#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pymysql from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import time import re import urllib.parse as urlparse from pathlib import Path import json import requests import datetime import sys from EmailMessagingGraph import send_mail # ============================================================ # RUNTIME INFO # ============================================================ RUN_START = datetime.datetime.now() processed_count = 0 new_torrent_count = 0 existing_torrent_count = 0 new_titles = [] print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}") sys.stdout.flush() # ============================================================ # 1) MySQL CONNECTION # ============================================================ db = pymysql.connect( host="192.168.1.76", port=3307, user="root", password="Vlado9674+", database="torrents", charset="utf8mb4", autocommit=True, ) cursor = db.cursor() # ============================================================ # 2) Selenium setup # ============================================================ COOKIE_FILE = Path("sktorrent_cookies.json") START_URL = ( "https://sktorrent.eu/torrent/torrents.php" "?search=&category=24&zaner=&jazyk=&active=0" ) chrome_options = Options() chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--disable-notifications") chrome_options.add_argument("--disable-popup-blocking") chrome_options.add_argument("--disable-extensions") driver = webdriver.Chrome(options=chrome_options) driver.set_window_position(380, 50) driver.set_window_size(1350, 1000) driver.get("https://sktorrent.eu") if COOKIE_FILE.exists(): with open(COOKIE_FILE, "r", encoding="utf-8") as f: cookies = json.load(f) for c in cookies: driver.add_cookie(c) print("🍪 Cookies loaded.") else: print("⚠️ Cookie file not found – login may be required.") # ============================================================ # 3) requests.Session from Selenium cookies # ============================================================ requests_session = requests.Session() for ck in driver.get_cookies(): requests_session.cookies.set(ck["name"], ck["value"]) print("🔗 Requests session initialized.") # ============================================================ # 4) Popup handler # ============================================================ def close_popup_if_any(): try: driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") time.sleep(0.5) except Exception: pass # ============================================================ # 5) Parse one torrent row # ============================================================ def parse_row(cells): category = cells[0].text.strip() try: download_a = cells[1].find_element(By.TAG_NAME, "a") download_link = download_a.get_attribute("href") except: return None parsed_dl = urlparse.urlparse(download_link) dl_query = urlparse.parse_qs(parsed_dl.query) torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] title_links = cells[2].find_elements(By.TAG_NAME, "a") if not title_links: return None a_tag = title_links[0] visible_name = a_tag.text.strip() full_title = a_tag.get_attribute("title") details_link = a_tag.get_attribute("href") parsed = urlparse.urlparse(details_link) query = urlparse.parse_qs(parsed.query) if "id" not in query: return None torrent_hash = query["id"][0] text_block = cells[2].get_attribute("innerText") text_block_clean = " ".join(text_block.split()) size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) size_pretty = size_match.group(1) if size_match else None added_pretty = added_match.group(1) if added_match else None # ====================================================== # EXACT DATE PROCESSING – COPIED 1:1 FROM YOUR FILE # ====================================================== added_mysql = None if added_pretty: # "29/11/2025 o 02:29" → "29/11/2025 02:29" clean = added_pretty.replace(" o ", " ").strip() parts = clean.split(" ") date_part = parts[0] time_part = parts[1] if len(parts) > 1 else "00:00:00" # pokud chybí sekundy, přidej if len(time_part.split(":")) == 2: time_part += ":00" day, month, year = date_part.split("/") added_mysql = f"{year}-{month}-{day} {time_part}" # ====================================================== # Image preview # ====================================================== img_link = None try: image_a = cells[2].find_element( By.XPATH, ".//a[contains(text(),'Obrázok')]" ) mouseover = image_a.get_attribute("onmouseover") img_match = re.search(r"src=([^ ]+)", mouseover) if img_match: img_link = img_match.group(1).replace("'", "").strip() if img_link.startswith("//"): img_link = "https:" + img_link except: pass seeders_a = cells[4].find_element(By.TAG_NAME, "a") seeders_number = int(seeders_a.text.strip()) seeders_link = seeders_a.get_attribute("href") leechers_a = cells[5].find_element(By.TAG_NAME, "a") leechers_number = int(leechers_a.text.strip()) leechers_link = leechers_a.get_attribute("href") cursor.execute( "SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,), ) row = cursor.fetchone() already_have_torrent = row is not None and row[0] is not None torrent_content = None if not already_have_torrent: time.sleep(3) try: resp = requests_session.get(download_link) resp.raise_for_status() torrent_content = resp.content except: torrent_content = None return { "torrent_hash": torrent_hash, "details_link": details_link, "category": category, "title_visible": visible_name, "title_full": full_title, "size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link, "seeders": seeders_number, "seeders_link": seeders_link, "leechers": leechers_number, "leechers_link": leechers_link, "torrent_filename": torrent_filename, "torrent_content": torrent_content if not already_have_torrent else None, "is_new_torrent": not already_have_torrent, } # ============================================================ # 6) INSERT SQL # ============================================================ insert_sql = """ INSERT INTO torrents ( torrent_hash, details_link, category, title_visible, title_full, size_pretty, added_datetime, preview_image, seeders, seeders_link, leechers, leechers_link, torrent_filename, torrent_content ) VALUES ( %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, %(size_pretty)s, %(added_datetime)s, %(preview_image)s, %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, %(torrent_filename)s, %(torrent_content)s ) ON DUPLICATE KEY UPDATE details_link = VALUES(details_link), category = VALUES(category), title_visible = VALUES(title_visible), title_full = VALUES(title_full), size_pretty = VALUES(size_pretty), added_datetime = VALUES(added_datetime), preview_image = VALUES(preview_image), seeders = VALUES(seeders), seeders_link = VALUES(seeders_link), leechers = VALUES(leechers), leechers_link = VALUES(leechers_link), torrent_filename = VALUES(torrent_filename), torrent_content = COALESCE(VALUES(torrent_content), torrent_content); """ # ============================================================ # 7) PROCESS FIRST PAGE ONLY # ============================================================ print("\n🌐 Loading FIRST page") driver.get(START_URL) time.sleep(2) close_popup_if_any() rows = driver.find_elements(By.CSS_SELECTOR, "table tr") real_rows = [ r.find_elements(By.TAG_NAME, "td") for r in rows if len(r.find_elements(By.TAG_NAME, "td")) == 7 ] print(f"📄 Found {len(real_rows)} torrent rows") for cells in real_rows: try: data = parse_row(cells) except Exception as e: print(f"⚠️ parse_row failed: {e}") continue if not data: continue processed_count += 1 if data["is_new_torrent"]: new_torrent_count += 1 new_titles.append(data["title_visible"]) else: existing_torrent_count += 1 print("💾 Saving:", data["title_visible"]) cursor.execute(insert_sql, data) # ============================================================ # 8) SEND EMAIL REPORT # ============================================================ RUN_END = datetime.datetime.now() subject = f"SKTorrent hourly run – {RUN_START:%Y-%m-%d %H:%M}" lines = [ f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}", f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}", "", f"Processed torrents: {processed_count}", f"New torrent files downloaded: {new_torrent_count}", f"Already known torrents: {existing_torrent_count}", ] if new_titles: lines.append("") lines.append("New torrents:") for t in new_titles: lines.append(f"- {t}") body = "\n".join(lines) send_mail( to="vladimir.buzalka@buzalka.cz", subject=subject, body=body, html=False, ) print("📧 Email report sent.") driver.quit() print("🎉 DONE")