#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pymysql from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import time import re import urllib.parse as urlparse from pathlib import Path import json # ============================================================ # 1) MySQL CONNECTION # ============================================================ db = pymysql.connect( host="192.168.1.76", port=3307, user="root", password="Vlado9674+", database="torrents", charset="utf8mb4", autocommit=True ) cursor = db.cursor() # ============================================================ # 2) Selenium setup # ============================================================ COOKIE_FILE = Path("sktorrent_cookies.json") URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0" chrome_options = Options() chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--disable-notifications") chrome_options.add_argument("--disable-popup-blocking") chrome_options.add_argument("--disable-extensions") driver = webdriver.Chrome(options=chrome_options) driver.get("https://sktorrent.eu") # Load cookies if COOKIE_FILE.exists(): with open(COOKIE_FILE, "r") as f: cookies = json.load(f) for c in cookies: driver.add_cookie(c) print("🍪 Cookies loaded.") driver.get(URL) time.sleep(2) # ============================================================ # Close interstitial popup reliably # ============================================================ time.sleep(1) try: # JS close always exists even when HTML structure varies driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") print("🧹 Popup closed via JS fallback.") time.sleep(1) except: print("ℹ️ Popup JS handler not found (probably no popup).") # ============================================================ # 3) Extract table rows # ============================================================ rows = driver.find_elements(By.CSS_SELECTOR, "table tr") print("Total rows found:", len(rows)) real_rows = [] for row in rows: cells = row.find_elements(By.TAG_NAME, "td") # REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS if len(cells) == 7: real_rows.append(cells) print("Real torrent rows:", len(real_rows)) print("") # ============================================================ # 4) Function to extract fields from one row # ============================================================ def parse_row(cells): # -------------------------- # 1️⃣ CATEGORY (cells[0]) # -------------------------- category = cells[0].text.strip() # -------------------------- # 2️⃣ TITLE + DETAILS LINK (always inside cells[2]) # -------------------------- title_links = cells[2].find_elements(By.TAG_NAME, "a") if not title_links: print("⚠️ Missing title link — skipping row") return None a_tag = title_links[0] visible_name = a_tag.text.strip() full_title = a_tag.get_attribute("title") details_link = a_tag.get_attribute("href") if not details_link: print("⚠️ Row has no details link — skipping") return None # -------------------------- # 3️⃣ TORRENT HASH # -------------------------- parsed = urlparse.urlparse(details_link) query = urlparse.parse_qs(parsed.query) if "id" not in query: print("⚠️ Skipping row with no torrent ID →", details_link) return None torrent_hash = query["id"][0] # -------------------------- # 4️⃣ TEXT BLOCK (size + date) # -------------------------- text_block = cells[2].get_attribute("innerText") text_block_clean = " ".join(text_block.split()) size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) size_pretty = size_match.group(1) if size_match else None added_pretty = added_match.group(1) if added_match else None # Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00” added_mysql = None if added_pretty: # Normalize formats like "29/11/2025 o 02:29", "29/11/2025 02:29:18" clean = added_pretty.replace(" o ", " ").strip() # Split date and time date_part, *time_parts = clean.split(" ") # If seconds are missing, add :00 time_part = time_parts[0] if time_parts else "00:00" if len(time_part.split(":")) == 2: time_part += ":00" day, month, year = date_part.split("/") added_mysql = f"{year}-{month}-{day} {time_part}" # -------------------------- # 5️⃣ IMAGE PREVIEW # -------------------------- img_link = None try: image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]") mouseover = image_a.get_attribute("onmouseover") img_match = re.search(r"src=([^ ]+)", mouseover) if img_match: img_link = img_match.group(1).replace("'", "").strip() if img_link.startswith("//"): img_link = "https:" + img_link except: pass # -------------------------- # 6️⃣ SEEDERS (cells[4]) # -------------------------- seeders_a = cells[4].find_element(By.TAG_NAME, "a") seeders_number = int(seeders_a.text.strip()) seeders_link = seeders_a.get_attribute("href") # -------------------------- # 7️⃣ LEECHERS (cells[5]) # -------------------------- leechers_a = cells[5].find_element(By.TAG_NAME, "a") leechers_number = int(leechers_a.text.strip()) leechers_link = leechers_a.get_attribute("href") # -------------------------- # Return result # -------------------------- return { "torrent_hash": torrent_hash, "details_link": details_link, "category": category, "title_visible": visible_name, "title_full": full_title, "size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link, "seeders": seeders_number, "seeders_link": seeders_link, "leechers": leechers_number, "leechers_link": leechers_link, } # ============================================================ # 5) MySQL INSERT # ============================================================ insert_sql = """ INSERT INTO torrents ( torrent_hash, details_link, category, title_visible, title_full, size_pretty, added_datetime, preview_image, seeders, seeders_link, leechers, leechers_link ) VALUES ( %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, %(size_pretty)s, %(added_datetime)s, %(preview_image)s, %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s ) ON DUPLICATE KEY UPDATE details_link = VALUES(details_link), category = VALUES(category), title_visible = VALUES(title_visible), title_full = VALUES(title_full), size_pretty = VALUES(size_pretty), added_datetime = VALUES(added_datetime), preview_image = VALUES(preview_image), seeders = VALUES(seeders), seeders_link = VALUES(seeders_link), leechers = VALUES(leechers), leechers_link = VALUES(leechers_link); """ # ============================================================ # 6) PROCESS ALL ROWS # ============================================================ for cells in real_rows: data = parse_row(cells) if not data: continue print("💾 Saving:", data["title_visible"]) cursor.execute(insert_sql, data) print("\n✅ DONE — All torrents saved to MySQL.") driver.quit()