#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pymysql from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import time import re import urllib.parse as urlparse from pathlib import Path import json # ============================================================ # 1) MySQL CONNECTION # ============================================================ db = pymysql.connect( host="192.168.1.76", port=3307, user="root", password="Vlado9674+", database="torrents", charset="utf8mb4", autocommit=True ) cursor = db.cursor() # ============================================================ # 2) Selenium setup # ============================================================ COOKIE_FILE = Path("sktorrent_cookies.json") URL = "https://sktorrent.eu/torrent/torrents.php?active=0" chrome_options = Options() chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--disable-notifications") chrome_options.add_argument("--disable-popup-blocking") chrome_options.add_argument("--disable-extensions") driver = webdriver.Chrome(options=chrome_options) driver.get("https://sktorrent.eu") # Load cookies if COOKIE_FILE.exists(): with open(COOKIE_FILE, "r") as f: cookies = json.load(f) for c in cookies: driver.add_cookie(c) print("🍪 Cookies loaded.") driver.get(URL) time.sleep(2) # Try to close inline popup if present try: close_btn = driver.find_element(By.XPATH, "//a[text()='CLOSE X']") close_btn.click() print("🧹 Popup closed.") except: pass # ============================================================ # 3) Extract table rows # ============================================================ rows = driver.find_elements(By.CSS_SELECTOR, "table tr") print("Total rows found:", len(rows)) real_rows = [] for row in rows: cells = row.find_elements(By.TAG_NAME, "td") if len(cells) >= 5: # real torrent rows real_rows.append(cells) print("Real data rows:", len(real_rows)) print("") # ============================================================ # 4) Function to extract all fields from one row # ============================================================ def parse_row(cells): # -------------------------- # 1️⃣ CATEGORY # -------------------------- category = cells[0].text.strip() # -------------------------- # 2️⃣ TITLES + DETAILS LINK # -------------------------- a_tag = cells[1].find_element(By.TAG_NAME, "a") visible_name = a_tag.text.strip() full_title = a_tag.get_attribute("title") details_link = a_tag.get_attribute("href") # -------------------------- # 3️⃣ TORRENT HASH # -------------------------- parsed = urlparse.urlparse(details_link) query = urlparse.parse_qs(parsed.query) # skip rows without ?id= if "id" not in query: print("⚠️ Skipping row with no torrent ID →", details_link) return None torrent_hash = query["id"][0] # -------------------------- # 4️⃣ TEXT BLOCK (size + date) # -------------------------- text_block = cells[1].get_attribute("innerText") text_block_clean = " ".join(text_block.split()) size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) size_pretty = size_match.group(1) if size_match else None added_pretty = added_match.group(1) if added_match else None # Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00” added_mysql = None if added_pretty: added_mysql = re.sub(r" o ", " ", added_pretty) day, month, year_time = added_mysql.split("/") year, time_part = year_time.split(" ") added_mysql = f"{year}-{month}-{day} {time_part}:00" # -------------------------- # 5️⃣ IMAGE PREVIEW # -------------------------- img_link = None try: image_a = cells[1].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]") mouseover = image_a.get_attribute("onmouseover") img_match = re.search(r"src=([^ ]+)", mouseover) if img_match: img_link = img_match.group(1).replace("'", "").strip() if img_link.startswith("//"): img_link = "https:" + img_link except: pass # -------------------------- # 6️⃣ SEEDERS # -------------------------- seeders_a = cells[3].find_element(By.TAG_NAME, "a") seeders_number = int(seeders_a.text.strip()) seeders_link = seeders_a.get_attribute("href") # -------------------------- # 7️⃣ LEECHERS # -------------------------- leechers_a = cells[4].find_element(By.TAG_NAME, "a") leechers_number = int(leechers_a.text.strip()) leechers_link = leechers_a.get_attribute("href") # -------------------------- # Return dictionary for MySQL # -------------------------- return { "torrent_hash": torrent_hash, "details_link": details_link, "category": category, "title_visible": visible_name, "title_full": full_title, "size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link, "seeders": seeders_number, "seeders_link": seeders_link, "leechers": leechers_number, "leechers_link": leechers_link, } # ============================================================ # 5) MySQL INSERT # ============================================================ insert_sql = """ INSERT INTO torrents ( torrent_hash, details_link, category, title_visible, title_full, size_pretty, added_datetime, preview_image, seeders, seeders_link, leechers, leechers_link ) VALUES ( %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, %(size_pretty)s, %(added_datetime)s, %(preview_image)s, %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s ) ON DUPLICATE KEY UPDATE details_link = VALUES(details_link), category = VALUES(category), title_visible = VALUES(title_visible), title_full = VALUES(title_full), size_pretty = VALUES(size_pretty), added_datetime = VALUES(added_datetime), preview_image = VALUES(preview_image), seeders = VALUES(seeders), seeders_link = VALUES(seeders_link), leechers = VALUES(leechers), leechers_link = VALUES(leechers_link); """ # ============================================================ # 6) PROCESS ALL REAL ROWS # ============================================================ for cells in real_rows: data = parse_row(cells) if not data: continue print("💾 Saving:", data["title_visible"]) cursor.execute(insert_sql, data) print("\n✅ DONE — All torrents saved to MySQL.") driver.quit()