#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re import requests from bs4 import BeautifulSoup import pymysql from datetime import datetime # ============================== # CONFIG # ============================== BASE_URL = "https://sktorrent.eu/torrent/torrents_v2.php?active=0" COOKIES_FILE = "sktorrent_cookies.json" # Your exported cookies.txt ( Netscape format ) USER_AGENT = ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) HEADERS = {"User-Agent": USER_AGENT} DB_CFG = { "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", "database": "torrents", "charset": "utf8mb4", "cursorclass": pymysql.cursors.DictCursor, } # ============================== # COOKIE LOADER # ============================== def load_cookies(path): cookies = {} with open(path, "r", encoding="utf-8") as f: for line in f: if line.startswith("#") or "\t" not in line: continue parts = line.strip().split("\t") if len(parts) >= 7: cookies[parts[5]] = parts[6] print(f"🍪 Loaded {len(cookies)} cookies.") return cookies # ============================== # MYSQL INSERT # ============================== def insert_torrent(db, t): sql = """ INSERT IGNORE INTO torrents ( category, title_visible, title_full, size_pretty, added_datetime, seeders, seeders_link, leechers, leechers_link, preview_image, details_link, torrent_hash ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """ with db.cursor() as cur: cur.execute(sql, ( t["category"], t["title_visible"], t["title_full"], t["size_pretty"], t["added_datetime"], t["seeders"], t["seeders_link"], t["leechers"], t["leechers_link"], t["preview_image"], t["details_link"], t["torrent_hash"], )) db.commit() # ============================== # PARSER # ============================== def parse_torrent_row(cols): """Parse a with exactly the structure of a torrent row.""" # --- category --- category = cols[0].get_text(strip=True) # --- download link (ignore) --- # second is download.gif # --- main column --- main_td = cols[2] a_title = main_td.find("a", href=re.compile("details.php")) if not a_title: return None title_visible = a_title.get_text(strip=True) title_full = a_title.get("title", "").strip() details_link = "https://sktorrent.eu/torrent/" + a_title.get("href") # Extract torrent hash from ?id=..... m = re.search(r"id=([A-Fa-f0-9]{40})", a_title.get("href")) if not m: return None torrent_hash = m.group(1) # Extract size + added date from the text below
text = main_td.get_text(" ", strip=True) # example: "GR ... Velkost 1.7 GB | Pridany 18/11/2025 o 07:00" size_match = re.search(r"Velkost ([\d\.]+ ?[GMK]B)", text) date_match = re.search(r"Pridany (\d{2}/\d{2}/\d{4}) o (\d{2}:\d{2})", text) size_pretty = size_match.group(1) if size_match else None added_datetime = None if date_match: d, t = date_match.groups() added_datetime = datetime.strptime(d + " " + t, "%d/%m/%Y %H:%M") # Extract preview img from onmouseover img = None img_a = main_td.find("a", onmouseover=True) if img_a: html = img_a.get("onmouseover", "") m2 = re.search(r"img src=//([^ ]+)", html) if m2: img = "https://" + m2.group(1) # --- seeders --- seed_a = cols[4].find("a") seeders = int(seed_a.get_text(strip=True)) if seed_a else 0 seeders_link = "https://sktorrent.eu/torrent/" + seed_a.get("href") if seed_a else None # --- leechers --- leech_a = cols[5].find("a") leechers = int(leech_a.get_text(strip=True)) if leech_a else 0 leechers_link = "https://sktorrent.eu/torrent/" + leech_a.get("href") if leech_a else None return { "category": category, "title_visible": title_visible, "title_full": title_full, "size_pretty": size_pretty, "added_datetime": added_datetime, "seeders": seeders, "seeders_link": seeders_link, "leechers": leechers, "leechers_link": leechers_link, "preview_image": img, "details_link": details_link, "torrent_hash": torrent_hash, } # ============================== # MAIN # ============================== def main(): cookies = load_cookies(COOKIES_FILE) session = requests.Session() session.headers.update(HEADERS) session.cookies.update(cookies) print("🌍 Downloading HTML...") r = session.get(BASE_URL, timeout=30) r.raise_for_status() soup = BeautifulSoup(r.text, "html.parser") tbody = soup.find("tbody") if not tbody: print("❌ Could not find ") return rows = tbody.find_all("tr") print(f"Found {len(rows)} rows.") db = pymysql.connect(**DB_CFG) inserted = 0 skipped = 0 for tr in rows: cols = tr.find_all("td") if len(cols) != 7: continue # ignore header & separator rows data = parse_torrent_row(cols) if not data: skipped += 1 continue insert_torrent(db, data) inserted += 1 print(f"✔ Inserted {data['torrent_hash']}") print(f"\n===== DONE =====") print(f"Inserted: {inserted}") print(f"Skipped: {skipped}") if __name__ == "__main__": main()