220 lines
5.7 KiB
Python
220 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import pymysql
|
|
from datetime import datetime
|
|
|
|
# ==============================
|
|
# CONFIG
|
|
# ==============================
|
|
|
|
BASE_URL = "https://sktorrent.eu/torrent/torrents_v2.php?active=0"
|
|
|
|
COOKIES_FILE = "sktorrent_cookies.json" # Your exported cookies.txt ( Netscape format )
|
|
USER_AGENT = (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
)
|
|
|
|
HEADERS = {"User-Agent": USER_AGENT}
|
|
|
|
DB_CFG = {
|
|
"host": "192.168.1.76",
|
|
"port": 3307,
|
|
"user": "root",
|
|
"password": "Vlado9674+",
|
|
"database": "torrents",
|
|
"charset": "utf8mb4",
|
|
"cursorclass": pymysql.cursors.DictCursor,
|
|
}
|
|
|
|
|
|
# ==============================
|
|
# COOKIE LOADER
|
|
# ==============================
|
|
|
|
def load_cookies(path):
|
|
cookies = {}
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
if line.startswith("#") or "\t" not in line:
|
|
continue
|
|
parts = line.strip().split("\t")
|
|
if len(parts) >= 7:
|
|
cookies[parts[5]] = parts[6]
|
|
print(f"🍪 Loaded {len(cookies)} cookies.")
|
|
return cookies
|
|
|
|
|
|
# ==============================
|
|
# MYSQL INSERT
|
|
# ==============================
|
|
|
|
def insert_torrent(db, t):
|
|
sql = """
|
|
INSERT IGNORE INTO torrents (
|
|
category,
|
|
title_visible,
|
|
title_full,
|
|
size_pretty,
|
|
added_datetime,
|
|
seeders,
|
|
seeders_link,
|
|
leechers,
|
|
leechers_link,
|
|
preview_image,
|
|
details_link,
|
|
torrent_hash
|
|
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
"""
|
|
with db.cursor() as cur:
|
|
cur.execute(sql, (
|
|
t["category"],
|
|
t["title_visible"],
|
|
t["title_full"],
|
|
t["size_pretty"],
|
|
t["added_datetime"],
|
|
t["seeders"],
|
|
t["seeders_link"],
|
|
t["leechers"],
|
|
t["leechers_link"],
|
|
t["preview_image"],
|
|
t["details_link"],
|
|
t["torrent_hash"],
|
|
))
|
|
db.commit()
|
|
|
|
|
|
# ==============================
|
|
# PARSER
|
|
# ==============================
|
|
|
|
def parse_torrent_row(cols):
|
|
"""Parse a <tr> with exactly the structure of a torrent row."""
|
|
|
|
# --- category ---
|
|
category = cols[0].get_text(strip=True)
|
|
|
|
# --- download link (ignore) ---
|
|
# second <td> is download.gif
|
|
|
|
# --- main column ---
|
|
main_td = cols[2]
|
|
|
|
a_title = main_td.find("a", href=re.compile("details.php"))
|
|
if not a_title:
|
|
return None
|
|
|
|
title_visible = a_title.get_text(strip=True)
|
|
title_full = a_title.get("title", "").strip()
|
|
details_link = "https://sktorrent.eu/torrent/" + a_title.get("href")
|
|
|
|
# Extract torrent hash from ?id=.....
|
|
m = re.search(r"id=([A-Fa-f0-9]{40})", a_title.get("href"))
|
|
if not m:
|
|
return None
|
|
torrent_hash = m.group(1)
|
|
|
|
# Extract size + added date from the text below <br>
|
|
text = main_td.get_text(" ", strip=True)
|
|
# example: "GR ... Velkost 1.7 GB | Pridany 18/11/2025 o 07:00"
|
|
size_match = re.search(r"Velkost ([\d\.]+ ?[GMK]B)", text)
|
|
date_match = re.search(r"Pridany (\d{2}/\d{2}/\d{4}) o (\d{2}:\d{2})", text)
|
|
|
|
size_pretty = size_match.group(1) if size_match else None
|
|
|
|
added_datetime = None
|
|
if date_match:
|
|
d, t = date_match.groups()
|
|
added_datetime = datetime.strptime(d + " " + t, "%d/%m/%Y %H:%M")
|
|
|
|
# Extract preview img from onmouseover
|
|
img = None
|
|
img_a = main_td.find("a", onmouseover=True)
|
|
if img_a:
|
|
html = img_a.get("onmouseover", "")
|
|
m2 = re.search(r"img src=//([^ ]+)", html)
|
|
if m2:
|
|
img = "https://" + m2.group(1)
|
|
|
|
# --- seeders ---
|
|
seed_a = cols[4].find("a")
|
|
seeders = int(seed_a.get_text(strip=True)) if seed_a else 0
|
|
seeders_link = "https://sktorrent.eu/torrent/" + seed_a.get("href") if seed_a else None
|
|
|
|
# --- leechers ---
|
|
leech_a = cols[5].find("a")
|
|
leechers = int(leech_a.get_text(strip=True)) if leech_a else 0
|
|
leechers_link = "https://sktorrent.eu/torrent/" + leech_a.get("href") if leech_a else None
|
|
|
|
return {
|
|
"category": category,
|
|
"title_visible": title_visible,
|
|
"title_full": title_full,
|
|
"size_pretty": size_pretty,
|
|
"added_datetime": added_datetime,
|
|
"seeders": seeders,
|
|
"seeders_link": seeders_link,
|
|
"leechers": leechers,
|
|
"leechers_link": leechers_link,
|
|
"preview_image": img,
|
|
"details_link": details_link,
|
|
"torrent_hash": torrent_hash,
|
|
}
|
|
|
|
|
|
# ==============================
|
|
# MAIN
|
|
# ==============================
|
|
|
|
def main():
|
|
|
|
cookies = load_cookies(COOKIES_FILE)
|
|
|
|
session = requests.Session()
|
|
session.headers.update(HEADERS)
|
|
session.cookies.update(cookies)
|
|
|
|
print("🌍 Downloading HTML...")
|
|
r = session.get(BASE_URL, timeout=30)
|
|
r.raise_for_status()
|
|
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
tbody = soup.find("tbody")
|
|
if not tbody:
|
|
print("❌ Could not find <tbody>")
|
|
return
|
|
|
|
rows = tbody.find_all("tr")
|
|
print(f"Found {len(rows)} <tr> rows.")
|
|
|
|
db = pymysql.connect(**DB_CFG)
|
|
|
|
inserted = 0
|
|
skipped = 0
|
|
|
|
for tr in rows:
|
|
cols = tr.find_all("td")
|
|
if len(cols) != 7:
|
|
continue # ignore header & separator rows
|
|
|
|
data = parse_torrent_row(cols)
|
|
if not data:
|
|
skipped += 1
|
|
continue
|
|
|
|
insert_torrent(db, data)
|
|
inserted += 1
|
|
print(f"✔ Inserted {data['torrent_hash']}")
|
|
|
|
print(f"\n===== DONE =====")
|
|
print(f"Inserted: {inserted}")
|
|
print(f"Skipped: {skipped}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|