vbnotebook

2025-11-18 07:22:17 +01:00
parent a764c9723e
commit 7bc330beba
2 changed files with 412 additions and 134 deletions
--- a/ParseviaRequests.py
+++ b/ParseviaRequests.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import requests
+from bs4 import BeautifulSoup
+import pymysql
+from datetime import datetime
+
+# ==============================
+# CONFIG
+# ==============================
+
+BASE_URL = "https://sktorrent.eu/torrent/torrents_v2.php?active=0"
+
+COOKIES_FILE = "sktorrent_cookies.json"      # Your exported cookies.txt ( Netscape format )
+USER_AGENT = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+)
+
+HEADERS = {"User-Agent": USER_AGENT}
+
+DB_CFG = {
+    "host": "192.168.1.76",
+    "port": 3307,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "cursorclass": pymysql.cursors.DictCursor,
+}
+
+
+# ==============================
+# COOKIE LOADER
+# ==============================
+
+def load_cookies(path):
+    cookies = {}
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            if line.startswith("#") or "\t" not in line:
+                continue
+            parts = line.strip().split("\t")
+            if len(parts) >= 7:
+                cookies[parts[5]] = parts[6]
+    print(f"🍪 Loaded {len(cookies)} cookies.")
+    return cookies
+
+
+# ==============================
+# MYSQL INSERT
+# ==============================
+
+def insert_torrent(db, t):
+    sql = """
+        INSERT IGNORE INTO torrents (
+            category,
+            title_visible,
+            title_full,
+            size_pretty,
+            added_datetime,
+            seeders,
+            seeders_link,
+            leechers,
+            leechers_link,
+            preview_image,
+            details_link,
+            torrent_hash
+        ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+    """
+    with db.cursor() as cur:
+        cur.execute(sql, (
+            t["category"],
+            t["title_visible"],
+            t["title_full"],
+            t["size_pretty"],
+            t["added_datetime"],
+            t["seeders"],
+            t["seeders_link"],
+            t["leechers"],
+            t["leechers_link"],
+            t["preview_image"],
+            t["details_link"],
+            t["torrent_hash"],
+        ))
+    db.commit()
+
+
+# ==============================
+# PARSER
+# ==============================
+
+def parse_torrent_row(cols):
+    """Parse a <tr> with exactly the structure of a torrent row."""
+
+    # --- category ---
+    category = cols[0].get_text(strip=True)
+
+    # --- download link (ignore) ---
+    # second <td> is download.gif
+
+    # --- main column ---
+    main_td = cols[2]
+
+    a_title = main_td.find("a", href=re.compile("details.php"))
+    if not a_title:
+        return None
+
+    title_visible = a_title.get_text(strip=True)
+    title_full = a_title.get("title", "").strip()
+    details_link = "https://sktorrent.eu/torrent/" + a_title.get("href")
+
+    # Extract torrent hash from ?id=.....
+    m = re.search(r"id=([A-Fa-f0-9]{40})", a_title.get("href"))
+    if not m:
+        return None
+    torrent_hash = m.group(1)
+
+    # Extract size + added date from the text below <br>
+    text = main_td.get_text(" ", strip=True)
+    # example: "GR ... Velkost 1.7 GB | Pridany 18/11/2025 o 07:00"
+    size_match = re.search(r"Velkost ([\d\.]+ ?[GMK]B)", text)
+    date_match = re.search(r"Pridany (\d{2}/\d{2}/\d{4}) o (\d{2}:\d{2})", text)
+
+    size_pretty = size_match.group(1) if size_match else None
+
+    added_datetime = None
+    if date_match:
+        d, t = date_match.groups()
+        added_datetime = datetime.strptime(d + " " + t, "%d/%m/%Y %H:%M")
+
+    # Extract preview img from onmouseover
+    img = None
+    img_a = main_td.find("a", onmouseover=True)
+    if img_a:
+        html = img_a.get("onmouseover", "")
+        m2 = re.search(r"img src=//([^ ]+)", html)
+        if m2:
+            img = "https://" + m2.group(1)
+
+    # --- seeders ---
+    seed_a = cols[4].find("a")
+    seeders = int(seed_a.get_text(strip=True)) if seed_a else 0
+    seeders_link = "https://sktorrent.eu/torrent/" + seed_a.get("href") if seed_a else None
+
+    # --- leechers ---
+    leech_a = cols[5].find("a")
+    leechers = int(leech_a.get_text(strip=True)) if leech_a else 0
+    leechers_link = "https://sktorrent.eu/torrent/" + leech_a.get("href") if leech_a else None
+
+    return {
+        "category": category,
+        "title_visible": title_visible,
+        "title_full": title_full,
+        "size_pretty": size_pretty,
+        "added_datetime": added_datetime,
+        "seeders": seeders,
+        "seeders_link": seeders_link,
+        "leechers": leechers,
+        "leechers_link": leechers_link,
+        "preview_image": img,
+        "details_link": details_link,
+        "torrent_hash": torrent_hash,
+    }
+
+
+# ==============================
+# MAIN
+# ==============================
+
+def main():
+
+    cookies = load_cookies(COOKIES_FILE)
+
+    session = requests.Session()
+    session.headers.update(HEADERS)
+    session.cookies.update(cookies)
+
+    print("🌍 Downloading HTML...")
+    r = session.get(BASE_URL, timeout=30)
+    r.raise_for_status()
+
+    soup = BeautifulSoup(r.text, "html.parser")
+    tbody = soup.find("tbody")
+    if not tbody:
+        print("❌ Could not find <tbody>")
+        return
+
+    rows = tbody.find_all("tr")
+    print(f"Found {len(rows)} <tr> rows.")
+
+    db = pymysql.connect(**DB_CFG)
+
+    inserted = 0
+    skipped = 0
+
+    for tr in rows:
+        cols = tr.find_all("td")
+        if len(cols) != 7:
+            continue  # ignore header & separator rows
+
+        data = parse_torrent_row(cols)
+        if not data:
+            skipped += 1
+            continue
+
+        insert_torrent(db, data)
+        inserted += 1
+        print(f"✔ Inserted {data['torrent_hash']}")
+
+    print(f"\n===== DONE =====")
+    print(f"Inserted: {inserted}")
+    print(f"Skipped: {skipped}")
+
+
+if __name__ == "__main__":
+    main()