From d57f7d75ceb0fda2116d24139fd8fac7129666e5 Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Sun, 1 Mar 2026 11:45:43 +0100 Subject: [PATCH] =?UTF-8?q?Add=20Seedbox/60=20AktualizaceSeeders.py=20?= =?UTF-8?q?=E2=80=94=20scrape=20seeders/leechers=20from=20sktorrent.eu?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Seedbox/60 AktualizaceSeeders.py | 220 +++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 Seedbox/60 AktualizaceSeeders.py diff --git a/Seedbox/60 AktualizaceSeeders.py b/Seedbox/60 AktualizaceSeeders.py new file mode 100644 index 0000000..d8c3eb4 --- /dev/null +++ b/Seedbox/60 AktualizaceSeeders.py @@ -0,0 +1,220 @@ +import pymysql +import requests +import json +import time +import re +import sys +from bs4 import BeautifulSoup +from datetime import datetime + +# ============================================================ +# CONFIG +# ============================================================ + +COOKIE_FILE = "sktorrent_cookies.json" + +BASE_URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC" + +SLEEP_BETWEEN_PAGES = 2.0 # sekundy mezi stránkami (web nás neblokuje) +MAX_PAGES = 300 # pojistka — skript se zastaví nejpozději zde + +# Kolik stránek za sebou bez jediné shody v DB = konec (dorazili jsme k novým torrentům) +STOP_AFTER_EMPTY_PAGES = 5 +# Kolik 403 chyb za sebou = přerušit (web nás blokuje) +STOP_AFTER_403 = 3 + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": True, +} + +# ============================================================ +# CONNECT +# ============================================================ + +def connect_db(): + return pymysql.connect(**DB_CONFIG) + + +def build_session(): + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + cookies = json.load(f) + session = requests.Session() + session.headers["User-Agent"] = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + ) + for c in cookies: + session.cookies.set(c["name"], c["value"], domain=c.get("domain", "")) + return session + + +# ============================================================ +# PARSE ONE PAGE +# ============================================================ + +def parse_page(html): + """ + Vrátí seznam dict: {hash, seeders, leechers} + """ + soup = BeautifulSoup(html, "html.parser") + results = [] + + for row in soup.select("table tr"): + cells = row.find_all("td") + if len(cells) != 7: + continue + + # td[1] musí mít odkaz download.php?id= + dl_link = cells[1].find("a", href=re.compile(r"download\.php\?id=")) + if not dl_link: + continue + + match = re.search(r"id=([a-f0-9]+)", dl_link["href"]) + if not match: + continue + + torrent_hash = match.group(1).lower() + + # seeders = td[4], leechers = td[5] + seeders_text = cells[4].get_text(strip=True) + leechers_text = cells[5].get_text(strip=True) + + try: + seeders = int(seeders_text) + except ValueError: + seeders = 0 + + try: + leechers = int(leechers_text) + except ValueError: + leechers = 0 + + results.append({ + "hash": torrent_hash, + "seeders": seeders, + "leechers": leechers, + }) + + return results + + +# ============================================================ +# MAIN +# ============================================================ + +def main(): + sys.stdout.reconfigure(encoding="utf-8") + + print("=" * 60) + print("AKTUALIZACE SEEDERS / LEECHERS — sktorrent.eu") + print(f"Spuštěno: {datetime.now():%Y-%m-%d %H:%M:%S}") + print("=" * 60) + + session = build_session() + db = connect_db() + cursor = db.cursor() + + # Zjisti max stránku + r0 = session.get(f"{BASE_URL}&page=0", timeout=15) + all_page_nums = [int(m.group(1)) for m in re.finditer(r"page=(\d+)", r0.text)] + max_page = max(all_page_nums) if all_page_nums else MAX_PAGES + print(f"Max stránka na webu: {max_page}") + print(f"Prochází od stránky {max_page} směrem dolů...\n") + + total_pages = 0 + total_parsed = 0 + total_updated = 0 + total_skipped = 0 + consecutive_empty = 0 # stránky za sebou bez jediné shody v DB + consecutive_403 = 0 # 403 chyby za sebou + + # Procházíme od nejstarší stránky (konec) k nejnovější (začátek) + for page in range(max_page, -1, -1): + + url = f"{BASE_URL}&page={page}" + try: + r = session.get(url, timeout=15) + r.raise_for_status() + consecutive_403 = 0 # reset po úspěchu + except requests.exceptions.HTTPError as e: + if e.response is not None and e.response.status_code == 403: + consecutive_403 += 1 + print(f"⚠️ Stránka {page} — 403 Forbidden ({consecutive_403}/{STOP_AFTER_403})") + if consecutive_403 >= STOP_AFTER_403: + print(f"\n🛑 {STOP_AFTER_403}× 403 za sebou — web nás blokuje, přerušuji.") + break + time.sleep(5) # pauza po 403 + else: + print(f"⚠️ Stránka {page} — chyba: {e}") + continue + except Exception as e: + print(f"⚠️ Stránka {page} — chyba: {e}") + continue + + if "login.php" in r.url or "Prihlas sa" in r.text: + print("❌ Cookies expiraly — je potřeba se znovu přihlásit (spusť Selenium skript)") + break + + rows = parse_page(r.text) + + if not rows: + print(f" Stránka {page:3d} → prázdná, konec paginace.") + break + + total_pages += 1 + total_parsed += len(rows) + page_updated = 0 + + for item in rows: + cursor.execute(""" + UPDATE torrents + SET + seeders = %s, + leechers = %s, + qb_last_update = NOW() + WHERE torrent_hash = %s + """, (item["seeders"], item["leechers"], item["hash"])) + + if cursor.rowcount > 0: + total_updated += 1 + page_updated += 1 + else: + total_skipped += 1 + + print(f" Stránka {page:3d} → {len(rows):2d} torrentů, " + f"updatováno: {page_updated:2d} (celkem: {total_updated})") + + # Zastavit pokud jsme dorazili do oblasti novějších torrentů (mimo DB) + if page_updated == 0: + consecutive_empty += 1 + if consecutive_empty >= STOP_AFTER_EMPTY_PAGES: + print(f"\n⏹ {STOP_AFTER_EMPTY_PAGES} stránek po sobě bez shody → " + f"dorazili jsme k novějším torrentům, které nejsou v DB. Konec.") + break + else: + consecutive_empty = 0 + + time.sleep(SLEEP_BETWEEN_PAGES) + + # ============================================================ + # SUMMARY + # ============================================================ + print() + print("=" * 60) + print(f"Hotovo: {datetime.now():%Y-%m-%d %H:%M:%S}") + print(f"Stránek zpracováno : {total_pages}") + print(f"Záznamů parsováno : {total_parsed}") + print(f"DB řádků updatováno: {total_updated}") + print(f"Nebylo v DB : {total_skipped}") + print("=" * 60) + + db.close() + + +if __name__ == "__main__": + main()