git

2026-02-01 07:18:20 +01:00
parent 7b0404bfe3
commit 3d11661997
7 changed files with 1074 additions and 116 deletions
--- a/Reporting.py
+++ b/Reporting.py
@@ -61,17 +61,28 @@ def get_data():
 # ==============================
 def auto_adjust_columns(writer, df, sheet_name):
-    """Pomocná funkce pro automatické rozšíření sloupců v Excelu"""
+    """Bezpečné automatické nastavení šířky sloupců"""
    worksheet = writer.sheets[sheet_name]
    for idx, col in enumerate(df.columns):
-        max_len = max(
+        series = df[col]
-            df[col].astype(str).map(len).max(),
+
-            len(str(col))
+        max_len = len(str(col))  # minimálně délka hlavičky
-        ) + 2
+
-        if max_len > 60: max_len = 60
+        for val in series:
            if val is None or (isinstance(val, float) and pd.isna(val)):
                length = 0
            else:
                length = len(str(val))
            if length > max_len:
                max_len = length
        max_len = min(max_len + 2, 60)
        worksheet.set_column(idx, idx, max_len)
 # ==============================
 # 🚀 HLAVNÍ LOGIKA
 # ==============================
--- a/pages.py
+++ b/pages.py
@@ -96,24 +96,25 @@ def close_popup_if_any():
 # ============================================================
-# 5) Parse one torrent row
+# 5) Parse one torrent row (MODIFIED)
 # ============================================================
 def parse_row(cells):
-    # Column 0: Category icon/text
+    # --- 1. INITIALIZE ---
    torrent_hash = None
    download_url = None
    category = cells[0].text.strip()
    try:
-        # Column 1: Download icon link
+        # --- 2. EXTRACT DOWNLOAD URL (Column 1) ---
        download_a = cells[1].find_element(By.TAG_NAME, "a")
-        download_link = download_a.get_attribute("href")
+        download_url = download_a.get_attribute("href")
    except:
        return None
-    parsed_dl = urlparse.urlparse(download_link)
+        parsed_dl = urlparse.urlparse(download_url)
        dl_query = urlparse.parse_qs(parsed_dl.query)
        torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
-    # Column 2: Name and info
+        # --- 3. EXTRACT DETAILS & HASH (Column 2) ---
        title_links = cells[2].find_elements(By.TAG_NAME, "a")
        if not title_links:
            return None
@@ -130,34 +131,27 @@ def parse_row(cells):
        torrent_hash = query["id"][0]
-    # Use innerText for robust text extraction
+        # --- 4. EXTRACT SIZE & DATE ---
        text_block = cells[2].get_attribute("innerText")
        text_block_clean = " ".join(text_block.split())
    # Regex for Size and Date
        size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
        added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
        size_pretty = size_match.group(1) if size_match else None
        added_pretty = added_match.group(1) if added_match else None
    # Date conversion: "29/11/2025 o 02:29" -> MySQL format
        added_mysql = None
        if added_pretty:
            clean = added_pretty.replace(" o ", " ").strip()
            parts = clean.split(" ")
            if len(parts) >= 2:
-            date_part = parts[0]
+                date_part, time_part = parts[0], parts[1]
-            time_part = parts[1]
+                if len(time_part.split(":")) == 2: time_part += ":00"
            if len(time_part.split(":")) == 2:
                time_part += ":00"
                try:
-                day, month, year = date_part.split("/")
+                    d, m, y = date_part.split("/")
-                added_mysql = f"{year}-{month}-{day} {time_part}"
+                    added_mysql = f"{y}-{m}-{d} {time_part}"
-            except:
+                except: pass
                added_mysql = None
-    # Column 2: Image preview (if exists)
+        # --- 5. IMAGE & STATS ---
        img_link = None
        try:
            image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
@@ -165,39 +159,33 @@ def parse_row(cells):
            img_match = re.search(r"src=([^ ]+)", mouseover)
            if img_match:
                img_link = img_match.group(1).replace("'", "").strip()
-            if img_link.startswith("//"):
+                if img_link.startswith("//"): img_link = "https:" + img_link
-                img_link = "https:" + img_link
+        except: pass
    except:
        pass
-    # Column 4: Seeders
+        seeders_number = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
-    seeders_a = cells[4].find_element(By.TAG_NAME, "a")
+        seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
-    seeders_number = int(seeders_a.text.strip())
+        leechers_number = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
-    seeders_link = seeders_a.get_attribute("href")
+        leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")
-    # Column 5: Leechers
+        # --- 6. DATABASE CHECK & DOWNLOAD ---
    leechers_a = cells[5].find_element(By.TAG_NAME, "a")
    leechers_number = int(leechers_a.text.strip())
    leechers_link = leechers_a.get_attribute("href")
    # Check database for existing binary content
        cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
-    row = cursor.fetchone()
+        db_row = cursor.fetchone()
-    already_have_torrent = row is not None and row[0] is not None
+        already_have_torrent = db_row is not None and db_row[0] is not None
        torrent_content = None
        if not already_have_torrent:
-        time.sleep(3)  # Politeness delay
+            time.sleep(2)
            try:
-            resp = requests_session.get(download_link)
+                resp = requests_session.get(download_url, timeout=10)
                resp.raise_for_status()
                torrent_content = resp.content
-        except:
+            except Exception as e:
-            torrent_content = None
+                print(f"   ⚠️ Download failed for {visible_name}: {e}")
        return {
            "torrent_hash": torrent_hash,
            "details_link": details_link,
            "download_url": download_url,
            "category": category,
            "title_visible": visible_name,
            "title_full": full_title,
@@ -212,19 +200,20 @@ def parse_row(cells):
            "torrent_content": torrent_content if not already_have_torrent else None,
            "is_new_torrent": not already_have_torrent,
        }
-
+    except Exception as e:
-
+        print(f"⚠️ parse_row logic failed: {e}")
        return None
 # ============================================================
-# 6) INSERT SQL
+# 6) INSERT SQL (MODIFIED)
 # ============================================================
 insert_sql = """
 INSERT INTO torrents (
-    torrent_hash, details_link, category, title_visible, title_full,
+    torrent_hash, details_link, download_url, category, title_visible, title_full,
    size_pretty, added_datetime, preview_image,
    seeders, seeders_link, leechers, leechers_link,
    torrent_filename, torrent_content
 ) VALUES (
-    %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
+    %(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
    %(size_pretty)s, %(added_datetime)s, %(preview_image)s,
    %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
    %(torrent_filename)s, %(torrent_content)s
@@ -232,9 +221,12 @@ INSERT INTO torrents (
 ON DUPLICATE KEY UPDATE
    seeders = VALUES(seeders),
    leechers = VALUES(leechers),
    download_url = VALUES(download_url),
    torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
 """
-
+# Note: COALESCE(torrent_content, VALUES(torrent_content))
 # keeps the old value if the new one is NULL,
 # but updates it if the old one was NULL and the new one is binary.
 # ============================================================
 # 7) PROCESS ALL PAGES
 # ============================================================
@@ -250,17 +242,27 @@ for page_num in range(0, TOTAL_PAGES):
    # Find table rows
    rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
-    # v1 table usually has 7 cells for a data row
+
-    real_rows = [r.find_elements(By.TAG_NAME, "td") for r in rows if len(r.find_elements(By.TAG_NAME, "td")) == 7]
+    # FILTER: Only keep rows that have 7 columns AND a link in the 2nd column (index 1)
    # This automatically discards headers and empty space rows.
    real_rows = []
    for r in rows:
        cells = r.find_elements(By.TAG_NAME, "td")
        if len(cells) == 7 and cells[1].find_elements(By.TAG_NAME, "a"):
            real_rows.append(cells)
    if not real_rows:
        print("⚠️ No data rows found on this page. Ending loop.")
        break
    # === INSERT THIS LINE HERE ===
    page_new_items = 0
    # =============================
    for cells in real_rows:
        try:
            data = parse_row(cells)
            # ... rest of your logic ...
        except Exception as e:
            print(f"⚠️ parse_row failed: {e}")
            continue
@@ -279,10 +281,10 @@ for page_num in range(0, TOTAL_PAGES):
        cursor.execute(insert_sql, data)
-    # If an entire page is old news, we can stop the deep crawl
+    # # If an entire page is old news, we can stop the deep crawl
-    if page_new_items == 0 and page_num > 0:
+    # if page_new_items == 0 and page_num > 0:
-        print("🛑 Page contained only known items. Sync complete.")
+    #     print("🛑 Page contained only known items. Sync complete.")
-        break
+    #     break
    time.sleep(1)
--- a/5threaddownloader.py
+++ b/5threaddownloader.py
@@ -0,0 +1,292 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import pymysql
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.options import Options
 import time
 import re
 import urllib.parse as urlparse
 from pathlib import Path
 import json
 import requests
 import datetime
 import sys
 import threading
 from concurrent.futures import ThreadPoolExecutor
 # Ensure this file exists in your directory
 from EmailMessagingGraph import send_mail
 # ============================================================
 # CONFIGURATION
 # ============================================================
 TOTAL_PAGES = 226
 THREADS = 5
 COOKIE_FILE = Path("sktorrent_cookies.json")
 # Database settings
 DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 BASE_URL = (
    "https://sktorrent.eu/torrent/torrents.php"
    "?active=0&category=24&order=data&by=DESC&zaner=&jazyk="
 )
 # Global counters for reporting (Thread-safe lock needed)
 stats_lock = threading.Lock()
 stats = {
    "processed": 0,
    "new": 0,
    "existing": 0,
    "new_titles": []
 }
 # ============================================================
 # 1) WORKER FUNCTION (Runs inside each thread)
 # ============================================================
 def process_page_chunk(page_indices, thread_id):
    """
    This function creates its OWN browser and OWN database connection.
    It processes the specific list of page numbers assigned to it.
    """
    print(f"🧵 [Thread-{thread_id}] Starting. Assigned {len(page_indices)} pages.")
    # --- A. Setup Independent DB Connection ---
    try:
        db = pymysql.connect(**DB_CONFIG)
        cursor = db.cursor()
    except Exception as e:
        print(f"❌ [Thread-{thread_id}] DB Connection failed: {e}")
        return
    # --- B. Setup Independent Selenium Driver ---
    chrome_options = Options()
    # HEADLESS MODE is safer for 5 threads to avoid popping up 5 windows
    chrome_options.add_argument("--headless=new")
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument("--disable-popup-blocking")
    chrome_options.add_argument("--disable-extensions")
    chrome_options.add_argument("--log-level=3")  # Reduce noise
    driver = webdriver.Chrome(options=chrome_options)
    driver.set_window_size(1350, 1000)
    # --- C. Login / Cookies ---
    driver.get("https://sktorrent.eu")
    if COOKIE_FILE.exists():
        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
            cookies = json.load(f)
        for c in cookies:
            driver.add_cookie(c)
    # --- D. Requests Session ---
    requests_session = requests.Session()
    for ck in driver.get_cookies():
        requests_session.cookies.set(ck["name"], ck["value"])
    # --- E. Helper: Parse Row (Local scope) ---
    def parse_row(cells):
        try:
            category = cells[0].text.strip()
            # Download URL
            download_a = cells[1].find_element(By.TAG_NAME, "a")
            download_url = download_a.get_attribute("href")
            parsed_dl = urlparse.urlparse(download_url)
            dl_query = urlparse.parse_qs(parsed_dl.query)
            torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
            # Details & Hash
            title_links = cells[2].find_elements(By.TAG_NAME, "a")
            if not title_links: return None
            a_tag = title_links[0]
            visible_name = a_tag.text.strip()
            full_title = a_tag.get_attribute("title")
            details_link = a_tag.get_attribute("href")
            parsed = urlparse.urlparse(details_link)
            query = urlparse.parse_qs(parsed.query)
            if "id" not in query: return None
            torrent_hash = query["id"][0]
            # Size & Date
            text_block = cells[2].get_attribute("innerText")
            clean_text = " ".join(text_block.split())
            size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", clean_text, re.IGNORECASE)
            added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", clean_text, re.IGNORECASE)
            size_pretty = size_match.group(1) if size_match else None
            added_mysql = None
            if added_match:
                clean = added_match.group(1).replace(" o ", " ").strip()
                parts = clean.split(" ")
                if len(parts) >= 2:
                    d, m, y = parts[0].split("/")
                    t = parts[1] + ":00" if len(parts[1].split(":")) == 2 else parts[1]
                    try:
                        added_mysql = f"{y}-{m}-{d} {t}"
                    except:
                        pass
            # Image
            img_link = None
            try:
                img_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
                img_src = re.search(r"src=([^ ]+)", img_a.get_attribute("onmouseover"))
                if img_src:
                    img_link = img_src.group(1).replace("'", "").strip()
                    if img_link.startswith("//"): img_link = "https:" + img_link
            except:
                pass
            # Stats
            seeders = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
            seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
            leechers = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
            leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")
            # Check DB
            cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
            row = cursor.fetchone()
            already_have_file = row is not None and row[0] is not None
            content = None
            if not already_have_file:
                # Politeness sleep only if downloading
                time.sleep(1)
                try:
                    r = requests_session.get(download_url, timeout=10)
                    r.raise_for_status()
                    content = r.content
                except:
                    pass
            return {
                "torrent_hash": torrent_hash, "details_link": details_link, "download_url": download_url,
                "category": category, "title_visible": visible_name, "title_full": full_title,
                "size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link,
                "seeders": seeders, "seeders_link": seeders_link, "leechers": leechers, "leechers_link": leechers_link,
                "torrent_filename": torrent_filename, "torrent_content": content,
                "is_new_torrent": not already_have_file
            }
        except Exception:
            return None
    # --- F. Loop through Assigned Pages ---
    for page_num in page_indices:
        url = f"{BASE_URL}&page={page_num}"
        print(f"   🔄 [Thread-{thread_id}] Scraping Page {page_num}")
        try:
            driver.get(url)
            # Close popup (simplified JS)
            driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
            # Row Filtering
            rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
            real_rows = []
            for r in rows:
                cs = r.find_elements(By.TAG_NAME, "td")
                if len(cs) == 7 and cs[1].find_elements(By.TAG_NAME, "a"):
                    real_rows.append(cs)
            if not real_rows:
                print(f"   ⚠️ [Thread-{thread_id}] Page {page_num} empty.")
                continue
            # Process Rows
            for cells in real_rows:
                data = parse_row(cells)
                if not data: continue
                # Update Global Stats safely
                with stats_lock:
                    stats["processed"] += 1
                    if data["is_new_torrent"]:
                        stats["new"] += 1
                        stats["new_titles"].append(data["title_visible"])
                    else:
                        stats["existing"] += 1
                # Insert SQL
                sql = """
                INSERT INTO torrents (
                    torrent_hash, details_link, download_url, category, title_visible, title_full,
                    size_pretty, added_datetime, preview_image,
                    seeders, seeders_link, leechers, leechers_link,
                    torrent_filename, torrent_content
                ) VALUES (
                    %(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
                    %(size_pretty)s, %(added_datetime)s, %(preview_image)s,
                    %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
                    %(torrent_filename)s, %(torrent_content)s
                )
                ON DUPLICATE KEY UPDATE
                    seeders = VALUES(seeders),
                    leechers = VALUES(leechers),
                    download_url = VALUES(download_url),
                    torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
                """
                cursor.execute(sql, data)
        except Exception as e:
            print(f"   💥 [Thread-{thread_id}] Error on page {page_num}: {e}")
    # Cleanup
    driver.quit()
    db.close()
    print(f"🏁 [Thread-{thread_id}] Finished assigned pages.")
 # ============================================================
 # 2) MAIN EXECUTION
 # ============================================================
 if __name__ == "__main__":
    RUN_START = datetime.datetime.now()
    print(f"🚀 Starting Multithreaded Scraper with {THREADS} threads...")
    # 1. Distribute pages among threads
    # Example: If 226 pages and 5 threads, each gets ~45 pages
    all_pages = list(range(TOTAL_PAGES))
    chunk_size = len(all_pages) // THREADS + 1
    chunks = [all_pages[i:i + chunk_size] for i in range(0, len(all_pages), chunk_size)]
    # 2. Start Threads
    with ThreadPoolExecutor(max_workers=THREADS) as executor:
        futures = []
        for i, page_chunk in enumerate(chunks):
            if page_chunk:  # Only start if chunk is not empty
                futures.append(executor.submit(process_page_chunk, page_chunk, i + 1))
        # Wait for all to finish
        for f in futures:
            f.result()
    # 3. Final Report
    RUN_END = datetime.datetime.now()
    print("\n✅ All threads completed.")
    body = (
        f"Run started:  {RUN_START:%Y-%m-%d %H:%M:%S}\n"
        f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}\n\n"
        f"Processed torrents: {stats['processed']}\n"
        f"New torrents saved: {stats['new']}\n"
        f"Existing torrents updated: {stats['existing']}\n"
    )
    if stats["new_titles"]:
        body += "\nNew torrents list:\n- " + "\n- ".join(stats["new_titles"])
    send_mail(to="vladimir.buzalka@buzalka.cz", subject=f"SKTorrent Multi-Thread Run", body=body, html=False)
    print("📧 Email report sent.")
--- a/5threaddownloadtorrentfiles.py
+++ b/5threaddownloadtorrentfiles.py
@@ -0,0 +1,212 @@
 import pymysql
 import requests
 import json
 import time
 import random
 import os
 import re
 from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor
 from threading import Lock
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 # ============================================================
 # KONFIGURACE
 # ============================================================
 DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 COOKIE_FILE = Path("sktorrent_cookies.json")
 BACKUP_DIR = "saved_torrents"  # Adresář pro lokální zálohu
 THREADS = 5  # Počet vláken
 # Globální zámek pro výpisy do konzole, aby se nepřepisovaly
 print_lock = Lock()
 stats = {"fixed": 0, "failed": 0, "saved_to_disk": 0}
 # ============================================================
 # POMOCNÉ FUNKCE
 # ============================================================
 def sanitize_filename(name):
    """Odstraní z názvu souboru nepovolené znaky"""
    # Povolíme jen písmena, čísla, tečky, pomlčky a mezery
    clean = re.sub(r'[^\w\s\.-]', '', name)
    return clean.strip()[:100]  # Ořízneme na 100 znaků pro jistotu
 def ensure_backup_dir():
    """Vytvoří adresář pro torrenty, pokud neexistuje"""
    if not os.path.exists(BACKUP_DIR):
        os.makedirs(BACKUP_DIR)
        print(f"📁 Vytvořen adresář pro zálohu: {os.path.abspath(BACKUP_DIR)}")
 def get_browser_identity():
    """
    Spustí Selenium (Chrome) JEN JEDNOU, aby získal validní
    User-Agent a čerstvé Cookies pro threads.
    """
    print("🤖 Startuji Selenium pro získání identity prohlížeče...")
    opts = Options()
    opts.add_argument("--headless=new")
    opts.add_argument("--disable-gpu")
    driver = webdriver.Chrome(options=opts)
    # Jdeme na web nastavit doménu pro cookies
    driver.get("https://sktorrent.eu")
    # Načteme cookies ze souboru
    if COOKIE_FILE.exists():
        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
            cookies_list = json.load(f)
        for c in cookies_list:
            driver.add_cookie(c)
        driver.refresh()
        time.sleep(2)
    # Exportujeme identitu
    user_agent = driver.execute_script("return navigator.userAgent;")
    browser_cookies = driver.get_cookies()
    driver.quit()
    print("✅ Identita získána.")
    return user_agent, browser_cookies
 # ============================================================
 # WORKER (Pracovní vlákno)
 # ============================================================
 def worker_task(rows_chunk, thread_id, user_agent, cookies_list):
    """
    Tato funkce běží v každém vlákně zvlášť.
    """
    # 1. Vytvoření vlastní Session pro toto vlákno
    session = requests.Session()
    session.headers.update({"User-Agent": user_agent})
    for c in cookies_list:
        session.cookies.set(c['name'], c['value'])
    # 2. Vlastní připojení k DB (nutné pro thread-safety)
    try:
        db = pymysql.connect(**DB_CONFIG)
        cursor = db.cursor()
    except Exception as e:
        with print_lock:
            print(f"❌ [Thread-{thread_id}] Chyba DB připojení: {e}")
        return
    for row in rows_chunk:
        t_hash, url, title = row
        # Ochrana: krátká náhodná pauza, aby 5 vláken nezabilo server
        time.sleep(random.uniform(0.5, 2.0))
        try:
            # Stažení
            resp = session.get(url, timeout=15)
            if resp.status_code == 403:
                with print_lock:
                    print(f"⛔ [Thread-{thread_id}] 403 Forbidden! {title[:20]}...")
                stats["failed"] += 1
                continue
            resp.raise_for_status()
            content = resp.content
            if len(content) > 100:
                # A) Uložit do DB (BLOB)
                sql = "UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s"
                cursor.execute(sql, (content, t_hash))
                # B) Uložit na DISK (Soubor)
                clean_name = sanitize_filename(title)
                # Přidáme kousek hashe do názvu, aby se nepřepsaly soubory se stejným jménem
                filename = f"{clean_name}_{t_hash[:6]}.torrent"
                file_path = os.path.join(BACKUP_DIR, filename)
                with open(file_path, "wb") as f:
                    f.write(content)
                with print_lock:
                    print(f"✅ [Thread-{thread_id}] OK: {clean_name}")
                    stats["fixed"] += 1
                    stats["saved_to_disk"] += 1
            else:
                with print_lock:
                    print(f"⚠️ [Thread-{thread_id}] Prázdný soubor: {title}")
                stats["failed"] += 1
        except Exception as e:
            with print_lock:
                print(f"❌ [Thread-{thread_id}] Chyba: {title[:20]}... -> {e}")
            stats["failed"] += 1
    db.close()
    with print_lock:
        print(f"🏁 [Thread-{thread_id}] Dokončil práci.")
 # ============================================================
 # HLAVNÍ LOOP
 # ============================================================
 if __name__ == "__main__":
    ensure_backup_dir()
    # 1. Získat data z DB
    print("🔍 Načítám seznam chybějících souborů z DB...")
    main_db = pymysql.connect(**DB_CONFIG)
    with main_db.cursor() as c:
        # Hledáme ty, co mají URL, ale nemají obsah
        c.execute(
            "SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
        all_rows = c.fetchall()
    main_db.close()
    total = len(all_rows)
    print(f"📋 K opravě: {total} položek.")
    if total == 0:
        print("🎉 Není co opravovat.")
        exit()
    # 2. Získat "Super Identitu" přes Selenium (jen jednou)
    u_agent, browser_cookies = get_browser_identity()
    # 3. Rozdělit práci pro 5 vláken
    chunk_size = total // THREADS + 1
    chunks = [all_rows[i:i + chunk_size] for i in range(0, total, chunk_size)]
    print(f"🚀 Spouštím {THREADS} vláken (ukládání do DB + do složky '{BACKUP_DIR}')...")
    # 4. Spustit multithreading
    with ThreadPoolExecutor(max_workers=THREADS) as executor:
        futures = []
        for i, chunk in enumerate(chunks):
            if chunk:
                # Každému vláknu předáme kus práce + identitu prohlížeče
                futures.append(executor.submit(worker_task, chunk, i + 1, u_agent, browser_cookies))
        # Čekáme na dokončení
        for f in futures:
            f.result()
    print("\n" + "=" * 40)
    print(f"🏁 DOKONČENO")
    print(f"✅ Opraveno v DB:    {stats['fixed']}")
    print(f"💾 Uloženo na disk:  {stats['saved_to_disk']}")
    print(f"❌ Chyby:            {stats['failed']}")
    print(f"📁 Soubory najdeš v: {os.path.abspath(BACKUP_DIR)}")
    print("=" * 40)
--- a/Cleanup.py
+++ b/Cleanup.py
@@ -0,0 +1,133 @@
 import pymysql
 import requests
 import json
 import time
 import random
 import os
 import re
 from pathlib import Path
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 # ============================================================
 # KONFIGURACE
 # ============================================================
 DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 COOKIE_FILE = Path("sktorrent_cookies.json")
 BACKUP_DIR = "saved_torrents"
 # ============================================================
 # POMOCNÉ FUNKCE
 # ============================================================
 def sanitize_filename(name):
    clean = re.sub(r'[^\w\s\.-]', '', name)
    return clean.strip()[:100]
 def get_browser_identity():
    print("🤖 Startuji Selenium (Single Thread Mode)...")
    opts = Options()
    opts.add_argument("--headless=new")
    opts.add_argument("--disable-gpu")
    driver = webdriver.Chrome(options=opts)
    driver.get("https://sktorrent.eu")
    if COOKIE_FILE.exists():
        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
            cookies_list = json.load(f)
        for c in cookies_list:
            driver.add_cookie(c)
        driver.refresh()
        time.sleep(2)
    user_agent = driver.execute_script("return navigator.userAgent;")
    browser_cookies = driver.get_cookies()
    driver.quit()
    return user_agent, browser_cookies
 # ============================================================
 # MAIN
 # ============================================================
 if __name__ == "__main__":
    if not os.path.exists(BACKUP_DIR):
        os.makedirs(BACKUP_DIR)
    # 1. Načíst zbývající chyby
    db = pymysql.connect(**DB_CONFIG)
    cursor = db.cursor()
    cursor.execute(
        "SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
    rows = cursor.fetchall()
    print(f"📋 Zbývá opravit: {len(rows)} položek.")
    if not rows:
        print("🎉 Hotovo! Vše je staženo.")
        exit()
    # 2. Získat identitu
    ua, cookies = get_browser_identity()
    session = requests.Session()
    session.headers.update({"User-Agent": ua})
    for c in cookies:
        session.cookies.set(c['name'], c['value'])
    # 3. Pomalá smyčka (1 vlákno)
    success = 0
    dead_links = 0
    print("🚀 Spouštím jemné dočištění...")
    for i, row in enumerate(rows):
        t_hash, url, title = row
        print(f"[{i + 1}/{len(rows)}] {title[:50]}...", end=" ")
        try:
            # Delší pauza pro stabilitu
            time.sleep(random.uniform(1.5, 3.0))
            resp = session.get(url, timeout=20)  # Delší timeout
            if resp.status_code == 404:
                print("❌ 404 Nenalezeno (soubor na serveru neexistuje)")
                dead_links += 1
                continue
            if resp.status_code != 200:
                print(f"❌ Chyba {resp.status_code}")
                continue
            content = resp.content
            if len(content) > 100:
                # DB
                cursor.execute("UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s", (content, t_hash))
                # Disk
                fname = f"{sanitize_filename(title)}_{t_hash[:6]}.torrent"
                with open(os.path.join(BACKUP_DIR, fname), "wb") as f:
                    f.write(content)
                print("✅ OK")
                success += 1
            else:
                print("⚠️ Prázdný soubor")
        except Exception as e:
            print(f"❌ Selhalo: {e}")
    db.close()
    print("\n" + "=" * 30)
    print(f"🏁 FINÁLE: Opraveno {success} z {len(rows)}")
    if dead_links > 0:
        print(f"💀 Mrtvé odkazy (404): {dead_links} (ty už opravit nejdou)")
--- a/WhatWehaveAlreadyDownloaded.py
+++ b/WhatWehaveAlreadyDownloaded.py
@@ -0,0 +1,158 @@
 import pymysql
 import bencodepy
 import os
 from pathlib import Path
 # ============================================================
 # CONFIGURATION
 # ============================================================
 # Your network path (Use raw string r"..." for backslashes)
 # PHYSICAL_DIR = Path(r"\\tower\torrents\downloads")
 PHYSICAL_DIR = Path(r"\\tower1\#Colddata\Porno")
 DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 # ============================================================
 # HELPER FUNCTIONS
 # ============================================================
 def decode_bytes(b):
    """
    Decodes bytes from Bencode into a string.
    Tries UTF-8 first, then common fallbacks.
    """
    if isinstance(b, str): return b
    encodings = ['utf-8', 'windows-1250', 'latin-1', 'cp1252']
    for enc in encodings:
        try:
            return b.decode(enc)
        except:
            continue
    return b.decode('utf-8', errors='ignore')
 def check_torrent_in_filesystem(torrent_blob, root_path):
    """
    Parses the binary BLOB, calculates expected paths,
    and checks if they exist in the root_path.
    """
    try:
        # Decode the binary BLOB
        data = bencodepy.decode(torrent_blob)
        info = data.get(b'info')
        if not info: return False
        # Get the name of the root file/folder defined in the torrent
        name = decode_bytes(info.get(b'name'))
        # Calculate expected location
        target_path = root_path / name
        # 1. Check if the main path exists
        if not target_path.exists():
            return False
        # 2. Size Verification (Basic)
        # If it's a single file
        if b'files' not in info:
            expected_size = info[b'length']
            real_size = target_path.stat().st_size
            # Allow 1% variance or 1KB (sometimes filesystems vary slightly)
            if abs(real_size - expected_size) < 4096:
                return True
            return False
        # If it's a multi-file torrent (folder)
        else:
            # If the folder exists, we assume it's mostly good,
            # but let's check at least one file inside to be sure it's not empty.
            files = info[b'files']
            if not files: return True  # Empty folder torrent? rare but possible.
            # Check the first file in the list
            first_file_path = target_path.joinpath(*[decode_bytes(p) for p in files[0][b'path']])
            return first_file_path.exists()
    except Exception as e:
        # If Bencode fails or path is weird
        return False
 # ============================================================
 # MAIN EXECUTION
 # ============================================================
 if __name__ == "__main__":
    if not PHYSICAL_DIR.exists():
        print(f"❌ ERROR: Cannot access path: {PHYSICAL_DIR}")
        print("Make sure the drive is mapped or the network path is accessible.")
        exit()
    print(f"📂 Scanning storage: {PHYSICAL_DIR}")
    print("🚀 Connecting to Database...")
    db = pymysql.connect(**DB_CONFIG)
    cursor = db.cursor()
    # 1. Get all torrents that have content (BLOB)
    # We only select ID and Content to keep memory usage reasonable
    cursor.execute(
        "SELECT torrent_hash, title_visible, torrent_content FROM torrents WHERE torrent_content IS NOT NULL")
    rows = cursor.fetchall()
    total = len(rows)
    print(f"📋 Analysing {total} torrents from database against disk files...")
    found_count = 0
    missing_count = 0
    # 2. Iterate and Check
    updates = []  # Store successful hashes to batch update later
    for index, row in enumerate(rows):
        t_hash, title, blob = row
        is_downloaded = check_torrent_in_filesystem(blob, PHYSICAL_DIR)
        if is_downloaded:
            found_count += 1
            updates.append(t_hash)
            # Print only every 50th line to reduce clutter, or if found
            # print(f"✅ Found: {title[:50]}")
        else:
            missing_count += 1
        if index % 100 == 0:
            print(f"   Processed {index}/{total} ... (Found: {found_count})")
    # 3. Batch Update Database
    print(f"\n💾 Updating Database: Marking {len(updates)} torrents as 'physical_exists = 1'...")
    # Reset everything to 0 first (in case you deleted files since last run)
    cursor.execute("UPDATE torrents SET physical_exists = 0")
    if updates:
        # Update in chunks of 1000 to be safe
        chunk_size = 1000
        for i in range(0, len(updates), chunk_size):
            chunk = updates[i:i + chunk_size]
            format_strings = ','.join(['%s'] * len(chunk))
            cursor.execute(f"UPDATE torrents SET physical_exists = 1 WHERE torrent_hash IN ({format_strings})",
                           tuple(chunk))
            db.commit()
    db.close()
    print("\n" + "=" * 40)
    print(f"🏁 SCAN COMPLETE")
    print(f"✅ Physically Available: {found_count}")
    print(f"❌ Missing / Not Downloaded: {missing_count}")
    print(f"📊 Completion Rate: {int((found_count / total) * 100)}%")
    print("=" * 40)
--- a/Nahraniexistujicich.py
+++ b/Nahraniexistujicich.py
@@ -0,0 +1,150 @@
 import pymysql
 import re
 import time
 import qbittorrentapi
 # ============================================================
 # KONFIGURACE
 # ============================================================
 MAX_SIZE_GB = 950
 QBT_URL = "https://vladob.zen.usbx.me/qbittorrent"
 QBT_USER = "vladob"
 QBT_PASS = "jCni3U6d#y4bfcm"
 DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 # ============================================================
 # POMOCNÉ FUNKCE
 # ============================================================
 def parse_size_to_gb(size_str):
    """Převede text '1.5 GB' nebo '500 MB' na float v GB"""
    if not size_str: return 0.0
    s = str(size_str).upper().replace(",", ".").strip()
    match = re.search(r"([\d\.]+)", s)
    if not match: return 0.0
    val = float(match.group(1))
    if "TB" in s: return val * 1024
    if "GB" in s: return val
    if "MB" in s: return val / 1024
    if "KB" in s: return val / 1024 / 1024
    return 0.0
 # ============================================================
 # HLAVNÍ LOGIKA
 # ============================================================
 def main():
    print(f"🚀 Plánuji přímý upload z DB (Limit: {MAX_SIZE_GB} GB, řazeno dle seederů)...")
    # 1. Načtení dat z DB
    # Stahujeme i BLOB (torrent_content), takže to může chvilku trvat
    db = pymysql.connect(**DB_CONFIG)
    cursor = db.cursor()
    print("⏳ Načítám data z MySQL...")
    sql = """
        SELECT torrent_hash, title_visible, size_pretty, seeders, torrent_content
        FROM torrents 
        WHERE physical_exists = 0 AND torrent_content IS NOT NULL
        ORDER BY seeders DESC
    """
    cursor.execute(sql)
    rows = cursor.fetchall()
    db.close()
    print(f"🔍 Nalezeno {len(rows)} kandidátů. Vybírám ty nejlepší...")
    # 2. Výběr do kapacity 950 GB
    selected_torrents = []
    total_size_gb = 0.0
    for row in rows:
        t_hash, title, size_str, seeders, content = row
        size_gb = parse_size_to_gb(size_str)
        # Pojistka proti nesmyslně velkým souborům nebo chybám v parsování
        if size_gb == 0 and "MB" not in str(size_str).upper() and "KB" not in str(size_str).upper():
            pass
            # Kontrola limitu
        if total_size_gb + size_gb > MAX_SIZE_GB:
            # Jakmile narazíme na něco, co se nevejde, končíme výběr (protože jsou seřazeny dle priority)
            print(f"🛑 Limit naplněn! '{title}' ({size_gb:.2f} GB) by přesáhl {MAX_SIZE_GB} GB.")
            break
        selected_torrents.append({
            "filename": f"{t_hash}.torrent",  # Virtuální název souboru
            "content": content,  # Binární data
            "title": title,
            "size": size_gb,
            "seeders": seeders
        })
        total_size_gb += size_gb
    # 3. Report
    print("-" * 40)
    print(f"📦 Vybráno: {len(selected_torrents)} torrentů")
    print(f"💾 Celková velikost: {total_size_gb:.2f} GB / {MAX_SIZE_GB} GB")
    if selected_torrents:
        avg_seeders = sum(t['seeders'] for t in selected_torrents) / len(selected_torrents)
        print(f"⚡ Průměrně seederů: {avg_seeders:.1f}")
    print("-" * 40)
    if not selected_torrents:
        print("Nic k nahrání.")
        exit()
    confirm = input("❓ Nahrát tento výběr na Seedbox? (ano/ne): ")
    if confirm.lower() not in ['ano', 'y', 'yes']:
        print("❌ Zrušeno.")
        exit()
    # 4. Připojení k qBittorrent
    try:
        qbt = qbittorrentapi.Client(
            host=QBT_URL,
            username=QBT_USER,
            password=QBT_PASS,
            VERIFY_WEBUI_CERTIFICATE=False
        )
        qbt.auth_log_in()
        print("✅ Připojeno k Seedboxu.")
    except Exception as e:
        print(f"❌ Chyba připojení: {e}")
        exit()
    # 5. Odeslání dat
    print("🚀 Odesílám...")
    success_count = 0
    for i, item in enumerate(selected_torrents):
        try:
            # Posíláme binární data přímo (tváříme se, že posíláme soubor)
            # formát: {'nazev_souboru.torrent': b'binarni_data...'}
            file_dict = {item['filename']: item['content']}
            qbt.torrents_add(torrent_files=file_dict, is_paused=False)
            print(f"[{i + 1}/{len(selected_torrents)}] 📤 {item['title']} ({item['size']:.1f} GB)")
            success_count += 1
            time.sleep(0.2)  # Malá pauza pro stabilitu API
        except Exception as e:
            print(f"❌ Chyba u {item['title']}: {e}")
    print("\n✅ HOTOVO.")
    print("Torrenty jsou na Seedboxu. Až se stáhnou, stáhni je domů a spusť skript 99_Scan...")
 if __name__ == "__main__":
    main()