git

2026-02-01 07:18:20 +01:00
parent 7b0404bfe3
commit 3d11661997
7 changed files with 1074 additions and 116 deletions
@@ -61,17 +61,28 @@ def get_data():
 # ==============================

 def auto_adjust_columns(writer, df, sheet_name):
-    """Pomocná funkce pro automatické rozšíření sloupců v Excelu"""
+    """Bezpečné automatické nastavení šířky sloupců"""
    worksheet = writer.sheets[sheet_name]
+
    for idx, col in enumerate(df.columns):
-        max_len = max(
-            df[col].astype(str).map(len).max(),
-            len(str(col))
-        ) + 2
-        if max_len > 60: max_len = 60
+        series = df[col]
+
+        max_len = len(str(col))  # minimálně délka hlavičky
+
+        for val in series:
+            if val is None or (isinstance(val, float) and pd.isna(val)):
+                length = 0
+            else:
+                length = len(str(val))
+
+            if length > max_len:
+                max_len = length
+
+        max_len = min(max_len + 2, 60)
        worksheet.set_column(idx, idx, max_len)


+
 # ==============================
 # 🚀 HLAVNÍ LOGIKA
 # ==============================
@@ -96,135 +96,124 @@ def close_popup_if_any():


 # ============================================================
-# 5) Parse one torrent row
+# 5) Parse one torrent row (MODIFIED)
 # ============================================================
+
 def parse_row(cells):
-    # Column 0: Category icon/text
+    # --- 1. INITIALIZE ---
+    torrent_hash = None
+    download_url = None
    category = cells[0].text.strip()

    try:
-        # Column 1: Download icon link
+        # --- 2. EXTRACT DOWNLOAD URL (Column 1) ---
        download_a = cells[1].find_element(By.TAG_NAME, "a")
-        download_link = download_a.get_attribute("href")
-    except:
-        return None
+        download_url = download_a.get_attribute("href")

-    parsed_dl = urlparse.urlparse(download_link)
-    dl_query = urlparse.parse_qs(parsed_dl.query)
-    torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
+        parsed_dl = urlparse.urlparse(download_url)
+        dl_query = urlparse.parse_qs(parsed_dl.query)
+        torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]

-    # Column 2: Name and info
-    title_links = cells[2].find_elements(By.TAG_NAME, "a")
-    if not title_links:
-        return None
+        # --- 3. EXTRACT DETAILS & HASH (Column 2) ---
+        title_links = cells[2].find_elements(By.TAG_NAME, "a")
+        if not title_links:
+            return None

-    a_tag = title_links[0]
-    visible_name = a_tag.text.strip()
-    full_title = a_tag.get_attribute("title")
-    details_link = a_tag.get_attribute("href")
+        a_tag = title_links[0]
+        visible_name = a_tag.text.strip()
+        full_title = a_tag.get_attribute("title")
+        details_link = a_tag.get_attribute("href")

-    parsed = urlparse.urlparse(details_link)
-    query = urlparse.parse_qs(parsed.query)
-    if "id" not in query:
-        return None
+        parsed = urlparse.urlparse(details_link)
+        query = urlparse.parse_qs(parsed.query)
+        if "id" not in query:
+            return None

-    torrent_hash = query["id"][0]
+        torrent_hash = query["id"][0]

-    # Use innerText for robust text extraction
-    text_block = cells[2].get_attribute("innerText")
-    text_block_clean = " ".join(text_block.split())
+        # --- 4. EXTRACT SIZE & DATE ---
+        text_block = cells[2].get_attribute("innerText")
+        text_block_clean = " ".join(text_block.split())
+        size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
+        added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
+        size_pretty = size_match.group(1) if size_match else None
+        added_pretty = added_match.group(1) if added_match else None

-    # Regex for Size and Date
-    size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
-    added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
+        added_mysql = None
+        if added_pretty:
+            clean = added_pretty.replace(" o ", " ").strip()
+            parts = clean.split(" ")
+            if len(parts) >= 2:
+                date_part, time_part = parts[0], parts[1]
+                if len(time_part.split(":")) == 2: time_part += ":00"
+                try:
+                    d, m, y = date_part.split("/")
+                    added_mysql = f"{y}-{m}-{d} {time_part}"
+                except: pass

-    size_pretty = size_match.group(1) if size_match else None
-    added_pretty = added_match.group(1) if added_match else None
-
-    # Date conversion: "29/11/2025 o 02:29" -> MySQL format
-    added_mysql = None
-    if added_pretty:
-        clean = added_pretty.replace(" o ", " ").strip()
-        parts = clean.split(" ")
-        if len(parts) >= 2:
-            date_part = parts[0]
-            time_part = parts[1]
-            if len(time_part.split(":")) == 2:
-                time_part += ":00"
-            try:
-                day, month, year = date_part.split("/")
-                added_mysql = f"{year}-{month}-{day} {time_part}"
-            except:
-                added_mysql = None
-
-    # Column 2: Image preview (if exists)
-    img_link = None
-    try:
-        image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
-        mouseover = image_a.get_attribute("onmouseover")
-        img_match = re.search(r"src=([^ ]+)", mouseover)
-        if img_match:
-            img_link = img_match.group(1).replace("'", "").strip()
-            if img_link.startswith("//"):
-                img_link = "https:" + img_link
-    except:
-        pass
-
-    # Column 4: Seeders
-    seeders_a = cells[4].find_element(By.TAG_NAME, "a")
-    seeders_number = int(seeders_a.text.strip())
-    seeders_link = seeders_a.get_attribute("href")
-
-    # Column 5: Leechers
-    leechers_a = cells[5].find_element(By.TAG_NAME, "a")
-    leechers_number = int(leechers_a.text.strip())
-    leechers_link = leechers_a.get_attribute("href")
-
-    # Check database for existing binary content
-    cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
-    row = cursor.fetchone()
-    already_have_torrent = row is not None and row[0] is not None
-
-    torrent_content = None
-    if not already_have_torrent:
-        time.sleep(3)  # Politeness delay
+        # --- 5. IMAGE & STATS ---
+        img_link = None
        try:
-            resp = requests_session.get(download_link)
-            resp.raise_for_status()
-            torrent_content = resp.content
-        except:
-            torrent_content = None
+            image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
+            mouseover = image_a.get_attribute("onmouseover")
+            img_match = re.search(r"src=([^ ]+)", mouseover)
+            if img_match:
+                img_link = img_match.group(1).replace("'", "").strip()
+                if img_link.startswith("//"): img_link = "https:" + img_link
+        except: pass

-    return {
-        "torrent_hash": torrent_hash,
-        "details_link": details_link,
-        "category": category,
-        "title_visible": visible_name,
-        "title_full": full_title,
-        "size_pretty": size_pretty,
-        "added_datetime": added_mysql,
-        "preview_image": img_link,
-        "seeders": seeders_number,
-        "seeders_link": seeders_link,
-        "leechers": leechers_number,
-        "leechers_link": leechers_link,
-        "torrent_filename": torrent_filename,
-        "torrent_content": torrent_content if not already_have_torrent else None,
-        "is_new_torrent": not already_have_torrent,
-    }
+        seeders_number = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
+        seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
+        leechers_number = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
+        leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")

+        # --- 6. DATABASE CHECK & DOWNLOAD ---
+        cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
+        db_row = cursor.fetchone()
+        already_have_torrent = db_row is not None and db_row[0] is not None

+        torrent_content = None
+        if not already_have_torrent:
+            time.sleep(2)
+            try:
+                resp = requests_session.get(download_url, timeout=10)
+                resp.raise_for_status()
+                torrent_content = resp.content
+            except Exception as e:
+                print(f"   ⚠️ Download failed for {visible_name}: {e}")
+
+        return {
+            "torrent_hash": torrent_hash,
+            "details_link": details_link,
+            "download_url": download_url,
+            "category": category,
+            "title_visible": visible_name,
+            "title_full": full_title,
+            "size_pretty": size_pretty,
+            "added_datetime": added_mysql,
+            "preview_image": img_link,
+            "seeders": seeders_number,
+            "seeders_link": seeders_link,
+            "leechers": leechers_number,
+            "leechers_link": leechers_link,
+            "torrent_filename": torrent_filename,
+            "torrent_content": torrent_content if not already_have_torrent else None,
+            "is_new_torrent": not already_have_torrent,
+        }
+    except Exception as e:
+        print(f"⚠️ parse_row logic failed: {e}")
+        return None
 # ============================================================
-# 6) INSERT SQL
+# 6) INSERT SQL (MODIFIED)
 # ============================================================
 insert_sql = """
 INSERT INTO torrents (
-    torrent_hash, details_link, category, title_visible, title_full,
+    torrent_hash, details_link, download_url, category, title_visible, title_full,
    size_pretty, added_datetime, preview_image,
    seeders, seeders_link, leechers, leechers_link,
    torrent_filename, torrent_content
 ) VALUES (
-    %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
+    %(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
    %(size_pretty)s, %(added_datetime)s, %(preview_image)s,
    %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
    %(torrent_filename)s, %(torrent_content)s
@@ -232,9 +221,12 @@ INSERT INTO torrents (
 ON DUPLICATE KEY UPDATE
    seeders = VALUES(seeders),
    leechers = VALUES(leechers),
+    download_url = VALUES(download_url),
    torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
 """
-
+# Note: COALESCE(torrent_content, VALUES(torrent_content))
+# keeps the old value if the new one is NULL,
+# but updates it if the old one was NULL and the new one is binary.
 # ============================================================
 # 7) PROCESS ALL PAGES
 # ============================================================
@@ -250,17 +242,27 @@ for page_num in range(0, TOTAL_PAGES):

    # Find table rows
    rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
-    # v1 table usually has 7 cells for a data row
-    real_rows = [r.find_elements(By.TAG_NAME, "td") for r in rows if len(r.find_elements(By.TAG_NAME, "td")) == 7]
+
+    # FILTER: Only keep rows that have 7 columns AND a link in the 2nd column (index 1)
+    # This automatically discards headers and empty space rows.
+    real_rows = []
+    for r in rows:
+        cells = r.find_elements(By.TAG_NAME, "td")
+        if len(cells) == 7 and cells[1].find_elements(By.TAG_NAME, "a"):
+            real_rows.append(cells)

    if not real_rows:
        print("⚠️ No data rows found on this page. Ending loop.")
        break

+    # === INSERT THIS LINE HERE ===
    page_new_items = 0
+    # =============================
+
    for cells in real_rows:
        try:
            data = parse_row(cells)
+            # ... rest of your logic ...
        except Exception as e:
            print(f"⚠️ parse_row failed: {e}")
            continue
@@ -279,10 +281,10 @@ for page_num in range(0, TOTAL_PAGES):

        cursor.execute(insert_sql, data)

-    # If an entire page is old news, we can stop the deep crawl
-    if page_new_items == 0 and page_num > 0:
-        print("🛑 Page contained only known items. Sync complete.")
-        break
+    # # If an entire page is old news, we can stop the deep crawl
+    # if page_new_items == 0 and page_num > 0:
+    #     print("🛑 Page contained only known items. Sync complete.")
+    #     break

    time.sleep(1)

@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import pymysql
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+import time
+import re
+import urllib.parse as urlparse
+from pathlib import Path
+import json
+import requests
+import datetime
+import sys
+import threading
+from concurrent.futures import ThreadPoolExecutor
+
+# Ensure this file exists in your directory
+from EmailMessagingGraph import send_mail
+
+# ============================================================
+# CONFIGURATION
+# ============================================================
+TOTAL_PAGES = 226
+THREADS = 5
+COOKIE_FILE = Path("sktorrent_cookies.json")
+
+# Database settings
+DB_CONFIG = {
+    "host": "192.168.1.50",
+    "port": 3306,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+BASE_URL = (
+    "https://sktorrent.eu/torrent/torrents.php"
+    "?active=0&category=24&order=data&by=DESC&zaner=&jazyk="
+)
+
+# Global counters for reporting (Thread-safe lock needed)
+stats_lock = threading.Lock()
+stats = {
+    "processed": 0,
+    "new": 0,
+    "existing": 0,
+    "new_titles": []
+}
+
+
+# ============================================================
+# 1) WORKER FUNCTION (Runs inside each thread)
+# ============================================================
+def process_page_chunk(page_indices, thread_id):
+    """
+    This function creates its OWN browser and OWN database connection.
+    It processes the specific list of page numbers assigned to it.
+    """
+    print(f"🧵 [Thread-{thread_id}] Starting. Assigned {len(page_indices)} pages.")
+
+    # --- A. Setup Independent DB Connection ---
+    try:
+        db = pymysql.connect(**DB_CONFIG)
+        cursor = db.cursor()
+    except Exception as e:
+        print(f"❌ [Thread-{thread_id}] DB Connection failed: {e}")
+        return
+
+    # --- B. Setup Independent Selenium Driver ---
+    chrome_options = Options()
+    # HEADLESS MODE is safer for 5 threads to avoid popping up 5 windows
+    chrome_options.add_argument("--headless=new")
+    chrome_options.add_argument("--disable-notifications")
+    chrome_options.add_argument("--disable-popup-blocking")
+    chrome_options.add_argument("--disable-extensions")
+    chrome_options.add_argument("--log-level=3")  # Reduce noise
+
+    driver = webdriver.Chrome(options=chrome_options)
+    driver.set_window_size(1350, 1000)
+
+    # --- C. Login / Cookies ---
+    driver.get("https://sktorrent.eu")
+    if COOKIE_FILE.exists():
+        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
+            cookies = json.load(f)
+        for c in cookies:
+            driver.add_cookie(c)
+
+    # --- D. Requests Session ---
+    requests_session = requests.Session()
+    for ck in driver.get_cookies():
+        requests_session.cookies.set(ck["name"], ck["value"])
+
+    # --- E. Helper: Parse Row (Local scope) ---
+    def parse_row(cells):
+        try:
+            category = cells[0].text.strip()
+
+            # Download URL
+            download_a = cells[1].find_element(By.TAG_NAME, "a")
+            download_url = download_a.get_attribute("href")
+
+            parsed_dl = urlparse.urlparse(download_url)
+            dl_query = urlparse.parse_qs(parsed_dl.query)
+            torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
+
+            # Details & Hash
+            title_links = cells[2].find_elements(By.TAG_NAME, "a")
+            if not title_links: return None
+            a_tag = title_links[0]
+            visible_name = a_tag.text.strip()
+            full_title = a_tag.get_attribute("title")
+            details_link = a_tag.get_attribute("href")
+
+            parsed = urlparse.urlparse(details_link)
+            query = urlparse.parse_qs(parsed.query)
+            if "id" not in query: return None
+            torrent_hash = query["id"][0]
+
+            # Size & Date
+            text_block = cells[2].get_attribute("innerText")
+            clean_text = " ".join(text_block.split())
+            size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", clean_text, re.IGNORECASE)
+            added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", clean_text, re.IGNORECASE)
+            size_pretty = size_match.group(1) if size_match else None
+
+            added_mysql = None
+            if added_match:
+                clean = added_match.group(1).replace(" o ", " ").strip()
+                parts = clean.split(" ")
+                if len(parts) >= 2:
+                    d, m, y = parts[0].split("/")
+                    t = parts[1] + ":00" if len(parts[1].split(":")) == 2 else parts[1]
+                    try:
+                        added_mysql = f"{y}-{m}-{d} {t}"
+                    except:
+                        pass
+
+            # Image
+            img_link = None
+            try:
+                img_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
+                img_src = re.search(r"src=([^ ]+)", img_a.get_attribute("onmouseover"))
+                if img_src:
+                    img_link = img_src.group(1).replace("'", "").strip()
+                    if img_link.startswith("//"): img_link = "https:" + img_link
+            except:
+                pass
+
+            # Stats
+            seeders = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
+            seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
+            leechers = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
+            leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")
+
+            # Check DB
+            cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
+            row = cursor.fetchone()
+            already_have_file = row is not None and row[0] is not None
+
+            content = None
+            if not already_have_file:
+                # Politeness sleep only if downloading
+                time.sleep(1)
+                try:
+                    r = requests_session.get(download_url, timeout=10)
+                    r.raise_for_status()
+                    content = r.content
+                except:
+                    pass
+
+            return {
+                "torrent_hash": torrent_hash, "details_link": details_link, "download_url": download_url,
+                "category": category, "title_visible": visible_name, "title_full": full_title,
+                "size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link,
+                "seeders": seeders, "seeders_link": seeders_link, "leechers": leechers, "leechers_link": leechers_link,
+                "torrent_filename": torrent_filename, "torrent_content": content,
+                "is_new_torrent": not already_have_file
+            }
+        except Exception:
+            return None
+
+    # --- F. Loop through Assigned Pages ---
+    for page_num in page_indices:
+        url = f"{BASE_URL}&page={page_num}"
+        print(f"   🔄 [Thread-{thread_id}] Scraping Page {page_num}")
+
+        try:
+            driver.get(url)
+            # Close popup (simplified JS)
+            driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
+
+            # Row Filtering
+            rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
+            real_rows = []
+            for r in rows:
+                cs = r.find_elements(By.TAG_NAME, "td")
+                if len(cs) == 7 and cs[1].find_elements(By.TAG_NAME, "a"):
+                    real_rows.append(cs)
+
+            if not real_rows:
+                print(f"   ⚠️ [Thread-{thread_id}] Page {page_num} empty.")
+                continue
+
+            # Process Rows
+            for cells in real_rows:
+                data = parse_row(cells)
+                if not data: continue
+
+                # Update Global Stats safely
+                with stats_lock:
+                    stats["processed"] += 1
+                    if data["is_new_torrent"]:
+                        stats["new"] += 1
+                        stats["new_titles"].append(data["title_visible"])
+                    else:
+                        stats["existing"] += 1
+
+                # Insert SQL
+                sql = """
+                INSERT INTO torrents (
+                    torrent_hash, details_link, download_url, category, title_visible, title_full,
+                    size_pretty, added_datetime, preview_image,
+                    seeders, seeders_link, leechers, leechers_link,
+                    torrent_filename, torrent_content
+                ) VALUES (
+                    %(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
+                    %(size_pretty)s, %(added_datetime)s, %(preview_image)s,
+                    %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
+                    %(torrent_filename)s, %(torrent_content)s
+                )
+                ON DUPLICATE KEY UPDATE
+                    seeders = VALUES(seeders),
+                    leechers = VALUES(leechers),
+                    download_url = VALUES(download_url),
+                    torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
+                """
+                cursor.execute(sql, data)
+
+        except Exception as e:
+            print(f"   💥 [Thread-{thread_id}] Error on page {page_num}: {e}")
+
+    # Cleanup
+    driver.quit()
+    db.close()
+    print(f"🏁 [Thread-{thread_id}] Finished assigned pages.")
+
+
+# ============================================================
+# 2) MAIN EXECUTION
+# ============================================================
+if __name__ == "__main__":
+    RUN_START = datetime.datetime.now()
+    print(f"🚀 Starting Multithreaded Scraper with {THREADS} threads...")
+
+    # 1. Distribute pages among threads
+    # Example: If 226 pages and 5 threads, each gets ~45 pages
+    all_pages = list(range(TOTAL_PAGES))
+    chunk_size = len(all_pages) // THREADS + 1
+    chunks = [all_pages[i:i + chunk_size] for i in range(0, len(all_pages), chunk_size)]
+
+    # 2. Start Threads
+    with ThreadPoolExecutor(max_workers=THREADS) as executor:
+        futures = []
+        for i, page_chunk in enumerate(chunks):
+            if page_chunk:  # Only start if chunk is not empty
+                futures.append(executor.submit(process_page_chunk, page_chunk, i + 1))
+
+        # Wait for all to finish
+        for f in futures:
+            f.result()
+
+    # 3. Final Report
+    RUN_END = datetime.datetime.now()
+    print("\n✅ All threads completed.")
+
+    body = (
+        f"Run started:  {RUN_START:%Y-%m-%d %H:%M:%S}\n"
+        f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}\n\n"
+        f"Processed torrents: {stats['processed']}\n"
+        f"New torrents saved: {stats['new']}\n"
+        f"Existing torrents updated: {stats['existing']}\n"
+    )
+    if stats["new_titles"]:
+        body += "\nNew torrents list:\n- " + "\n- ".join(stats["new_titles"])
+
+    send_mail(to="vladimir.buzalka@buzalka.cz", subject=f"SKTorrent Multi-Thread Run", body=body, html=False)
+    print("📧 Email report sent.")
@@ -0,0 +1,212 @@
+import pymysql
+import requests
+import json
+import time
+import random
+import os
+import re
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+# ============================================================
+# KONFIGURACE
+# ============================================================
+DB_CONFIG = {
+    "host": "192.168.1.50",
+    "port": 3306,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+COOKIE_FILE = Path("sktorrent_cookies.json")
+BACKUP_DIR = "saved_torrents"  # Adresář pro lokální zálohu
+THREADS = 5  # Počet vláken
+
+# Globální zámek pro výpisy do konzole, aby se nepřepisovaly
+print_lock = Lock()
+stats = {"fixed": 0, "failed": 0, "saved_to_disk": 0}
+
+
+# ============================================================
+# POMOCNÉ FUNKCE
+# ============================================================
+
+def sanitize_filename(name):
+    """Odstraní z názvu souboru nepovolené znaky"""
+    # Povolíme jen písmena, čísla, tečky, pomlčky a mezery
+    clean = re.sub(r'[^\w\s\.-]', '', name)
+    return clean.strip()[:100]  # Ořízneme na 100 znaků pro jistotu
+
+
+def ensure_backup_dir():
+    """Vytvoří adresář pro torrenty, pokud neexistuje"""
+    if not os.path.exists(BACKUP_DIR):
+        os.makedirs(BACKUP_DIR)
+        print(f"📁 Vytvořen adresář pro zálohu: {os.path.abspath(BACKUP_DIR)}")
+
+
+def get_browser_identity():
+    """
+    Spustí Selenium (Chrome) JEN JEDNOU, aby získal validní
+    User-Agent a čerstvé Cookies pro threads.
+    """
+    print("🤖 Startuji Selenium pro získání identity prohlížeče...")
+
+    opts = Options()
+    opts.add_argument("--headless=new")
+    opts.add_argument("--disable-gpu")
+
+    driver = webdriver.Chrome(options=opts)
+
+    # Jdeme na web nastavit doménu pro cookies
+    driver.get("https://sktorrent.eu")
+
+    # Načteme cookies ze souboru
+    if COOKIE_FILE.exists():
+        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
+            cookies_list = json.load(f)
+        for c in cookies_list:
+            driver.add_cookie(c)
+        driver.refresh()
+        time.sleep(2)
+
+    # Exportujeme identitu
+    user_agent = driver.execute_script("return navigator.userAgent;")
+    browser_cookies = driver.get_cookies()
+
+    driver.quit()
+    print("✅ Identita získána.")
+    return user_agent, browser_cookies
+
+
+# ============================================================
+# WORKER (Pracovní vlákno)
+# ============================================================
+def worker_task(rows_chunk, thread_id, user_agent, cookies_list):
+    """
+    Tato funkce běží v každém vlákně zvlášť.
+    """
+    # 1. Vytvoření vlastní Session pro toto vlákno
+    session = requests.Session()
+    session.headers.update({"User-Agent": user_agent})
+    for c in cookies_list:
+        session.cookies.set(c['name'], c['value'])
+
+    # 2. Vlastní připojení k DB (nutné pro thread-safety)
+    try:
+        db = pymysql.connect(**DB_CONFIG)
+        cursor = db.cursor()
+    except Exception as e:
+        with print_lock:
+            print(f"❌ [Thread-{thread_id}] Chyba DB připojení: {e}")
+        return
+
+    for row in rows_chunk:
+        t_hash, url, title = row
+
+        # Ochrana: krátká náhodná pauza, aby 5 vláken nezabilo server
+        time.sleep(random.uniform(0.5, 2.0))
+
+        try:
+            # Stažení
+            resp = session.get(url, timeout=15)
+
+            if resp.status_code == 403:
+                with print_lock:
+                    print(f"⛔ [Thread-{thread_id}] 403 Forbidden! {title[:20]}...")
+                stats["failed"] += 1
+                continue
+
+            resp.raise_for_status()
+            content = resp.content
+
+            if len(content) > 100:
+                # A) Uložit do DB (BLOB)
+                sql = "UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s"
+                cursor.execute(sql, (content, t_hash))
+
+                # B) Uložit na DISK (Soubor)
+                clean_name = sanitize_filename(title)
+                # Přidáme kousek hashe do názvu, aby se nepřepsaly soubory se stejným jménem
+                filename = f"{clean_name}_{t_hash[:6]}.torrent"
+                file_path = os.path.join(BACKUP_DIR, filename)
+
+                with open(file_path, "wb") as f:
+                    f.write(content)
+
+                with print_lock:
+                    print(f"✅ [Thread-{thread_id}] OK: {clean_name}")
+                    stats["fixed"] += 1
+                    stats["saved_to_disk"] += 1
+            else:
+                with print_lock:
+                    print(f"⚠️ [Thread-{thread_id}] Prázdný soubor: {title}")
+                stats["failed"] += 1
+
+        except Exception as e:
+            with print_lock:
+                print(f"❌ [Thread-{thread_id}] Chyba: {title[:20]}... -> {e}")
+            stats["failed"] += 1
+
+    db.close()
+    with print_lock:
+        print(f"🏁 [Thread-{thread_id}] Dokončil práci.")
+
+
+# ============================================================
+# HLAVNÍ LOOP
+# ============================================================
+if __name__ == "__main__":
+    ensure_backup_dir()
+
+    # 1. Získat data z DB
+    print("🔍 Načítám seznam chybějících souborů z DB...")
+    main_db = pymysql.connect(**DB_CONFIG)
+    with main_db.cursor() as c:
+        # Hledáme ty, co mají URL, ale nemají obsah
+        c.execute(
+            "SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
+        all_rows = c.fetchall()
+    main_db.close()
+
+    total = len(all_rows)
+    print(f"📋 K opravě: {total} položek.")
+
+    if total == 0:
+        print("🎉 Není co opravovat.")
+        exit()
+
+    # 2. Získat "Super Identitu" přes Selenium (jen jednou)
+    u_agent, browser_cookies = get_browser_identity()
+
+    # 3. Rozdělit práci pro 5 vláken
+    chunk_size = total // THREADS + 1
+    chunks = [all_rows[i:i + chunk_size] for i in range(0, total, chunk_size)]
+
+    print(f"🚀 Spouštím {THREADS} vláken (ukládání do DB + do složky '{BACKUP_DIR}')...")
+
+    # 4. Spustit multithreading
+    with ThreadPoolExecutor(max_workers=THREADS) as executor:
+        futures = []
+        for i, chunk in enumerate(chunks):
+            if chunk:
+                # Každému vláknu předáme kus práce + identitu prohlížeče
+                futures.append(executor.submit(worker_task, chunk, i + 1, u_agent, browser_cookies))
+
+        # Čekáme na dokončení
+        for f in futures:
+            f.result()
+
+    print("\n" + "=" * 40)
+    print(f"🏁 DOKONČENO")
+    print(f"✅ Opraveno v DB:    {stats['fixed']}")
+    print(f"💾 Uloženo na disk:  {stats['saved_to_disk']}")
+    print(f"❌ Chyby:            {stats['failed']}")
+    print(f"📁 Soubory najdeš v: {os.path.abspath(BACKUP_DIR)}")
+    print("=" * 40)
@@ -0,0 +1,133 @@
+import pymysql
+import requests
+import json
+import time
+import random
+import os
+import re
+from pathlib import Path
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+# ============================================================
+# KONFIGURACE
+# ============================================================
+DB_CONFIG = {
+    "host": "192.168.1.50",
+    "port": 3306,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+COOKIE_FILE = Path("sktorrent_cookies.json")
+BACKUP_DIR = "saved_torrents"
+
+
+# ============================================================
+# POMOCNÉ FUNKCE
+# ============================================================
+def sanitize_filename(name):
+    clean = re.sub(r'[^\w\s\.-]', '', name)
+    return clean.strip()[:100]
+
+
+def get_browser_identity():
+    print("🤖 Startuji Selenium (Single Thread Mode)...")
+    opts = Options()
+    opts.add_argument("--headless=new")
+    opts.add_argument("--disable-gpu")
+    driver = webdriver.Chrome(options=opts)
+    driver.get("https://sktorrent.eu")
+
+    if COOKIE_FILE.exists():
+        with open(COOKIE_FILE, "r", encoding="utf-8") as f:
+            cookies_list = json.load(f)
+        for c in cookies_list:
+            driver.add_cookie(c)
+        driver.refresh()
+        time.sleep(2)
+
+    user_agent = driver.execute_script("return navigator.userAgent;")
+    browser_cookies = driver.get_cookies()
+    driver.quit()
+    return user_agent, browser_cookies
+
+
+# ============================================================
+# MAIN
+# ============================================================
+if __name__ == "__main__":
+    if not os.path.exists(BACKUP_DIR):
+        os.makedirs(BACKUP_DIR)
+
+    # 1. Načíst zbývající chyby
+    db = pymysql.connect(**DB_CONFIG)
+    cursor = db.cursor()
+    cursor.execute(
+        "SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
+    rows = cursor.fetchall()
+
+    print(f"📋 Zbývá opravit: {len(rows)} položek.")
+    if not rows:
+        print("🎉 Hotovo! Vše je staženo.")
+        exit()
+
+    # 2. Získat identitu
+    ua, cookies = get_browser_identity()
+
+    session = requests.Session()
+    session.headers.update({"User-Agent": ua})
+    for c in cookies:
+        session.cookies.set(c['name'], c['value'])
+
+    # 3. Pomalá smyčka (1 vlákno)
+    success = 0
+    dead_links = 0
+
+    print("🚀 Spouštím jemné dočištění...")
+
+    for i, row in enumerate(rows):
+        t_hash, url, title = row
+        print(f"[{i + 1}/{len(rows)}] {title[:50]}...", end=" ")
+
+        try:
+            # Delší pauza pro stabilitu
+            time.sleep(random.uniform(1.5, 3.0))
+
+            resp = session.get(url, timeout=20)  # Delší timeout
+
+            if resp.status_code == 404:
+                print("❌ 404 Nenalezeno (soubor na serveru neexistuje)")
+                dead_links += 1
+                continue
+
+            if resp.status_code != 200:
+                print(f"❌ Chyba {resp.status_code}")
+                continue
+
+            content = resp.content
+            if len(content) > 100:
+                # DB
+                cursor.execute("UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s", (content, t_hash))
+
+                # Disk
+                fname = f"{sanitize_filename(title)}_{t_hash[:6]}.torrent"
+                with open(os.path.join(BACKUP_DIR, fname), "wb") as f:
+                    f.write(content)
+
+                print("✅ OK")
+                success += 1
+            else:
+                print("⚠️ Prázdný soubor")
+
+        except Exception as e:
+            print(f"❌ Selhalo: {e}")
+
+    db.close()
+    print("\n" + "=" * 30)
+    print(f"🏁 FINÁLE: Opraveno {success} z {len(rows)}")
+    if dead_links > 0:
+        print(f"💀 Mrtvé odkazy (404): {dead_links} (ty už opravit nejdou)")
@@ -0,0 +1,158 @@
+import pymysql
+import bencodepy
+import os
+from pathlib import Path
+
+# ============================================================
+# CONFIGURATION
+# ============================================================
+# Your network path (Use raw string r"..." for backslashes)
+# PHYSICAL_DIR = Path(r"\\tower\torrents\downloads")
+PHYSICAL_DIR = Path(r"\\tower1\#Colddata\Porno")
+
+DB_CONFIG = {
+    "host": "192.168.1.50",
+    "port": 3306,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+
+# ============================================================
+# HELPER FUNCTIONS
+# ============================================================
+def decode_bytes(b):
+    """
+    Decodes bytes from Bencode into a string.
+    Tries UTF-8 first, then common fallbacks.
+    """
+    if isinstance(b, str): return b
+    encodings = ['utf-8', 'windows-1250', 'latin-1', 'cp1252']
+    for enc in encodings:
+        try:
+            return b.decode(enc)
+        except:
+            continue
+    return b.decode('utf-8', errors='ignore')
+
+
+def check_torrent_in_filesystem(torrent_blob, root_path):
+    """
+    Parses the binary BLOB, calculates expected paths,
+    and checks if they exist in the root_path.
+    """
+    try:
+        # Decode the binary BLOB
+        data = bencodepy.decode(torrent_blob)
+        info = data.get(b'info')
+        if not info: return False
+
+        # Get the name of the root file/folder defined in the torrent
+        name = decode_bytes(info.get(b'name'))
+
+        # Calculate expected location
+        target_path = root_path / name
+
+        # 1. Check if the main path exists
+        if not target_path.exists():
+            return False
+
+        # 2. Size Verification (Basic)
+        # If it's a single file
+        if b'files' not in info:
+            expected_size = info[b'length']
+            real_size = target_path.stat().st_size
+            # Allow 1% variance or 1KB (sometimes filesystems vary slightly)
+            if abs(real_size - expected_size) < 4096:
+                return True
+            return False
+
+        # If it's a multi-file torrent (folder)
+        else:
+            # If the folder exists, we assume it's mostly good,
+            # but let's check at least one file inside to be sure it's not empty.
+            files = info[b'files']
+            if not files: return True  # Empty folder torrent? rare but possible.
+
+            # Check the first file in the list
+            first_file_path = target_path.joinpath(*[decode_bytes(p) for p in files[0][b'path']])
+            return first_file_path.exists()
+
+    except Exception as e:
+        # If Bencode fails or path is weird
+        return False
+
+
+# ============================================================
+# MAIN EXECUTION
+# ============================================================
+if __name__ == "__main__":
+    if not PHYSICAL_DIR.exists():
+        print(f"❌ ERROR: Cannot access path: {PHYSICAL_DIR}")
+        print("Make sure the drive is mapped or the network path is accessible.")
+        exit()
+
+    print(f"📂 Scanning storage: {PHYSICAL_DIR}")
+    print("🚀 Connecting to Database...")
+
+    db = pymysql.connect(**DB_CONFIG)
+    cursor = db.cursor()
+
+    # 1. Get all torrents that have content (BLOB)
+    # We only select ID and Content to keep memory usage reasonable
+    cursor.execute(
+        "SELECT torrent_hash, title_visible, torrent_content FROM torrents WHERE torrent_content IS NOT NULL")
+
+    rows = cursor.fetchall()
+    total = len(rows)
+    print(f"📋 Analysing {total} torrents from database against disk files...")
+
+    found_count = 0
+    missing_count = 0
+
+    # 2. Iterate and Check
+    updates = []  # Store successful hashes to batch update later
+
+    for index, row in enumerate(rows):
+        t_hash, title, blob = row
+
+        is_downloaded = check_torrent_in_filesystem(blob, PHYSICAL_DIR)
+
+        if is_downloaded:
+            found_count += 1
+            updates.append(t_hash)
+            # Print only every 50th line to reduce clutter, or if found
+            # print(f"✅ Found: {title[:50]}")
+        else:
+            missing_count += 1
+
+        if index % 100 == 0:
+            print(f"   Processed {index}/{total} ... (Found: {found_count})")
+
+    # 3. Batch Update Database
+    print(f"\n💾 Updating Database: Marking {len(updates)} torrents as 'physical_exists = 1'...")
+
+    # Reset everything to 0 first (in case you deleted files since last run)
+    cursor.execute("UPDATE torrents SET physical_exists = 0")
+
+    if updates:
+        # Update in chunks of 1000 to be safe
+        chunk_size = 1000
+        for i in range(0, len(updates), chunk_size):
+            chunk = updates[i:i + chunk_size]
+            format_strings = ','.join(['%s'] * len(chunk))
+            cursor.execute(f"UPDATE torrents SET physical_exists = 1 WHERE torrent_hash IN ({format_strings})",
+                           tuple(chunk))
+            db.commit()
+
+    db.close()
+
+    print("\n" + "=" * 40)
+    print(f"🏁 SCAN COMPLETE")
+    print(f"✅ Physically Available: {found_count}")
+    print(f"❌ Missing / Not Downloaded: {missing_count}")
+    print(f"📊 Completion Rate: {int((found_count / total) * 100)}%")
+    print("=" * 40)
@@ -0,0 +1,150 @@
+import pymysql
+import re
+import time
+import qbittorrentapi
+
+# ============================================================
+# KONFIGURACE
+# ============================================================
+MAX_SIZE_GB = 950
+QBT_URL = "https://vladob.zen.usbx.me/qbittorrent"
+QBT_USER = "vladob"
+QBT_PASS = "jCni3U6d#y4bfcm"
+
+DB_CONFIG = {
+    "host": "192.168.1.50",
+    "port": 3306,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+
+# ============================================================
+# POMOCNÉ FUNKCE
+# ============================================================
+def parse_size_to_gb(size_str):
+    """Převede text '1.5 GB' nebo '500 MB' na float v GB"""
+    if not size_str: return 0.0
+    s = str(size_str).upper().replace(",", ".").strip()
+    match = re.search(r"([\d\.]+)", s)
+    if not match: return 0.0
+    val = float(match.group(1))
+
+    if "TB" in s: return val * 1024
+    if "GB" in s: return val
+    if "MB" in s: return val / 1024
+    if "KB" in s: return val / 1024 / 1024
+    return 0.0
+
+
+# ============================================================
+# HLAVNÍ LOGIKA
+# ============================================================
+def main():
+    print(f"🚀 Plánuji přímý upload z DB (Limit: {MAX_SIZE_GB} GB, řazeno dle seederů)...")
+
+    # 1. Načtení dat z DB
+    # Stahujeme i BLOB (torrent_content), takže to může chvilku trvat
+    db = pymysql.connect(**DB_CONFIG)
+    cursor = db.cursor()
+
+    print("⏳ Načítám data z MySQL...")
+    sql = """
+        SELECT torrent_hash, title_visible, size_pretty, seeders, torrent_content
+        FROM torrents 
+        WHERE physical_exists = 0 AND torrent_content IS NOT NULL
+        ORDER BY seeders DESC
+    """
+    cursor.execute(sql)
+    rows = cursor.fetchall()
+    db.close()
+
+    print(f"🔍 Nalezeno {len(rows)} kandidátů. Vybírám ty nejlepší...")
+
+    # 2. Výběr do kapacity 950 GB
+    selected_torrents = []
+    total_size_gb = 0.0
+
+    for row in rows:
+        t_hash, title, size_str, seeders, content = row
+        size_gb = parse_size_to_gb(size_str)
+
+        # Pojistka proti nesmyslně velkým souborům nebo chybám v parsování
+        if size_gb == 0 and "MB" not in str(size_str).upper() and "KB" not in str(size_str).upper():
+            pass
+
+            # Kontrola limitu
+        if total_size_gb + size_gb > MAX_SIZE_GB:
+            # Jakmile narazíme na něco, co se nevejde, končíme výběr (protože jsou seřazeny dle priority)
+            print(f"🛑 Limit naplněn! '{title}' ({size_gb:.2f} GB) by přesáhl {MAX_SIZE_GB} GB.")
+            break
+
+        selected_torrents.append({
+            "filename": f"{t_hash}.torrent",  # Virtuální název souboru
+            "content": content,  # Binární data
+            "title": title,
+            "size": size_gb,
+            "seeders": seeders
+        })
+        total_size_gb += size_gb
+
+    # 3. Report
+    print("-" * 40)
+    print(f"📦 Vybráno: {len(selected_torrents)} torrentů")
+    print(f"💾 Celková velikost: {total_size_gb:.2f} GB / {MAX_SIZE_GB} GB")
+    if selected_torrents:
+        avg_seeders = sum(t['seeders'] for t in selected_torrents) / len(selected_torrents)
+        print(f"⚡ Průměrně seederů: {avg_seeders:.1f}")
+    print("-" * 40)
+
+    if not selected_torrents:
+        print("Nic k nahrání.")
+        exit()
+
+    confirm = input("❓ Nahrát tento výběr na Seedbox? (ano/ne): ")
+    if confirm.lower() not in ['ano', 'y', 'yes']:
+        print("❌ Zrušeno.")
+        exit()
+
+    # 4. Připojení k qBittorrent
+    try:
+        qbt = qbittorrentapi.Client(
+            host=QBT_URL,
+            username=QBT_USER,
+            password=QBT_PASS,
+            VERIFY_WEBUI_CERTIFICATE=False
+        )
+        qbt.auth_log_in()
+        print("✅ Připojeno k Seedboxu.")
+    except Exception as e:
+        print(f"❌ Chyba připojení: {e}")
+        exit()
+
+    # 5. Odeslání dat
+    print("🚀 Odesílám...")
+    success_count = 0
+
+    for i, item in enumerate(selected_torrents):
+        try:
+            # Posíláme binární data přímo (tváříme se, že posíláme soubor)
+            # formát: {'nazev_souboru.torrent': b'binarni_data...'}
+            file_dict = {item['filename']: item['content']}
+
+            qbt.torrents_add(torrent_files=file_dict, is_paused=False)
+
+            print(f"[{i + 1}/{len(selected_torrents)}] 📤 {item['title']} ({item['size']:.1f} GB)")
+            success_count += 1
+            time.sleep(0.2)  # Malá pauza pro stabilitu API
+
+        except Exception as e:
+            print(f"❌ Chyba u {item['title']}: {e}")
+
+    print("\n✅ HOTOVO.")
+    print("Torrenty jsou na Seedboxu. Až se stáhnou, stáhni je domů a spusť skript 99_Scan...")
+
+
+if __name__ == "__main__":
+    main()