git
This commit is contained in:
@@ -61,17 +61,28 @@ def get_data():
|
|||||||
# ==============================
|
# ==============================
|
||||||
|
|
||||||
def auto_adjust_columns(writer, df, sheet_name):
|
def auto_adjust_columns(writer, df, sheet_name):
|
||||||
"""Pomocná funkce pro automatické rozšíření sloupců v Excelu"""
|
"""Bezpečné automatické nastavení šířky sloupců"""
|
||||||
worksheet = writer.sheets[sheet_name]
|
worksheet = writer.sheets[sheet_name]
|
||||||
|
|
||||||
for idx, col in enumerate(df.columns):
|
for idx, col in enumerate(df.columns):
|
||||||
max_len = max(
|
series = df[col]
|
||||||
df[col].astype(str).map(len).max(),
|
|
||||||
len(str(col))
|
max_len = len(str(col)) # minimálně délka hlavičky
|
||||||
) + 2
|
|
||||||
if max_len > 60: max_len = 60
|
for val in series:
|
||||||
|
if val is None or (isinstance(val, float) and pd.isna(val)):
|
||||||
|
length = 0
|
||||||
|
else:
|
||||||
|
length = len(str(val))
|
||||||
|
|
||||||
|
if length > max_len:
|
||||||
|
max_len = length
|
||||||
|
|
||||||
|
max_len = min(max_len + 2, 60)
|
||||||
worksheet.set_column(idx, idx, max_len)
|
worksheet.set_column(idx, idx, max_len)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ==============================
|
# ==============================
|
||||||
# 🚀 HLAVNÍ LOGIKA
|
# 🚀 HLAVNÍ LOGIKA
|
||||||
# ==============================
|
# ==============================
|
||||||
|
|||||||
@@ -96,24 +96,25 @@ def close_popup_if_any():
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 5) Parse one torrent row
|
# 5) Parse one torrent row (MODIFIED)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
def parse_row(cells):
|
def parse_row(cells):
|
||||||
# Column 0: Category icon/text
|
# --- 1. INITIALIZE ---
|
||||||
|
torrent_hash = None
|
||||||
|
download_url = None
|
||||||
category = cells[0].text.strip()
|
category = cells[0].text.strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Column 1: Download icon link
|
# --- 2. EXTRACT DOWNLOAD URL (Column 1) ---
|
||||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||||
download_link = download_a.get_attribute("href")
|
download_url = download_a.get_attribute("href")
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
parsed_dl = urlparse.urlparse(download_link)
|
parsed_dl = urlparse.urlparse(download_url)
|
||||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||||
|
|
||||||
# Column 2: Name and info
|
# --- 3. EXTRACT DETAILS & HASH (Column 2) ---
|
||||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||||
if not title_links:
|
if not title_links:
|
||||||
return None
|
return None
|
||||||
@@ -130,34 +131,27 @@ def parse_row(cells):
|
|||||||
|
|
||||||
torrent_hash = query["id"][0]
|
torrent_hash = query["id"][0]
|
||||||
|
|
||||||
# Use innerText for robust text extraction
|
# --- 4. EXTRACT SIZE & DATE ---
|
||||||
text_block = cells[2].get_attribute("innerText")
|
text_block = cells[2].get_attribute("innerText")
|
||||||
text_block_clean = " ".join(text_block.split())
|
text_block_clean = " ".join(text_block.split())
|
||||||
|
|
||||||
# Regex for Size and Date
|
|
||||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||||
|
|
||||||
size_pretty = size_match.group(1) if size_match else None
|
size_pretty = size_match.group(1) if size_match else None
|
||||||
added_pretty = added_match.group(1) if added_match else None
|
added_pretty = added_match.group(1) if added_match else None
|
||||||
|
|
||||||
# Date conversion: "29/11/2025 o 02:29" -> MySQL format
|
|
||||||
added_mysql = None
|
added_mysql = None
|
||||||
if added_pretty:
|
if added_pretty:
|
||||||
clean = added_pretty.replace(" o ", " ").strip()
|
clean = added_pretty.replace(" o ", " ").strip()
|
||||||
parts = clean.split(" ")
|
parts = clean.split(" ")
|
||||||
if len(parts) >= 2:
|
if len(parts) >= 2:
|
||||||
date_part = parts[0]
|
date_part, time_part = parts[0], parts[1]
|
||||||
time_part = parts[1]
|
if len(time_part.split(":")) == 2: time_part += ":00"
|
||||||
if len(time_part.split(":")) == 2:
|
|
||||||
time_part += ":00"
|
|
||||||
try:
|
try:
|
||||||
day, month, year = date_part.split("/")
|
d, m, y = date_part.split("/")
|
||||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
added_mysql = f"{y}-{m}-{d} {time_part}"
|
||||||
except:
|
except: pass
|
||||||
added_mysql = None
|
|
||||||
|
|
||||||
# Column 2: Image preview (if exists)
|
# --- 5. IMAGE & STATS ---
|
||||||
img_link = None
|
img_link = None
|
||||||
try:
|
try:
|
||||||
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
||||||
@@ -165,39 +159,33 @@ def parse_row(cells):
|
|||||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||||
if img_match:
|
if img_match:
|
||||||
img_link = img_match.group(1).replace("'", "").strip()
|
img_link = img_match.group(1).replace("'", "").strip()
|
||||||
if img_link.startswith("//"):
|
if img_link.startswith("//"): img_link = "https:" + img_link
|
||||||
img_link = "https:" + img_link
|
except: pass
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Column 4: Seeders
|
seeders_number = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
|
||||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
|
||||||
seeders_number = int(seeders_a.text.strip())
|
leechers_number = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
|
||||||
seeders_link = seeders_a.get_attribute("href")
|
leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")
|
||||||
|
|
||||||
# Column 5: Leechers
|
# --- 6. DATABASE CHECK & DOWNLOAD ---
|
||||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
|
||||||
leechers_number = int(leechers_a.text.strip())
|
|
||||||
leechers_link = leechers_a.get_attribute("href")
|
|
||||||
|
|
||||||
# Check database for existing binary content
|
|
||||||
cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
|
cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
|
||||||
row = cursor.fetchone()
|
db_row = cursor.fetchone()
|
||||||
already_have_torrent = row is not None and row[0] is not None
|
already_have_torrent = db_row is not None and db_row[0] is not None
|
||||||
|
|
||||||
torrent_content = None
|
torrent_content = None
|
||||||
if not already_have_torrent:
|
if not already_have_torrent:
|
||||||
time.sleep(3) # Politeness delay
|
time.sleep(2)
|
||||||
try:
|
try:
|
||||||
resp = requests_session.get(download_link)
|
resp = requests_session.get(download_url, timeout=10)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
torrent_content = resp.content
|
torrent_content = resp.content
|
||||||
except:
|
except Exception as e:
|
||||||
torrent_content = None
|
print(f" ⚠️ Download failed for {visible_name}: {e}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"torrent_hash": torrent_hash,
|
"torrent_hash": torrent_hash,
|
||||||
"details_link": details_link,
|
"details_link": details_link,
|
||||||
|
"download_url": download_url,
|
||||||
"category": category,
|
"category": category,
|
||||||
"title_visible": visible_name,
|
"title_visible": visible_name,
|
||||||
"title_full": full_title,
|
"title_full": full_title,
|
||||||
@@ -212,19 +200,20 @@ def parse_row(cells):
|
|||||||
"torrent_content": torrent_content if not already_have_torrent else None,
|
"torrent_content": torrent_content if not already_have_torrent else None,
|
||||||
"is_new_torrent": not already_have_torrent,
|
"is_new_torrent": not already_have_torrent,
|
||||||
}
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ parse_row logic failed: {e}")
|
||||||
|
return None
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 6) INSERT SQL
|
# 6) INSERT SQL (MODIFIED)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
insert_sql = """
|
insert_sql = """
|
||||||
INSERT INTO torrents (
|
INSERT INTO torrents (
|
||||||
torrent_hash, details_link, category, title_visible, title_full,
|
torrent_hash, details_link, download_url, category, title_visible, title_full,
|
||||||
size_pretty, added_datetime, preview_image,
|
size_pretty, added_datetime, preview_image,
|
||||||
seeders, seeders_link, leechers, leechers_link,
|
seeders, seeders_link, leechers, leechers_link,
|
||||||
torrent_filename, torrent_content
|
torrent_filename, torrent_content
|
||||||
) VALUES (
|
) VALUES (
|
||||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
%(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||||
%(torrent_filename)s, %(torrent_content)s
|
%(torrent_filename)s, %(torrent_content)s
|
||||||
@@ -232,9 +221,12 @@ INSERT INTO torrents (
|
|||||||
ON DUPLICATE KEY UPDATE
|
ON DUPLICATE KEY UPDATE
|
||||||
seeders = VALUES(seeders),
|
seeders = VALUES(seeders),
|
||||||
leechers = VALUES(leechers),
|
leechers = VALUES(leechers),
|
||||||
|
download_url = VALUES(download_url),
|
||||||
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
||||||
"""
|
"""
|
||||||
|
# Note: COALESCE(torrent_content, VALUES(torrent_content))
|
||||||
|
# keeps the old value if the new one is NULL,
|
||||||
|
# but updates it if the old one was NULL and the new one is binary.
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 7) PROCESS ALL PAGES
|
# 7) PROCESS ALL PAGES
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -250,17 +242,27 @@ for page_num in range(0, TOTAL_PAGES):
|
|||||||
|
|
||||||
# Find table rows
|
# Find table rows
|
||||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||||
# v1 table usually has 7 cells for a data row
|
|
||||||
real_rows = [r.find_elements(By.TAG_NAME, "td") for r in rows if len(r.find_elements(By.TAG_NAME, "td")) == 7]
|
# FILTER: Only keep rows that have 7 columns AND a link in the 2nd column (index 1)
|
||||||
|
# This automatically discards headers and empty space rows.
|
||||||
|
real_rows = []
|
||||||
|
for r in rows:
|
||||||
|
cells = r.find_elements(By.TAG_NAME, "td")
|
||||||
|
if len(cells) == 7 and cells[1].find_elements(By.TAG_NAME, "a"):
|
||||||
|
real_rows.append(cells)
|
||||||
|
|
||||||
if not real_rows:
|
if not real_rows:
|
||||||
print("⚠️ No data rows found on this page. Ending loop.")
|
print("⚠️ No data rows found on this page. Ending loop.")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# === INSERT THIS LINE HERE ===
|
||||||
page_new_items = 0
|
page_new_items = 0
|
||||||
|
# =============================
|
||||||
|
|
||||||
for cells in real_rows:
|
for cells in real_rows:
|
||||||
try:
|
try:
|
||||||
data = parse_row(cells)
|
data = parse_row(cells)
|
||||||
|
# ... rest of your logic ...
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"⚠️ parse_row failed: {e}")
|
print(f"⚠️ parse_row failed: {e}")
|
||||||
continue
|
continue
|
||||||
@@ -279,10 +281,10 @@ for page_num in range(0, TOTAL_PAGES):
|
|||||||
|
|
||||||
cursor.execute(insert_sql, data)
|
cursor.execute(insert_sql, data)
|
||||||
|
|
||||||
# If an entire page is old news, we can stop the deep crawl
|
# # If an entire page is old news, we can stop the deep crawl
|
||||||
if page_new_items == 0 and page_num > 0:
|
# if page_new_items == 0 and page_num > 0:
|
||||||
print("🛑 Page contained only known items. Sync complete.")
|
# print("🛑 Page contained only known items. Sync complete.")
|
||||||
break
|
# break
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|||||||
292
91 5threaddownloader.py
Normal file
292
91 5threaddownloader.py
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import urllib.parse as urlparse
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import datetime
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
# Ensure this file exists in your directory
|
||||||
|
from EmailMessagingGraph import send_mail
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# CONFIGURATION
|
||||||
|
# ============================================================
|
||||||
|
TOTAL_PAGES = 226
|
||||||
|
THREADS = 5
|
||||||
|
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||||
|
|
||||||
|
# Database settings
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
BASE_URL = (
|
||||||
|
"https://sktorrent.eu/torrent/torrents.php"
|
||||||
|
"?active=0&category=24&order=data&by=DESC&zaner=&jazyk="
|
||||||
|
)
|
||||||
|
|
||||||
|
# Global counters for reporting (Thread-safe lock needed)
|
||||||
|
stats_lock = threading.Lock()
|
||||||
|
stats = {
|
||||||
|
"processed": 0,
|
||||||
|
"new": 0,
|
||||||
|
"existing": 0,
|
||||||
|
"new_titles": []
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 1) WORKER FUNCTION (Runs inside each thread)
|
||||||
|
# ============================================================
|
||||||
|
def process_page_chunk(page_indices, thread_id):
|
||||||
|
"""
|
||||||
|
This function creates its OWN browser and OWN database connection.
|
||||||
|
It processes the specific list of page numbers assigned to it.
|
||||||
|
"""
|
||||||
|
print(f"🧵 [Thread-{thread_id}] Starting. Assigned {len(page_indices)} pages.")
|
||||||
|
|
||||||
|
# --- A. Setup Independent DB Connection ---
|
||||||
|
try:
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cursor = db.cursor()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ [Thread-{thread_id}] DB Connection failed: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- B. Setup Independent Selenium Driver ---
|
||||||
|
chrome_options = Options()
|
||||||
|
# HEADLESS MODE is safer for 5 threads to avoid popping up 5 windows
|
||||||
|
chrome_options.add_argument("--headless=new")
|
||||||
|
chrome_options.add_argument("--disable-notifications")
|
||||||
|
chrome_options.add_argument("--disable-popup-blocking")
|
||||||
|
chrome_options.add_argument("--disable-extensions")
|
||||||
|
chrome_options.add_argument("--log-level=3") # Reduce noise
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
|
driver.set_window_size(1350, 1000)
|
||||||
|
|
||||||
|
# --- C. Login / Cookies ---
|
||||||
|
driver.get("https://sktorrent.eu")
|
||||||
|
if COOKIE_FILE.exists():
|
||||||
|
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||||
|
cookies = json.load(f)
|
||||||
|
for c in cookies:
|
||||||
|
driver.add_cookie(c)
|
||||||
|
|
||||||
|
# --- D. Requests Session ---
|
||||||
|
requests_session = requests.Session()
|
||||||
|
for ck in driver.get_cookies():
|
||||||
|
requests_session.cookies.set(ck["name"], ck["value"])
|
||||||
|
|
||||||
|
# --- E. Helper: Parse Row (Local scope) ---
|
||||||
|
def parse_row(cells):
|
||||||
|
try:
|
||||||
|
category = cells[0].text.strip()
|
||||||
|
|
||||||
|
# Download URL
|
||||||
|
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||||
|
download_url = download_a.get_attribute("href")
|
||||||
|
|
||||||
|
parsed_dl = urlparse.urlparse(download_url)
|
||||||
|
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||||
|
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||||
|
|
||||||
|
# Details & Hash
|
||||||
|
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||||
|
if not title_links: return None
|
||||||
|
a_tag = title_links[0]
|
||||||
|
visible_name = a_tag.text.strip()
|
||||||
|
full_title = a_tag.get_attribute("title")
|
||||||
|
details_link = a_tag.get_attribute("href")
|
||||||
|
|
||||||
|
parsed = urlparse.urlparse(details_link)
|
||||||
|
query = urlparse.parse_qs(parsed.query)
|
||||||
|
if "id" not in query: return None
|
||||||
|
torrent_hash = query["id"][0]
|
||||||
|
|
||||||
|
# Size & Date
|
||||||
|
text_block = cells[2].get_attribute("innerText")
|
||||||
|
clean_text = " ".join(text_block.split())
|
||||||
|
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", clean_text, re.IGNORECASE)
|
||||||
|
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", clean_text, re.IGNORECASE)
|
||||||
|
size_pretty = size_match.group(1) if size_match else None
|
||||||
|
|
||||||
|
added_mysql = None
|
||||||
|
if added_match:
|
||||||
|
clean = added_match.group(1).replace(" o ", " ").strip()
|
||||||
|
parts = clean.split(" ")
|
||||||
|
if len(parts) >= 2:
|
||||||
|
d, m, y = parts[0].split("/")
|
||||||
|
t = parts[1] + ":00" if len(parts[1].split(":")) == 2 else parts[1]
|
||||||
|
try:
|
||||||
|
added_mysql = f"{y}-{m}-{d} {t}"
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Image
|
||||||
|
img_link = None
|
||||||
|
try:
|
||||||
|
img_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
||||||
|
img_src = re.search(r"src=([^ ]+)", img_a.get_attribute("onmouseover"))
|
||||||
|
if img_src:
|
||||||
|
img_link = img_src.group(1).replace("'", "").strip()
|
||||||
|
if img_link.startswith("//"): img_link = "https:" + img_link
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
seeders = int(cells[4].find_element(By.TAG_NAME, "a").text.strip())
|
||||||
|
seeders_link = cells[4].find_element(By.TAG_NAME, "a").get_attribute("href")
|
||||||
|
leechers = int(cells[5].find_element(By.TAG_NAME, "a").text.strip())
|
||||||
|
leechers_link = cells[5].find_element(By.TAG_NAME, "a").get_attribute("href")
|
||||||
|
|
||||||
|
# Check DB
|
||||||
|
cursor.execute("SELECT torrent_content FROM torrents WHERE torrent_hash=%s", (torrent_hash,))
|
||||||
|
row = cursor.fetchone()
|
||||||
|
already_have_file = row is not None and row[0] is not None
|
||||||
|
|
||||||
|
content = None
|
||||||
|
if not already_have_file:
|
||||||
|
# Politeness sleep only if downloading
|
||||||
|
time.sleep(1)
|
||||||
|
try:
|
||||||
|
r = requests_session.get(download_url, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
content = r.content
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
"torrent_hash": torrent_hash, "details_link": details_link, "download_url": download_url,
|
||||||
|
"category": category, "title_visible": visible_name, "title_full": full_title,
|
||||||
|
"size_pretty": size_pretty, "added_datetime": added_mysql, "preview_image": img_link,
|
||||||
|
"seeders": seeders, "seeders_link": seeders_link, "leechers": leechers, "leechers_link": leechers_link,
|
||||||
|
"torrent_filename": torrent_filename, "torrent_content": content,
|
||||||
|
"is_new_torrent": not already_have_file
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# --- F. Loop through Assigned Pages ---
|
||||||
|
for page_num in page_indices:
|
||||||
|
url = f"{BASE_URL}&page={page_num}"
|
||||||
|
print(f" 🔄 [Thread-{thread_id}] Scraping Page {page_num}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
driver.get(url)
|
||||||
|
# Close popup (simplified JS)
|
||||||
|
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||||
|
|
||||||
|
# Row Filtering
|
||||||
|
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||||
|
real_rows = []
|
||||||
|
for r in rows:
|
||||||
|
cs = r.find_elements(By.TAG_NAME, "td")
|
||||||
|
if len(cs) == 7 and cs[1].find_elements(By.TAG_NAME, "a"):
|
||||||
|
real_rows.append(cs)
|
||||||
|
|
||||||
|
if not real_rows:
|
||||||
|
print(f" ⚠️ [Thread-{thread_id}] Page {page_num} empty.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process Rows
|
||||||
|
for cells in real_rows:
|
||||||
|
data = parse_row(cells)
|
||||||
|
if not data: continue
|
||||||
|
|
||||||
|
# Update Global Stats safely
|
||||||
|
with stats_lock:
|
||||||
|
stats["processed"] += 1
|
||||||
|
if data["is_new_torrent"]:
|
||||||
|
stats["new"] += 1
|
||||||
|
stats["new_titles"].append(data["title_visible"])
|
||||||
|
else:
|
||||||
|
stats["existing"] += 1
|
||||||
|
|
||||||
|
# Insert SQL
|
||||||
|
sql = """
|
||||||
|
INSERT INTO torrents (
|
||||||
|
torrent_hash, details_link, download_url, category, title_visible, title_full,
|
||||||
|
size_pretty, added_datetime, preview_image,
|
||||||
|
seeders, seeders_link, leechers, leechers_link,
|
||||||
|
torrent_filename, torrent_content
|
||||||
|
) VALUES (
|
||||||
|
%(torrent_hash)s, %(details_link)s, %(download_url)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||||
|
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||||
|
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||||
|
%(torrent_filename)s, %(torrent_content)s
|
||||||
|
)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
seeders = VALUES(seeders),
|
||||||
|
leechers = VALUES(leechers),
|
||||||
|
download_url = VALUES(download_url),
|
||||||
|
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
||||||
|
"""
|
||||||
|
cursor.execute(sql, data)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 💥 [Thread-{thread_id}] Error on page {page_num}: {e}")
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
driver.quit()
|
||||||
|
db.close()
|
||||||
|
print(f"🏁 [Thread-{thread_id}] Finished assigned pages.")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 2) MAIN EXECUTION
|
||||||
|
# ============================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
RUN_START = datetime.datetime.now()
|
||||||
|
print(f"🚀 Starting Multithreaded Scraper with {THREADS} threads...")
|
||||||
|
|
||||||
|
# 1. Distribute pages among threads
|
||||||
|
# Example: If 226 pages and 5 threads, each gets ~45 pages
|
||||||
|
all_pages = list(range(TOTAL_PAGES))
|
||||||
|
chunk_size = len(all_pages) // THREADS + 1
|
||||||
|
chunks = [all_pages[i:i + chunk_size] for i in range(0, len(all_pages), chunk_size)]
|
||||||
|
|
||||||
|
# 2. Start Threads
|
||||||
|
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||||
|
futures = []
|
||||||
|
for i, page_chunk in enumerate(chunks):
|
||||||
|
if page_chunk: # Only start if chunk is not empty
|
||||||
|
futures.append(executor.submit(process_page_chunk, page_chunk, i + 1))
|
||||||
|
|
||||||
|
# Wait for all to finish
|
||||||
|
for f in futures:
|
||||||
|
f.result()
|
||||||
|
|
||||||
|
# 3. Final Report
|
||||||
|
RUN_END = datetime.datetime.now()
|
||||||
|
print("\n✅ All threads completed.")
|
||||||
|
|
||||||
|
body = (
|
||||||
|
f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}\n"
|
||||||
|
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}\n\n"
|
||||||
|
f"Processed torrents: {stats['processed']}\n"
|
||||||
|
f"New torrents saved: {stats['new']}\n"
|
||||||
|
f"Existing torrents updated: {stats['existing']}\n"
|
||||||
|
)
|
||||||
|
if stats["new_titles"]:
|
||||||
|
body += "\nNew torrents list:\n- " + "\n- ".join(stats["new_titles"])
|
||||||
|
|
||||||
|
send_mail(to="vladimir.buzalka@buzalka.cz", subject=f"SKTorrent Multi-Thread Run", body=body, html=False)
|
||||||
|
print("📧 Email report sent.")
|
||||||
212
92 5threaddownloadtorrentfiles.py
Normal file
212
92 5threaddownloadtorrentfiles.py
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
import pymysql
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from threading import Lock
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# KONFIGURACE
|
||||||
|
# ============================================================
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||||
|
BACKUP_DIR = "saved_torrents" # Adresář pro lokální zálohu
|
||||||
|
THREADS = 5 # Počet vláken
|
||||||
|
|
||||||
|
# Globální zámek pro výpisy do konzole, aby se nepřepisovaly
|
||||||
|
print_lock = Lock()
|
||||||
|
stats = {"fixed": 0, "failed": 0, "saved_to_disk": 0}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# POMOCNÉ FUNKCE
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def sanitize_filename(name):
|
||||||
|
"""Odstraní z názvu souboru nepovolené znaky"""
|
||||||
|
# Povolíme jen písmena, čísla, tečky, pomlčky a mezery
|
||||||
|
clean = re.sub(r'[^\w\s\.-]', '', name)
|
||||||
|
return clean.strip()[:100] # Ořízneme na 100 znaků pro jistotu
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_backup_dir():
|
||||||
|
"""Vytvoří adresář pro torrenty, pokud neexistuje"""
|
||||||
|
if not os.path.exists(BACKUP_DIR):
|
||||||
|
os.makedirs(BACKUP_DIR)
|
||||||
|
print(f"📁 Vytvořen adresář pro zálohu: {os.path.abspath(BACKUP_DIR)}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_browser_identity():
|
||||||
|
"""
|
||||||
|
Spustí Selenium (Chrome) JEN JEDNOU, aby získal validní
|
||||||
|
User-Agent a čerstvé Cookies pro threads.
|
||||||
|
"""
|
||||||
|
print("🤖 Startuji Selenium pro získání identity prohlížeče...")
|
||||||
|
|
||||||
|
opts = Options()
|
||||||
|
opts.add_argument("--headless=new")
|
||||||
|
opts.add_argument("--disable-gpu")
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(options=opts)
|
||||||
|
|
||||||
|
# Jdeme na web nastavit doménu pro cookies
|
||||||
|
driver.get("https://sktorrent.eu")
|
||||||
|
|
||||||
|
# Načteme cookies ze souboru
|
||||||
|
if COOKIE_FILE.exists():
|
||||||
|
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||||
|
cookies_list = json.load(f)
|
||||||
|
for c in cookies_list:
|
||||||
|
driver.add_cookie(c)
|
||||||
|
driver.refresh()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Exportujeme identitu
|
||||||
|
user_agent = driver.execute_script("return navigator.userAgent;")
|
||||||
|
browser_cookies = driver.get_cookies()
|
||||||
|
|
||||||
|
driver.quit()
|
||||||
|
print("✅ Identita získána.")
|
||||||
|
return user_agent, browser_cookies
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# WORKER (Pracovní vlákno)
|
||||||
|
# ============================================================
|
||||||
|
def worker_task(rows_chunk, thread_id, user_agent, cookies_list):
|
||||||
|
"""
|
||||||
|
Tato funkce běží v každém vlákně zvlášť.
|
||||||
|
"""
|
||||||
|
# 1. Vytvoření vlastní Session pro toto vlákno
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update({"User-Agent": user_agent})
|
||||||
|
for c in cookies_list:
|
||||||
|
session.cookies.set(c['name'], c['value'])
|
||||||
|
|
||||||
|
# 2. Vlastní připojení k DB (nutné pro thread-safety)
|
||||||
|
try:
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cursor = db.cursor()
|
||||||
|
except Exception as e:
|
||||||
|
with print_lock:
|
||||||
|
print(f"❌ [Thread-{thread_id}] Chyba DB připojení: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
for row in rows_chunk:
|
||||||
|
t_hash, url, title = row
|
||||||
|
|
||||||
|
# Ochrana: krátká náhodná pauza, aby 5 vláken nezabilo server
|
||||||
|
time.sleep(random.uniform(0.5, 2.0))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Stažení
|
||||||
|
resp = session.get(url, timeout=15)
|
||||||
|
|
||||||
|
if resp.status_code == 403:
|
||||||
|
with print_lock:
|
||||||
|
print(f"⛔ [Thread-{thread_id}] 403 Forbidden! {title[:20]}...")
|
||||||
|
stats["failed"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
content = resp.content
|
||||||
|
|
||||||
|
if len(content) > 100:
|
||||||
|
# A) Uložit do DB (BLOB)
|
||||||
|
sql = "UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s"
|
||||||
|
cursor.execute(sql, (content, t_hash))
|
||||||
|
|
||||||
|
# B) Uložit na DISK (Soubor)
|
||||||
|
clean_name = sanitize_filename(title)
|
||||||
|
# Přidáme kousek hashe do názvu, aby se nepřepsaly soubory se stejným jménem
|
||||||
|
filename = f"{clean_name}_{t_hash[:6]}.torrent"
|
||||||
|
file_path = os.path.join(BACKUP_DIR, filename)
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
with print_lock:
|
||||||
|
print(f"✅ [Thread-{thread_id}] OK: {clean_name}")
|
||||||
|
stats["fixed"] += 1
|
||||||
|
stats["saved_to_disk"] += 1
|
||||||
|
else:
|
||||||
|
with print_lock:
|
||||||
|
print(f"⚠️ [Thread-{thread_id}] Prázdný soubor: {title}")
|
||||||
|
stats["failed"] += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
with print_lock:
|
||||||
|
print(f"❌ [Thread-{thread_id}] Chyba: {title[:20]}... -> {e}")
|
||||||
|
stats["failed"] += 1
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
with print_lock:
|
||||||
|
print(f"🏁 [Thread-{thread_id}] Dokončil práci.")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# HLAVNÍ LOOP
|
||||||
|
# ============================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ensure_backup_dir()
|
||||||
|
|
||||||
|
# 1. Získat data z DB
|
||||||
|
print("🔍 Načítám seznam chybějících souborů z DB...")
|
||||||
|
main_db = pymysql.connect(**DB_CONFIG)
|
||||||
|
with main_db.cursor() as c:
|
||||||
|
# Hledáme ty, co mají URL, ale nemají obsah
|
||||||
|
c.execute(
|
||||||
|
"SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
|
||||||
|
all_rows = c.fetchall()
|
||||||
|
main_db.close()
|
||||||
|
|
||||||
|
total = len(all_rows)
|
||||||
|
print(f"📋 K opravě: {total} položek.")
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
print("🎉 Není co opravovat.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 2. Získat "Super Identitu" přes Selenium (jen jednou)
|
||||||
|
u_agent, browser_cookies = get_browser_identity()
|
||||||
|
|
||||||
|
# 3. Rozdělit práci pro 5 vláken
|
||||||
|
chunk_size = total // THREADS + 1
|
||||||
|
chunks = [all_rows[i:i + chunk_size] for i in range(0, total, chunk_size)]
|
||||||
|
|
||||||
|
print(f"🚀 Spouštím {THREADS} vláken (ukládání do DB + do složky '{BACKUP_DIR}')...")
|
||||||
|
|
||||||
|
# 4. Spustit multithreading
|
||||||
|
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||||
|
futures = []
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
if chunk:
|
||||||
|
# Každému vláknu předáme kus práce + identitu prohlížeče
|
||||||
|
futures.append(executor.submit(worker_task, chunk, i + 1, u_agent, browser_cookies))
|
||||||
|
|
||||||
|
# Čekáme na dokončení
|
||||||
|
for f in futures:
|
||||||
|
f.result()
|
||||||
|
|
||||||
|
print("\n" + "=" * 40)
|
||||||
|
print(f"🏁 DOKONČENO")
|
||||||
|
print(f"✅ Opraveno v DB: {stats['fixed']}")
|
||||||
|
print(f"💾 Uloženo na disk: {stats['saved_to_disk']}")
|
||||||
|
print(f"❌ Chyby: {stats['failed']}")
|
||||||
|
print(f"📁 Soubory najdeš v: {os.path.abspath(BACKUP_DIR)}")
|
||||||
|
print("=" * 40)
|
||||||
133
93 Final SingleThreaded Cleanup.py
Normal file
133
93 Final SingleThreaded Cleanup.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import pymysql
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# KONFIGURACE
|
||||||
|
# ============================================================
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||||
|
BACKUP_DIR = "saved_torrents"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# POMOCNÉ FUNKCE
|
||||||
|
# ============================================================
|
||||||
|
def sanitize_filename(name):
|
||||||
|
clean = re.sub(r'[^\w\s\.-]', '', name)
|
||||||
|
return clean.strip()[:100]
|
||||||
|
|
||||||
|
|
||||||
|
def get_browser_identity():
|
||||||
|
print("🤖 Startuji Selenium (Single Thread Mode)...")
|
||||||
|
opts = Options()
|
||||||
|
opts.add_argument("--headless=new")
|
||||||
|
opts.add_argument("--disable-gpu")
|
||||||
|
driver = webdriver.Chrome(options=opts)
|
||||||
|
driver.get("https://sktorrent.eu")
|
||||||
|
|
||||||
|
if COOKIE_FILE.exists():
|
||||||
|
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||||
|
cookies_list = json.load(f)
|
||||||
|
for c in cookies_list:
|
||||||
|
driver.add_cookie(c)
|
||||||
|
driver.refresh()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
user_agent = driver.execute_script("return navigator.userAgent;")
|
||||||
|
browser_cookies = driver.get_cookies()
|
||||||
|
driver.quit()
|
||||||
|
return user_agent, browser_cookies
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# MAIN
|
||||||
|
# ============================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if not os.path.exists(BACKUP_DIR):
|
||||||
|
os.makedirs(BACKUP_DIR)
|
||||||
|
|
||||||
|
# 1. Načíst zbývající chyby
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cursor = db.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
print(f"📋 Zbývá opravit: {len(rows)} položek.")
|
||||||
|
if not rows:
|
||||||
|
print("🎉 Hotovo! Vše je staženo.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 2. Získat identitu
|
||||||
|
ua, cookies = get_browser_identity()
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update({"User-Agent": ua})
|
||||||
|
for c in cookies:
|
||||||
|
session.cookies.set(c['name'], c['value'])
|
||||||
|
|
||||||
|
# 3. Pomalá smyčka (1 vlákno)
|
||||||
|
success = 0
|
||||||
|
dead_links = 0
|
||||||
|
|
||||||
|
print("🚀 Spouštím jemné dočištění...")
|
||||||
|
|
||||||
|
for i, row in enumerate(rows):
|
||||||
|
t_hash, url, title = row
|
||||||
|
print(f"[{i + 1}/{len(rows)}] {title[:50]}...", end=" ")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Delší pauza pro stabilitu
|
||||||
|
time.sleep(random.uniform(1.5, 3.0))
|
||||||
|
|
||||||
|
resp = session.get(url, timeout=20) # Delší timeout
|
||||||
|
|
||||||
|
if resp.status_code == 404:
|
||||||
|
print("❌ 404 Nenalezeno (soubor na serveru neexistuje)")
|
||||||
|
dead_links += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
print(f"❌ Chyba {resp.status_code}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = resp.content
|
||||||
|
if len(content) > 100:
|
||||||
|
# DB
|
||||||
|
cursor.execute("UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s", (content, t_hash))
|
||||||
|
|
||||||
|
# Disk
|
||||||
|
fname = f"{sanitize_filename(title)}_{t_hash[:6]}.torrent"
|
||||||
|
with open(os.path.join(BACKUP_DIR, fname), "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
print("✅ OK")
|
||||||
|
success += 1
|
||||||
|
else:
|
||||||
|
print("⚠️ Prázdný soubor")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Selhalo: {e}")
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
print("\n" + "=" * 30)
|
||||||
|
print(f"🏁 FINÁLE: Opraveno {success} z {len(rows)}")
|
||||||
|
if dead_links > 0:
|
||||||
|
print(f"💀 Mrtvé odkazy (404): {dead_links} (ty už opravit nejdou)")
|
||||||
158
94 WhatWehaveAlreadyDownloaded.py
Normal file
158
94 WhatWehaveAlreadyDownloaded.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
import pymysql
|
||||||
|
import bencodepy
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# CONFIGURATION
|
||||||
|
# ============================================================
|
||||||
|
# Your network path (Use raw string r"..." for backslashes)
|
||||||
|
# PHYSICAL_DIR = Path(r"\\tower\torrents\downloads")
|
||||||
|
PHYSICAL_DIR = Path(r"\\tower1\#Colddata\Porno")
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# HELPER FUNCTIONS
|
||||||
|
# ============================================================
|
||||||
|
def decode_bytes(b):
|
||||||
|
"""
|
||||||
|
Decodes bytes from Bencode into a string.
|
||||||
|
Tries UTF-8 first, then common fallbacks.
|
||||||
|
"""
|
||||||
|
if isinstance(b, str): return b
|
||||||
|
encodings = ['utf-8', 'windows-1250', 'latin-1', 'cp1252']
|
||||||
|
for enc in encodings:
|
||||||
|
try:
|
||||||
|
return b.decode(enc)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return b.decode('utf-8', errors='ignore')
|
||||||
|
|
||||||
|
|
||||||
|
def check_torrent_in_filesystem(torrent_blob, root_path):
|
||||||
|
"""
|
||||||
|
Parses the binary BLOB, calculates expected paths,
|
||||||
|
and checks if they exist in the root_path.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Decode the binary BLOB
|
||||||
|
data = bencodepy.decode(torrent_blob)
|
||||||
|
info = data.get(b'info')
|
||||||
|
if not info: return False
|
||||||
|
|
||||||
|
# Get the name of the root file/folder defined in the torrent
|
||||||
|
name = decode_bytes(info.get(b'name'))
|
||||||
|
|
||||||
|
# Calculate expected location
|
||||||
|
target_path = root_path / name
|
||||||
|
|
||||||
|
# 1. Check if the main path exists
|
||||||
|
if not target_path.exists():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 2. Size Verification (Basic)
|
||||||
|
# If it's a single file
|
||||||
|
if b'files' not in info:
|
||||||
|
expected_size = info[b'length']
|
||||||
|
real_size = target_path.stat().st_size
|
||||||
|
# Allow 1% variance or 1KB (sometimes filesystems vary slightly)
|
||||||
|
if abs(real_size - expected_size) < 4096:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If it's a multi-file torrent (folder)
|
||||||
|
else:
|
||||||
|
# If the folder exists, we assume it's mostly good,
|
||||||
|
# but let's check at least one file inside to be sure it's not empty.
|
||||||
|
files = info[b'files']
|
||||||
|
if not files: return True # Empty folder torrent? rare but possible.
|
||||||
|
|
||||||
|
# Check the first file in the list
|
||||||
|
first_file_path = target_path.joinpath(*[decode_bytes(p) for p in files[0][b'path']])
|
||||||
|
return first_file_path.exists()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# If Bencode fails or path is weird
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# MAIN EXECUTION
|
||||||
|
# ============================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if not PHYSICAL_DIR.exists():
|
||||||
|
print(f"❌ ERROR: Cannot access path: {PHYSICAL_DIR}")
|
||||||
|
print("Make sure the drive is mapped or the network path is accessible.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
print(f"📂 Scanning storage: {PHYSICAL_DIR}")
|
||||||
|
print("🚀 Connecting to Database...")
|
||||||
|
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
# 1. Get all torrents that have content (BLOB)
|
||||||
|
# We only select ID and Content to keep memory usage reasonable
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT torrent_hash, title_visible, torrent_content FROM torrents WHERE torrent_content IS NOT NULL")
|
||||||
|
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
total = len(rows)
|
||||||
|
print(f"📋 Analysing {total} torrents from database against disk files...")
|
||||||
|
|
||||||
|
found_count = 0
|
||||||
|
missing_count = 0
|
||||||
|
|
||||||
|
# 2. Iterate and Check
|
||||||
|
updates = [] # Store successful hashes to batch update later
|
||||||
|
|
||||||
|
for index, row in enumerate(rows):
|
||||||
|
t_hash, title, blob = row
|
||||||
|
|
||||||
|
is_downloaded = check_torrent_in_filesystem(blob, PHYSICAL_DIR)
|
||||||
|
|
||||||
|
if is_downloaded:
|
||||||
|
found_count += 1
|
||||||
|
updates.append(t_hash)
|
||||||
|
# Print only every 50th line to reduce clutter, or if found
|
||||||
|
# print(f"✅ Found: {title[:50]}")
|
||||||
|
else:
|
||||||
|
missing_count += 1
|
||||||
|
|
||||||
|
if index % 100 == 0:
|
||||||
|
print(f" Processed {index}/{total} ... (Found: {found_count})")
|
||||||
|
|
||||||
|
# 3. Batch Update Database
|
||||||
|
print(f"\n💾 Updating Database: Marking {len(updates)} torrents as 'physical_exists = 1'...")
|
||||||
|
|
||||||
|
# Reset everything to 0 first (in case you deleted files since last run)
|
||||||
|
cursor.execute("UPDATE torrents SET physical_exists = 0")
|
||||||
|
|
||||||
|
if updates:
|
||||||
|
# Update in chunks of 1000 to be safe
|
||||||
|
chunk_size = 1000
|
||||||
|
for i in range(0, len(updates), chunk_size):
|
||||||
|
chunk = updates[i:i + chunk_size]
|
||||||
|
format_strings = ','.join(['%s'] * len(chunk))
|
||||||
|
cursor.execute(f"UPDATE torrents SET physical_exists = 1 WHERE torrent_hash IN ({format_strings})",
|
||||||
|
tuple(chunk))
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
print("\n" + "=" * 40)
|
||||||
|
print(f"🏁 SCAN COMPLETE")
|
||||||
|
print(f"✅ Physically Available: {found_count}")
|
||||||
|
print(f"❌ Missing / Not Downloaded: {missing_count}")
|
||||||
|
print(f"📊 Completion Rate: {int((found_count / total) * 100)}%")
|
||||||
|
print("=" * 40)
|
||||||
150
Seedbox/10 Nahraniexistujicich.py
Normal file
150
Seedbox/10 Nahraniexistujicich.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import pymysql
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import qbittorrentapi
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# KONFIGURACE
|
||||||
|
# ============================================================
|
||||||
|
MAX_SIZE_GB = 950
|
||||||
|
QBT_URL = "https://vladob.zen.usbx.me/qbittorrent"
|
||||||
|
QBT_USER = "vladob"
|
||||||
|
QBT_PASS = "jCni3U6d#y4bfcm"
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# POMOCNÉ FUNKCE
|
||||||
|
# ============================================================
|
||||||
|
def parse_size_to_gb(size_str):
|
||||||
|
"""Převede text '1.5 GB' nebo '500 MB' na float v GB"""
|
||||||
|
if not size_str: return 0.0
|
||||||
|
s = str(size_str).upper().replace(",", ".").strip()
|
||||||
|
match = re.search(r"([\d\.]+)", s)
|
||||||
|
if not match: return 0.0
|
||||||
|
val = float(match.group(1))
|
||||||
|
|
||||||
|
if "TB" in s: return val * 1024
|
||||||
|
if "GB" in s: return val
|
||||||
|
if "MB" in s: return val / 1024
|
||||||
|
if "KB" in s: return val / 1024 / 1024
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# HLAVNÍ LOGIKA
|
||||||
|
# ============================================================
|
||||||
|
def main():
|
||||||
|
print(f"🚀 Plánuji přímý upload z DB (Limit: {MAX_SIZE_GB} GB, řazeno dle seederů)...")
|
||||||
|
|
||||||
|
# 1. Načtení dat z DB
|
||||||
|
# Stahujeme i BLOB (torrent_content), takže to může chvilku trvat
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
print("⏳ Načítám data z MySQL...")
|
||||||
|
sql = """
|
||||||
|
SELECT torrent_hash, title_visible, size_pretty, seeders, torrent_content
|
||||||
|
FROM torrents
|
||||||
|
WHERE physical_exists = 0 AND torrent_content IS NOT NULL
|
||||||
|
ORDER BY seeders DESC
|
||||||
|
"""
|
||||||
|
cursor.execute(sql)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
print(f"🔍 Nalezeno {len(rows)} kandidátů. Vybírám ty nejlepší...")
|
||||||
|
|
||||||
|
# 2. Výběr do kapacity 950 GB
|
||||||
|
selected_torrents = []
|
||||||
|
total_size_gb = 0.0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
t_hash, title, size_str, seeders, content = row
|
||||||
|
size_gb = parse_size_to_gb(size_str)
|
||||||
|
|
||||||
|
# Pojistka proti nesmyslně velkým souborům nebo chybám v parsování
|
||||||
|
if size_gb == 0 and "MB" not in str(size_str).upper() and "KB" not in str(size_str).upper():
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Kontrola limitu
|
||||||
|
if total_size_gb + size_gb > MAX_SIZE_GB:
|
||||||
|
# Jakmile narazíme na něco, co se nevejde, končíme výběr (protože jsou seřazeny dle priority)
|
||||||
|
print(f"🛑 Limit naplněn! '{title}' ({size_gb:.2f} GB) by přesáhl {MAX_SIZE_GB} GB.")
|
||||||
|
break
|
||||||
|
|
||||||
|
selected_torrents.append({
|
||||||
|
"filename": f"{t_hash}.torrent", # Virtuální název souboru
|
||||||
|
"content": content, # Binární data
|
||||||
|
"title": title,
|
||||||
|
"size": size_gb,
|
||||||
|
"seeders": seeders
|
||||||
|
})
|
||||||
|
total_size_gb += size_gb
|
||||||
|
|
||||||
|
# 3. Report
|
||||||
|
print("-" * 40)
|
||||||
|
print(f"📦 Vybráno: {len(selected_torrents)} torrentů")
|
||||||
|
print(f"💾 Celková velikost: {total_size_gb:.2f} GB / {MAX_SIZE_GB} GB")
|
||||||
|
if selected_torrents:
|
||||||
|
avg_seeders = sum(t['seeders'] for t in selected_torrents) / len(selected_torrents)
|
||||||
|
print(f"⚡ Průměrně seederů: {avg_seeders:.1f}")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
if not selected_torrents:
|
||||||
|
print("Nic k nahrání.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
confirm = input("❓ Nahrát tento výběr na Seedbox? (ano/ne): ")
|
||||||
|
if confirm.lower() not in ['ano', 'y', 'yes']:
|
||||||
|
print("❌ Zrušeno.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 4. Připojení k qBittorrent
|
||||||
|
try:
|
||||||
|
qbt = qbittorrentapi.Client(
|
||||||
|
host=QBT_URL,
|
||||||
|
username=QBT_USER,
|
||||||
|
password=QBT_PASS,
|
||||||
|
VERIFY_WEBUI_CERTIFICATE=False
|
||||||
|
)
|
||||||
|
qbt.auth_log_in()
|
||||||
|
print("✅ Připojeno k Seedboxu.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Chyba připojení: {e}")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 5. Odeslání dat
|
||||||
|
print("🚀 Odesílám...")
|
||||||
|
success_count = 0
|
||||||
|
|
||||||
|
for i, item in enumerate(selected_torrents):
|
||||||
|
try:
|
||||||
|
# Posíláme binární data přímo (tváříme se, že posíláme soubor)
|
||||||
|
# formát: {'nazev_souboru.torrent': b'binarni_data...'}
|
||||||
|
file_dict = {item['filename']: item['content']}
|
||||||
|
|
||||||
|
qbt.torrents_add(torrent_files=file_dict, is_paused=False)
|
||||||
|
|
||||||
|
print(f"[{i + 1}/{len(selected_torrents)}] 📤 {item['title']} ({item['size']:.1f} GB)")
|
||||||
|
success_count += 1
|
||||||
|
time.sleep(0.2) # Malá pauza pro stabilitu API
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Chyba u {item['title']}: {e}")
|
||||||
|
|
||||||
|
print("\n✅ HOTOVO.")
|
||||||
|
print("Torrenty jsou na Seedboxu. Až se stáhnou, stáhni je domů a spusť skript 99_Scan...")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user