git
This commit is contained in:
212
92 5threaddownloadtorrentfiles.py
Normal file
212
92 5threaddownloadtorrentfiles.py
Normal file
@@ -0,0 +1,212 @@
|
||||
import pymysql
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import random
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from threading import Lock
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
# ============================================================
|
||||
# KONFIGURACE
|
||||
# ============================================================
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
BACKUP_DIR = "saved_torrents" # Adresář pro lokální zálohu
|
||||
THREADS = 5 # Počet vláken
|
||||
|
||||
# Globální zámek pro výpisy do konzole, aby se nepřepisovaly
|
||||
print_lock = Lock()
|
||||
stats = {"fixed": 0, "failed": 0, "saved_to_disk": 0}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# POMOCNÉ FUNKCE
|
||||
# ============================================================
|
||||
|
||||
def sanitize_filename(name):
|
||||
"""Odstraní z názvu souboru nepovolené znaky"""
|
||||
# Povolíme jen písmena, čísla, tečky, pomlčky a mezery
|
||||
clean = re.sub(r'[^\w\s\.-]', '', name)
|
||||
return clean.strip()[:100] # Ořízneme na 100 znaků pro jistotu
|
||||
|
||||
|
||||
def ensure_backup_dir():
|
||||
"""Vytvoří adresář pro torrenty, pokud neexistuje"""
|
||||
if not os.path.exists(BACKUP_DIR):
|
||||
os.makedirs(BACKUP_DIR)
|
||||
print(f"📁 Vytvořen adresář pro zálohu: {os.path.abspath(BACKUP_DIR)}")
|
||||
|
||||
|
||||
def get_browser_identity():
|
||||
"""
|
||||
Spustí Selenium (Chrome) JEN JEDNOU, aby získal validní
|
||||
User-Agent a čerstvé Cookies pro threads.
|
||||
"""
|
||||
print("🤖 Startuji Selenium pro získání identity prohlížeče...")
|
||||
|
||||
opts = Options()
|
||||
opts.add_argument("--headless=new")
|
||||
opts.add_argument("--disable-gpu")
|
||||
|
||||
driver = webdriver.Chrome(options=opts)
|
||||
|
||||
# Jdeme na web nastavit doménu pro cookies
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
# Načteme cookies ze souboru
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||
cookies_list = json.load(f)
|
||||
for c in cookies_list:
|
||||
driver.add_cookie(c)
|
||||
driver.refresh()
|
||||
time.sleep(2)
|
||||
|
||||
# Exportujeme identitu
|
||||
user_agent = driver.execute_script("return navigator.userAgent;")
|
||||
browser_cookies = driver.get_cookies()
|
||||
|
||||
driver.quit()
|
||||
print("✅ Identita získána.")
|
||||
return user_agent, browser_cookies
|
||||
|
||||
|
||||
# ============================================================
|
||||
# WORKER (Pracovní vlákno)
|
||||
# ============================================================
|
||||
def worker_task(rows_chunk, thread_id, user_agent, cookies_list):
|
||||
"""
|
||||
Tato funkce běží v každém vlákně zvlášť.
|
||||
"""
|
||||
# 1. Vytvoření vlastní Session pro toto vlákno
|
||||
session = requests.Session()
|
||||
session.headers.update({"User-Agent": user_agent})
|
||||
for c in cookies_list:
|
||||
session.cookies.set(c['name'], c['value'])
|
||||
|
||||
# 2. Vlastní připojení k DB (nutné pro thread-safety)
|
||||
try:
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cursor = db.cursor()
|
||||
except Exception as e:
|
||||
with print_lock:
|
||||
print(f"❌ [Thread-{thread_id}] Chyba DB připojení: {e}")
|
||||
return
|
||||
|
||||
for row in rows_chunk:
|
||||
t_hash, url, title = row
|
||||
|
||||
# Ochrana: krátká náhodná pauza, aby 5 vláken nezabilo server
|
||||
time.sleep(random.uniform(0.5, 2.0))
|
||||
|
||||
try:
|
||||
# Stažení
|
||||
resp = session.get(url, timeout=15)
|
||||
|
||||
if resp.status_code == 403:
|
||||
with print_lock:
|
||||
print(f"⛔ [Thread-{thread_id}] 403 Forbidden! {title[:20]}...")
|
||||
stats["failed"] += 1
|
||||
continue
|
||||
|
||||
resp.raise_for_status()
|
||||
content = resp.content
|
||||
|
||||
if len(content) > 100:
|
||||
# A) Uložit do DB (BLOB)
|
||||
sql = "UPDATE torrents SET torrent_content = %s WHERE torrent_hash = %s"
|
||||
cursor.execute(sql, (content, t_hash))
|
||||
|
||||
# B) Uložit na DISK (Soubor)
|
||||
clean_name = sanitize_filename(title)
|
||||
# Přidáme kousek hashe do názvu, aby se nepřepsaly soubory se stejným jménem
|
||||
filename = f"{clean_name}_{t_hash[:6]}.torrent"
|
||||
file_path = os.path.join(BACKUP_DIR, filename)
|
||||
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
with print_lock:
|
||||
print(f"✅ [Thread-{thread_id}] OK: {clean_name}")
|
||||
stats["fixed"] += 1
|
||||
stats["saved_to_disk"] += 1
|
||||
else:
|
||||
with print_lock:
|
||||
print(f"⚠️ [Thread-{thread_id}] Prázdný soubor: {title}")
|
||||
stats["failed"] += 1
|
||||
|
||||
except Exception as e:
|
||||
with print_lock:
|
||||
print(f"❌ [Thread-{thread_id}] Chyba: {title[:20]}... -> {e}")
|
||||
stats["failed"] += 1
|
||||
|
||||
db.close()
|
||||
with print_lock:
|
||||
print(f"🏁 [Thread-{thread_id}] Dokončil práci.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# HLAVNÍ LOOP
|
||||
# ============================================================
|
||||
if __name__ == "__main__":
|
||||
ensure_backup_dir()
|
||||
|
||||
# 1. Získat data z DB
|
||||
print("🔍 Načítám seznam chybějících souborů z DB...")
|
||||
main_db = pymysql.connect(**DB_CONFIG)
|
||||
with main_db.cursor() as c:
|
||||
# Hledáme ty, co mají URL, ale nemají obsah
|
||||
c.execute(
|
||||
"SELECT torrent_hash, download_url, title_visible FROM torrents WHERE torrent_content IS NULL AND download_url IS NOT NULL")
|
||||
all_rows = c.fetchall()
|
||||
main_db.close()
|
||||
|
||||
total = len(all_rows)
|
||||
print(f"📋 K opravě: {total} položek.")
|
||||
|
||||
if total == 0:
|
||||
print("🎉 Není co opravovat.")
|
||||
exit()
|
||||
|
||||
# 2. Získat "Super Identitu" přes Selenium (jen jednou)
|
||||
u_agent, browser_cookies = get_browser_identity()
|
||||
|
||||
# 3. Rozdělit práci pro 5 vláken
|
||||
chunk_size = total // THREADS + 1
|
||||
chunks = [all_rows[i:i + chunk_size] for i in range(0, total, chunk_size)]
|
||||
|
||||
print(f"🚀 Spouštím {THREADS} vláken (ukládání do DB + do složky '{BACKUP_DIR}')...")
|
||||
|
||||
# 4. Spustit multithreading
|
||||
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||
futures = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
if chunk:
|
||||
# Každému vláknu předáme kus práce + identitu prohlížeče
|
||||
futures.append(executor.submit(worker_task, chunk, i + 1, u_agent, browser_cookies))
|
||||
|
||||
# Čekáme na dokončení
|
||||
for f in futures:
|
||||
f.result()
|
||||
|
||||
print("\n" + "=" * 40)
|
||||
print(f"🏁 DOKONČENO")
|
||||
print(f"✅ Opraveno v DB: {stats['fixed']}")
|
||||
print(f"💾 Uloženo na disk: {stats['saved_to_disk']}")
|
||||
print(f"❌ Chyby: {stats['failed']}")
|
||||
print(f"📁 Soubory najdeš v: {os.path.abspath(BACKUP_DIR)}")
|
||||
print("=" * 40)
|
||||
Reference in New Issue
Block a user