Save local IDE settings

This commit is contained in:
2025-12-30 08:42:44 +01:00
8 changed files with 2096 additions and 0 deletions

14
.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
# Virtual environment
.venv/
# Python
__pycache__/
*.pyc
*.log
# IDE
.idea/
# OS
.DS_Store
Thumbs.db

390
30 OpenTextLIsting v5.py Normal file
View File

@@ -0,0 +1,390 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
# Start URL pro kategorii 24, seřazeno podle data DESC
START_URL = (
"https://sktorrent.eu/torrent/torrents.php"
"?search=&category=24&zaner=&jazyk=&active=0"
)
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
# Pozice a velikost okna (aby nepřekrývalo PyCharm)
driver.set_window_position(380, 50) # 10 cm od levého okraje
driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru
# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies
driver.get("https://sktorrent.eu")
# Load cookies z JSON
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
else:
print("⚠️ Cookie file not found, you may not be logged in!")
# ============================================================
# 3) Převod cookies → requests.Session (pro stahování .torrent)
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
print("🔗 Requests session initialized with Selenium cookies.")
# ============================================================
# 4) Funkce pro zavření popupu
# ============================================================
def close_popup_if_any():
"""Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit()."""
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
# Krátká pauza, ať se DOM uklidní
time.sleep(0.5)
print("🧹 Popup closed via JS fallback (if present).")
except Exception as e:
print(" Popup JS handler not found:", e)
# ============================================================
# 5) Funkce pro parsování jednoho řádku (jednoho torrentu)
# ============================================================
def parse_row(cells):
"""
cells: list<WebElement> o délce 7
Struktura:
0: kategorie
1: download link (.torrent)
2: název + velikost + datum + 'Obrázok' + žánr
3: -- (ignorujeme)
4: seeders
5: leechers
6: completed
"""
# --------------------------
# 1⃣ CATEGORY
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1])
# --------------------------
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
print("⚠️ No download link in row, skipping.")
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
# --------------------------
# 3⃣ TITLE + DETAILS LINK (in cell[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ No title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# Extract torrent hash from ?id=
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ Size + date parsing
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Robustní převod data/času do MySQL datetime
added_mysql = None
if added_pretty:
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# pokud chybí sekundy, přidej
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ Image preview
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(
By.XPATH,
".//a[contains(text(),'Obrázok')]"
)
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS / LEECHERS
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# 7⃣ Check, zda už máme torrent_content v DB
# --------------------------
cursor.execute(
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
(torrent_hash,)
)
row = cursor.fetchone()
already_have_torrent = row is not None and row[0] is not None
# --------------------------
# 8⃣ DOWNLOAD TORRENT CONTENT (.torrent) only if needed
# --------------------------
torrent_content = None
if already_have_torrent:
print(f" ↪️ Torrent file already stored, skipping download ({torrent_filename})")
else:
time.sleep(3) # mezera mezi torrenty
try:
resp = requests_session.get(download_link)
resp.raise_for_status()
torrent_content = resp.content
except Exception as e:
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
torrent_content = None
# --------------------------
# FINAL DICTIONARY
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
# pokud jsme torrent už měli, vracíme None → UPDATE ho nepřepíše (COALESCE)
"torrent_content": torrent_content if not already_have_torrent else None,
}
# ============================================================
# 6) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
"""
# ============================================================
# 7) Funkce pro zpracování jedné stránky
# ============================================================
def process_current_page(page_index: int):
"""
Zpracuje aktuálně otevřenou stránku:
- najde všechny "REAL TORRENT ROWS" (7 td)
- pro každý torrent:
* parse_row
* insert/update do DB
"""
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print(f"📄 Page {page_index}: {len(real_rows)} torrent rows")
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print(f" 💾 [{page_index}] Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
# ============================================================
# 8) Hlavní stránkovací cyklus
# ============================================================
current_url = START_URL
page_index = 0
while True:
print(f"\n🌐 Loading page {page_index}: {current_url}")
driver.get(current_url)
time.sleep(2)
# zavři popup, pokud je
close_popup_if_any()
# zpracuj aktuální stránku
process_current_page(page_index)
# pokus se najít tlačítko "Dalsi >>"
try:
next_btn = driver.find_element(
By.XPATH,
"//a[b[contains(text(),'Dalsi')]]"
)
next_url = next_btn.get_attribute("href")
if not next_url:
print("⛔ Next link has no href, stopping.")
break
# pokud je relativní, doplň doménu
if next_url.startswith("/"):
next_url = "https://sktorrent.eu" + next_url
# když by náhodou bylo stejné URL → přeruš nekonečnou smyčku
if next_url == current_url:
print("⛔ Next URL equals current URL, stopping.")
break
print("➡️ Next page:", next_url)
current_url = next_url
page_index += 1
# malá pauza mezi stránkami
time.sleep(1)
except Exception:
print("✅ No 'Dalsi >>' link found, reached last page. Done.")
break
print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded (without re-downloading existing ones).")
driver.quit()

256
30 OpenTextListing v2.py Normal file
View File

@@ -0,0 +1,256 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0"
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://sktorrent.eu")
# Load cookies
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
driver.get(URL)
time.sleep(2)
# ============================================================
# Close interstitial popup reliably
# ============================================================
time.sleep(1)
try:
# JS close always exists even when HTML structure varies
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
print("🧹 Popup closed via JS fallback.")
time.sleep(1)
except:
print(" Popup JS handler not found (probably no popup).")
# ============================================================
# 3) Extract table rows
# ============================================================
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
print("Total rows found:", len(rows))
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print("Real torrent rows:", len(real_rows))
print("")
# ============================================================
# 4) Function to extract fields from one row
# ============================================================
def parse_row(cells):
# --------------------------
# 1⃣ CATEGORY (cells[0])
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ TITLE + DETAILS LINK (always inside cells[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ Missing title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# 3⃣ TORRENT HASH
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ TEXT BLOCK (size + date)
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
added_mysql = None
if added_pretty:
# Normalize formats like "29/11/2025 o 02:29", "29/11/2025 02:29:18"
clean = added_pretty.replace(" o ", " ").strip()
# Split date and time
date_part, *time_parts = clean.split(" ")
# If seconds are missing, add :00
time_part = time_parts[0] if time_parts else "00:00"
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ IMAGE PREVIEW
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS (cells[4])
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
# --------------------------
# 7⃣ LEECHERS (cells[5])
# --------------------------
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# Return result
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
}
# ============================================================
# 5) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link);
"""
# ============================================================
# 6) PROCESS ALL ROWS
# ============================================================
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
print("\n✅ DONE — All torrents saved to MySQL.")
driver.quit()

291
30 OpenTextListing v3.py Normal file
View File

@@ -0,0 +1,291 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://sktorrent.eu")
# Load cookies
session_cookies = []
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
session_cookies.append({c['name']: c['value']})
print("🍪 Cookies loaded.")
driver.get(URL)
time.sleep(2)
# ============================================================
# 3) Close interstitial popup robustly
# ============================================================
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
print("🧹 Popup closed via JS fallback.")
time.sleep(1)
except:
print(" No popup found.")
# ============================================================
# Convert Selenium cookies → Python requests cookies
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
# ============================================================
# 4) Extract table rows
# ============================================================
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
print("Total rows found:", len(rows))
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print("Real torrent rows:", len(real_rows))
print("")
# ============================================================
# 5) Function to extract fields from one row
# ============================================================
def parse_row(cells):
# --------------------------
# 1⃣ CATEGORY
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ DOWNLOAD LINK FOR TORRENT FILE
# --------------------------
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
print("⚠️ No download link in row, skipping.")
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
# --------------------------
# 3⃣ Title + details link (in cell[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ No title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# Extract torrent hash from ?id=
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ Size + date parsing
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Robust time normalization
added_mysql = None
if added_pretty:
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# add seconds if missing
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ Image preview
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS / LEECHERS
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# 7⃣ DOWNLOAD TORRENT CONTENT (.torrent)
# --------------------------
try:
torrent_content = requests_session.get(download_link).content
except Exception as e:
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
torrent_content = None
# --------------------------
# FINAL DICTIONARY
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
"torrent_content": torrent_content,
}
# ============================================================
# 6) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = VALUES(torrent_content);
"""
# ============================================================
# 7) PROCESS ALL ROWS
# ============================================================
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
print("\n✅ DONE — All torrents saved to MySQL & torrent files downloaded.")
driver.quit()

375
30 OpenTextListing v4.py Normal file
View File

@@ -0,0 +1,375 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
# Start URL pro kategorii 24, seřazeno podle data DESC
START_URL = (
"https://sktorrent.eu/torrent/torrents.php"
"?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0"
)
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
# Pozice a velikost okna (aby nepřekrývalo PyCharm)
driver.set_window_position(380, 50) # 10 cm od levého okraje
driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru
# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies
driver.get("https://sktorrent.eu")
# Load cookies z JSON
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
else:
print("⚠️ Cookie file not found, you may not be logged in!")
# ============================================================
# 3) Převod cookies → requests.Session (pro stahování .torrent)
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
print("🔗 Requests session initialized with Selenium cookies.")
# ============================================================
# 4) Funkce pro zavření popupu
# ============================================================
def close_popup_if_any():
"""Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit()."""
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
# Krátká pauza, ať se DOM uklidní
time.sleep(0.5)
print("🧹 Popup closed via JS fallback (if present).")
except Exception as e:
print(" Popup JS handler not found:", e)
# ============================================================
# 5) Funkce pro parsování jednoho řádku (jednoho torrentu)
# ============================================================
def parse_row(cells):
"""
cells: list<WebElement> o délce 7
Struktura:
0: kategorie
1: download link (.torrent)
2: název + velikost + datum + 'Obrázok' + žánr
3: -- (ignorujeme)
4: seeders
5: leechers
6: completed
"""
# --------------------------
# 1⃣ CATEGORY
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1])
# --------------------------
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
print("⚠️ No download link in row, skipping.")
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
# --------------------------
# 3⃣ TITLE + DETAILS LINK (in cell[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ No title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# Extract torrent hash from ?id=
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ Size + date parsing
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Robustní převod data/času do MySQL datetime
added_mysql = None
if added_pretty:
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# pokud chybí sekundy, přidej
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ Image preview
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(
By.XPATH,
".//a[contains(text(),'Obrázok')]"
)
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS / LEECHERS
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# 7⃣ DOWNLOAD TORRENT CONTENT (.torrent)
# --------------------------
torrent_content = None
time.sleep(3) #mezera mezi torrenty
try:
resp = requests_session.get(download_link)
resp.raise_for_status()
torrent_content = resp.content
except Exception as e:
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
torrent_content = None
# --------------------------
# FINAL DICTIONARY
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
"torrent_content": torrent_content,
}
# ============================================================
# 6) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = VALUES(torrent_content);
"""
# ============================================================
# 7) Funkce pro zpracování jedné stránky
# ============================================================
def process_current_page(page_index: int):
"""
Zpracuje aktuálně otevřenou stránku:
- najde všechny "REAL TORRENT ROWS" (7 td)
- pro každý torrent:
* parse_row
* insert/update do DB
"""
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print(f"📄 Page {page_index}: {len(real_rows)} torrent rows")
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print(f" 💾 [{page_index}] Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
# ============================================================
# 8) Hlavní stránkovací cyklus
# ============================================================
current_url = START_URL
page_index = 0
while True:
print(f"\n🌐 Loading page {page_index}: {current_url}")
driver.get(current_url)
time.sleep(2)
# zavři popup, pokud je
close_popup_if_any()
# zpracuj aktuální stránku
process_current_page(page_index)
# pokus se najít tlačítko "Dalsi >>"
try:
next_btn = driver.find_element(
By.XPATH,
"//a[b[contains(text(),'Dalsi')]]"
)
next_url = next_btn.get_attribute("href")
if not next_url:
print("⛔ Next link has no href, stopping.")
break
# pokud je relativní, doplň doménu
if next_url.startswith("/"):
next_url = "https://sktorrent.eu" + next_url
# když by náhodou bylo stejné URL → přeruš nekonečnou smyčku
if next_url == current_url:
print("⛔ Next URL equals current URL, stopping.")
break
print("➡️ Next page:", next_url)
current_url = next_url
page_index += 1
# malá pauza mezi stránkami
time.sleep(1)
except Exception:
print("✅ No 'Dalsi >>' link found, reached last page. Done.")
break
print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded.")
driver.quit()

91
EmailMessagingGraph.py Normal file
View File

@@ -0,0 +1,91 @@
"""
EmailMessagingGraph.py
----------------------
Private Microsoft Graph mail sender
Application permissions, shared mailbox
"""
import msal
import requests
from functools import lru_cache
from typing import Union, List
# =========================
# PRIVATE CONFIG (ONLY YOU)
# =========================
TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
SENDER = "reports@buzalka.cz"
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
SCOPE = ["https://graph.microsoft.com/.default"]
@lru_cache(maxsize=1)
def _get_token() -> str:
app = msal.ConfidentialClientApplication(
CLIENT_ID,
authority=AUTHORITY,
client_credential=CLIENT_SECRET,
)
token = app.acquire_token_for_client(scopes=SCOPE)
if "access_token" not in token:
raise RuntimeError(f"Graph auth failed: {token}")
return token["access_token"]
def send_mail(
to: Union[str, List[str]],
subject: str,
body: str,
*,
html: bool = False,
):
"""
Send email via Microsoft Graph.
:param to: email or list of emails
:param subject: subject
:param body: email body
:param html: True = HTML, False = plain text
"""
if isinstance(to, str):
to = [to]
payload = {
"message": {
"subject": subject,
"body": {
"contentType": "HTML" if html else "Text",
"content": body,
},
"toRecipients": [
{"emailAddress": {"address": addr}} for addr in to
],
},
"saveToSentItems": "true",
}
headers = {
"Authorization": f"Bearer {_get_token()}",
"Content-Type": "application/json",
}
r = requests.post(
f"https://graph.microsoft.com/v1.0/users/{SENDER}/sendMail",
headers=headers,
json=payload,
timeout=30,
)
if r.status_code != 202:
raise RuntimeError(
f"sendMail failed [{r.status_code}]: {r.text}"
)

342
Reporter_ReadNewTorrents.py Normal file
View File

@@ -0,0 +1,342 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
import datetime
import sys
from EmailMessagingGraph import send_mail
# ============================================================
# RUNTIME INFO
# ============================================================
RUN_START = datetime.datetime.now()
processed_count = 0
new_torrent_count = 0
existing_torrent_count = 0
new_titles = []
print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}")
sys.stdout.flush()
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True,
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
START_URL = (
"https://sktorrent.eu/torrent/torrents.php"
"?search=&category=24&zaner=&jazyk=&active=0"
)
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.set_window_position(380, 50)
driver.set_window_size(1350, 1000)
driver.get("https://sktorrent.eu")
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
else:
print("⚠️ Cookie file not found login may be required.")
# ============================================================
# 3) requests.Session from Selenium cookies
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
print("🔗 Requests session initialized.")
# ============================================================
# 4) Popup handler
# ============================================================
def close_popup_if_any():
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
time.sleep(0.5)
except Exception:
pass
# ============================================================
# 5) Parse one torrent row
# ============================================================
def parse_row(cells):
category = cells[0].text.strip()
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
return None
torrent_hash = query["id"][0]
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# ======================================================
# EXACT DATE PROCESSING COPIED 1:1 FROM YOUR FILE
# ======================================================
added_mysql = None
if added_pretty:
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# pokud chybí sekundy, přidej
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# ======================================================
# Image preview
# ======================================================
img_link = None
try:
image_a = cells[2].find_element(
By.XPATH,
".//a[contains(text(),'Obrázok')]"
)
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
cursor.execute(
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
(torrent_hash,),
)
row = cursor.fetchone()
already_have_torrent = row is not None and row[0] is not None
torrent_content = None
if not already_have_torrent:
time.sleep(3)
try:
resp = requests_session.get(download_link)
resp.raise_for_status()
torrent_content = resp.content
except:
torrent_content = None
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
"torrent_content": torrent_content if not already_have_torrent else None,
"is_new_torrent": not already_have_torrent,
}
# ============================================================
# 6) INSERT SQL
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
"""
# ============================================================
# 7) PROCESS FIRST PAGE ONLY
# ============================================================
print("\n🌐 Loading FIRST page")
driver.get(START_URL)
time.sleep(2)
close_popup_if_any()
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
real_rows = [
r.find_elements(By.TAG_NAME, "td")
for r in rows
if len(r.find_elements(By.TAG_NAME, "td")) == 7
]
print(f"📄 Found {len(real_rows)} torrent rows")
for cells in real_rows:
try:
data = parse_row(cells)
except Exception as e:
print(f"⚠️ parse_row failed: {e}")
continue
if not data:
continue
processed_count += 1
if data["is_new_torrent"]:
new_torrent_count += 1
new_titles.append(data["title_visible"])
else:
existing_torrent_count += 1
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
# ============================================================
# 8) SEND EMAIL REPORT
# ============================================================
RUN_END = datetime.datetime.now()
subject = f"SKTorrent hourly run {RUN_START:%Y-%m-%d %H:%M}"
lines = [
f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}",
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
"",
f"Processed torrents: {processed_count}",
f"New torrent files downloaded: {new_torrent_count}",
f"Already known torrents: {existing_torrent_count}",
]
if new_titles:
lines.append("")
lines.append("New torrents:")
for t in new_titles:
lines.append(f"- {t}")
body = "\n".join(lines)
send_mail(
to="vladimir.buzalka@buzalka.cz",
subject=subject,
body=body,
html=False,
)
print("📧 Email report sent.")
driver.quit()
print("🎉 DONE")

View File

@@ -0,0 +1,337 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
import pymysql
import qbittorrentapi
import bencodepy
from EmailMessagingGraph import send_mail
# ==============================
# ⚙ CONFIGURATION
# ==============================
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": True,
}
QBT_CONFIG = {
"host": "192.168.1.76",
"port": 8080,
"username": "admin",
"password": "adminadmin",
}
MAX_ACTIVE_DOWNLOADS = 10
DEAD_TORRENT_MINUTES = 5
DEFAULT_SAVE_PATH = None
MAIL_TO = "vladimir.buzalka@buzalka.cz"
MAX_LIST_ITEMS = 50 # cap lists in email
# ==============================
# 🧮 RUNTIME STATS + LISTS
# ==============================
RUN_START = datetime.now()
stat_synced = 0
stat_completed = 0
stat_dead = 0
stat_enqueued = 0
deleted_completed = [] # list[str]
deleted_dead = [] # list[str]
added_new = [] # list[str]
active_downloading = [] # list[str]
# ==============================
# 🔧 CONNECT
# ==============================
db = pymysql.connect(**DB_CONFIG)
cursor = db.cursor(pymysql.cursors.DictCursor)
qb = qbittorrentapi.Client(**QBT_CONFIG)
try:
qb.auth_log_in()
print("✅ Connected to qBittorrent.")
except Exception as e:
raise SystemExit(f"❌ Could not connect to qBittorrent: {e}")
# ==============================
# 🧪 TORRENT VALIDATION
# ==============================
def is_valid_torrent(blob: bytes) -> bool:
try:
data = bencodepy.decode(blob)
return isinstance(data, dict) and b"info" in data
except Exception:
return False
# ==============================
# 🔄 SYNC FROM QB → DB
# ==============================
def sync_qb_to_db():
global stat_synced
torrents = qb.torrents_info()
stat_synced = len(torrents)
for t in torrents:
completion_dt = None
if getattr(t, "completion_on", 0):
try:
completion_dt = datetime.fromtimestamp(t.completion_on)
except Exception:
pass
cursor.execute("""
UPDATE torrents
SET qb_added = 1,
qb_hash = COALESCE(qb_hash, %s),
qb_state = %s,
qb_progress = %s,
qb_savepath = %s,
qb_completed_datetime =
IF(%s IS NOT NULL AND qb_completed_datetime IS NULL, %s, qb_completed_datetime),
qb_last_update = NOW()
WHERE qb_hash = %s OR torrent_hash = %s
""", (
t.hash,
t.state,
float(t.progress) * 100.0,
getattr(t, "save_path", None),
completion_dt,
completion_dt,
t.hash,
t.hash,
))
# ==============================
# 🧹 HANDLE COMPLETED + DEAD
# ==============================
def handle_completed_and_dead():
global stat_completed, stat_dead
torrents = qb.torrents_info()
for t in torrents:
t_hash = t.hash
state = t.state
progress = float(t.progress)
# ✔ COMPLETED
if progress >= 1.0 or state in {"completed", "uploading", "stalledUP", "queuedUP"}:
stat_completed += 1
deleted_completed.append(t.name)
try:
qb.torrents_delete(torrent_hashes=t_hash, delete_files=False)
except Exception as e:
# keep name in report; just note error in DB state if you want later
print(f"⚠️ delete (keep data) failed for {t.name}: {e}")
cursor.execute("""
UPDATE torrents
SET qb_state='completed',
qb_progress=100,
qb_completed_datetime=NOW(),
qb_last_update=NOW()
WHERE qb_hash=%s OR torrent_hash=%s
""", (t_hash, t_hash))
continue
# ❌ DEAD (never seen_complete)
try:
props = qb.torrents_properties(t_hash)
except Exception:
continue
if getattr(props, "last_seen", 0) == -1:
added_dt = getattr(t, "added_on", 0)
if added_dt:
if datetime.now() - datetime.fromtimestamp(added_dt) > timedelta(minutes=DEAD_TORRENT_MINUTES):
stat_dead += 1
deleted_dead.append(t.name)
try:
qb.torrents_delete(torrent_hashes=t_hash, delete_files=True)
except Exception as e:
print(f"⚠️ delete (files) failed for {t.name}: {e}")
cursor.execute("""
UPDATE torrents
SET qb_state='dead',
qb_last_update=NOW()
WHERE qb_hash=%s OR torrent_hash=%s
""", (t_hash, t_hash))
# ==============================
# 📊 ACTIVE DOWNLOADS
# ==============================
def count_active_downloads():
return sum(1 for t in qb.torrents_info() if float(t.progress) < 1.0)
def snapshot_active_downloading():
"""
Capture current actively downloading torrents (progress < 100%).
"""
active = []
for t in qb.torrents_info():
prog = float(t.progress)
if prog < 1.0:
active.append(f"{t.name}{prog*100:.1f}% — {t.state}")
return sorted(active)
# ==============================
# ENQUEUE NEW TORRENTS
# ==============================
def enqueue_new_torrents():
global stat_enqueued
active = count_active_downloads()
if active >= MAX_ACTIVE_DOWNLOADS:
return
slots = MAX_ACTIVE_DOWNLOADS - active
cursor.execute("""
SELECT id, torrent_hash, torrent_content, torrent_filename
FROM torrents
WHERE (qb_added IS NULL OR qb_added = 0)
AND torrent_content IS NOT NULL
ORDER BY added_datetime DESC
LIMIT %s
""", (slots,))
for row in cursor.fetchall():
blob = row["torrent_content"]
if not blob:
continue
if not is_valid_torrent(blob):
cursor.execute("""
UPDATE torrents
SET qb_state='invalid',
torrent_content=NULL,
qb_last_update=NOW()
WHERE id=%s
""", (row["id"],))
continue
# Add torrent
try:
qb.torrents_add(torrent_files=blob, savepath=DEFAULT_SAVE_PATH)
except Exception as e:
print(f"❌ Failed to add {row['torrent_hash']}: {e}")
continue
stat_enqueued += 1
added_new.append(row.get("torrent_filename") or row["torrent_hash"])
cursor.execute("""
UPDATE torrents
SET qb_added=1,
qb_hash=COALESCE(qb_hash, %s),
qb_state='added',
qb_last_update=NOW()
WHERE id=%s
""", (row["torrent_hash"], row["id"]))
# ==============================
# ✉️ EMAIL HELPERS
# ==============================
def format_list(title: str, items: list[str]) -> list[str]:
lines = []
if not items:
return [f"{title}: (none)"]
lines.append(f"{title}: {len(items)}")
shown = items[:MAX_LIST_ITEMS]
for it in shown:
lines.append(f" - {it}")
if len(items) > MAX_LIST_ITEMS:
lines.append(f" ... (+{len(items) - MAX_LIST_ITEMS} more)")
return lines
# ==============================
# 🏁 MAIN (ONE RUN)
# ==============================
print("🚀 QB worker run started")
try:
sync_qb_to_db()
handle_completed_and_dead()
enqueue_new_torrents()
# Snapshot after enqueue/deletions, so email reflects end-state
active_downloading = snapshot_active_downloading()
finally:
db.close()
# ==============================
# 📧 EMAIL REPORT
# ==============================
RUN_END = datetime.now()
body_lines = [
f"Run started : {RUN_START:%Y-%m-%d %H:%M:%S}",
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
"",
f"QB torrents synced : {stat_synced}",
f"Completed removed : {stat_completed}",
f"Dead removed : {stat_dead}",
f"New torrents added : {stat_enqueued}",
f"Active downloads : {sum(1 for _ in active_downloading)}",
"",
]
body_lines += format_list("Deleted (completed, kept data)", deleted_completed)
body_lines.append("")
body_lines += format_list("Deleted (dead, deleted files)", deleted_dead)
body_lines.append("")
body_lines += format_list("Newly added to qBittorrent", added_new)
body_lines.append("")
body_lines += format_list("Actively downloading now", active_downloading)
send_mail(
to=MAIL_TO,
subject=f"qBittorrent worker {RUN_START:%Y-%m-%d %H:%M}",
body="\n".join(body_lines),
html=False,
)
print("📧 Email report sent")
print("🎉 DONE")