Save local IDE settings
This commit is contained in:
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# Virtual environment
|
||||
.venv/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.log
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
390
30 OpenTextLIsting v5.py
Normal file
390
30 OpenTextLIsting v5.py
Normal file
@@ -0,0 +1,390 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
|
||||
# Start URL pro kategorii 24, seřazeno podle data DESC
|
||||
START_URL = (
|
||||
"https://sktorrent.eu/torrent/torrents.php"
|
||||
"?search=&category=24&zaner=&jazyk=&active=0"
|
||||
)
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
# Pozice a velikost okna (aby nepřekrývalo PyCharm)
|
||||
driver.set_window_position(380, 50) # 10 cm od levého okraje
|
||||
driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru
|
||||
|
||||
|
||||
# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
# Load cookies z JSON
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
print("🍪 Cookies loaded.")
|
||||
else:
|
||||
print("⚠️ Cookie file not found, you may not be logged in!")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) Převod cookies → requests.Session (pro stahování .torrent)
|
||||
# ============================================================
|
||||
|
||||
requests_session = requests.Session()
|
||||
for ck in driver.get_cookies():
|
||||
requests_session.cookies.set(ck["name"], ck["value"])
|
||||
|
||||
print("🔗 Requests session initialized with Selenium cookies.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Funkce pro zavření popupu
|
||||
# ============================================================
|
||||
|
||||
def close_popup_if_any():
|
||||
"""Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit()."""
|
||||
try:
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
# Krátká pauza, ať se DOM uklidní
|
||||
time.sleep(0.5)
|
||||
print("🧹 Popup closed via JS fallback (if present).")
|
||||
except Exception as e:
|
||||
print("ℹ️ Popup JS handler not found:", e)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) Funkce pro parsování jednoho řádku (jednoho torrentu)
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
"""
|
||||
cells: list<WebElement> o délce 7
|
||||
Struktura:
|
||||
0: kategorie
|
||||
1: download link (.torrent)
|
||||
2: název + velikost + datum + 'Obrázok' + žánr
|
||||
3: -- (ignorujeme)
|
||||
4: seeders
|
||||
5: leechers
|
||||
6: completed
|
||||
"""
|
||||
|
||||
# --------------------------
|
||||
# 1️⃣ CATEGORY
|
||||
# --------------------------
|
||||
category = cells[0].text.strip()
|
||||
|
||||
# --------------------------
|
||||
# 2️⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1])
|
||||
# --------------------------
|
||||
try:
|
||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||
download_link = download_a.get_attribute("href")
|
||||
except:
|
||||
print("⚠️ No download link in row, skipping.")
|
||||
return None
|
||||
|
||||
parsed_dl = urlparse.urlparse(download_link)
|
||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||
|
||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||
|
||||
# --------------------------
|
||||
# 3️⃣ TITLE + DETAILS LINK (in cell[2])
|
||||
# --------------------------
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
print("⚠️ No title link — skipping row")
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
if not details_link:
|
||||
print("⚠️ Row has no details link — skipping")
|
||||
return None
|
||||
|
||||
# --------------------------
|
||||
# Extract torrent hash from ?id=
|
||||
# --------------------------
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
|
||||
if "id" not in query:
|
||||
print("⚠️ Skipping row with no torrent ID →", details_link)
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
# --------------------------
|
||||
# 4️⃣ Size + date parsing
|
||||
# --------------------------
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# Robustní převod data/času do MySQL datetime
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
parts = clean.split(" ")
|
||||
|
||||
date_part = parts[0]
|
||||
time_part = parts[1] if len(parts) > 1 else "00:00:00"
|
||||
|
||||
# pokud chybí sekundy, přidej
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# --------------------------
|
||||
# 5️⃣ Image preview
|
||||
# --------------------------
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(
|
||||
By.XPATH,
|
||||
".//a[contains(text(),'Obrázok')]"
|
||||
)
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
# --------------------------
|
||||
# 6️⃣ SEEDERS / LEECHERS
|
||||
# --------------------------
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
# --------------------------
|
||||
# 7️⃣ Check, zda už máme torrent_content v DB
|
||||
# --------------------------
|
||||
cursor.execute(
|
||||
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
|
||||
(torrent_hash,)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
already_have_torrent = row is not None and row[0] is not None
|
||||
|
||||
# --------------------------
|
||||
# 8️⃣ DOWNLOAD TORRENT CONTENT (.torrent) – only if needed
|
||||
# --------------------------
|
||||
torrent_content = None
|
||||
|
||||
if already_have_torrent:
|
||||
print(f" ↪️ Torrent file already stored, skipping download ({torrent_filename})")
|
||||
else:
|
||||
time.sleep(3) # mezera mezi torrenty
|
||||
try:
|
||||
resp = requests_session.get(download_link)
|
||||
resp.raise_for_status()
|
||||
torrent_content = resp.content
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
|
||||
torrent_content = None
|
||||
|
||||
# --------------------------
|
||||
# FINAL DICTIONARY
|
||||
# --------------------------
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
"torrent_filename": torrent_filename,
|
||||
# pokud jsme torrent už měli, vracíme None → UPDATE ho nepřepíše (COALESCE)
|
||||
"torrent_content": torrent_content if not already_have_torrent else None,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) MySQL INSERT
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link,
|
||||
torrent_filename, torrent_content
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||
%(torrent_filename)s, %(torrent_content)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link),
|
||||
torrent_filename = VALUES(torrent_filename),
|
||||
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7) Funkce pro zpracování jedné stránky
|
||||
# ============================================================
|
||||
|
||||
def process_current_page(page_index: int):
|
||||
"""
|
||||
Zpracuje aktuálně otevřenou stránku:
|
||||
- najde všechny "REAL TORRENT ROWS" (7 td)
|
||||
- pro každý torrent:
|
||||
* parse_row
|
||||
* insert/update do DB
|
||||
"""
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
|
||||
real_rows = []
|
||||
for row in rows:
|
||||
cells = row.find_elements(By.TAG_NAME, "td")
|
||||
|
||||
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
|
||||
if len(cells) == 7:
|
||||
real_rows.append(cells)
|
||||
|
||||
print(f"📄 Page {page_index}: {len(real_rows)} torrent rows")
|
||||
|
||||
for cells in real_rows:
|
||||
data = parse_row(cells)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
print(f" 💾 [{page_index}] Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 8) Hlavní stránkovací cyklus
|
||||
# ============================================================
|
||||
|
||||
current_url = START_URL
|
||||
page_index = 0
|
||||
|
||||
while True:
|
||||
print(f"\n🌐 Loading page {page_index}: {current_url}")
|
||||
driver.get(current_url)
|
||||
time.sleep(2)
|
||||
|
||||
# zavři popup, pokud je
|
||||
close_popup_if_any()
|
||||
|
||||
# zpracuj aktuální stránku
|
||||
process_current_page(page_index)
|
||||
|
||||
# pokus se najít tlačítko "Dalsi >>"
|
||||
try:
|
||||
next_btn = driver.find_element(
|
||||
By.XPATH,
|
||||
"//a[b[contains(text(),'Dalsi')]]"
|
||||
)
|
||||
next_url = next_btn.get_attribute("href")
|
||||
|
||||
if not next_url:
|
||||
print("⛔ Next link has no href, stopping.")
|
||||
break
|
||||
|
||||
# pokud je relativní, doplň doménu
|
||||
if next_url.startswith("/"):
|
||||
next_url = "https://sktorrent.eu" + next_url
|
||||
|
||||
# když by náhodou bylo stejné URL → přeruš nekonečnou smyčku
|
||||
if next_url == current_url:
|
||||
print("⛔ Next URL equals current URL, stopping.")
|
||||
break
|
||||
|
||||
print("➡️ Next page:", next_url)
|
||||
current_url = next_url
|
||||
page_index += 1
|
||||
|
||||
# malá pauza mezi stránkami
|
||||
time.sleep(1)
|
||||
|
||||
except Exception:
|
||||
print("✅ No 'Dalsi >>' link found, reached last page. Done.")
|
||||
break
|
||||
|
||||
|
||||
print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded (without re-downloading existing ones).")
|
||||
driver.quit()
|
||||
256
30 OpenTextListing v2.py
Normal file
256
30 OpenTextListing v2.py
Normal file
@@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0"
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
# Load cookies
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
print("🍪 Cookies loaded.")
|
||||
|
||||
driver.get(URL)
|
||||
time.sleep(2)
|
||||
|
||||
# ============================================================
|
||||
# Close interstitial popup reliably
|
||||
# ============================================================
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
# JS close always exists even when HTML structure varies
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
print("🧹 Popup closed via JS fallback.")
|
||||
time.sleep(1)
|
||||
except:
|
||||
print("ℹ️ Popup JS handler not found (probably no popup).")
|
||||
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) Extract table rows
|
||||
# ============================================================
|
||||
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
print("Total rows found:", len(rows))
|
||||
|
||||
real_rows = []
|
||||
for row in rows:
|
||||
cells = row.find_elements(By.TAG_NAME, "td")
|
||||
|
||||
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
|
||||
if len(cells) == 7:
|
||||
real_rows.append(cells)
|
||||
|
||||
print("Real torrent rows:", len(real_rows))
|
||||
print("")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Function to extract fields from one row
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
|
||||
# --------------------------
|
||||
# 1️⃣ CATEGORY (cells[0])
|
||||
# --------------------------
|
||||
category = cells[0].text.strip()
|
||||
|
||||
# --------------------------
|
||||
# 2️⃣ TITLE + DETAILS LINK (always inside cells[2])
|
||||
# --------------------------
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
print("⚠️ Missing title link — skipping row")
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
if not details_link:
|
||||
print("⚠️ Row has no details link — skipping")
|
||||
return None
|
||||
|
||||
# --------------------------
|
||||
# 3️⃣ TORRENT HASH
|
||||
# --------------------------
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
|
||||
if "id" not in query:
|
||||
print("⚠️ Skipping row with no torrent ID →", details_link)
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
# --------------------------
|
||||
# 4️⃣ TEXT BLOCK (size + date)
|
||||
# --------------------------
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
# Normalize formats like "29/11/2025 o 02:29", "29/11/2025 02:29:18"
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
|
||||
# Split date and time
|
||||
date_part, *time_parts = clean.split(" ")
|
||||
|
||||
# If seconds are missing, add :00
|
||||
time_part = time_parts[0] if time_parts else "00:00"
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# --------------------------
|
||||
# 5️⃣ IMAGE PREVIEW
|
||||
# --------------------------
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
# --------------------------
|
||||
# 6️⃣ SEEDERS (cells[4])
|
||||
# --------------------------
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
# --------------------------
|
||||
# 7️⃣ LEECHERS (cells[5])
|
||||
# --------------------------
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
# --------------------------
|
||||
# Return result
|
||||
# --------------------------
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) MySQL INSERT
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) PROCESS ALL ROWS
|
||||
# ============================================================
|
||||
|
||||
for cells in real_rows:
|
||||
data = parse_row(cells)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
print("💾 Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
print("\n✅ DONE — All torrents saved to MySQL.")
|
||||
driver.quit()
|
||||
291
30 OpenTextListing v3.py
Normal file
291
30 OpenTextListing v3.py
Normal file
@@ -0,0 +1,291 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
# Load cookies
|
||||
session_cookies = []
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
session_cookies.append({c['name']: c['value']})
|
||||
print("🍪 Cookies loaded.")
|
||||
|
||||
driver.get(URL)
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) Close interstitial popup robustly
|
||||
# ============================================================
|
||||
|
||||
try:
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
print("🧹 Popup closed via JS fallback.")
|
||||
time.sleep(1)
|
||||
except:
|
||||
print("ℹ️ No popup found.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Convert Selenium cookies → Python requests cookies
|
||||
# ============================================================
|
||||
|
||||
requests_session = requests.Session()
|
||||
for ck in driver.get_cookies():
|
||||
requests_session.cookies.set(ck["name"], ck["value"])
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Extract table rows
|
||||
# ============================================================
|
||||
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
print("Total rows found:", len(rows))
|
||||
|
||||
real_rows = []
|
||||
for row in rows:
|
||||
cells = row.find_elements(By.TAG_NAME, "td")
|
||||
|
||||
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
|
||||
if len(cells) == 7:
|
||||
real_rows.append(cells)
|
||||
|
||||
print("Real torrent rows:", len(real_rows))
|
||||
print("")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) Function to extract fields from one row
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
|
||||
# --------------------------
|
||||
# 1️⃣ CATEGORY
|
||||
# --------------------------
|
||||
category = cells[0].text.strip()
|
||||
|
||||
# --------------------------
|
||||
# 2️⃣ DOWNLOAD LINK FOR TORRENT FILE
|
||||
# --------------------------
|
||||
try:
|
||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||
download_link = download_a.get_attribute("href")
|
||||
except:
|
||||
print("⚠️ No download link in row, skipping.")
|
||||
return None
|
||||
|
||||
parsed_dl = urlparse.urlparse(download_link)
|
||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||
|
||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||
|
||||
# --------------------------
|
||||
# 3️⃣ Title + details link (in cell[2])
|
||||
# --------------------------
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
print("⚠️ No title link — skipping row")
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
if not details_link:
|
||||
print("⚠️ Row has no details link — skipping")
|
||||
return None
|
||||
|
||||
# --------------------------
|
||||
# Extract torrent hash from ?id=
|
||||
# --------------------------
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
|
||||
if "id" not in query:
|
||||
print("⚠️ Skipping row with no torrent ID →", details_link)
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
# --------------------------
|
||||
# 4️⃣ Size + date parsing
|
||||
# --------------------------
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# Robust time normalization
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
parts = clean.split(" ")
|
||||
|
||||
date_part = parts[0]
|
||||
time_part = parts[1] if len(parts) > 1 else "00:00:00"
|
||||
|
||||
# add seconds if missing
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# --------------------------
|
||||
# 5️⃣ Image preview
|
||||
# --------------------------
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
# --------------------------
|
||||
# 6️⃣ SEEDERS / LEECHERS
|
||||
# --------------------------
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
# --------------------------
|
||||
# 7️⃣ DOWNLOAD TORRENT CONTENT (.torrent)
|
||||
# --------------------------
|
||||
try:
|
||||
torrent_content = requests_session.get(download_link).content
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
|
||||
torrent_content = None
|
||||
|
||||
# --------------------------
|
||||
# FINAL DICTIONARY
|
||||
# --------------------------
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
"torrent_filename": torrent_filename,
|
||||
"torrent_content": torrent_content,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) MySQL INSERT
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link,
|
||||
torrent_filename, torrent_content
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||
%(torrent_filename)s, %(torrent_content)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link),
|
||||
torrent_filename = VALUES(torrent_filename),
|
||||
torrent_content = VALUES(torrent_content);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7) PROCESS ALL ROWS
|
||||
# ============================================================
|
||||
|
||||
for cells in real_rows:
|
||||
data = parse_row(cells)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
print("💾 Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
print("\n✅ DONE — All torrents saved to MySQL & torrent files downloaded.")
|
||||
driver.quit()
|
||||
375
30 OpenTextListing v4.py
Normal file
375
30 OpenTextListing v4.py
Normal file
@@ -0,0 +1,375 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
|
||||
# Start URL pro kategorii 24, seřazeno podle data DESC
|
||||
START_URL = (
|
||||
"https://sktorrent.eu/torrent/torrents.php"
|
||||
"?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0"
|
||||
)
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
# Pozice a velikost okna (aby nepřekrývalo PyCharm)
|
||||
driver.set_window_position(380, 50) # 10 cm od levého okraje
|
||||
driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru
|
||||
|
||||
|
||||
# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
# Load cookies z JSON
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
print("🍪 Cookies loaded.")
|
||||
else:
|
||||
print("⚠️ Cookie file not found, you may not be logged in!")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) Převod cookies → requests.Session (pro stahování .torrent)
|
||||
# ============================================================
|
||||
|
||||
requests_session = requests.Session()
|
||||
for ck in driver.get_cookies():
|
||||
requests_session.cookies.set(ck["name"], ck["value"])
|
||||
|
||||
print("🔗 Requests session initialized with Selenium cookies.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Funkce pro zavření popupu
|
||||
# ============================================================
|
||||
|
||||
def close_popup_if_any():
|
||||
"""Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit()."""
|
||||
try:
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
# Krátká pauza, ať se DOM uklidní
|
||||
time.sleep(0.5)
|
||||
print("🧹 Popup closed via JS fallback (if present).")
|
||||
except Exception as e:
|
||||
print("ℹ️ Popup JS handler not found:", e)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) Funkce pro parsování jednoho řádku (jednoho torrentu)
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
"""
|
||||
cells: list<WebElement> o délce 7
|
||||
Struktura:
|
||||
0: kategorie
|
||||
1: download link (.torrent)
|
||||
2: název + velikost + datum + 'Obrázok' + žánr
|
||||
3: -- (ignorujeme)
|
||||
4: seeders
|
||||
5: leechers
|
||||
6: completed
|
||||
"""
|
||||
|
||||
# --------------------------
|
||||
# 1️⃣ CATEGORY
|
||||
# --------------------------
|
||||
category = cells[0].text.strip()
|
||||
|
||||
# --------------------------
|
||||
# 2️⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1])
|
||||
# --------------------------
|
||||
try:
|
||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||
download_link = download_a.get_attribute("href")
|
||||
except:
|
||||
print("⚠️ No download link in row, skipping.")
|
||||
return None
|
||||
|
||||
parsed_dl = urlparse.urlparse(download_link)
|
||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||
|
||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||
|
||||
# --------------------------
|
||||
# 3️⃣ TITLE + DETAILS LINK (in cell[2])
|
||||
# --------------------------
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
print("⚠️ No title link — skipping row")
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
if not details_link:
|
||||
print("⚠️ Row has no details link — skipping")
|
||||
return None
|
||||
|
||||
# --------------------------
|
||||
# Extract torrent hash from ?id=
|
||||
# --------------------------
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
|
||||
if "id" not in query:
|
||||
print("⚠️ Skipping row with no torrent ID →", details_link)
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
# --------------------------
|
||||
# 4️⃣ Size + date parsing
|
||||
# --------------------------
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# Robustní převod data/času do MySQL datetime
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
parts = clean.split(" ")
|
||||
|
||||
date_part = parts[0]
|
||||
time_part = parts[1] if len(parts) > 1 else "00:00:00"
|
||||
|
||||
# pokud chybí sekundy, přidej
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# --------------------------
|
||||
# 5️⃣ Image preview
|
||||
# --------------------------
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(
|
||||
By.XPATH,
|
||||
".//a[contains(text(),'Obrázok')]"
|
||||
)
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
# --------------------------
|
||||
# 6️⃣ SEEDERS / LEECHERS
|
||||
# --------------------------
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
# --------------------------
|
||||
# 7️⃣ DOWNLOAD TORRENT CONTENT (.torrent)
|
||||
# --------------------------
|
||||
torrent_content = None
|
||||
time.sleep(3) #mezera mezi torrenty
|
||||
try:
|
||||
resp = requests_session.get(download_link)
|
||||
resp.raise_for_status()
|
||||
torrent_content = resp.content
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
|
||||
torrent_content = None
|
||||
|
||||
# --------------------------
|
||||
# FINAL DICTIONARY
|
||||
# --------------------------
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
"torrent_filename": torrent_filename,
|
||||
"torrent_content": torrent_content,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) MySQL INSERT
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link,
|
||||
torrent_filename, torrent_content
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||
%(torrent_filename)s, %(torrent_content)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link),
|
||||
torrent_filename = VALUES(torrent_filename),
|
||||
torrent_content = VALUES(torrent_content);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7) Funkce pro zpracování jedné stránky
|
||||
# ============================================================
|
||||
|
||||
def process_current_page(page_index: int):
|
||||
"""
|
||||
Zpracuje aktuálně otevřenou stránku:
|
||||
- najde všechny "REAL TORRENT ROWS" (7 td)
|
||||
- pro každý torrent:
|
||||
* parse_row
|
||||
* insert/update do DB
|
||||
"""
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
|
||||
real_rows = []
|
||||
for row in rows:
|
||||
cells = row.find_elements(By.TAG_NAME, "td")
|
||||
|
||||
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
|
||||
if len(cells) == 7:
|
||||
real_rows.append(cells)
|
||||
|
||||
print(f"📄 Page {page_index}: {len(real_rows)} torrent rows")
|
||||
|
||||
for cells in real_rows:
|
||||
data = parse_row(cells)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
print(f" 💾 [{page_index}] Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 8) Hlavní stránkovací cyklus
|
||||
# ============================================================
|
||||
|
||||
current_url = START_URL
|
||||
page_index = 0
|
||||
|
||||
while True:
|
||||
print(f"\n🌐 Loading page {page_index}: {current_url}")
|
||||
driver.get(current_url)
|
||||
time.sleep(2)
|
||||
|
||||
# zavři popup, pokud je
|
||||
close_popup_if_any()
|
||||
|
||||
# zpracuj aktuální stránku
|
||||
process_current_page(page_index)
|
||||
|
||||
# pokus se najít tlačítko "Dalsi >>"
|
||||
try:
|
||||
next_btn = driver.find_element(
|
||||
By.XPATH,
|
||||
"//a[b[contains(text(),'Dalsi')]]"
|
||||
)
|
||||
next_url = next_btn.get_attribute("href")
|
||||
|
||||
if not next_url:
|
||||
print("⛔ Next link has no href, stopping.")
|
||||
break
|
||||
|
||||
# pokud je relativní, doplň doménu
|
||||
if next_url.startswith("/"):
|
||||
next_url = "https://sktorrent.eu" + next_url
|
||||
|
||||
# když by náhodou bylo stejné URL → přeruš nekonečnou smyčku
|
||||
if next_url == current_url:
|
||||
print("⛔ Next URL equals current URL, stopping.")
|
||||
break
|
||||
|
||||
print("➡️ Next page:", next_url)
|
||||
current_url = next_url
|
||||
page_index += 1
|
||||
|
||||
# malá pauza mezi stránkami
|
||||
time.sleep(1)
|
||||
|
||||
except Exception:
|
||||
print("✅ No 'Dalsi >>' link found, reached last page. Done.")
|
||||
break
|
||||
|
||||
|
||||
print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded.")
|
||||
driver.quit()
|
||||
91
EmailMessagingGraph.py
Normal file
91
EmailMessagingGraph.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
EmailMessagingGraph.py
|
||||
----------------------
|
||||
Private Microsoft Graph mail sender
|
||||
Application permissions, shared mailbox
|
||||
"""
|
||||
|
||||
import msal
|
||||
import requests
|
||||
from functools import lru_cache
|
||||
from typing import Union, List
|
||||
|
||||
|
||||
# =========================
|
||||
# PRIVATE CONFIG (ONLY YOU)
|
||||
# =========================
|
||||
TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||
CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||
CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||
SENDER = "reports@buzalka.cz"
|
||||
|
||||
|
||||
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
||||
SCOPE = ["https://graph.microsoft.com/.default"]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_token() -> str:
|
||||
app = msal.ConfidentialClientApplication(
|
||||
CLIENT_ID,
|
||||
authority=AUTHORITY,
|
||||
client_credential=CLIENT_SECRET,
|
||||
)
|
||||
|
||||
token = app.acquire_token_for_client(scopes=SCOPE)
|
||||
|
||||
if "access_token" not in token:
|
||||
raise RuntimeError(f"Graph auth failed: {token}")
|
||||
|
||||
return token["access_token"]
|
||||
|
||||
|
||||
def send_mail(
|
||||
to: Union[str, List[str]],
|
||||
subject: str,
|
||||
body: str,
|
||||
*,
|
||||
html: bool = False,
|
||||
):
|
||||
"""
|
||||
Send email via Microsoft Graph.
|
||||
|
||||
:param to: email or list of emails
|
||||
:param subject: subject
|
||||
:param body: email body
|
||||
:param html: True = HTML, False = plain text
|
||||
"""
|
||||
|
||||
if isinstance(to, str):
|
||||
to = [to]
|
||||
|
||||
payload = {
|
||||
"message": {
|
||||
"subject": subject,
|
||||
"body": {
|
||||
"contentType": "HTML" if html else "Text",
|
||||
"content": body,
|
||||
},
|
||||
"toRecipients": [
|
||||
{"emailAddress": {"address": addr}} for addr in to
|
||||
],
|
||||
},
|
||||
"saveToSentItems": "true",
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {_get_token()}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
r = requests.post(
|
||||
f"https://graph.microsoft.com/v1.0/users/{SENDER}/sendMail",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if r.status_code != 202:
|
||||
raise RuntimeError(
|
||||
f"sendMail failed [{r.status_code}]: {r.text}"
|
||||
)
|
||||
342
Reporter_ReadNewTorrents.py
Normal file
342
Reporter_ReadNewTorrents.py
Normal file
@@ -0,0 +1,342 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
import datetime
|
||||
import sys
|
||||
|
||||
from EmailMessagingGraph import send_mail
|
||||
|
||||
|
||||
# ============================================================
|
||||
# RUNTIME INFO
|
||||
# ============================================================
|
||||
|
||||
RUN_START = datetime.datetime.now()
|
||||
|
||||
processed_count = 0
|
||||
new_torrent_count = 0
|
||||
existing_torrent_count = 0
|
||||
new_titles = []
|
||||
|
||||
print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True,
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
|
||||
START_URL = (
|
||||
"https://sktorrent.eu/torrent/torrents.php"
|
||||
"?search=&category=24&zaner=&jazyk=&active=0"
|
||||
)
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
driver.set_window_position(380, 50)
|
||||
driver.set_window_size(1350, 1000)
|
||||
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
print("🍪 Cookies loaded.")
|
||||
else:
|
||||
print("⚠️ Cookie file not found – login may be required.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) requests.Session from Selenium cookies
|
||||
# ============================================================
|
||||
|
||||
requests_session = requests.Session()
|
||||
for ck in driver.get_cookies():
|
||||
requests_session.cookies.set(ck["name"], ck["value"])
|
||||
|
||||
print("🔗 Requests session initialized.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Popup handler
|
||||
# ============================================================
|
||||
|
||||
def close_popup_if_any():
|
||||
try:
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
time.sleep(0.5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) Parse one torrent row
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
category = cells[0].text.strip()
|
||||
|
||||
try:
|
||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||
download_link = download_a.get_attribute("href")
|
||||
except:
|
||||
return None
|
||||
|
||||
parsed_dl = urlparse.urlparse(download_link)
|
||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
if "id" not in query:
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# ======================================================
|
||||
# EXACT DATE PROCESSING – COPIED 1:1 FROM YOUR FILE
|
||||
# ======================================================
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
parts = clean.split(" ")
|
||||
|
||||
date_part = parts[0]
|
||||
time_part = parts[1] if len(parts) > 1 else "00:00:00"
|
||||
|
||||
# pokud chybí sekundy, přidej
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# ======================================================
|
||||
# Image preview
|
||||
# ======================================================
|
||||
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(
|
||||
By.XPATH,
|
||||
".//a[contains(text(),'Obrázok')]"
|
||||
)
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
cursor.execute(
|
||||
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
|
||||
(torrent_hash,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
already_have_torrent = row is not None and row[0] is not None
|
||||
|
||||
torrent_content = None
|
||||
if not already_have_torrent:
|
||||
time.sleep(3)
|
||||
try:
|
||||
resp = requests_session.get(download_link)
|
||||
resp.raise_for_status()
|
||||
torrent_content = resp.content
|
||||
except:
|
||||
torrent_content = None
|
||||
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
"torrent_filename": torrent_filename,
|
||||
"torrent_content": torrent_content if not already_have_torrent else None,
|
||||
"is_new_torrent": not already_have_torrent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) INSERT SQL
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link,
|
||||
torrent_filename, torrent_content
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||
%(torrent_filename)s, %(torrent_content)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link),
|
||||
torrent_filename = VALUES(torrent_filename),
|
||||
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7) PROCESS FIRST PAGE ONLY
|
||||
# ============================================================
|
||||
|
||||
print("\n🌐 Loading FIRST page")
|
||||
driver.get(START_URL)
|
||||
time.sleep(2)
|
||||
|
||||
close_popup_if_any()
|
||||
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
real_rows = [
|
||||
r.find_elements(By.TAG_NAME, "td")
|
||||
for r in rows
|
||||
if len(r.find_elements(By.TAG_NAME, "td")) == 7
|
||||
]
|
||||
|
||||
print(f"📄 Found {len(real_rows)} torrent rows")
|
||||
|
||||
for cells in real_rows:
|
||||
try:
|
||||
data = parse_row(cells)
|
||||
except Exception as e:
|
||||
print(f"⚠️ parse_row failed: {e}")
|
||||
continue
|
||||
|
||||
if not data:
|
||||
continue
|
||||
|
||||
processed_count += 1
|
||||
|
||||
if data["is_new_torrent"]:
|
||||
new_torrent_count += 1
|
||||
new_titles.append(data["title_visible"])
|
||||
else:
|
||||
existing_torrent_count += 1
|
||||
|
||||
print("💾 Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 8) SEND EMAIL REPORT
|
||||
# ============================================================
|
||||
|
||||
RUN_END = datetime.datetime.now()
|
||||
|
||||
subject = f"SKTorrent hourly run – {RUN_START:%Y-%m-%d %H:%M}"
|
||||
|
||||
lines = [
|
||||
f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}",
|
||||
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
|
||||
"",
|
||||
f"Processed torrents: {processed_count}",
|
||||
f"New torrent files downloaded: {new_torrent_count}",
|
||||
f"Already known torrents: {existing_torrent_count}",
|
||||
]
|
||||
|
||||
if new_titles:
|
||||
lines.append("")
|
||||
lines.append("New torrents:")
|
||||
for t in new_titles:
|
||||
lines.append(f"- {t}")
|
||||
|
||||
body = "\n".join(lines)
|
||||
|
||||
send_mail(
|
||||
to="vladimir.buzalka@buzalka.cz",
|
||||
subject=subject,
|
||||
body=body,
|
||||
html=False,
|
||||
)
|
||||
|
||||
print("📧 Email report sent.")
|
||||
|
||||
driver.quit()
|
||||
print("🎉 DONE")
|
||||
337
Reporter_TorrentsManipulation.py
Normal file
337
Reporter_TorrentsManipulation.py
Normal file
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pymysql
|
||||
import qbittorrentapi
|
||||
import bencodepy
|
||||
|
||||
from EmailMessagingGraph import send_mail
|
||||
|
||||
|
||||
# ==============================
|
||||
# ⚙ CONFIGURATION
|
||||
# ==============================
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
QBT_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 8080,
|
||||
"username": "admin",
|
||||
"password": "adminadmin",
|
||||
}
|
||||
|
||||
MAX_ACTIVE_DOWNLOADS = 10
|
||||
DEAD_TORRENT_MINUTES = 5
|
||||
DEFAULT_SAVE_PATH = None
|
||||
|
||||
MAIL_TO = "vladimir.buzalka@buzalka.cz"
|
||||
|
||||
MAX_LIST_ITEMS = 50 # cap lists in email
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🧮 RUNTIME STATS + LISTS
|
||||
# ==============================
|
||||
|
||||
RUN_START = datetime.now()
|
||||
|
||||
stat_synced = 0
|
||||
stat_completed = 0
|
||||
stat_dead = 0
|
||||
stat_enqueued = 0
|
||||
|
||||
deleted_completed = [] # list[str]
|
||||
deleted_dead = [] # list[str]
|
||||
added_new = [] # list[str]
|
||||
active_downloading = [] # list[str]
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🔧 CONNECT
|
||||
# ==============================
|
||||
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cursor = db.cursor(pymysql.cursors.DictCursor)
|
||||
|
||||
qb = qbittorrentapi.Client(**QBT_CONFIG)
|
||||
|
||||
try:
|
||||
qb.auth_log_in()
|
||||
print("✅ Connected to qBittorrent.")
|
||||
except Exception as e:
|
||||
raise SystemExit(f"❌ Could not connect to qBittorrent: {e}")
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🧪 TORRENT VALIDATION
|
||||
# ==============================
|
||||
|
||||
def is_valid_torrent(blob: bytes) -> bool:
|
||||
try:
|
||||
data = bencodepy.decode(blob)
|
||||
return isinstance(data, dict) and b"info" in data
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🔄 SYNC FROM QB → DB
|
||||
# ==============================
|
||||
|
||||
def sync_qb_to_db():
|
||||
global stat_synced
|
||||
|
||||
torrents = qb.torrents_info()
|
||||
stat_synced = len(torrents)
|
||||
|
||||
for t in torrents:
|
||||
completion_dt = None
|
||||
if getattr(t, "completion_on", 0):
|
||||
try:
|
||||
completion_dt = datetime.fromtimestamp(t.completion_on)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE torrents
|
||||
SET qb_added = 1,
|
||||
qb_hash = COALESCE(qb_hash, %s),
|
||||
qb_state = %s,
|
||||
qb_progress = %s,
|
||||
qb_savepath = %s,
|
||||
qb_completed_datetime =
|
||||
IF(%s IS NOT NULL AND qb_completed_datetime IS NULL, %s, qb_completed_datetime),
|
||||
qb_last_update = NOW()
|
||||
WHERE qb_hash = %s OR torrent_hash = %s
|
||||
""", (
|
||||
t.hash,
|
||||
t.state,
|
||||
float(t.progress) * 100.0,
|
||||
getattr(t, "save_path", None),
|
||||
completion_dt,
|
||||
completion_dt,
|
||||
t.hash,
|
||||
t.hash,
|
||||
))
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🧹 HANDLE COMPLETED + DEAD
|
||||
# ==============================
|
||||
|
||||
def handle_completed_and_dead():
|
||||
global stat_completed, stat_dead
|
||||
|
||||
torrents = qb.torrents_info()
|
||||
|
||||
for t in torrents:
|
||||
t_hash = t.hash
|
||||
state = t.state
|
||||
progress = float(t.progress)
|
||||
|
||||
# ✔ COMPLETED
|
||||
if progress >= 1.0 or state in {"completed", "uploading", "stalledUP", "queuedUP"}:
|
||||
stat_completed += 1
|
||||
deleted_completed.append(t.name)
|
||||
|
||||
try:
|
||||
qb.torrents_delete(torrent_hashes=t_hash, delete_files=False)
|
||||
except Exception as e:
|
||||
# keep name in report; just note error in DB state if you want later
|
||||
print(f"⚠️ delete (keep data) failed for {t.name}: {e}")
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE torrents
|
||||
SET qb_state='completed',
|
||||
qb_progress=100,
|
||||
qb_completed_datetime=NOW(),
|
||||
qb_last_update=NOW()
|
||||
WHERE qb_hash=%s OR torrent_hash=%s
|
||||
""", (t_hash, t_hash))
|
||||
continue
|
||||
|
||||
# ❌ DEAD (never seen_complete)
|
||||
try:
|
||||
props = qb.torrents_properties(t_hash)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if getattr(props, "last_seen", 0) == -1:
|
||||
added_dt = getattr(t, "added_on", 0)
|
||||
if added_dt:
|
||||
if datetime.now() - datetime.fromtimestamp(added_dt) > timedelta(minutes=DEAD_TORRENT_MINUTES):
|
||||
stat_dead += 1
|
||||
deleted_dead.append(t.name)
|
||||
|
||||
try:
|
||||
qb.torrents_delete(torrent_hashes=t_hash, delete_files=True)
|
||||
except Exception as e:
|
||||
print(f"⚠️ delete (files) failed for {t.name}: {e}")
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE torrents
|
||||
SET qb_state='dead',
|
||||
qb_last_update=NOW()
|
||||
WHERE qb_hash=%s OR torrent_hash=%s
|
||||
""", (t_hash, t_hash))
|
||||
|
||||
|
||||
# ==============================
|
||||
# 📊 ACTIVE DOWNLOADS
|
||||
# ==============================
|
||||
|
||||
def count_active_downloads():
|
||||
return sum(1 for t in qb.torrents_info() if float(t.progress) < 1.0)
|
||||
|
||||
|
||||
def snapshot_active_downloading():
|
||||
"""
|
||||
Capture current actively downloading torrents (progress < 100%).
|
||||
"""
|
||||
active = []
|
||||
for t in qb.torrents_info():
|
||||
prog = float(t.progress)
|
||||
if prog < 1.0:
|
||||
active.append(f"{t.name} — {prog*100:.1f}% — {t.state}")
|
||||
return sorted(active)
|
||||
|
||||
|
||||
# ==============================
|
||||
# ➕ ENQUEUE NEW TORRENTS
|
||||
# ==============================
|
||||
|
||||
def enqueue_new_torrents():
|
||||
global stat_enqueued
|
||||
|
||||
active = count_active_downloads()
|
||||
if active >= MAX_ACTIVE_DOWNLOADS:
|
||||
return
|
||||
|
||||
slots = MAX_ACTIVE_DOWNLOADS - active
|
||||
|
||||
cursor.execute("""
|
||||
SELECT id, torrent_hash, torrent_content, torrent_filename
|
||||
FROM torrents
|
||||
WHERE (qb_added IS NULL OR qb_added = 0)
|
||||
AND torrent_content IS NOT NULL
|
||||
ORDER BY added_datetime DESC
|
||||
LIMIT %s
|
||||
""", (slots,))
|
||||
|
||||
for row in cursor.fetchall():
|
||||
blob = row["torrent_content"]
|
||||
if not blob:
|
||||
continue
|
||||
|
||||
if not is_valid_torrent(blob):
|
||||
cursor.execute("""
|
||||
UPDATE torrents
|
||||
SET qb_state='invalid',
|
||||
torrent_content=NULL,
|
||||
qb_last_update=NOW()
|
||||
WHERE id=%s
|
||||
""", (row["id"],))
|
||||
continue
|
||||
|
||||
# ➕ Add torrent
|
||||
try:
|
||||
qb.torrents_add(torrent_files=blob, savepath=DEFAULT_SAVE_PATH)
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to add {row['torrent_hash']}: {e}")
|
||||
continue
|
||||
|
||||
stat_enqueued += 1
|
||||
added_new.append(row.get("torrent_filename") or row["torrent_hash"])
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE torrents
|
||||
SET qb_added=1,
|
||||
qb_hash=COALESCE(qb_hash, %s),
|
||||
qb_state='added',
|
||||
qb_last_update=NOW()
|
||||
WHERE id=%s
|
||||
""", (row["torrent_hash"], row["id"]))
|
||||
|
||||
|
||||
# ==============================
|
||||
# ✉️ EMAIL HELPERS
|
||||
# ==============================
|
||||
|
||||
def format_list(title: str, items: list[str]) -> list[str]:
|
||||
lines = []
|
||||
if not items:
|
||||
return [f"{title}: (none)"]
|
||||
|
||||
lines.append(f"{title}: {len(items)}")
|
||||
shown = items[:MAX_LIST_ITEMS]
|
||||
for it in shown:
|
||||
lines.append(f" - {it}")
|
||||
if len(items) > MAX_LIST_ITEMS:
|
||||
lines.append(f" ... (+{len(items) - MAX_LIST_ITEMS} more)")
|
||||
return lines
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🏁 MAIN (ONE RUN)
|
||||
# ==============================
|
||||
|
||||
print("🚀 QB worker run started")
|
||||
|
||||
try:
|
||||
sync_qb_to_db()
|
||||
handle_completed_and_dead()
|
||||
enqueue_new_torrents()
|
||||
|
||||
# Snapshot after enqueue/deletions, so email reflects end-state
|
||||
active_downloading = snapshot_active_downloading()
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ==============================
|
||||
# 📧 EMAIL REPORT
|
||||
# ==============================
|
||||
|
||||
RUN_END = datetime.now()
|
||||
|
||||
body_lines = [
|
||||
f"Run started : {RUN_START:%Y-%m-%d %H:%M:%S}",
|
||||
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
|
||||
"",
|
||||
f"QB torrents synced : {stat_synced}",
|
||||
f"Completed removed : {stat_completed}",
|
||||
f"Dead removed : {stat_dead}",
|
||||
f"New torrents added : {stat_enqueued}",
|
||||
f"Active downloads : {sum(1 for _ in active_downloading)}",
|
||||
"",
|
||||
]
|
||||
|
||||
body_lines += format_list("Deleted (completed, kept data)", deleted_completed)
|
||||
body_lines.append("")
|
||||
body_lines += format_list("Deleted (dead, deleted files)", deleted_dead)
|
||||
body_lines.append("")
|
||||
body_lines += format_list("Newly added to qBittorrent", added_new)
|
||||
body_lines.append("")
|
||||
body_lines += format_list("Actively downloading now", active_downloading)
|
||||
|
||||
send_mail(
|
||||
to=MAIL_TO,
|
||||
subject=f"qBittorrent worker – {RUN_START:%Y-%m-%d %H:%M}",
|
||||
body="\n".join(body_lines),
|
||||
html=False,
|
||||
)
|
||||
|
||||
print("📧 Email report sent")
|
||||
print("🎉 DONE")
|
||||
Reference in New Issue
Block a user