diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..323750b --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Virtual environment +.venv/ + +# Python +__pycache__/ +*.pyc +*.log + +# IDE +.idea/ + +# OS +.DS_Store +Thumbs.db diff --git a/30 OpenTextLIsting v5.py b/30 OpenTextLIsting v5.py new file mode 100644 index 0000000..35ca7b9 --- /dev/null +++ b/30 OpenTextLIsting v5.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json +import requests + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") + +# Start URL pro kategorii 24, seřazeno podle data DESC +START_URL = ( + "https://sktorrent.eu/torrent/torrents.php" + "?search=&category=24&zaner=&jazyk=&active=0" +) + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +# Pozice a velikost okna (aby nepřekrývalo PyCharm) +driver.set_window_position(380, 50) # 10 cm od levého okraje +driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru + + +# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies +driver.get("https://sktorrent.eu") + +# Load cookies z JSON +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") +else: + print("⚠️ Cookie file not found, you may not be logged in!") + + +# ============================================================ +# 3) Převod cookies → requests.Session (pro stahování .torrent) +# ============================================================ + +requests_session = requests.Session() +for ck in driver.get_cookies(): + requests_session.cookies.set(ck["name"], ck["value"]) + +print("🔗 Requests session initialized with Selenium cookies.") + + +# ============================================================ +# 4) Funkce pro zavření popupu +# ============================================================ + +def close_popup_if_any(): + """Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit().""" + try: + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + # Krátká pauza, ať se DOM uklidní + time.sleep(0.5) + print("🧹 Popup closed via JS fallback (if present).") + except Exception as e: + print("ℹ️ Popup JS handler not found:", e) + + +# ============================================================ +# 5) Funkce pro parsování jednoho řádku (jednoho torrentu) +# ============================================================ + +def parse_row(cells): + """ + cells: list o délce 7 + Struktura: + 0: kategorie + 1: download link (.torrent) + 2: název + velikost + datum + 'Obrázok' + žánr + 3: -- (ignorujeme) + 4: seeders + 5: leechers + 6: completed + """ + + # -------------------------- + # 1️⃣ CATEGORY + # -------------------------- + category = cells[0].text.strip() + + # -------------------------- + # 2️⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1]) + # -------------------------- + try: + download_a = cells[1].find_element(By.TAG_NAME, "a") + download_link = download_a.get_attribute("href") + except: + print("⚠️ No download link in row, skipping.") + return None + + parsed_dl = urlparse.urlparse(download_link) + dl_query = urlparse.parse_qs(parsed_dl.query) + + torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] + + # -------------------------- + # 3️⃣ TITLE + DETAILS LINK (in cell[2]) + # -------------------------- + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + print("⚠️ No title link — skipping row") + return None + + a_tag = title_links[0] + + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + if not details_link: + print("⚠️ Row has no details link — skipping") + return None + + # -------------------------- + # Extract torrent hash from ?id= + # -------------------------- + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + + if "id" not in query: + print("⚠️ Skipping row with no torrent ID →", details_link) + return None + + torrent_hash = query["id"][0] + + # -------------------------- + # 4️⃣ Size + date parsing + # -------------------------- + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # Robustní převod data/času do MySQL datetime + added_mysql = None + if added_pretty: + # "29/11/2025 o 02:29" → "29/11/2025 02:29" + clean = added_pretty.replace(" o ", " ").strip() + parts = clean.split(" ") + + date_part = parts[0] + time_part = parts[1] if len(parts) > 1 else "00:00:00" + + # pokud chybí sekundy, přidej + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + added_mysql = f"{year}-{month}-{day} {time_part}" + + # -------------------------- + # 5️⃣ Image preview + # -------------------------- + img_link = None + try: + image_a = cells[2].find_element( + By.XPATH, + ".//a[contains(text(),'Obrázok')]" + ) + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + # -------------------------- + # 6️⃣ SEEDERS / LEECHERS + # -------------------------- + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + # -------------------------- + # 7️⃣ Check, zda už máme torrent_content v DB + # -------------------------- + cursor.execute( + "SELECT torrent_content FROM torrents WHERE torrent_hash=%s", + (torrent_hash,) + ) + row = cursor.fetchone() + already_have_torrent = row is not None and row[0] is not None + + # -------------------------- + # 8️⃣ DOWNLOAD TORRENT CONTENT (.torrent) – only if needed + # -------------------------- + torrent_content = None + + if already_have_torrent: + print(f" ↪️ Torrent file already stored, skipping download ({torrent_filename})") + else: + time.sleep(3) # mezera mezi torrenty + try: + resp = requests_session.get(download_link) + resp.raise_for_status() + torrent_content = resp.content + except Exception as e: + print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}") + torrent_content = None + + # -------------------------- + # FINAL DICTIONARY + # -------------------------- + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + "torrent_filename": torrent_filename, + # pokud jsme torrent už měli, vracíme None → UPDATE ho nepřepíše (COALESCE) + "torrent_content": torrent_content if not already_have_torrent else None, + } + + +# ============================================================ +# 6) MySQL INSERT +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link, + torrent_filename, torrent_content +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, + %(torrent_filename)s, %(torrent_content)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link), + torrent_filename = VALUES(torrent_filename), + torrent_content = COALESCE(VALUES(torrent_content), torrent_content); +""" + + +# ============================================================ +# 7) Funkce pro zpracování jedné stránky +# ============================================================ + +def process_current_page(page_index: int): + """ + Zpracuje aktuálně otevřenou stránku: + - najde všechny "REAL TORRENT ROWS" (7 td) + - pro každý torrent: + * parse_row + * insert/update do DB + """ + rows = driver.find_elements(By.CSS_SELECTOR, "table tr") + + real_rows = [] + for row in rows: + cells = row.find_elements(By.TAG_NAME, "td") + + # REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS + if len(cells) == 7: + real_rows.append(cells) + + print(f"📄 Page {page_index}: {len(real_rows)} torrent rows") + + for cells in real_rows: + data = parse_row(cells) + if not data: + continue + + print(f" 💾 [{page_index}] Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + + +# ============================================================ +# 8) Hlavní stránkovací cyklus +# ============================================================ + +current_url = START_URL +page_index = 0 + +while True: + print(f"\n🌐 Loading page {page_index}: {current_url}") + driver.get(current_url) + time.sleep(2) + + # zavři popup, pokud je + close_popup_if_any() + + # zpracuj aktuální stránku + process_current_page(page_index) + + # pokus se najít tlačítko "Dalsi >>" + try: + next_btn = driver.find_element( + By.XPATH, + "//a[b[contains(text(),'Dalsi')]]" + ) + next_url = next_btn.get_attribute("href") + + if not next_url: + print("⛔ Next link has no href, stopping.") + break + + # pokud je relativní, doplň doménu + if next_url.startswith("/"): + next_url = "https://sktorrent.eu" + next_url + + # když by náhodou bylo stejné URL → přeruš nekonečnou smyčku + if next_url == current_url: + print("⛔ Next URL equals current URL, stopping.") + break + + print("➡️ Next page:", next_url) + current_url = next_url + page_index += 1 + + # malá pauza mezi stránkami + time.sleep(1) + + except Exception: + print("✅ No 'Dalsi >>' link found, reached last page. Done.") + break + + +print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded (without re-downloading existing ones).") +driver.quit() diff --git a/30 OpenTextListing v2.py b/30 OpenTextListing v2.py new file mode 100644 index 0000000..ab396fc --- /dev/null +++ b/30 OpenTextListing v2.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") +URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0" + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +driver.get("https://sktorrent.eu") + +# Load cookies +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") + +driver.get(URL) +time.sleep(2) + +# ============================================================ +# Close interstitial popup reliably +# ============================================================ + +time.sleep(1) + +try: + # JS close always exists even when HTML structure varies + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + print("🧹 Popup closed via JS fallback.") + time.sleep(1) +except: + print("ℹ️ Popup JS handler not found (probably no popup).") + + + +# ============================================================ +# 3) Extract table rows +# ============================================================ + +rows = driver.find_elements(By.CSS_SELECTOR, "table tr") +print("Total rows found:", len(rows)) + +real_rows = [] +for row in rows: + cells = row.find_elements(By.TAG_NAME, "td") + + # REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS + if len(cells) == 7: + real_rows.append(cells) + +print("Real torrent rows:", len(real_rows)) +print("") + + +# ============================================================ +# 4) Function to extract fields from one row +# ============================================================ + +def parse_row(cells): + + # -------------------------- + # 1️⃣ CATEGORY (cells[0]) + # -------------------------- + category = cells[0].text.strip() + + # -------------------------- + # 2️⃣ TITLE + DETAILS LINK (always inside cells[2]) + # -------------------------- + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + print("⚠️ Missing title link — skipping row") + return None + + a_tag = title_links[0] + + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + if not details_link: + print("⚠️ Row has no details link — skipping") + return None + + # -------------------------- + # 3️⃣ TORRENT HASH + # -------------------------- + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + + if "id" not in query: + print("⚠️ Skipping row with no torrent ID →", details_link) + return None + + torrent_hash = query["id"][0] + + # -------------------------- + # 4️⃣ TEXT BLOCK (size + date) + # -------------------------- + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00” + added_mysql = None + if added_pretty: + # Normalize formats like "29/11/2025 o 02:29", "29/11/2025 02:29:18" + clean = added_pretty.replace(" o ", " ").strip() + + # Split date and time + date_part, *time_parts = clean.split(" ") + + # If seconds are missing, add :00 + time_part = time_parts[0] if time_parts else "00:00" + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + + added_mysql = f"{year}-{month}-{day} {time_part}" + + # -------------------------- + # 5️⃣ IMAGE PREVIEW + # -------------------------- + img_link = None + try: + image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]") + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + # -------------------------- + # 6️⃣ SEEDERS (cells[4]) + # -------------------------- + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + # -------------------------- + # 7️⃣ LEECHERS (cells[5]) + # -------------------------- + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + # -------------------------- + # Return result + # -------------------------- + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + } + + +# ============================================================ +# 5) MySQL INSERT +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link); +""" + + +# ============================================================ +# 6) PROCESS ALL ROWS +# ============================================================ + +for cells in real_rows: + data = parse_row(cells) + if not data: + continue + + print("💾 Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + +print("\n✅ DONE — All torrents saved to MySQL.") +driver.quit() diff --git a/30 OpenTextListing v3.py b/30 OpenTextListing v3.py new file mode 100644 index 0000000..85f1e4c --- /dev/null +++ b/30 OpenTextListing v3.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json +import requests + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") +URL = "https://sktorrent.eu/torrent/torrents.php?active=0" + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +driver.get("https://sktorrent.eu") + +# Load cookies +session_cookies = [] +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + session_cookies.append({c['name']: c['value']}) + print("🍪 Cookies loaded.") + +driver.get(URL) +time.sleep(2) + + +# ============================================================ +# 3) Close interstitial popup robustly +# ============================================================ + +try: + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + print("🧹 Popup closed via JS fallback.") + time.sleep(1) +except: + print("ℹ️ No popup found.") + + +# ============================================================ +# Convert Selenium cookies → Python requests cookies +# ============================================================ + +requests_session = requests.Session() +for ck in driver.get_cookies(): + requests_session.cookies.set(ck["name"], ck["value"]) + + +# ============================================================ +# 4) Extract table rows +# ============================================================ + +rows = driver.find_elements(By.CSS_SELECTOR, "table tr") +print("Total rows found:", len(rows)) + +real_rows = [] +for row in rows: + cells = row.find_elements(By.TAG_NAME, "td") + + # REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS + if len(cells) == 7: + real_rows.append(cells) + +print("Real torrent rows:", len(real_rows)) +print("") + + +# ============================================================ +# 5) Function to extract fields from one row +# ============================================================ + +def parse_row(cells): + + # -------------------------- + # 1️⃣ CATEGORY + # -------------------------- + category = cells[0].text.strip() + + # -------------------------- + # 2️⃣ DOWNLOAD LINK FOR TORRENT FILE + # -------------------------- + try: + download_a = cells[1].find_element(By.TAG_NAME, "a") + download_link = download_a.get_attribute("href") + except: + print("⚠️ No download link in row, skipping.") + return None + + parsed_dl = urlparse.urlparse(download_link) + dl_query = urlparse.parse_qs(parsed_dl.query) + + torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] + + # -------------------------- + # 3️⃣ Title + details link (in cell[2]) + # -------------------------- + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + print("⚠️ No title link — skipping row") + return None + + a_tag = title_links[0] + + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + if not details_link: + print("⚠️ Row has no details link — skipping") + return None + + # -------------------------- + # Extract torrent hash from ?id= + # -------------------------- + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + + if "id" not in query: + print("⚠️ Skipping row with no torrent ID →", details_link) + return None + + torrent_hash = query["id"][0] + + # -------------------------- + # 4️⃣ Size + date parsing + # -------------------------- + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # Robust time normalization + added_mysql = None + if added_pretty: + + clean = added_pretty.replace(" o ", " ").strip() + parts = clean.split(" ") + + date_part = parts[0] + time_part = parts[1] if len(parts) > 1 else "00:00:00" + + # add seconds if missing + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + added_mysql = f"{year}-{month}-{day} {time_part}" + + # -------------------------- + # 5️⃣ Image preview + # -------------------------- + img_link = None + try: + image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]") + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + # -------------------------- + # 6️⃣ SEEDERS / LEECHERS + # -------------------------- + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + # -------------------------- + # 7️⃣ DOWNLOAD TORRENT CONTENT (.torrent) + # -------------------------- + try: + torrent_content = requests_session.get(download_link).content + except Exception as e: + print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}") + torrent_content = None + + # -------------------------- + # FINAL DICTIONARY + # -------------------------- + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + "torrent_filename": torrent_filename, + "torrent_content": torrent_content, + } + + +# ============================================================ +# 6) MySQL INSERT +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link, + torrent_filename, torrent_content +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, + %(torrent_filename)s, %(torrent_content)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link), + torrent_filename = VALUES(torrent_filename), + torrent_content = VALUES(torrent_content); +""" + + +# ============================================================ +# 7) PROCESS ALL ROWS +# ============================================================ + +for cells in real_rows: + data = parse_row(cells) + if not data: + continue + + print("💾 Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + +print("\n✅ DONE — All torrents saved to MySQL & torrent files downloaded.") +driver.quit() diff --git a/30 OpenTextListing v4.py b/30 OpenTextListing v4.py new file mode 100644 index 0000000..cc59520 --- /dev/null +++ b/30 OpenTextListing v4.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json +import requests + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") + +# Start URL pro kategorii 24, seřazeno podle data DESC +START_URL = ( + "https://sktorrent.eu/torrent/torrents.php" + "?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0" +) + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +# Pozice a velikost okna (aby nepřekrývalo PyCharm) +driver.set_window_position(380, 50) # 10 cm od levého okraje +driver.set_window_size(1350, 1000) # můžeš změnit dle monitoru + + +# Nejprve otevřeme hlavní stránku kvůli doméně pro cookies +driver.get("https://sktorrent.eu") + +# Load cookies z JSON +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") +else: + print("⚠️ Cookie file not found, you may not be logged in!") + + +# ============================================================ +# 3) Převod cookies → requests.Session (pro stahování .torrent) +# ============================================================ + +requests_session = requests.Session() +for ck in driver.get_cookies(): + requests_session.cookies.set(ck["name"], ck["value"]) + +print("🔗 Requests session initialized with Selenium cookies.") + + +# ============================================================ +# 4) Funkce pro zavření popupu +# ============================================================ + +def close_popup_if_any(): + """Zkusí zavřít interstitial reklamu pomocí JS funkce interstitialBox.closeit().""" + try: + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + # Krátká pauza, ať se DOM uklidní + time.sleep(0.5) + print("🧹 Popup closed via JS fallback (if present).") + except Exception as e: + print("ℹ️ Popup JS handler not found:", e) + + +# ============================================================ +# 5) Funkce pro parsování jednoho řádku (jednoho torrentu) +# ============================================================ + +def parse_row(cells): + """ + cells: list o délce 7 + Struktura: + 0: kategorie + 1: download link (.torrent) + 2: název + velikost + datum + 'Obrázok' + žánr + 3: -- (ignorujeme) + 4: seeders + 5: leechers + 6: completed + """ + + # -------------------------- + # 1️⃣ CATEGORY + # -------------------------- + category = cells[0].text.strip() + + # -------------------------- + # 2️⃣ DOWNLOAD LINK FOR TORRENT FILE (cells[1]) + # -------------------------- + try: + download_a = cells[1].find_element(By.TAG_NAME, "a") + download_link = download_a.get_attribute("href") + except: + print("⚠️ No download link in row, skipping.") + return None + + parsed_dl = urlparse.urlparse(download_link) + dl_query = urlparse.parse_qs(parsed_dl.query) + + torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] + + # -------------------------- + # 3️⃣ TITLE + DETAILS LINK (in cell[2]) + # -------------------------- + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + print("⚠️ No title link — skipping row") + return None + + a_tag = title_links[0] + + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + if not details_link: + print("⚠️ Row has no details link — skipping") + return None + + # -------------------------- + # Extract torrent hash from ?id= + # -------------------------- + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + + if "id" not in query: + print("⚠️ Skipping row with no torrent ID →", details_link) + return None + + torrent_hash = query["id"][0] + + # -------------------------- + # 4️⃣ Size + date parsing + # -------------------------- + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # Robustní převod data/času do MySQL datetime + added_mysql = None + if added_pretty: + # "29/11/2025 o 02:29" → "29/11/2025 02:29" + clean = added_pretty.replace(" o ", " ").strip() + parts = clean.split(" ") + + date_part = parts[0] + time_part = parts[1] if len(parts) > 1 else "00:00:00" + + # pokud chybí sekundy, přidej + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + added_mysql = f"{year}-{month}-{day} {time_part}" + + # -------------------------- + # 5️⃣ Image preview + # -------------------------- + img_link = None + try: + image_a = cells[2].find_element( + By.XPATH, + ".//a[contains(text(),'Obrázok')]" + ) + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + # -------------------------- + # 6️⃣ SEEDERS / LEECHERS + # -------------------------- + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + # -------------------------- + # 7️⃣ DOWNLOAD TORRENT CONTENT (.torrent) + # -------------------------- + torrent_content = None + time.sleep(3) #mezera mezi torrenty + try: + resp = requests_session.get(download_link) + resp.raise_for_status() + torrent_content = resp.content + except Exception as e: + print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}") + torrent_content = None + + # -------------------------- + # FINAL DICTIONARY + # -------------------------- + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + "torrent_filename": torrent_filename, + "torrent_content": torrent_content, + } + + +# ============================================================ +# 6) MySQL INSERT +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link, + torrent_filename, torrent_content +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, + %(torrent_filename)s, %(torrent_content)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link), + torrent_filename = VALUES(torrent_filename), + torrent_content = VALUES(torrent_content); +""" + + +# ============================================================ +# 7) Funkce pro zpracování jedné stránky +# ============================================================ + +def process_current_page(page_index: int): + """ + Zpracuje aktuálně otevřenou stránku: + - najde všechny "REAL TORRENT ROWS" (7 td) + - pro každý torrent: + * parse_row + * insert/update do DB + """ + rows = driver.find_elements(By.CSS_SELECTOR, "table tr") + + real_rows = [] + for row in rows: + cells = row.find_elements(By.TAG_NAME, "td") + + # REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS + if len(cells) == 7: + real_rows.append(cells) + + print(f"📄 Page {page_index}: {len(real_rows)} torrent rows") + + for cells in real_rows: + data = parse_row(cells) + if not data: + continue + + print(f" 💾 [{page_index}] Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + + +# ============================================================ +# 8) Hlavní stránkovací cyklus +# ============================================================ + +current_url = START_URL +page_index = 0 + +while True: + print(f"\n🌐 Loading page {page_index}: {current_url}") + driver.get(current_url) + time.sleep(2) + + # zavři popup, pokud je + close_popup_if_any() + + # zpracuj aktuální stránku + process_current_page(page_index) + + # pokus se najít tlačítko "Dalsi >>" + try: + next_btn = driver.find_element( + By.XPATH, + "//a[b[contains(text(),'Dalsi')]]" + ) + next_url = next_btn.get_attribute("href") + + if not next_url: + print("⛔ Next link has no href, stopping.") + break + + # pokud je relativní, doplň doménu + if next_url.startswith("/"): + next_url = "https://sktorrent.eu" + next_url + + # když by náhodou bylo stejné URL → přeruš nekonečnou smyčku + if next_url == current_url: + print("⛔ Next URL equals current URL, stopping.") + break + + print("➡️ Next page:", next_url) + current_url = next_url + page_index += 1 + + # malá pauza mezi stránkami + time.sleep(1) + + except Exception: + print("✅ No 'Dalsi >>' link found, reached last page. Done.") + break + + +print("\n🎉 DONE — All pages processed, torrents saved & torrent files downloaded.") +driver.quit() diff --git a/EmailMessagingGraph.py b/EmailMessagingGraph.py new file mode 100644 index 0000000..6e5ea25 --- /dev/null +++ b/EmailMessagingGraph.py @@ -0,0 +1,91 @@ +""" +EmailMessagingGraph.py +---------------------- +Private Microsoft Graph mail sender +Application permissions, shared mailbox +""" + +import msal +import requests +from functools import lru_cache +from typing import Union, List + + +# ========================= +# PRIVATE CONFIG (ONLY YOU) +# ========================= +TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9" +CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f" +CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk" +SENDER = "reports@buzalka.cz" + + +AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}" +SCOPE = ["https://graph.microsoft.com/.default"] + + +@lru_cache(maxsize=1) +def _get_token() -> str: + app = msal.ConfidentialClientApplication( + CLIENT_ID, + authority=AUTHORITY, + client_credential=CLIENT_SECRET, + ) + + token = app.acquire_token_for_client(scopes=SCOPE) + + if "access_token" not in token: + raise RuntimeError(f"Graph auth failed: {token}") + + return token["access_token"] + + +def send_mail( + to: Union[str, List[str]], + subject: str, + body: str, + *, + html: bool = False, +): + """ + Send email via Microsoft Graph. + + :param to: email or list of emails + :param subject: subject + :param body: email body + :param html: True = HTML, False = plain text + """ + + if isinstance(to, str): + to = [to] + + payload = { + "message": { + "subject": subject, + "body": { + "contentType": "HTML" if html else "Text", + "content": body, + }, + "toRecipients": [ + {"emailAddress": {"address": addr}} for addr in to + ], + }, + "saveToSentItems": "true", + } + + headers = { + "Authorization": f"Bearer {_get_token()}", + "Content-Type": "application/json", + } + + r = requests.post( + f"https://graph.microsoft.com/v1.0/users/{SENDER}/sendMail", + headers=headers, + json=payload, + timeout=30, + ) + + if r.status_code != 202: + raise RuntimeError( + f"sendMail failed [{r.status_code}]: {r.text}" + ) diff --git a/Reporter_ReadNewTorrents.py b/Reporter_ReadNewTorrents.py new file mode 100644 index 0000000..eef20f9 --- /dev/null +++ b/Reporter_ReadNewTorrents.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json +import requests +import datetime +import sys + +from EmailMessagingGraph import send_mail + + +# ============================================================ +# RUNTIME INFO +# ============================================================ + +RUN_START = datetime.datetime.now() + +processed_count = 0 +new_torrent_count = 0 +existing_torrent_count = 0 +new_titles = [] + +print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}") +sys.stdout.flush() + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True, +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") + +START_URL = ( + "https://sktorrent.eu/torrent/torrents.php" + "?search=&category=24&zaner=&jazyk=&active=0" +) + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +driver.set_window_position(380, 50) +driver.set_window_size(1350, 1000) + +driver.get("https://sktorrent.eu") + +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") +else: + print("⚠️ Cookie file not found – login may be required.") + + +# ============================================================ +# 3) requests.Session from Selenium cookies +# ============================================================ + +requests_session = requests.Session() +for ck in driver.get_cookies(): + requests_session.cookies.set(ck["name"], ck["value"]) + +print("🔗 Requests session initialized.") + + +# ============================================================ +# 4) Popup handler +# ============================================================ + +def close_popup_if_any(): + try: + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + time.sleep(0.5) + except Exception: + pass + + +# ============================================================ +# 5) Parse one torrent row +# ============================================================ + +def parse_row(cells): + category = cells[0].text.strip() + + try: + download_a = cells[1].find_element(By.TAG_NAME, "a") + download_link = download_a.get_attribute("href") + except: + return None + + parsed_dl = urlparse.urlparse(download_link) + dl_query = urlparse.parse_qs(parsed_dl.query) + torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] + + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + return None + + a_tag = title_links[0] + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + if "id" not in query: + return None + + torrent_hash = query["id"][0] + + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # ====================================================== + # EXACT DATE PROCESSING – COPIED 1:1 FROM YOUR FILE + # ====================================================== + added_mysql = None + if added_pretty: + # "29/11/2025 o 02:29" → "29/11/2025 02:29" + clean = added_pretty.replace(" o ", " ").strip() + parts = clean.split(" ") + + date_part = parts[0] + time_part = parts[1] if len(parts) > 1 else "00:00:00" + + # pokud chybí sekundy, přidej + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + added_mysql = f"{year}-{month}-{day} {time_part}" + + # ====================================================== + # Image preview + # ====================================================== + + img_link = None + try: + image_a = cells[2].find_element( + By.XPATH, + ".//a[contains(text(),'Obrázok')]" + ) + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + cursor.execute( + "SELECT torrent_content FROM torrents WHERE torrent_hash=%s", + (torrent_hash,), + ) + row = cursor.fetchone() + already_have_torrent = row is not None and row[0] is not None + + torrent_content = None + if not already_have_torrent: + time.sleep(3) + try: + resp = requests_session.get(download_link) + resp.raise_for_status() + torrent_content = resp.content + except: + torrent_content = None + + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + "torrent_filename": torrent_filename, + "torrent_content": torrent_content if not already_have_torrent else None, + "is_new_torrent": not already_have_torrent, + } + + +# ============================================================ +# 6) INSERT SQL +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link, + torrent_filename, torrent_content +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, + %(torrent_filename)s, %(torrent_content)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link), + torrent_filename = VALUES(torrent_filename), + torrent_content = COALESCE(VALUES(torrent_content), torrent_content); +""" + + +# ============================================================ +# 7) PROCESS FIRST PAGE ONLY +# ============================================================ + +print("\n🌐 Loading FIRST page") +driver.get(START_URL) +time.sleep(2) + +close_popup_if_any() + +rows = driver.find_elements(By.CSS_SELECTOR, "table tr") +real_rows = [ + r.find_elements(By.TAG_NAME, "td") + for r in rows + if len(r.find_elements(By.TAG_NAME, "td")) == 7 +] + +print(f"📄 Found {len(real_rows)} torrent rows") + +for cells in real_rows: + try: + data = parse_row(cells) + except Exception as e: + print(f"⚠️ parse_row failed: {e}") + continue + + if not data: + continue + + processed_count += 1 + + if data["is_new_torrent"]: + new_torrent_count += 1 + new_titles.append(data["title_visible"]) + else: + existing_torrent_count += 1 + + print("💾 Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + + +# ============================================================ +# 8) SEND EMAIL REPORT +# ============================================================ + +RUN_END = datetime.datetime.now() + +subject = f"SKTorrent hourly run – {RUN_START:%Y-%m-%d %H:%M}" + +lines = [ + f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}", + f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}", + "", + f"Processed torrents: {processed_count}", + f"New torrent files downloaded: {new_torrent_count}", + f"Already known torrents: {existing_torrent_count}", +] + +if new_titles: + lines.append("") + lines.append("New torrents:") + for t in new_titles: + lines.append(f"- {t}") + +body = "\n".join(lines) + +send_mail( + to="vladimir.buzalka@buzalka.cz", + subject=subject, + body=body, + html=False, +) + +print("📧 Email report sent.") + +driver.quit() +print("🎉 DONE") diff --git a/Reporter_TorrentsManipulation.py b/Reporter_TorrentsManipulation.py new file mode 100644 index 0000000..0b1cc05 --- /dev/null +++ b/Reporter_TorrentsManipulation.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from datetime import datetime, timedelta + +import pymysql +import qbittorrentapi +import bencodepy + +from EmailMessagingGraph import send_mail + + +# ============================== +# ⚙ CONFIGURATION +# ============================== + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": True, +} + +QBT_CONFIG = { + "host": "192.168.1.76", + "port": 8080, + "username": "admin", + "password": "adminadmin", +} + +MAX_ACTIVE_DOWNLOADS = 10 +DEAD_TORRENT_MINUTES = 5 +DEFAULT_SAVE_PATH = None + +MAIL_TO = "vladimir.buzalka@buzalka.cz" + +MAX_LIST_ITEMS = 50 # cap lists in email + + +# ============================== +# 🧮 RUNTIME STATS + LISTS +# ============================== + +RUN_START = datetime.now() + +stat_synced = 0 +stat_completed = 0 +stat_dead = 0 +stat_enqueued = 0 + +deleted_completed = [] # list[str] +deleted_dead = [] # list[str] +added_new = [] # list[str] +active_downloading = [] # list[str] + + +# ============================== +# 🔧 CONNECT +# ============================== + +db = pymysql.connect(**DB_CONFIG) +cursor = db.cursor(pymysql.cursors.DictCursor) + +qb = qbittorrentapi.Client(**QBT_CONFIG) + +try: + qb.auth_log_in() + print("✅ Connected to qBittorrent.") +except Exception as e: + raise SystemExit(f"❌ Could not connect to qBittorrent: {e}") + + +# ============================== +# 🧪 TORRENT VALIDATION +# ============================== + +def is_valid_torrent(blob: bytes) -> bool: + try: + data = bencodepy.decode(blob) + return isinstance(data, dict) and b"info" in data + except Exception: + return False + + +# ============================== +# 🔄 SYNC FROM QB → DB +# ============================== + +def sync_qb_to_db(): + global stat_synced + + torrents = qb.torrents_info() + stat_synced = len(torrents) + + for t in torrents: + completion_dt = None + if getattr(t, "completion_on", 0): + try: + completion_dt = datetime.fromtimestamp(t.completion_on) + except Exception: + pass + + cursor.execute(""" + UPDATE torrents + SET qb_added = 1, + qb_hash = COALESCE(qb_hash, %s), + qb_state = %s, + qb_progress = %s, + qb_savepath = %s, + qb_completed_datetime = + IF(%s IS NOT NULL AND qb_completed_datetime IS NULL, %s, qb_completed_datetime), + qb_last_update = NOW() + WHERE qb_hash = %s OR torrent_hash = %s + """, ( + t.hash, + t.state, + float(t.progress) * 100.0, + getattr(t, "save_path", None), + completion_dt, + completion_dt, + t.hash, + t.hash, + )) + + +# ============================== +# 🧹 HANDLE COMPLETED + DEAD +# ============================== + +def handle_completed_and_dead(): + global stat_completed, stat_dead + + torrents = qb.torrents_info() + + for t in torrents: + t_hash = t.hash + state = t.state + progress = float(t.progress) + + # ✔ COMPLETED + if progress >= 1.0 or state in {"completed", "uploading", "stalledUP", "queuedUP"}: + stat_completed += 1 + deleted_completed.append(t.name) + + try: + qb.torrents_delete(torrent_hashes=t_hash, delete_files=False) + except Exception as e: + # keep name in report; just note error in DB state if you want later + print(f"⚠️ delete (keep data) failed for {t.name}: {e}") + + cursor.execute(""" + UPDATE torrents + SET qb_state='completed', + qb_progress=100, + qb_completed_datetime=NOW(), + qb_last_update=NOW() + WHERE qb_hash=%s OR torrent_hash=%s + """, (t_hash, t_hash)) + continue + + # ❌ DEAD (never seen_complete) + try: + props = qb.torrents_properties(t_hash) + except Exception: + continue + + if getattr(props, "last_seen", 0) == -1: + added_dt = getattr(t, "added_on", 0) + if added_dt: + if datetime.now() - datetime.fromtimestamp(added_dt) > timedelta(minutes=DEAD_TORRENT_MINUTES): + stat_dead += 1 + deleted_dead.append(t.name) + + try: + qb.torrents_delete(torrent_hashes=t_hash, delete_files=True) + except Exception as e: + print(f"⚠️ delete (files) failed for {t.name}: {e}") + + cursor.execute(""" + UPDATE torrents + SET qb_state='dead', + qb_last_update=NOW() + WHERE qb_hash=%s OR torrent_hash=%s + """, (t_hash, t_hash)) + + +# ============================== +# 📊 ACTIVE DOWNLOADS +# ============================== + +def count_active_downloads(): + return sum(1 for t in qb.torrents_info() if float(t.progress) < 1.0) + + +def snapshot_active_downloading(): + """ + Capture current actively downloading torrents (progress < 100%). + """ + active = [] + for t in qb.torrents_info(): + prog = float(t.progress) + if prog < 1.0: + active.append(f"{t.name} — {prog*100:.1f}% — {t.state}") + return sorted(active) + + +# ============================== +# ➕ ENQUEUE NEW TORRENTS +# ============================== + +def enqueue_new_torrents(): + global stat_enqueued + + active = count_active_downloads() + if active >= MAX_ACTIVE_DOWNLOADS: + return + + slots = MAX_ACTIVE_DOWNLOADS - active + + cursor.execute(""" + SELECT id, torrent_hash, torrent_content, torrent_filename + FROM torrents + WHERE (qb_added IS NULL OR qb_added = 0) + AND torrent_content IS NOT NULL + ORDER BY added_datetime DESC + LIMIT %s + """, (slots,)) + + for row in cursor.fetchall(): + blob = row["torrent_content"] + if not blob: + continue + + if not is_valid_torrent(blob): + cursor.execute(""" + UPDATE torrents + SET qb_state='invalid', + torrent_content=NULL, + qb_last_update=NOW() + WHERE id=%s + """, (row["id"],)) + continue + + # ➕ Add torrent + try: + qb.torrents_add(torrent_files=blob, savepath=DEFAULT_SAVE_PATH) + except Exception as e: + print(f"❌ Failed to add {row['torrent_hash']}: {e}") + continue + + stat_enqueued += 1 + added_new.append(row.get("torrent_filename") or row["torrent_hash"]) + + cursor.execute(""" + UPDATE torrents + SET qb_added=1, + qb_hash=COALESCE(qb_hash, %s), + qb_state='added', + qb_last_update=NOW() + WHERE id=%s + """, (row["torrent_hash"], row["id"])) + + +# ============================== +# ✉️ EMAIL HELPERS +# ============================== + +def format_list(title: str, items: list[str]) -> list[str]: + lines = [] + if not items: + return [f"{title}: (none)"] + + lines.append(f"{title}: {len(items)}") + shown = items[:MAX_LIST_ITEMS] + for it in shown: + lines.append(f" - {it}") + if len(items) > MAX_LIST_ITEMS: + lines.append(f" ... (+{len(items) - MAX_LIST_ITEMS} more)") + return lines + + +# ============================== +# 🏁 MAIN (ONE RUN) +# ============================== + +print("🚀 QB worker run started") + +try: + sync_qb_to_db() + handle_completed_and_dead() + enqueue_new_torrents() + + # Snapshot after enqueue/deletions, so email reflects end-state + active_downloading = snapshot_active_downloading() + +finally: + db.close() + + +# ============================== +# 📧 EMAIL REPORT +# ============================== + +RUN_END = datetime.now() + +body_lines = [ + f"Run started : {RUN_START:%Y-%m-%d %H:%M:%S}", + f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}", + "", + f"QB torrents synced : {stat_synced}", + f"Completed removed : {stat_completed}", + f"Dead removed : {stat_dead}", + f"New torrents added : {stat_enqueued}", + f"Active downloads : {sum(1 for _ in active_downloading)}", + "", +] + +body_lines += format_list("Deleted (completed, kept data)", deleted_completed) +body_lines.append("") +body_lines += format_list("Deleted (dead, deleted files)", deleted_dead) +body_lines.append("") +body_lines += format_list("Newly added to qBittorrent", added_new) +body_lines.append("") +body_lines += format_list("Actively downloading now", active_downloading) + +send_mail( + to=MAIL_TO, + subject=f"qBittorrent worker – {RUN_START:%Y-%m-%d %H:%M}", + body="\n".join(body_lines), + html=False, +) + +print("📧 Email report sent") +print("🎉 DONE")