This commit is contained in:
2025-12-15 07:03:22 +01:00
parent 314eb20e6b
commit 5f1c55243e
2 changed files with 433 additions and 0 deletions

342
Reporter_ReadNewTorrents.py Normal file
View File

@@ -0,0 +1,342 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
import datetime
import sys
from EmailMessagingGraph import send_mail
# ============================================================
# RUNTIME INFO
# ============================================================
RUN_START = datetime.datetime.now()
processed_count = 0
new_torrent_count = 0
existing_torrent_count = 0
new_titles = []
print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}")
sys.stdout.flush()
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True,
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
START_URL = (
"https://sktorrent.eu/torrent/torrents.php"
"?search=&category=24&zaner=&jazyk=&active=0"
)
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.set_window_position(380, 50)
driver.set_window_size(1350, 1000)
driver.get("https://sktorrent.eu")
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
else:
print("⚠️ Cookie file not found login may be required.")
# ============================================================
# 3) requests.Session from Selenium cookies
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
print("🔗 Requests session initialized.")
# ============================================================
# 4) Popup handler
# ============================================================
def close_popup_if_any():
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
time.sleep(0.5)
except Exception:
pass
# ============================================================
# 5) Parse one torrent row
# ============================================================
def parse_row(cells):
category = cells[0].text.strip()
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
return None
torrent_hash = query["id"][0]
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# ======================================================
# EXACT DATE PROCESSING COPIED 1:1 FROM YOUR FILE
# ======================================================
added_mysql = None
if added_pretty:
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# pokud chybí sekundy, přidej
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# ======================================================
# Image preview
# ======================================================
img_link = None
try:
image_a = cells[2].find_element(
By.XPATH,
".//a[contains(text(),'Obrázok')]"
)
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
cursor.execute(
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
(torrent_hash,),
)
row = cursor.fetchone()
already_have_torrent = row is not None and row[0] is not None
torrent_content = None
if not already_have_torrent:
time.sleep(3)
try:
resp = requests_session.get(download_link)
resp.raise_for_status()
torrent_content = resp.content
except:
torrent_content = None
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
"torrent_content": torrent_content if not already_have_torrent else None,
"is_new_torrent": not already_have_torrent,
}
# ============================================================
# 6) INSERT SQL
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
"""
# ============================================================
# 7) PROCESS FIRST PAGE ONLY
# ============================================================
print("\n🌐 Loading FIRST page")
driver.get(START_URL)
time.sleep(2)
close_popup_if_any()
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
real_rows = [
r.find_elements(By.TAG_NAME, "td")
for r in rows
if len(r.find_elements(By.TAG_NAME, "td")) == 7
]
print(f"📄 Found {len(real_rows)} torrent rows")
for cells in real_rows:
try:
data = parse_row(cells)
except Exception as e:
print(f"⚠️ parse_row failed: {e}")
continue
if not data:
continue
processed_count += 1
if data["is_new_torrent"]:
new_torrent_count += 1
new_titles.append(data["title_visible"])
else:
existing_torrent_count += 1
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
# ============================================================
# 8) SEND EMAIL REPORT
# ============================================================
RUN_END = datetime.datetime.now()
subject = f"SKTorrent hourly run {RUN_START:%Y-%m-%d %H:%M}"
lines = [
f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}",
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
"",
f"Processed torrents: {processed_count}",
f"New torrent files downloaded: {new_torrent_count}",
f"Already known torrents: {existing_torrent_count}",
]
if new_titles:
lines.append("")
lines.append("New torrents:")
for t in new_titles:
lines.append(f"- {t}")
body = "\n".join(lines)
send_mail(
to="vladimir.buzalka@buzalka.cz",
subject=subject,
body=body,
html=False,
)
print("📧 Email report sent.")
driver.quit()
print("🎉 DONE")