reporter
This commit is contained in:
342
Reporter_ReadNewTorrents.py
Normal file
342
Reporter_ReadNewTorrents.py
Normal file
@@ -0,0 +1,342 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
import datetime
|
||||
import sys
|
||||
|
||||
from EmailMessagingGraph import send_mail
|
||||
|
||||
|
||||
# ============================================================
|
||||
# RUNTIME INFO
|
||||
# ============================================================
|
||||
|
||||
RUN_START = datetime.datetime.now()
|
||||
|
||||
processed_count = 0
|
||||
new_torrent_count = 0
|
||||
existing_torrent_count = 0
|
||||
new_titles = []
|
||||
|
||||
print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1) MySQL CONNECTION
|
||||
# ============================================================
|
||||
|
||||
db = pymysql.connect(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="torrents",
|
||||
charset="utf8mb4",
|
||||
autocommit=True,
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2) Selenium setup
|
||||
# ============================================================
|
||||
|
||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||
|
||||
START_URL = (
|
||||
"https://sktorrent.eu/torrent/torrents.php"
|
||||
"?search=&category=24&zaner=&jazyk=&active=0"
|
||||
)
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--start-maximized")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
chrome_options.add_argument("--disable-popup-blocking")
|
||||
chrome_options.add_argument("--disable-extensions")
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
driver.set_window_position(380, 50)
|
||||
driver.set_window_size(1350, 1000)
|
||||
|
||||
driver.get("https://sktorrent.eu")
|
||||
|
||||
if COOKIE_FILE.exists():
|
||||
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
for c in cookies:
|
||||
driver.add_cookie(c)
|
||||
print("🍪 Cookies loaded.")
|
||||
else:
|
||||
print("⚠️ Cookie file not found – login may be required.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3) requests.Session from Selenium cookies
|
||||
# ============================================================
|
||||
|
||||
requests_session = requests.Session()
|
||||
for ck in driver.get_cookies():
|
||||
requests_session.cookies.set(ck["name"], ck["value"])
|
||||
|
||||
print("🔗 Requests session initialized.")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4) Popup handler
|
||||
# ============================================================
|
||||
|
||||
def close_popup_if_any():
|
||||
try:
|
||||
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
|
||||
time.sleep(0.5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5) Parse one torrent row
|
||||
# ============================================================
|
||||
|
||||
def parse_row(cells):
|
||||
category = cells[0].text.strip()
|
||||
|
||||
try:
|
||||
download_a = cells[1].find_element(By.TAG_NAME, "a")
|
||||
download_link = download_a.get_attribute("href")
|
||||
except:
|
||||
return None
|
||||
|
||||
parsed_dl = urlparse.urlparse(download_link)
|
||||
dl_query = urlparse.parse_qs(parsed_dl.query)
|
||||
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
|
||||
|
||||
title_links = cells[2].find_elements(By.TAG_NAME, "a")
|
||||
if not title_links:
|
||||
return None
|
||||
|
||||
a_tag = title_links[0]
|
||||
visible_name = a_tag.text.strip()
|
||||
full_title = a_tag.get_attribute("title")
|
||||
details_link = a_tag.get_attribute("href")
|
||||
|
||||
parsed = urlparse.urlparse(details_link)
|
||||
query = urlparse.parse_qs(parsed.query)
|
||||
if "id" not in query:
|
||||
return None
|
||||
|
||||
torrent_hash = query["id"][0]
|
||||
|
||||
text_block = cells[2].get_attribute("innerText")
|
||||
text_block_clean = " ".join(text_block.split())
|
||||
|
||||
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||
|
||||
size_pretty = size_match.group(1) if size_match else None
|
||||
added_pretty = added_match.group(1) if added_match else None
|
||||
|
||||
# ======================================================
|
||||
# EXACT DATE PROCESSING – COPIED 1:1 FROM YOUR FILE
|
||||
# ======================================================
|
||||
added_mysql = None
|
||||
if added_pretty:
|
||||
# "29/11/2025 o 02:29" → "29/11/2025 02:29"
|
||||
clean = added_pretty.replace(" o ", " ").strip()
|
||||
parts = clean.split(" ")
|
||||
|
||||
date_part = parts[0]
|
||||
time_part = parts[1] if len(parts) > 1 else "00:00:00"
|
||||
|
||||
# pokud chybí sekundy, přidej
|
||||
if len(time_part.split(":")) == 2:
|
||||
time_part += ":00"
|
||||
|
||||
day, month, year = date_part.split("/")
|
||||
added_mysql = f"{year}-{month}-{day} {time_part}"
|
||||
|
||||
# ======================================================
|
||||
# Image preview
|
||||
# ======================================================
|
||||
|
||||
img_link = None
|
||||
try:
|
||||
image_a = cells[2].find_element(
|
||||
By.XPATH,
|
||||
".//a[contains(text(),'Obrázok')]"
|
||||
)
|
||||
mouseover = image_a.get_attribute("onmouseover")
|
||||
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||
if img_match:
|
||||
img_link = img_match.group(1).replace("'", "").strip()
|
||||
if img_link.startswith("//"):
|
||||
img_link = "https:" + img_link
|
||||
except:
|
||||
pass
|
||||
|
||||
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||
seeders_number = int(seeders_a.text.strip())
|
||||
seeders_link = seeders_a.get_attribute("href")
|
||||
|
||||
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
|
||||
leechers_number = int(leechers_a.text.strip())
|
||||
leechers_link = leechers_a.get_attribute("href")
|
||||
|
||||
cursor.execute(
|
||||
"SELECT torrent_content FROM torrents WHERE torrent_hash=%s",
|
||||
(torrent_hash,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
already_have_torrent = row is not None and row[0] is not None
|
||||
|
||||
torrent_content = None
|
||||
if not already_have_torrent:
|
||||
time.sleep(3)
|
||||
try:
|
||||
resp = requests_session.get(download_link)
|
||||
resp.raise_for_status()
|
||||
torrent_content = resp.content
|
||||
except:
|
||||
torrent_content = None
|
||||
|
||||
return {
|
||||
"torrent_hash": torrent_hash,
|
||||
"details_link": details_link,
|
||||
"category": category,
|
||||
"title_visible": visible_name,
|
||||
"title_full": full_title,
|
||||
"size_pretty": size_pretty,
|
||||
"added_datetime": added_mysql,
|
||||
"preview_image": img_link,
|
||||
"seeders": seeders_number,
|
||||
"seeders_link": seeders_link,
|
||||
"leechers": leechers_number,
|
||||
"leechers_link": leechers_link,
|
||||
"torrent_filename": torrent_filename,
|
||||
"torrent_content": torrent_content if not already_have_torrent else None,
|
||||
"is_new_torrent": not already_have_torrent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6) INSERT SQL
|
||||
# ============================================================
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO torrents (
|
||||
torrent_hash, details_link, category, title_visible, title_full,
|
||||
size_pretty, added_datetime, preview_image,
|
||||
seeders, seeders_link, leechers, leechers_link,
|
||||
torrent_filename, torrent_content
|
||||
) VALUES (
|
||||
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
|
||||
%(torrent_filename)s, %(torrent_content)s
|
||||
)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
details_link = VALUES(details_link),
|
||||
category = VALUES(category),
|
||||
title_visible = VALUES(title_visible),
|
||||
title_full = VALUES(title_full),
|
||||
size_pretty = VALUES(size_pretty),
|
||||
added_datetime = VALUES(added_datetime),
|
||||
preview_image = VALUES(preview_image),
|
||||
seeders = VALUES(seeders),
|
||||
seeders_link = VALUES(seeders_link),
|
||||
leechers = VALUES(leechers),
|
||||
leechers_link = VALUES(leechers_link),
|
||||
torrent_filename = VALUES(torrent_filename),
|
||||
torrent_content = COALESCE(VALUES(torrent_content), torrent_content);
|
||||
"""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7) PROCESS FIRST PAGE ONLY
|
||||
# ============================================================
|
||||
|
||||
print("\n🌐 Loading FIRST page")
|
||||
driver.get(START_URL)
|
||||
time.sleep(2)
|
||||
|
||||
close_popup_if_any()
|
||||
|
||||
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||
real_rows = [
|
||||
r.find_elements(By.TAG_NAME, "td")
|
||||
for r in rows
|
||||
if len(r.find_elements(By.TAG_NAME, "td")) == 7
|
||||
]
|
||||
|
||||
print(f"📄 Found {len(real_rows)} torrent rows")
|
||||
|
||||
for cells in real_rows:
|
||||
try:
|
||||
data = parse_row(cells)
|
||||
except Exception as e:
|
||||
print(f"⚠️ parse_row failed: {e}")
|
||||
continue
|
||||
|
||||
if not data:
|
||||
continue
|
||||
|
||||
processed_count += 1
|
||||
|
||||
if data["is_new_torrent"]:
|
||||
new_torrent_count += 1
|
||||
new_titles.append(data["title_visible"])
|
||||
else:
|
||||
existing_torrent_count += 1
|
||||
|
||||
print("💾 Saving:", data["title_visible"])
|
||||
cursor.execute(insert_sql, data)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 8) SEND EMAIL REPORT
|
||||
# ============================================================
|
||||
|
||||
RUN_END = datetime.datetime.now()
|
||||
|
||||
subject = f"SKTorrent hourly run – {RUN_START:%Y-%m-%d %H:%M}"
|
||||
|
||||
lines = [
|
||||
f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}",
|
||||
f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}",
|
||||
"",
|
||||
f"Processed torrents: {processed_count}",
|
||||
f"New torrent files downloaded: {new_torrent_count}",
|
||||
f"Already known torrents: {existing_torrent_count}",
|
||||
]
|
||||
|
||||
if new_titles:
|
||||
lines.append("")
|
||||
lines.append("New torrents:")
|
||||
for t in new_titles:
|
||||
lines.append(f"- {t}")
|
||||
|
||||
body = "\n".join(lines)
|
||||
|
||||
send_mail(
|
||||
to="vladimir.buzalka@buzalka.cz",
|
||||
subject=subject,
|
||||
body=body,
|
||||
html=False,
|
||||
)
|
||||
|
||||
print("📧 Email report sent.")
|
||||
|
||||
driver.quit()
|
||||
print("🎉 DONE")
|
||||
Reference in New Issue
Block a user