diff --git a/EmailMessagingGraph.py b/EmailMessagingGraph.py new file mode 100644 index 0000000..6e5ea25 --- /dev/null +++ b/EmailMessagingGraph.py @@ -0,0 +1,91 @@ +""" +EmailMessagingGraph.py +---------------------- +Private Microsoft Graph mail sender +Application permissions, shared mailbox +""" + +import msal +import requests +from functools import lru_cache +from typing import Union, List + + +# ========================= +# PRIVATE CONFIG (ONLY YOU) +# ========================= +TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9" +CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f" +CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk" +SENDER = "reports@buzalka.cz" + + +AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}" +SCOPE = ["https://graph.microsoft.com/.default"] + + +@lru_cache(maxsize=1) +def _get_token() -> str: + app = msal.ConfidentialClientApplication( + CLIENT_ID, + authority=AUTHORITY, + client_credential=CLIENT_SECRET, + ) + + token = app.acquire_token_for_client(scopes=SCOPE) + + if "access_token" not in token: + raise RuntimeError(f"Graph auth failed: {token}") + + return token["access_token"] + + +def send_mail( + to: Union[str, List[str]], + subject: str, + body: str, + *, + html: bool = False, +): + """ + Send email via Microsoft Graph. + + :param to: email or list of emails + :param subject: subject + :param body: email body + :param html: True = HTML, False = plain text + """ + + if isinstance(to, str): + to = [to] + + payload = { + "message": { + "subject": subject, + "body": { + "contentType": "HTML" if html else "Text", + "content": body, + }, + "toRecipients": [ + {"emailAddress": {"address": addr}} for addr in to + ], + }, + "saveToSentItems": "true", + } + + headers = { + "Authorization": f"Bearer {_get_token()}", + "Content-Type": "application/json", + } + + r = requests.post( + f"https://graph.microsoft.com/v1.0/users/{SENDER}/sendMail", + headers=headers, + json=payload, + timeout=30, + ) + + if r.status_code != 202: + raise RuntimeError( + f"sendMail failed [{r.status_code}]: {r.text}" + ) diff --git a/Reporter_ReadNewTorrents.py b/Reporter_ReadNewTorrents.py new file mode 100644 index 0000000..eef20f9 --- /dev/null +++ b/Reporter_ReadNewTorrents.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse +from pathlib import Path +import json +import requests +import datetime +import sys + +from EmailMessagingGraph import send_mail + + +# ============================================================ +# RUNTIME INFO +# ============================================================ + +RUN_START = datetime.datetime.now() + +processed_count = 0 +new_torrent_count = 0 +existing_torrent_count = 0 +new_titles = [] + +print(f"🕒 Run started at {RUN_START:%Y-%m-%d %H:%M:%S}") +sys.stdout.flush() + + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True, +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ + +COOKIE_FILE = Path("sktorrent_cookies.json") + +START_URL = ( + "https://sktorrent.eu/torrent/torrents.php" + "?search=&category=24&zaner=&jazyk=&active=0" +) + +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") + +driver = webdriver.Chrome(options=chrome_options) + +driver.set_window_position(380, 50) +driver.set_window_size(1350, 1000) + +driver.get("https://sktorrent.eu") + +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") +else: + print("⚠️ Cookie file not found – login may be required.") + + +# ============================================================ +# 3) requests.Session from Selenium cookies +# ============================================================ + +requests_session = requests.Session() +for ck in driver.get_cookies(): + requests_session.cookies.set(ck["name"], ck["value"]) + +print("🔗 Requests session initialized.") + + +# ============================================================ +# 4) Popup handler +# ============================================================ + +def close_popup_if_any(): + try: + driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}") + time.sleep(0.5) + except Exception: + pass + + +# ============================================================ +# 5) Parse one torrent row +# ============================================================ + +def parse_row(cells): + category = cells[0].text.strip() + + try: + download_a = cells[1].find_element(By.TAG_NAME, "a") + download_link = download_a.get_attribute("href") + except: + return None + + parsed_dl = urlparse.urlparse(download_link) + dl_query = urlparse.parse_qs(parsed_dl.query) + torrent_filename = dl_query.get("f", ["unknown.torrent"])[0] + + title_links = cells[2].find_elements(By.TAG_NAME, "a") + if not title_links: + return None + + a_tag = title_links[0] + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") + + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) + if "id" not in query: + return None + + torrent_hash = query["id"][0] + + text_block = cells[2].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) + + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) + + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None + + # ====================================================== + # EXACT DATE PROCESSING – COPIED 1:1 FROM YOUR FILE + # ====================================================== + added_mysql = None + if added_pretty: + # "29/11/2025 o 02:29" → "29/11/2025 02:29" + clean = added_pretty.replace(" o ", " ").strip() + parts = clean.split(" ") + + date_part = parts[0] + time_part = parts[1] if len(parts) > 1 else "00:00:00" + + # pokud chybí sekundy, přidej + if len(time_part.split(":")) == 2: + time_part += ":00" + + day, month, year = date_part.split("/") + added_mysql = f"{year}-{month}-{day} {time_part}" + + # ====================================================== + # Image preview + # ====================================================== + + img_link = None + try: + image_a = cells[2].find_element( + By.XPATH, + ".//a[contains(text(),'Obrázok')]" + ) + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass + + seeders_a = cells[4].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") + + leechers_a = cells[5].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") + + cursor.execute( + "SELECT torrent_content FROM torrents WHERE torrent_hash=%s", + (torrent_hash,), + ) + row = cursor.fetchone() + already_have_torrent = row is not None and row[0] is not None + + torrent_content = None + if not already_have_torrent: + time.sleep(3) + try: + resp = requests_session.get(download_link) + resp.raise_for_status() + torrent_content = resp.content + except: + torrent_content = None + + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + "torrent_filename": torrent_filename, + "torrent_content": torrent_content if not already_have_torrent else None, + "is_new_torrent": not already_have_torrent, + } + + +# ============================================================ +# 6) INSERT SQL +# ============================================================ + +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link, + torrent_filename, torrent_content +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s, + %(torrent_filename)s, %(torrent_content)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link), + torrent_filename = VALUES(torrent_filename), + torrent_content = COALESCE(VALUES(torrent_content), torrent_content); +""" + + +# ============================================================ +# 7) PROCESS FIRST PAGE ONLY +# ============================================================ + +print("\n🌐 Loading FIRST page") +driver.get(START_URL) +time.sleep(2) + +close_popup_if_any() + +rows = driver.find_elements(By.CSS_SELECTOR, "table tr") +real_rows = [ + r.find_elements(By.TAG_NAME, "td") + for r in rows + if len(r.find_elements(By.TAG_NAME, "td")) == 7 +] + +print(f"📄 Found {len(real_rows)} torrent rows") + +for cells in real_rows: + try: + data = parse_row(cells) + except Exception as e: + print(f"⚠️ parse_row failed: {e}") + continue + + if not data: + continue + + processed_count += 1 + + if data["is_new_torrent"]: + new_torrent_count += 1 + new_titles.append(data["title_visible"]) + else: + existing_torrent_count += 1 + + print("💾 Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + + +# ============================================================ +# 8) SEND EMAIL REPORT +# ============================================================ + +RUN_END = datetime.datetime.now() + +subject = f"SKTorrent hourly run – {RUN_START:%Y-%m-%d %H:%M}" + +lines = [ + f"Run started: {RUN_START:%Y-%m-%d %H:%M:%S}", + f"Run finished: {RUN_END:%Y-%m-%d %H:%M:%S}", + "", + f"Processed torrents: {processed_count}", + f"New torrent files downloaded: {new_torrent_count}", + f"Already known torrents: {existing_torrent_count}", +] + +if new_titles: + lines.append("") + lines.append("New torrents:") + for t in new_titles: + lines.append(f"- {t}") + +body = "\n".join(lines) + +send_mail( + to="vladimir.buzalka@buzalka.cz", + subject=subject, + body=body, + html=False, +) + +print("📧 Email report sent.") + +driver.quit() +print("🎉 DONE")