diff --git a/30 OpenTextListing.py b/30 OpenTextListing.py index 828a295..6628d96 100644 --- a/30 OpenTextListing.py +++ b/30 OpenTextListing.py @@ -1,171 +1,230 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import json +import pymysql +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import time +import re +import urllib.parse as urlparse from pathlib import Path -from playwright.sync_api import sync_playwright +import json -# ============================================================= -# CONFIGURATION -# ============================================================= + +# ============================================================ +# 1) MySQL CONNECTION +# ============================================================ + +db = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="torrents", + charset="utf8mb4", + autocommit=True +) + +cursor = db.cursor() + + +# ============================================================ +# 2) Selenium setup +# ============================================================ COOKIE_FILE = Path("sktorrent_cookies.json") URL = "https://sktorrent.eu/torrent/torrents.php?active=0" +chrome_options = Options() +chrome_options.add_argument("--start-maximized") +chrome_options.add_argument("--disable-notifications") +chrome_options.add_argument("--disable-popup-blocking") +chrome_options.add_argument("--disable-extensions") -def load_cookies(context): - """Load saved cookies if available.""" - if COOKIE_FILE.exists(): - with open(COOKIE_FILE, "r") as f: - cookies = json.load(f) - context.add_cookies(cookies) - print("🔄 Loaded login cookies.") - return True - print("❌ Cookie file not found. Run manual login first.") - return False +driver = webdriver.Chrome(options=chrome_options) + +driver.get("https://sktorrent.eu") + +# Load cookies +if COOKIE_FILE.exists(): + with open(COOKIE_FILE, "r") as f: + cookies = json.load(f) + for c in cookies: + driver.add_cookie(c) + print("🍪 Cookies loaded.") + +driver.get(URL) +time.sleep(2) -# ============================================================= -# MAIN CODE -# ============================================================= +# Try to close inline popup if present +try: + close_btn = driver.find_element(By.XPATH, "//a[text()='CLOSE X']") + close_btn.click() + print("🧹 Popup closed.") +except: + pass -with sync_playwright() as p: - # 1️⃣ Launch browser - browser = p.chromium.launch( - headless=False, - args=[ - "--disable-popup-blocking", - "--disable-background-networking", - "--disable-notifications", - "--no-default-browser-check", - "--no-first-run", - "--noerrdialogs", - "--disable-dev-shm-usage", - "--disable-features=IsolateOrigins,site-per-process", - "--no-sandbox", - ] - ) +# ============================================================ +# 3) Extract table rows +# ============================================================ - # 2️⃣ Create context before any pages exist - context = browser.new_context() +rows = driver.find_elements(By.CSS_SELECTOR, "table tr") +print("Total rows found:", len(rows)) - # 3️⃣ Block ALL third-party requests (ads, JS, popups, tracking) - def block_third_party(route, request): - url = request.url.lower() - if "sktorrent.eu" in url: - route.continue_() - else: - print(f"🚫 Blocked third-party request: {url}") - route.abort() +real_rows = [] +for row in rows: + cells = row.find_elements(By.TAG_NAME, "td") + if len(cells) >= 5: # real torrent rows + real_rows.append(cells) - context.route("**/*", block_third_party) +print("Real data rows:", len(real_rows)) +print("") - # 4️⃣ Block ANY popup windows except the first page - pages = [] - def on_new_page(new_page): - pages.append(new_page) - if len(pages) == 1: - print("➡️ Main page created.") - else: - print("⚠️ Popup blocked (auto-closed).") - new_page.close() +# ============================================================ +# 4) Function to extract all fields from one row +# ============================================================ - context.on("page", on_new_page) +def parse_row(cells): - # 5️⃣ Disable all popup JS functions (window.open, window.close, opener.close) - context.add_init_script(""" - window.open = () => { console.log("Blocked window.open"); return null; }; - window.close = () => { console.log("Blocked window.close"); }; + # -------------------------- + # 1️⃣ CATEGORY + # -------------------------- + category = cells[0].text.strip() - try { - if (window.opener) { - window.opener.close = () => { console.log("Blocked opener.close"); }; - } - } catch (e) {} + # -------------------------- + # 2️⃣ TITLES + DETAILS LINK + # -------------------------- + a_tag = cells[1].find_element(By.TAG_NAME, "a") - // Block - document.addEventListener('click', function(e) { - const el = e.target.closest('a[target="_blank"]'); - if (el) { - e.preventDefault(); - console.log("Blocked target=_blank"); - } - }, true); + visible_name = a_tag.text.strip() + full_title = a_tag.get_attribute("title") + details_link = a_tag.get_attribute("href") - // Block middle-click opening a new tab - document.addEventListener('auxclick', function(e) { - e.preventDefault(); - }, true); - """) + # -------------------------- + # 3️⃣ TORRENT HASH + # -------------------------- + parsed = urlparse.urlparse(details_link) + query = urlparse.parse_qs(parsed.query) - # 6️⃣ Create the FIRST page (main page) - page = context.new_page() - pages.append(page) + # skip rows without ?id= + if "id" not in query: + print("⚠️ Skipping row with no torrent ID →", details_link) + return None - # 7️⃣ Load cookies (login) - load_cookies(context) + torrent_hash = query["id"][0] - # 8️⃣ Navigate - print("🌍 Opening page...") - page.goto(URL) + # -------------------------- + # 4️⃣ TEXT BLOCK (size + date) + # -------------------------- + text_block = cells[1].get_attribute("innerText") + text_block_clean = " ".join(text_block.split()) - # Do NOT use networkidle on ad-heavy sites - page.wait_for_load_state("domcontentloaded") - page.wait_for_selector("table tr", timeout=15000) - # Remove popup/overlay elements created by SKTorrent - page.evaluate(""" - const selectors = [ - '#lightbox', '.lightbox', '#popup', '.popup', - '.overlay', '#overlay', '.modal', '#modal', - 'div[style*="fixed"]', 'div[style*="position: fixed"]', - 'table[style*="position: fixed"]', - 'iframe', 'frame' - ]; + size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE) + added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE) - selectors.forEach(sel => { - document.querySelectorAll(sel).forEach(el => { - console.log("Removing popup element:", sel); - el.remove(); - }); - }); + size_pretty = size_match.group(1) if size_match else None + added_pretty = added_match.group(1) if added_match else None - // Remove onclick handlers that trigger popups - document.querySelectorAll('*').forEach(el => { - el.onclick = null; - el.onauxclick = null; - el.oncontextmenu = null; - }); + # Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00” + added_mysql = None + if added_pretty: + added_mysql = re.sub(r" o ", " ", added_pretty) + day, month, year_time = added_mysql.split("/") + year, time_part = year_time.split(" ") + added_mysql = f"{year}-{month}-{day} {time_part}:00" - // Remove timers that trigger delayed popups - window.setTimeout = () => {}; - window.setInterval = () => {}; - """) + # -------------------------- + # 5️⃣ IMAGE PREVIEW + # -------------------------- + img_link = None + try: + image_a = cells[1].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]") + mouseover = image_a.get_attribute("onmouseover") + img_match = re.search(r"src=([^ ]+)", mouseover) + if img_match: + img_link = img_match.group(1).replace("'", "").strip() + if img_link.startswith("//"): + img_link = "https:" + img_link + except: + pass - print("✔ Page loaded, extracting table rows...") + # -------------------------- + # 6️⃣ SEEDERS + # -------------------------- + seeders_a = cells[3].find_element(By.TAG_NAME, "a") + seeders_number = int(seeders_a.text.strip()) + seeders_link = seeders_a.get_attribute("href") - # 9️⃣ Extract all rows - rows = page.locator("table tr").all() - print(f"📄 Total rows found (including header): {len(rows)}") + # -------------------------- + # 7️⃣ LEECHERS + # -------------------------- + leechers_a = cells[4].find_element(By.TAG_NAME, "a") + leechers_number = int(leechers_a.text.strip()) + leechers_link = leechers_a.get_attribute("href") - # 🔟 Extract SECOND ROW only (your request) - if len(rows) > 1: - row = rows[1] # 0 = header, 1 = first data row - tds = row.locator("td") + # -------------------------- + # Return dictionary for MySQL + # -------------------------- + return { + "torrent_hash": torrent_hash, + "details_link": details_link, + "category": category, + "title_visible": visible_name, + "title_full": full_title, + "size_pretty": size_pretty, + "added_datetime": added_mysql, + "preview_image": img_link, + "seeders": seeders_number, + "seeders_link": seeders_link, + "leechers": leechers_number, + "leechers_link": leechers_link, + } - name = tds.nth(1).inner_text().strip() - size = tds.nth(2).inner_text().strip() - seeders = tds.nth(3).inner_text().strip() - leechers = tds.nth(4).inner_text().strip() - print("\n========= SECOND ROW =========") - print(f"Name: {name}") - print(f"Size: {size}") - print(f"Seeders: {seeders}") - print(f"Leechers: {leechers}") - print("==============================\n") - else: - print("❌ No data rows found!") +# ============================================================ +# 5) MySQL INSERT +# ============================================================ - page.wait_for_timeout(5000) +insert_sql = """ +INSERT INTO torrents ( + torrent_hash, details_link, category, title_visible, title_full, + size_pretty, added_datetime, preview_image, + seeders, seeders_link, leechers, leechers_link +) VALUES ( + %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s, + %(size_pretty)s, %(added_datetime)s, %(preview_image)s, + %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s +) +ON DUPLICATE KEY UPDATE + details_link = VALUES(details_link), + category = VALUES(category), + title_visible = VALUES(title_visible), + title_full = VALUES(title_full), + size_pretty = VALUES(size_pretty), + added_datetime = VALUES(added_datetime), + preview_image = VALUES(preview_image), + seeders = VALUES(seeders), + seeders_link = VALUES(seeders_link), + leechers = VALUES(leechers), + leechers_link = VALUES(leechers_link); +""" + + +# ============================================================ +# 6) PROCESS ALL REAL ROWS +# ============================================================ + +for cells in real_rows: + data = parse_row(cells) + if not data: + continue + + print("💾 Saving:", data["title_visible"]) + cursor.execute(insert_sql, data) + +print("\n✅ DONE — All torrents saved to MySQL.") +driver.quit() diff --git a/40 ParseviaRequests.py b/40 ParseviaRequests.py new file mode 100644 index 0000000..59cc7ca --- /dev/null +++ b/40 ParseviaRequests.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import requests +from bs4 import BeautifulSoup +import pymysql +from datetime import datetime + +# ============================== +# CONFIG +# ============================== + +BASE_URL = "https://sktorrent.eu/torrent/torrents_v2.php?active=0" + +COOKIES_FILE = "sktorrent_cookies.json" # Your exported cookies.txt ( Netscape format ) +USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +) + +HEADERS = {"User-Agent": USER_AGENT} + +DB_CFG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "cursorclass": pymysql.cursors.DictCursor, +} + + +# ============================== +# COOKIE LOADER +# ============================== + +def load_cookies(path): + cookies = {} + with open(path, "r", encoding="utf-8") as f: + for line in f: + if line.startswith("#") or "\t" not in line: + continue + parts = line.strip().split("\t") + if len(parts) >= 7: + cookies[parts[5]] = parts[6] + print(f"🍪 Loaded {len(cookies)} cookies.") + return cookies + + +# ============================== +# MYSQL INSERT +# ============================== + +def insert_torrent(db, t): + sql = """ + INSERT IGNORE INTO torrents ( + category, + title_visible, + title_full, + size_pretty, + added_datetime, + seeders, + seeders_link, + leechers, + leechers_link, + preview_image, + details_link, + torrent_hash + ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + """ + with db.cursor() as cur: + cur.execute(sql, ( + t["category"], + t["title_visible"], + t["title_full"], + t["size_pretty"], + t["added_datetime"], + t["seeders"], + t["seeders_link"], + t["leechers"], + t["leechers_link"], + t["preview_image"], + t["details_link"], + t["torrent_hash"], + )) + db.commit() + + +# ============================== +# PARSER +# ============================== + +def parse_torrent_row(cols): + """Parse a with exactly the structure of a torrent row.""" + + # --- category --- + category = cols[0].get_text(strip=True) + + # --- download link (ignore) --- + # second is download.gif + + # --- main column --- + main_td = cols[2] + + a_title = main_td.find("a", href=re.compile("details.php")) + if not a_title: + return None + + title_visible = a_title.get_text(strip=True) + title_full = a_title.get("title", "").strip() + details_link = "https://sktorrent.eu/torrent/" + a_title.get("href") + + # Extract torrent hash from ?id=..... + m = re.search(r"id=([A-Fa-f0-9]{40})", a_title.get("href")) + if not m: + return None + torrent_hash = m.group(1) + + # Extract size + added date from the text below
+ text = main_td.get_text(" ", strip=True) + # example: "GR ... Velkost 1.7 GB | Pridany 18/11/2025 o 07:00" + size_match = re.search(r"Velkost ([\d\.]+ ?[GMK]B)", text) + date_match = re.search(r"Pridany (\d{2}/\d{2}/\d{4}) o (\d{2}:\d{2})", text) + + size_pretty = size_match.group(1) if size_match else None + + added_datetime = None + if date_match: + d, t = date_match.groups() + added_datetime = datetime.strptime(d + " " + t, "%d/%m/%Y %H:%M") + + # Extract preview img from onmouseover + img = None + img_a = main_td.find("a", onmouseover=True) + if img_a: + html = img_a.get("onmouseover", "") + m2 = re.search(r"img src=//([^ ]+)", html) + if m2: + img = "https://" + m2.group(1) + + # --- seeders --- + seed_a = cols[4].find("a") + seeders = int(seed_a.get_text(strip=True)) if seed_a else 0 + seeders_link = "https://sktorrent.eu/torrent/" + seed_a.get("href") if seed_a else None + + # --- leechers --- + leech_a = cols[5].find("a") + leechers = int(leech_a.get_text(strip=True)) if leech_a else 0 + leechers_link = "https://sktorrent.eu/torrent/" + leech_a.get("href") if leech_a else None + + return { + "category": category, + "title_visible": title_visible, + "title_full": title_full, + "size_pretty": size_pretty, + "added_datetime": added_datetime, + "seeders": seeders, + "seeders_link": seeders_link, + "leechers": leechers, + "leechers_link": leechers_link, + "preview_image": img, + "details_link": details_link, + "torrent_hash": torrent_hash, + } + + +# ============================== +# MAIN +# ============================== + +def main(): + + cookies = load_cookies(COOKIES_FILE) + + session = requests.Session() + session.headers.update(HEADERS) + session.cookies.update(cookies) + + print("🌍 Downloading HTML...") + r = session.get(BASE_URL, timeout=30) + r.raise_for_status() + + soup = BeautifulSoup(r.text, "html.parser") + tbody = soup.find("tbody") + if not tbody: + print("❌ Could not find ") + return + + rows = tbody.find_all("tr") + print(f"Found {len(rows)} rows.") + + db = pymysql.connect(**DB_CFG) + + inserted = 0 + skipped = 0 + + for tr in rows: + cols = tr.find_all("td") + if len(cols) != 7: + continue # ignore header & separator rows + + data = parse_torrent_row(cols) + if not data: + skipped += 1 + continue + + insert_torrent(db, data) + inserted += 1 + print(f"✔ Inserted {data['torrent_hash']}") + + print(f"\n===== DONE =====") + print(f"Inserted: {inserted}") + print(f"Skipped: {skipped}") + + +if __name__ == "__main__": + main()