vbnotebook

2025-11-18 07:22:17 +01:00
parent a764c9723e
commit 7bc330beba
2 changed files with 412 additions and 134 deletions
--- a/OpenTextListing.py
+++ b/OpenTextListing.py
@@ -1,171 +1,230 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

-import json
+import pymysql
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+import time
+import re
+import urllib.parse as urlparse
 from pathlib import Path
-from playwright.sync_api import sync_playwright
+import json

-# =============================================================
-# CONFIGURATION
-# =============================================================
+
+# ============================================================
+# 1) MySQL CONNECTION
+# ============================================================
+
+db = pymysql.connect(
+    host="192.168.1.76",
+    port=3307,
+    user="root",
+    password="Vlado9674+",
+    database="torrents",
+    charset="utf8mb4",
+    autocommit=True
+)
+
+cursor = db.cursor()
+
+
+# ============================================================
+# 2) Selenium setup
+# ============================================================

 COOKIE_FILE = Path("sktorrent_cookies.json")
 URL = "https://sktorrent.eu/torrent/torrents.php?active=0"

+chrome_options = Options()
+chrome_options.add_argument("--start-maximized")
+chrome_options.add_argument("--disable-notifications")
+chrome_options.add_argument("--disable-popup-blocking")
+chrome_options.add_argument("--disable-extensions")

-def load_cookies(context):
-    """Load saved cookies if available."""
-    if COOKIE_FILE.exists():
-        with open(COOKIE_FILE, "r") as f:
-            cookies = json.load(f)
-        context.add_cookies(cookies)
-        print("🔄 Loaded login cookies.")
-        return True
-    print("❌ Cookie file not found. Run manual login first.")
-    return False
+driver = webdriver.Chrome(options=chrome_options)
+
+driver.get("https://sktorrent.eu")
+
+# Load cookies
+if COOKIE_FILE.exists():
+    with open(COOKIE_FILE, "r") as f:
+        cookies = json.load(f)
+    for c in cookies:
+        driver.add_cookie(c)
+    print("🍪 Cookies loaded.")
+
+driver.get(URL)
+time.sleep(2)


-# =============================================================
-# MAIN CODE
-# =============================================================
+# Try to close inline popup if present
+try:
+    close_btn = driver.find_element(By.XPATH, "//a[text()='CLOSE X']")
+    close_btn.click()
+    print("🧹 Popup closed.")
+except:
+    pass

-with sync_playwright() as p:

-    # 1️⃣ Launch browser
-    browser = p.chromium.launch(
-        headless=False,
-        args=[
-            "--disable-popup-blocking",
-            "--disable-background-networking",
-            "--disable-notifications",
-            "--no-default-browser-check",
-            "--no-first-run",
-            "--noerrdialogs",
-            "--disable-dev-shm-usage",
-            "--disable-features=IsolateOrigins,site-per-process",
-            "--no-sandbox",
-        ]
-    )
+# ============================================================
+# 3) Extract table rows
+# ============================================================

-    # 2️⃣ Create context before any pages exist
-    context = browser.new_context()
+rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
+print("Total rows found:", len(rows))

-    # 3️⃣ Block ALL third-party requests (ads, JS, popups, tracking)
-    def block_third_party(route, request):
-        url = request.url.lower()
-        if "sktorrent.eu" in url:
-            route.continue_()
-        else:
-            print(f"🚫 Blocked third-party request: {url}")
-            route.abort()
+real_rows = []
+for row in rows:
+    cells = row.find_elements(By.TAG_NAME, "td")
+    if len(cells) >= 5:  # real torrent rows
+        real_rows.append(cells)

-    context.route("**/*", block_third_party)
+print("Real data rows:", len(real_rows))
+print("")

-    # 4️⃣ Block ANY popup windows except the first page
-    pages = []

-    def on_new_page(new_page):
-        pages.append(new_page)
-        if len(pages) == 1:
-            print("➡️ Main page created.")
-        else:
-            print("⚠️ Popup blocked (auto-closed).")
-            new_page.close()
+# ============================================================
+# 4) Function to extract all fields from one row
+# ============================================================

-    context.on("page", on_new_page)
+def parse_row(cells):

-    # 5️⃣ Disable all popup JS functions (window.open, window.close, opener.close)
-    context.add_init_script("""
-        window.open = () => { console.log("Blocked window.open"); return null; };
-        window.close = () => { console.log("Blocked window.close"); };
+    # --------------------------
+    # 1️⃣ CATEGORY
+    # --------------------------
+    category = cells[0].text.strip()

-        try {
-            if (window.opener) {
-                window.opener.close = () => { console.log("Blocked opener.close"); };
-            }
-        } catch (e) {}
+    # --------------------------
+    # 2️⃣ TITLES + DETAILS LINK
+    # --------------------------
+    a_tag = cells[1].find_element(By.TAG_NAME, "a")

-        // Block <a target="_blank">
-        document.addEventListener('click', function(e) {
-            const el = e.target.closest('a[target="_blank"]');
-            if (el) {
-                e.preventDefault();
-                console.log("Blocked target=_blank");
-            }
-        }, true);
+    visible_name = a_tag.text.strip()
+    full_title   = a_tag.get_attribute("title")
+    details_link = a_tag.get_attribute("href")

-        // Block middle-click opening a new tab
-        document.addEventListener('auxclick', function(e) {
-            e.preventDefault();
-        }, true);
-    """)
+    # --------------------------
+    # 3️⃣ TORRENT HASH
+    # --------------------------
+    parsed = urlparse.urlparse(details_link)
+    query = urlparse.parse_qs(parsed.query)

-    # 6️⃣ Create the FIRST page (main page)
-    page = context.new_page()
-    pages.append(page)
+    # skip rows without ?id=
+    if "id" not in query:
+        print("⚠️ Skipping row with no torrent ID →", details_link)
+        return None

-    # 7️⃣ Load cookies (login)
-    load_cookies(context)
+    torrent_hash = query["id"][0]

-    # 8️⃣ Navigate
-    print("🌍 Opening page...")
-    page.goto(URL)
+    # --------------------------
+    # 4️⃣ TEXT BLOCK (size + date)
+    # --------------------------
+    text_block = cells[1].get_attribute("innerText")
+    text_block_clean = " ".join(text_block.split())

-    # Do NOT use networkidle on ad-heavy sites
-    page.wait_for_load_state("domcontentloaded")
-    page.wait_for_selector("table tr", timeout=15000)
-    # Remove popup/overlay elements created by SKTorrent
-    page.evaluate("""
-        const selectors = [
-            '#lightbox', '.lightbox', '#popup', '.popup', 
-            '.overlay', '#overlay', '.modal', '#modal',
-            'div[style*="fixed"]', 'div[style*="position: fixed"]',
-            'table[style*="position: fixed"]',
-            'iframe', 'frame'
-        ];
+    size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
+    added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)

-        selectors.forEach(sel => {
-            document.querySelectorAll(sel).forEach(el => {
-                console.log("Removing popup element:", sel);
-                el.remove();
-            });
-        });
+    size_pretty = size_match.group(1) if size_match else None
+    added_pretty = added_match.group(1) if added_match else None

-        // Remove onclick handlers that trigger popups
-        document.querySelectorAll('*').forEach(el => {
-            el.onclick = null;
-            el.onauxclick = null;
-            el.oncontextmenu = null;
-        });
+    # Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
+    added_mysql = None
+    if added_pretty:
+        added_mysql = re.sub(r" o ", " ", added_pretty)
+        day, month, year_time = added_mysql.split("/")
+        year, time_part = year_time.split(" ")
+        added_mysql = f"{year}-{month}-{day} {time_part}:00"

-        // Remove timers that trigger delayed popups
-        window.setTimeout = () => {};
-        window.setInterval = () => {};
-    """)
+    # --------------------------
+    # 5️⃣ IMAGE PREVIEW
+    # --------------------------
+    img_link = None
+    try:
+        image_a = cells[1].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
+        mouseover = image_a.get_attribute("onmouseover")
+        img_match = re.search(r"src=([^ ]+)", mouseover)
+        if img_match:
+            img_link = img_match.group(1).replace("'", "").strip()
+            if img_link.startswith("//"):
+                img_link = "https:" + img_link
+    except:
+        pass

-    print("✔ Page loaded, extracting table rows...")
+    # --------------------------
+    # 6️⃣ SEEDERS
+    # --------------------------
+    seeders_a = cells[3].find_element(By.TAG_NAME, "a")
+    seeders_number = int(seeders_a.text.strip())
+    seeders_link = seeders_a.get_attribute("href")

-    # 9️⃣ Extract all rows
-    rows = page.locator("table tr").all()
-    print(f"📄 Total rows found (including header): {len(rows)}")
+    # --------------------------
+    # 7️⃣ LEECHERS
+    # --------------------------
+    leechers_a = cells[4].find_element(By.TAG_NAME, "a")
+    leechers_number = int(leechers_a.text.strip())
+    leechers_link = leechers_a.get_attribute("href")

-    # 🔟 Extract SECOND ROW only (your request)
-    if len(rows) > 1:
-        row = rows[1]  # 0 = header, 1 = first data row
-        tds = row.locator("td")
+    # --------------------------
+    # Return dictionary for MySQL
+    # --------------------------
+    return {
+        "torrent_hash": torrent_hash,
+        "details_link": details_link,
+        "category": category,
+        "title_visible": visible_name,
+        "title_full": full_title,
+        "size_pretty": size_pretty,
+        "added_datetime": added_mysql,
+        "preview_image": img_link,
+        "seeders": seeders_number,
+        "seeders_link": seeders_link,
+        "leechers": leechers_number,
+        "leechers_link": leechers_link,
+    }

-        name = tds.nth(1).inner_text().strip()
-        size = tds.nth(2).inner_text().strip()
-        seeders = tds.nth(3).inner_text().strip()
-        leechers = tds.nth(4).inner_text().strip()

-        print("\n========= SECOND ROW =========")
-        print(f"Name: {name}")
-        print(f"Size: {size}")
-        print(f"Seeders: {seeders}")
-        print(f"Leechers: {leechers}")
-        print("==============================\n")
-    else:
-        print("❌ No data rows found!")
+# ============================================================
+# 5) MySQL INSERT
+# ============================================================

-    page.wait_for_timeout(5000)
+insert_sql = """
+INSERT INTO torrents (
+    torrent_hash, details_link, category, title_visible, title_full,
+    size_pretty, added_datetime, preview_image,
+    seeders, seeders_link, leechers, leechers_link
+) VALUES (
+    %(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
+    %(size_pretty)s, %(added_datetime)s, %(preview_image)s,
+    %(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
+)
+ON DUPLICATE KEY UPDATE
+    details_link = VALUES(details_link),
+    category = VALUES(category),
+    title_visible = VALUES(title_visible),
+    title_full = VALUES(title_full),
+    size_pretty = VALUES(size_pretty),
+    added_datetime = VALUES(added_datetime),
+    preview_image = VALUES(preview_image),
+    seeders = VALUES(seeders),
+    seeders_link = VALUES(seeders_link),
+    leechers = VALUES(leechers),
+    leechers_link = VALUES(leechers_link);
+"""
+
+
+# ============================================================
+# 6) PROCESS ALL REAL ROWS
+# ============================================================
+
+for cells in real_rows:
+    data = parse_row(cells)
+    if not data:
+        continue
+
+    print("💾 Saving:", data["title_visible"])
+    cursor.execute(insert_sql, data)
+
+print("\n✅ DONE — All torrents saved to MySQL.")
+driver.quit()