Files
torrents/30 OpenTextListing v2.py
2025-12-15 06:11:53 +01:00

257 lines
7.7 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
URL = "https://sktorrent.eu/torrent/torrents.php?active=0&category=24&order=data&by=DESC&zaner=&jazyk=&page=0"
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://sktorrent.eu")
# Load cookies
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
driver.get(URL)
time.sleep(2)
# ============================================================
# Close interstitial popup reliably
# ============================================================
time.sleep(1)
try:
# JS close always exists even when HTML structure varies
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
print("🧹 Popup closed via JS fallback.")
time.sleep(1)
except:
print(" Popup JS handler not found (probably no popup).")
# ============================================================
# 3) Extract table rows
# ============================================================
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
print("Total rows found:", len(rows))
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print("Real torrent rows:", len(real_rows))
print("")
# ============================================================
# 4) Function to extract fields from one row
# ============================================================
def parse_row(cells):
# --------------------------
# 1⃣ CATEGORY (cells[0])
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ TITLE + DETAILS LINK (always inside cells[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ Missing title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# 3⃣ TORRENT HASH
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ TEXT BLOCK (size + date)
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
added_mysql = None
if added_pretty:
# Normalize formats like "29/11/2025 o 02:29", "29/11/2025 02:29:18"
clean = added_pretty.replace(" o ", " ").strip()
# Split date and time
date_part, *time_parts = clean.split(" ")
# If seconds are missing, add :00
time_part = time_parts[0] if time_parts else "00:00"
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ IMAGE PREVIEW
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS (cells[4])
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
# --------------------------
# 7⃣ LEECHERS (cells[5])
# --------------------------
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# Return result
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
}
# ============================================================
# 5) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link);
"""
# ============================================================
# 6) PROCESS ALL ROWS
# ============================================================
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
print("\n✅ DONE — All torrents saved to MySQL.")
driver.quit()