Files
torrents/30 OpenTextListing.py
2025-11-18 07:22:17 +01:00

231 lines
6.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://sktorrent.eu")
# Load cookies
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
print("🍪 Cookies loaded.")
driver.get(URL)
time.sleep(2)
# Try to close inline popup if present
try:
close_btn = driver.find_element(By.XPATH, "//a[text()='CLOSE X']")
close_btn.click()
print("🧹 Popup closed.")
except:
pass
# ============================================================
# 3) Extract table rows
# ============================================================
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
print("Total rows found:", len(rows))
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
if len(cells) >= 5: # real torrent rows
real_rows.append(cells)
print("Real data rows:", len(real_rows))
print("")
# ============================================================
# 4) Function to extract all fields from one row
# ============================================================
def parse_row(cells):
# --------------------------
# 1⃣ CATEGORY
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ TITLES + DETAILS LINK
# --------------------------
a_tag = cells[1].find_element(By.TAG_NAME, "a")
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
# --------------------------
# 3⃣ TORRENT HASH
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
# skip rows without ?id=
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ TEXT BLOCK (size + date)
# --------------------------
text_block = cells[1].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
added_mysql = None
if added_pretty:
added_mysql = re.sub(r" o ", " ", added_pretty)
day, month, year_time = added_mysql.split("/")
year, time_part = year_time.split(" ")
added_mysql = f"{year}-{month}-{day} {time_part}:00"
# --------------------------
# 5⃣ IMAGE PREVIEW
# --------------------------
img_link = None
try:
image_a = cells[1].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS
# --------------------------
seeders_a = cells[3].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
# --------------------------
# 7⃣ LEECHERS
# --------------------------
leechers_a = cells[4].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# Return dictionary for MySQL
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
}
# ============================================================
# 5) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link);
"""
# ============================================================
# 6) PROCESS ALL REAL ROWS
# ============================================================
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
print("\n✅ DONE — All torrents saved to MySQL.")
driver.quit()