Files
torrents/30 OpenTextListing v3.py
2025-12-15 06:11:53 +01:00

292 lines
8.8 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
import urllib.parse as urlparse
from pathlib import Path
import json
import requests
# ============================================================
# 1) MySQL CONNECTION
# ============================================================
db = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="torrents",
charset="utf8mb4",
autocommit=True
)
cursor = db.cursor()
# ============================================================
# 2) Selenium setup
# ============================================================
COOKIE_FILE = Path("sktorrent_cookies.json")
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://sktorrent.eu")
# Load cookies
session_cookies = []
if COOKIE_FILE.exists():
with open(COOKIE_FILE, "r") as f:
cookies = json.load(f)
for c in cookies:
driver.add_cookie(c)
session_cookies.append({c['name']: c['value']})
print("🍪 Cookies loaded.")
driver.get(URL)
time.sleep(2)
# ============================================================
# 3) Close interstitial popup robustly
# ============================================================
try:
driver.execute_script("try { interstitialBox.closeit(); } catch(e) {}")
print("🧹 Popup closed via JS fallback.")
time.sleep(1)
except:
print(" No popup found.")
# ============================================================
# Convert Selenium cookies → Python requests cookies
# ============================================================
requests_session = requests.Session()
for ck in driver.get_cookies():
requests_session.cookies.set(ck["name"], ck["value"])
# ============================================================
# 4) Extract table rows
# ============================================================
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
print("Total rows found:", len(rows))
real_rows = []
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
# REAL TORRENT ROWS ALWAYS HAVE EXACTLY 7 TD CELLS
if len(cells) == 7:
real_rows.append(cells)
print("Real torrent rows:", len(real_rows))
print("")
# ============================================================
# 5) Function to extract fields from one row
# ============================================================
def parse_row(cells):
# --------------------------
# 1⃣ CATEGORY
# --------------------------
category = cells[0].text.strip()
# --------------------------
# 2⃣ DOWNLOAD LINK FOR TORRENT FILE
# --------------------------
try:
download_a = cells[1].find_element(By.TAG_NAME, "a")
download_link = download_a.get_attribute("href")
except:
print("⚠️ No download link in row, skipping.")
return None
parsed_dl = urlparse.urlparse(download_link)
dl_query = urlparse.parse_qs(parsed_dl.query)
torrent_filename = dl_query.get("f", ["unknown.torrent"])[0]
# --------------------------
# 3⃣ Title + details link (in cell[2])
# --------------------------
title_links = cells[2].find_elements(By.TAG_NAME, "a")
if not title_links:
print("⚠️ No title link — skipping row")
return None
a_tag = title_links[0]
visible_name = a_tag.text.strip()
full_title = a_tag.get_attribute("title")
details_link = a_tag.get_attribute("href")
if not details_link:
print("⚠️ Row has no details link — skipping")
return None
# --------------------------
# Extract torrent hash from ?id=
# --------------------------
parsed = urlparse.urlparse(details_link)
query = urlparse.parse_qs(parsed.query)
if "id" not in query:
print("⚠️ Skipping row with no torrent ID →", details_link)
return None
torrent_hash = query["id"][0]
# --------------------------
# 4⃣ Size + date parsing
# --------------------------
text_block = cells[2].get_attribute("innerText")
text_block_clean = " ".join(text_block.split())
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
size_pretty = size_match.group(1) if size_match else None
added_pretty = added_match.group(1) if added_match else None
# Robust time normalization
added_mysql = None
if added_pretty:
clean = added_pretty.replace(" o ", " ").strip()
parts = clean.split(" ")
date_part = parts[0]
time_part = parts[1] if len(parts) > 1 else "00:00:00"
# add seconds if missing
if len(time_part.split(":")) == 2:
time_part += ":00"
day, month, year = date_part.split("/")
added_mysql = f"{year}-{month}-{day} {time_part}"
# --------------------------
# 5⃣ Image preview
# --------------------------
img_link = None
try:
image_a = cells[2].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
mouseover = image_a.get_attribute("onmouseover")
img_match = re.search(r"src=([^ ]+)", mouseover)
if img_match:
img_link = img_match.group(1).replace("'", "").strip()
if img_link.startswith("//"):
img_link = "https:" + img_link
except:
pass
# --------------------------
# 6⃣ SEEDERS / LEECHERS
# --------------------------
seeders_a = cells[4].find_element(By.TAG_NAME, "a")
seeders_number = int(seeders_a.text.strip())
seeders_link = seeders_a.get_attribute("href")
leechers_a = cells[5].find_element(By.TAG_NAME, "a")
leechers_number = int(leechers_a.text.strip())
leechers_link = leechers_a.get_attribute("href")
# --------------------------
# 7⃣ DOWNLOAD TORRENT CONTENT (.torrent)
# --------------------------
try:
torrent_content = requests_session.get(download_link).content
except Exception as e:
print(f"⚠️ Could not download torrent file for {torrent_hash}: {e}")
torrent_content = None
# --------------------------
# FINAL DICTIONARY
# --------------------------
return {
"torrent_hash": torrent_hash,
"details_link": details_link,
"category": category,
"title_visible": visible_name,
"title_full": full_title,
"size_pretty": size_pretty,
"added_datetime": added_mysql,
"preview_image": img_link,
"seeders": seeders_number,
"seeders_link": seeders_link,
"leechers": leechers_number,
"leechers_link": leechers_link,
"torrent_filename": torrent_filename,
"torrent_content": torrent_content,
}
# ============================================================
# 6) MySQL INSERT
# ============================================================
insert_sql = """
INSERT INTO torrents (
torrent_hash, details_link, category, title_visible, title_full,
size_pretty, added_datetime, preview_image,
seeders, seeders_link, leechers, leechers_link,
torrent_filename, torrent_content
) VALUES (
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s,
%(torrent_filename)s, %(torrent_content)s
)
ON DUPLICATE KEY UPDATE
details_link = VALUES(details_link),
category = VALUES(category),
title_visible = VALUES(title_visible),
title_full = VALUES(title_full),
size_pretty = VALUES(size_pretty),
added_datetime = VALUES(added_datetime),
preview_image = VALUES(preview_image),
seeders = VALUES(seeders),
seeders_link = VALUES(seeders_link),
leechers = VALUES(leechers),
leechers_link = VALUES(leechers_link),
torrent_filename = VALUES(torrent_filename),
torrent_content = VALUES(torrent_content);
"""
# ============================================================
# 7) PROCESS ALL ROWS
# ============================================================
for cells in real_rows:
data = parse_row(cells)
if not data:
continue
print("💾 Saving:", data["title_visible"])
cursor.execute(insert_sql, data)
print("\n✅ DONE — All torrents saved to MySQL & torrent files downloaded.")
driver.quit()