vbnotebook
This commit is contained in:
@@ -1,171 +1,230 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import pymysql
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import urllib.parse as urlparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from playwright.sync_api import sync_playwright
|
import json
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# CONFIGURATION
|
# ============================================================
|
||||||
# =============================================================
|
# 1) MySQL CONNECTION
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
db = pymysql.connect(
|
||||||
|
host="192.168.1.76",
|
||||||
|
port=3307,
|
||||||
|
user="root",
|
||||||
|
password="Vlado9674+",
|
||||||
|
database="torrents",
|
||||||
|
charset="utf8mb4",
|
||||||
|
autocommit=True
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 2) Selenium setup
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
COOKIE_FILE = Path("sktorrent_cookies.json")
|
COOKIE_FILE = Path("sktorrent_cookies.json")
|
||||||
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
|
URL = "https://sktorrent.eu/torrent/torrents.php?active=0"
|
||||||
|
|
||||||
|
chrome_options = Options()
|
||||||
|
chrome_options.add_argument("--start-maximized")
|
||||||
|
chrome_options.add_argument("--disable-notifications")
|
||||||
|
chrome_options.add_argument("--disable-popup-blocking")
|
||||||
|
chrome_options.add_argument("--disable-extensions")
|
||||||
|
|
||||||
def load_cookies(context):
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
"""Load saved cookies if available."""
|
|
||||||
|
driver.get("https://sktorrent.eu")
|
||||||
|
|
||||||
|
# Load cookies
|
||||||
if COOKIE_FILE.exists():
|
if COOKIE_FILE.exists():
|
||||||
with open(COOKIE_FILE, "r") as f:
|
with open(COOKIE_FILE, "r") as f:
|
||||||
cookies = json.load(f)
|
cookies = json.load(f)
|
||||||
context.add_cookies(cookies)
|
for c in cookies:
|
||||||
print("🔄 Loaded login cookies.")
|
driver.add_cookie(c)
|
||||||
return True
|
print("🍪 Cookies loaded.")
|
||||||
print("❌ Cookie file not found. Run manual login first.")
|
|
||||||
return False
|
driver.get(URL)
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
# Try to close inline popup if present
|
||||||
# MAIN CODE
|
try:
|
||||||
# =============================================================
|
close_btn = driver.find_element(By.XPATH, "//a[text()='CLOSE X']")
|
||||||
|
close_btn.click()
|
||||||
|
print("🧹 Popup closed.")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
with sync_playwright() as p:
|
|
||||||
|
|
||||||
# 1️⃣ Launch browser
|
# ============================================================
|
||||||
browser = p.chromium.launch(
|
# 3) Extract table rows
|
||||||
headless=False,
|
# ============================================================
|
||||||
args=[
|
|
||||||
"--disable-popup-blocking",
|
rows = driver.find_elements(By.CSS_SELECTOR, "table tr")
|
||||||
"--disable-background-networking",
|
print("Total rows found:", len(rows))
|
||||||
"--disable-notifications",
|
|
||||||
"--no-default-browser-check",
|
real_rows = []
|
||||||
"--no-first-run",
|
for row in rows:
|
||||||
"--noerrdialogs",
|
cells = row.find_elements(By.TAG_NAME, "td")
|
||||||
"--disable-dev-shm-usage",
|
if len(cells) >= 5: # real torrent rows
|
||||||
"--disable-features=IsolateOrigins,site-per-process",
|
real_rows.append(cells)
|
||||||
"--no-sandbox",
|
|
||||||
]
|
print("Real data rows:", len(real_rows))
|
||||||
|
print("")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4) Function to extract all fields from one row
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def parse_row(cells):
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 1️⃣ CATEGORY
|
||||||
|
# --------------------------
|
||||||
|
category = cells[0].text.strip()
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 2️⃣ TITLES + DETAILS LINK
|
||||||
|
# --------------------------
|
||||||
|
a_tag = cells[1].find_element(By.TAG_NAME, "a")
|
||||||
|
|
||||||
|
visible_name = a_tag.text.strip()
|
||||||
|
full_title = a_tag.get_attribute("title")
|
||||||
|
details_link = a_tag.get_attribute("href")
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 3️⃣ TORRENT HASH
|
||||||
|
# --------------------------
|
||||||
|
parsed = urlparse.urlparse(details_link)
|
||||||
|
query = urlparse.parse_qs(parsed.query)
|
||||||
|
|
||||||
|
# skip rows without ?id=
|
||||||
|
if "id" not in query:
|
||||||
|
print("⚠️ Skipping row with no torrent ID →", details_link)
|
||||||
|
return None
|
||||||
|
|
||||||
|
torrent_hash = query["id"][0]
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 4️⃣ TEXT BLOCK (size + date)
|
||||||
|
# --------------------------
|
||||||
|
text_block = cells[1].get_attribute("innerText")
|
||||||
|
text_block_clean = " ".join(text_block.split())
|
||||||
|
|
||||||
|
size_match = re.search(r"Velkost ([0-9\.]+ ?[KMG]B)", text_block_clean, re.IGNORECASE)
|
||||||
|
added_match = re.search(r"Pridany (.+?)(?:\sObrázok|$)", text_block_clean, re.IGNORECASE)
|
||||||
|
|
||||||
|
size_pretty = size_match.group(1) if size_match else None
|
||||||
|
added_pretty = added_match.group(1) if added_match else None
|
||||||
|
|
||||||
|
# Convert “18/11/2025 o 07:00” → “2025-11-18 07:00:00”
|
||||||
|
added_mysql = None
|
||||||
|
if added_pretty:
|
||||||
|
added_mysql = re.sub(r" o ", " ", added_pretty)
|
||||||
|
day, month, year_time = added_mysql.split("/")
|
||||||
|
year, time_part = year_time.split(" ")
|
||||||
|
added_mysql = f"{year}-{month}-{day} {time_part}:00"
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 5️⃣ IMAGE PREVIEW
|
||||||
|
# --------------------------
|
||||||
|
img_link = None
|
||||||
|
try:
|
||||||
|
image_a = cells[1].find_element(By.XPATH, ".//a[contains(text(),'Obrázok')]")
|
||||||
|
mouseover = image_a.get_attribute("onmouseover")
|
||||||
|
img_match = re.search(r"src=([^ ]+)", mouseover)
|
||||||
|
if img_match:
|
||||||
|
img_link = img_match.group(1).replace("'", "").strip()
|
||||||
|
if img_link.startswith("//"):
|
||||||
|
img_link = "https:" + img_link
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 6️⃣ SEEDERS
|
||||||
|
# --------------------------
|
||||||
|
seeders_a = cells[3].find_element(By.TAG_NAME, "a")
|
||||||
|
seeders_number = int(seeders_a.text.strip())
|
||||||
|
seeders_link = seeders_a.get_attribute("href")
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# 7️⃣ LEECHERS
|
||||||
|
# --------------------------
|
||||||
|
leechers_a = cells[4].find_element(By.TAG_NAME, "a")
|
||||||
|
leechers_number = int(leechers_a.text.strip())
|
||||||
|
leechers_link = leechers_a.get_attribute("href")
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# Return dictionary for MySQL
|
||||||
|
# --------------------------
|
||||||
|
return {
|
||||||
|
"torrent_hash": torrent_hash,
|
||||||
|
"details_link": details_link,
|
||||||
|
"category": category,
|
||||||
|
"title_visible": visible_name,
|
||||||
|
"title_full": full_title,
|
||||||
|
"size_pretty": size_pretty,
|
||||||
|
"added_datetime": added_mysql,
|
||||||
|
"preview_image": img_link,
|
||||||
|
"seeders": seeders_number,
|
||||||
|
"seeders_link": seeders_link,
|
||||||
|
"leechers": leechers_number,
|
||||||
|
"leechers_link": leechers_link,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 5) MySQL INSERT
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
insert_sql = """
|
||||||
|
INSERT INTO torrents (
|
||||||
|
torrent_hash, details_link, category, title_visible, title_full,
|
||||||
|
size_pretty, added_datetime, preview_image,
|
||||||
|
seeders, seeders_link, leechers, leechers_link
|
||||||
|
) VALUES (
|
||||||
|
%(torrent_hash)s, %(details_link)s, %(category)s, %(title_visible)s, %(title_full)s,
|
||||||
|
%(size_pretty)s, %(added_datetime)s, %(preview_image)s,
|
||||||
|
%(seeders)s, %(seeders_link)s, %(leechers)s, %(leechers_link)s
|
||||||
)
|
)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
details_link = VALUES(details_link),
|
||||||
|
category = VALUES(category),
|
||||||
|
title_visible = VALUES(title_visible),
|
||||||
|
title_full = VALUES(title_full),
|
||||||
|
size_pretty = VALUES(size_pretty),
|
||||||
|
added_datetime = VALUES(added_datetime),
|
||||||
|
preview_image = VALUES(preview_image),
|
||||||
|
seeders = VALUES(seeders),
|
||||||
|
seeders_link = VALUES(seeders_link),
|
||||||
|
leechers = VALUES(leechers),
|
||||||
|
leechers_link = VALUES(leechers_link);
|
||||||
|
"""
|
||||||
|
|
||||||
# 2️⃣ Create context before any pages exist
|
|
||||||
context = browser.new_context()
|
|
||||||
|
|
||||||
# 3️⃣ Block ALL third-party requests (ads, JS, popups, tracking)
|
# ============================================================
|
||||||
def block_third_party(route, request):
|
# 6) PROCESS ALL REAL ROWS
|
||||||
url = request.url.lower()
|
# ============================================================
|
||||||
if "sktorrent.eu" in url:
|
|
||||||
route.continue_()
|
|
||||||
else:
|
|
||||||
print(f"🚫 Blocked third-party request: {url}")
|
|
||||||
route.abort()
|
|
||||||
|
|
||||||
context.route("**/*", block_third_party)
|
for cells in real_rows:
|
||||||
|
data = parse_row(cells)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
|
||||||
# 4️⃣ Block ANY popup windows except the first page
|
print("💾 Saving:", data["title_visible"])
|
||||||
pages = []
|
cursor.execute(insert_sql, data)
|
||||||
|
|
||||||
def on_new_page(new_page):
|
print("\n✅ DONE — All torrents saved to MySQL.")
|
||||||
pages.append(new_page)
|
driver.quit()
|
||||||
if len(pages) == 1:
|
|
||||||
print("➡️ Main page created.")
|
|
||||||
else:
|
|
||||||
print("⚠️ Popup blocked (auto-closed).")
|
|
||||||
new_page.close()
|
|
||||||
|
|
||||||
context.on("page", on_new_page)
|
|
||||||
|
|
||||||
# 5️⃣ Disable all popup JS functions (window.open, window.close, opener.close)
|
|
||||||
context.add_init_script("""
|
|
||||||
window.open = () => { console.log("Blocked window.open"); return null; };
|
|
||||||
window.close = () => { console.log("Blocked window.close"); };
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (window.opener) {
|
|
||||||
window.opener.close = () => { console.log("Blocked opener.close"); };
|
|
||||||
}
|
|
||||||
} catch (e) {}
|
|
||||||
|
|
||||||
// Block <a target="_blank">
|
|
||||||
document.addEventListener('click', function(e) {
|
|
||||||
const el = e.target.closest('a[target="_blank"]');
|
|
||||||
if (el) {
|
|
||||||
e.preventDefault();
|
|
||||||
console.log("Blocked target=_blank");
|
|
||||||
}
|
|
||||||
}, true);
|
|
||||||
|
|
||||||
// Block middle-click opening a new tab
|
|
||||||
document.addEventListener('auxclick', function(e) {
|
|
||||||
e.preventDefault();
|
|
||||||
}, true);
|
|
||||||
""")
|
|
||||||
|
|
||||||
# 6️⃣ Create the FIRST page (main page)
|
|
||||||
page = context.new_page()
|
|
||||||
pages.append(page)
|
|
||||||
|
|
||||||
# 7️⃣ Load cookies (login)
|
|
||||||
load_cookies(context)
|
|
||||||
|
|
||||||
# 8️⃣ Navigate
|
|
||||||
print("🌍 Opening page...")
|
|
||||||
page.goto(URL)
|
|
||||||
|
|
||||||
# Do NOT use networkidle on ad-heavy sites
|
|
||||||
page.wait_for_load_state("domcontentloaded")
|
|
||||||
page.wait_for_selector("table tr", timeout=15000)
|
|
||||||
# Remove popup/overlay elements created by SKTorrent
|
|
||||||
page.evaluate("""
|
|
||||||
const selectors = [
|
|
||||||
'#lightbox', '.lightbox', '#popup', '.popup',
|
|
||||||
'.overlay', '#overlay', '.modal', '#modal',
|
|
||||||
'div[style*="fixed"]', 'div[style*="position: fixed"]',
|
|
||||||
'table[style*="position: fixed"]',
|
|
||||||
'iframe', 'frame'
|
|
||||||
];
|
|
||||||
|
|
||||||
selectors.forEach(sel => {
|
|
||||||
document.querySelectorAll(sel).forEach(el => {
|
|
||||||
console.log("Removing popup element:", sel);
|
|
||||||
el.remove();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// Remove onclick handlers that trigger popups
|
|
||||||
document.querySelectorAll('*').forEach(el => {
|
|
||||||
el.onclick = null;
|
|
||||||
el.onauxclick = null;
|
|
||||||
el.oncontextmenu = null;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Remove timers that trigger delayed popups
|
|
||||||
window.setTimeout = () => {};
|
|
||||||
window.setInterval = () => {};
|
|
||||||
""")
|
|
||||||
|
|
||||||
print("✔ Page loaded, extracting table rows...")
|
|
||||||
|
|
||||||
# 9️⃣ Extract all rows
|
|
||||||
rows = page.locator("table tr").all()
|
|
||||||
print(f"📄 Total rows found (including header): {len(rows)}")
|
|
||||||
|
|
||||||
# 🔟 Extract SECOND ROW only (your request)
|
|
||||||
if len(rows) > 1:
|
|
||||||
row = rows[1] # 0 = header, 1 = first data row
|
|
||||||
tds = row.locator("td")
|
|
||||||
|
|
||||||
name = tds.nth(1).inner_text().strip()
|
|
||||||
size = tds.nth(2).inner_text().strip()
|
|
||||||
seeders = tds.nth(3).inner_text().strip()
|
|
||||||
leechers = tds.nth(4).inner_text().strip()
|
|
||||||
|
|
||||||
print("\n========= SECOND ROW =========")
|
|
||||||
print(f"Name: {name}")
|
|
||||||
print(f"Size: {size}")
|
|
||||||
print(f"Seeders: {seeders}")
|
|
||||||
print(f"Leechers: {leechers}")
|
|
||||||
print("==============================\n")
|
|
||||||
else:
|
|
||||||
print("❌ No data rows found!")
|
|
||||||
|
|
||||||
page.wait_for_timeout(5000)
|
|
||||||
|
|||||||
219
40 ParseviaRequests.py
Normal file
219
40 ParseviaRequests.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pymysql
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# CONFIG
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
BASE_URL = "https://sktorrent.eu/torrent/torrents_v2.php?active=0"
|
||||||
|
|
||||||
|
COOKIES_FILE = "sktorrent_cookies.json" # Your exported cookies.txt ( Netscape format )
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
HEADERS = {"User-Agent": USER_AGENT}
|
||||||
|
|
||||||
|
DB_CFG = {
|
||||||
|
"host": "192.168.1.76",
|
||||||
|
"port": 3307,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"cursorclass": pymysql.cursors.DictCursor,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# COOKIE LOADER
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def load_cookies(path):
|
||||||
|
cookies = {}
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("#") or "\t" not in line:
|
||||||
|
continue
|
||||||
|
parts = line.strip().split("\t")
|
||||||
|
if len(parts) >= 7:
|
||||||
|
cookies[parts[5]] = parts[6]
|
||||||
|
print(f"🍪 Loaded {len(cookies)} cookies.")
|
||||||
|
return cookies
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# MYSQL INSERT
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def insert_torrent(db, t):
|
||||||
|
sql = """
|
||||||
|
INSERT IGNORE INTO torrents (
|
||||||
|
category,
|
||||||
|
title_visible,
|
||||||
|
title_full,
|
||||||
|
size_pretty,
|
||||||
|
added_datetime,
|
||||||
|
seeders,
|
||||||
|
seeders_link,
|
||||||
|
leechers,
|
||||||
|
leechers_link,
|
||||||
|
preview_image,
|
||||||
|
details_link,
|
||||||
|
torrent_hash
|
||||||
|
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||||
|
"""
|
||||||
|
with db.cursor() as cur:
|
||||||
|
cur.execute(sql, (
|
||||||
|
t["category"],
|
||||||
|
t["title_visible"],
|
||||||
|
t["title_full"],
|
||||||
|
t["size_pretty"],
|
||||||
|
t["added_datetime"],
|
||||||
|
t["seeders"],
|
||||||
|
t["seeders_link"],
|
||||||
|
t["leechers"],
|
||||||
|
t["leechers_link"],
|
||||||
|
t["preview_image"],
|
||||||
|
t["details_link"],
|
||||||
|
t["torrent_hash"],
|
||||||
|
))
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# PARSER
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def parse_torrent_row(cols):
|
||||||
|
"""Parse a <tr> with exactly the structure of a torrent row."""
|
||||||
|
|
||||||
|
# --- category ---
|
||||||
|
category = cols[0].get_text(strip=True)
|
||||||
|
|
||||||
|
# --- download link (ignore) ---
|
||||||
|
# second <td> is download.gif
|
||||||
|
|
||||||
|
# --- main column ---
|
||||||
|
main_td = cols[2]
|
||||||
|
|
||||||
|
a_title = main_td.find("a", href=re.compile("details.php"))
|
||||||
|
if not a_title:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title_visible = a_title.get_text(strip=True)
|
||||||
|
title_full = a_title.get("title", "").strip()
|
||||||
|
details_link = "https://sktorrent.eu/torrent/" + a_title.get("href")
|
||||||
|
|
||||||
|
# Extract torrent hash from ?id=.....
|
||||||
|
m = re.search(r"id=([A-Fa-f0-9]{40})", a_title.get("href"))
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
torrent_hash = m.group(1)
|
||||||
|
|
||||||
|
# Extract size + added date from the text below <br>
|
||||||
|
text = main_td.get_text(" ", strip=True)
|
||||||
|
# example: "GR ... Velkost 1.7 GB | Pridany 18/11/2025 o 07:00"
|
||||||
|
size_match = re.search(r"Velkost ([\d\.]+ ?[GMK]B)", text)
|
||||||
|
date_match = re.search(r"Pridany (\d{2}/\d{2}/\d{4}) o (\d{2}:\d{2})", text)
|
||||||
|
|
||||||
|
size_pretty = size_match.group(1) if size_match else None
|
||||||
|
|
||||||
|
added_datetime = None
|
||||||
|
if date_match:
|
||||||
|
d, t = date_match.groups()
|
||||||
|
added_datetime = datetime.strptime(d + " " + t, "%d/%m/%Y %H:%M")
|
||||||
|
|
||||||
|
# Extract preview img from onmouseover
|
||||||
|
img = None
|
||||||
|
img_a = main_td.find("a", onmouseover=True)
|
||||||
|
if img_a:
|
||||||
|
html = img_a.get("onmouseover", "")
|
||||||
|
m2 = re.search(r"img src=//([^ ]+)", html)
|
||||||
|
if m2:
|
||||||
|
img = "https://" + m2.group(1)
|
||||||
|
|
||||||
|
# --- seeders ---
|
||||||
|
seed_a = cols[4].find("a")
|
||||||
|
seeders = int(seed_a.get_text(strip=True)) if seed_a else 0
|
||||||
|
seeders_link = "https://sktorrent.eu/torrent/" + seed_a.get("href") if seed_a else None
|
||||||
|
|
||||||
|
# --- leechers ---
|
||||||
|
leech_a = cols[5].find("a")
|
||||||
|
leechers = int(leech_a.get_text(strip=True)) if leech_a else 0
|
||||||
|
leechers_link = "https://sktorrent.eu/torrent/" + leech_a.get("href") if leech_a else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"category": category,
|
||||||
|
"title_visible": title_visible,
|
||||||
|
"title_full": title_full,
|
||||||
|
"size_pretty": size_pretty,
|
||||||
|
"added_datetime": added_datetime,
|
||||||
|
"seeders": seeders,
|
||||||
|
"seeders_link": seeders_link,
|
||||||
|
"leechers": leechers,
|
||||||
|
"leechers_link": leechers_link,
|
||||||
|
"preview_image": img,
|
||||||
|
"details_link": details_link,
|
||||||
|
"torrent_hash": torrent_hash,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# MAIN
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
cookies = load_cookies(COOKIES_FILE)
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update(HEADERS)
|
||||||
|
session.cookies.update(cookies)
|
||||||
|
|
||||||
|
print("🌍 Downloading HTML...")
|
||||||
|
r = session.get(BASE_URL, timeout=30)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
|
tbody = soup.find("tbody")
|
||||||
|
if not tbody:
|
||||||
|
print("❌ Could not find <tbody>")
|
||||||
|
return
|
||||||
|
|
||||||
|
rows = tbody.find_all("tr")
|
||||||
|
print(f"Found {len(rows)} <tr> rows.")
|
||||||
|
|
||||||
|
db = pymysql.connect(**DB_CFG)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
for tr in rows:
|
||||||
|
cols = tr.find_all("td")
|
||||||
|
if len(cols) != 7:
|
||||||
|
continue # ignore header & separator rows
|
||||||
|
|
||||||
|
data = parse_torrent_row(cols)
|
||||||
|
if not data:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
insert_torrent(db, data)
|
||||||
|
inserted += 1
|
||||||
|
print(f"✔ Inserted {data['torrent_hash']}")
|
||||||
|
|
||||||
|
print(f"\n===== DONE =====")
|
||||||
|
print(f"Inserted: {inserted}")
|
||||||
|
print(f"Skipped: {skipped}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user