Files
torrents/94 WhatWehaveAlreadyDownloaded.py
2026-02-01 07:18:20 +01:00

158 lines
5.1 KiB
Python

import pymysql
import bencodepy
import os
from pathlib import Path
# ============================================================
# CONFIGURATION
# ============================================================
# Your network path (Use raw string r"..." for backslashes)
# PHYSICAL_DIR = Path(r"\\tower\torrents\downloads")
PHYSICAL_DIR = Path(r"\\tower1\#Colddata\Porno")
DB_CONFIG = {
"host": "192.168.1.50",
"port": 3306,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": True,
}
# ============================================================
# HELPER FUNCTIONS
# ============================================================
def decode_bytes(b):
"""
Decodes bytes from Bencode into a string.
Tries UTF-8 first, then common fallbacks.
"""
if isinstance(b, str): return b
encodings = ['utf-8', 'windows-1250', 'latin-1', 'cp1252']
for enc in encodings:
try:
return b.decode(enc)
except:
continue
return b.decode('utf-8', errors='ignore')
def check_torrent_in_filesystem(torrent_blob, root_path):
"""
Parses the binary BLOB, calculates expected paths,
and checks if they exist in the root_path.
"""
try:
# Decode the binary BLOB
data = bencodepy.decode(torrent_blob)
info = data.get(b'info')
if not info: return False
# Get the name of the root file/folder defined in the torrent
name = decode_bytes(info.get(b'name'))
# Calculate expected location
target_path = root_path / name
# 1. Check if the main path exists
if not target_path.exists():
return False
# 2. Size Verification (Basic)
# If it's a single file
if b'files' not in info:
expected_size = info[b'length']
real_size = target_path.stat().st_size
# Allow 1% variance or 1KB (sometimes filesystems vary slightly)
if abs(real_size - expected_size) < 4096:
return True
return False
# If it's a multi-file torrent (folder)
else:
# If the folder exists, we assume it's mostly good,
# but let's check at least one file inside to be sure it's not empty.
files = info[b'files']
if not files: return True # Empty folder torrent? rare but possible.
# Check the first file in the list
first_file_path = target_path.joinpath(*[decode_bytes(p) for p in files[0][b'path']])
return first_file_path.exists()
except Exception as e:
# If Bencode fails or path is weird
return False
# ============================================================
# MAIN EXECUTION
# ============================================================
if __name__ == "__main__":
if not PHYSICAL_DIR.exists():
print(f"❌ ERROR: Cannot access path: {PHYSICAL_DIR}")
print("Make sure the drive is mapped or the network path is accessible.")
exit()
print(f"📂 Scanning storage: {PHYSICAL_DIR}")
print("🚀 Connecting to Database...")
db = pymysql.connect(**DB_CONFIG)
cursor = db.cursor()
# 1. Get all torrents that have content (BLOB)
# We only select ID and Content to keep memory usage reasonable
cursor.execute(
"SELECT torrent_hash, title_visible, torrent_content FROM torrents WHERE torrent_content IS NOT NULL")
rows = cursor.fetchall()
total = len(rows)
print(f"📋 Analysing {total} torrents from database against disk files...")
found_count = 0
missing_count = 0
# 2. Iterate and Check
updates = [] # Store successful hashes to batch update later
for index, row in enumerate(rows):
t_hash, title, blob = row
is_downloaded = check_torrent_in_filesystem(blob, PHYSICAL_DIR)
if is_downloaded:
found_count += 1
updates.append(t_hash)
# Print only every 50th line to reduce clutter, or if found
# print(f"✅ Found: {title[:50]}")
else:
missing_count += 1
if index % 100 == 0:
print(f" Processed {index}/{total} ... (Found: {found_count})")
# 3. Batch Update Database
print(f"\n💾 Updating Database: Marking {len(updates)} torrents as 'physical_exists = 1'...")
# Reset everything to 0 first (in case you deleted files since last run)
cursor.execute("UPDATE torrents SET physical_exists = 0")
if updates:
# Update in chunks of 1000 to be safe
chunk_size = 1000
for i in range(0, len(updates), chunk_size):
chunk = updates[i:i + chunk_size]
format_strings = ','.join(['%s'] * len(chunk))
cursor.execute(f"UPDATE torrents SET physical_exists = 1 WHERE torrent_hash IN ({format_strings})",
tuple(chunk))
db.commit()
db.close()
print("\n" + "=" * 40)
print(f"🏁 SCAN COMPLETE")
print(f"✅ Physically Available: {found_count}")
print(f"❌ Missing / Not Downloaded: {missing_count}")
print(f"📊 Completion Rate: {int((found_count / total) * 100)}%")
print("=" * 40)