git
This commit is contained in:
158
94 WhatWehaveAlreadyDownloaded.py
Normal file
158
94 WhatWehaveAlreadyDownloaded.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import pymysql
|
||||
import bencodepy
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# ============================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================
|
||||
# Your network path (Use raw string r"..." for backslashes)
|
||||
# PHYSICAL_DIR = Path(r"\\tower\torrents\downloads")
|
||||
PHYSICAL_DIR = Path(r"\\tower1\#Colddata\Porno")
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================
|
||||
def decode_bytes(b):
|
||||
"""
|
||||
Decodes bytes from Bencode into a string.
|
||||
Tries UTF-8 first, then common fallbacks.
|
||||
"""
|
||||
if isinstance(b, str): return b
|
||||
encodings = ['utf-8', 'windows-1250', 'latin-1', 'cp1252']
|
||||
for enc in encodings:
|
||||
try:
|
||||
return b.decode(enc)
|
||||
except:
|
||||
continue
|
||||
return b.decode('utf-8', errors='ignore')
|
||||
|
||||
|
||||
def check_torrent_in_filesystem(torrent_blob, root_path):
|
||||
"""
|
||||
Parses the binary BLOB, calculates expected paths,
|
||||
and checks if they exist in the root_path.
|
||||
"""
|
||||
try:
|
||||
# Decode the binary BLOB
|
||||
data = bencodepy.decode(torrent_blob)
|
||||
info = data.get(b'info')
|
||||
if not info: return False
|
||||
|
||||
# Get the name of the root file/folder defined in the torrent
|
||||
name = decode_bytes(info.get(b'name'))
|
||||
|
||||
# Calculate expected location
|
||||
target_path = root_path / name
|
||||
|
||||
# 1. Check if the main path exists
|
||||
if not target_path.exists():
|
||||
return False
|
||||
|
||||
# 2. Size Verification (Basic)
|
||||
# If it's a single file
|
||||
if b'files' not in info:
|
||||
expected_size = info[b'length']
|
||||
real_size = target_path.stat().st_size
|
||||
# Allow 1% variance or 1KB (sometimes filesystems vary slightly)
|
||||
if abs(real_size - expected_size) < 4096:
|
||||
return True
|
||||
return False
|
||||
|
||||
# If it's a multi-file torrent (folder)
|
||||
else:
|
||||
# If the folder exists, we assume it's mostly good,
|
||||
# but let's check at least one file inside to be sure it's not empty.
|
||||
files = info[b'files']
|
||||
if not files: return True # Empty folder torrent? rare but possible.
|
||||
|
||||
# Check the first file in the list
|
||||
first_file_path = target_path.joinpath(*[decode_bytes(p) for p in files[0][b'path']])
|
||||
return first_file_path.exists()
|
||||
|
||||
except Exception as e:
|
||||
# If Bencode fails or path is weird
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================
|
||||
if __name__ == "__main__":
|
||||
if not PHYSICAL_DIR.exists():
|
||||
print(f"❌ ERROR: Cannot access path: {PHYSICAL_DIR}")
|
||||
print("Make sure the drive is mapped or the network path is accessible.")
|
||||
exit()
|
||||
|
||||
print(f"📂 Scanning storage: {PHYSICAL_DIR}")
|
||||
print("🚀 Connecting to Database...")
|
||||
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cursor = db.cursor()
|
||||
|
||||
# 1. Get all torrents that have content (BLOB)
|
||||
# We only select ID and Content to keep memory usage reasonable
|
||||
cursor.execute(
|
||||
"SELECT torrent_hash, title_visible, torrent_content FROM torrents WHERE torrent_content IS NOT NULL")
|
||||
|
||||
rows = cursor.fetchall()
|
||||
total = len(rows)
|
||||
print(f"📋 Analysing {total} torrents from database against disk files...")
|
||||
|
||||
found_count = 0
|
||||
missing_count = 0
|
||||
|
||||
# 2. Iterate and Check
|
||||
updates = [] # Store successful hashes to batch update later
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
t_hash, title, blob = row
|
||||
|
||||
is_downloaded = check_torrent_in_filesystem(blob, PHYSICAL_DIR)
|
||||
|
||||
if is_downloaded:
|
||||
found_count += 1
|
||||
updates.append(t_hash)
|
||||
# Print only every 50th line to reduce clutter, or if found
|
||||
# print(f"✅ Found: {title[:50]}")
|
||||
else:
|
||||
missing_count += 1
|
||||
|
||||
if index % 100 == 0:
|
||||
print(f" Processed {index}/{total} ... (Found: {found_count})")
|
||||
|
||||
# 3. Batch Update Database
|
||||
print(f"\n💾 Updating Database: Marking {len(updates)} torrents as 'physical_exists = 1'...")
|
||||
|
||||
# Reset everything to 0 first (in case you deleted files since last run)
|
||||
cursor.execute("UPDATE torrents SET physical_exists = 0")
|
||||
|
||||
if updates:
|
||||
# Update in chunks of 1000 to be safe
|
||||
chunk_size = 1000
|
||||
for i in range(0, len(updates), chunk_size):
|
||||
chunk = updates[i:i + chunk_size]
|
||||
format_strings = ','.join(['%s'] * len(chunk))
|
||||
cursor.execute(f"UPDATE torrents SET physical_exists = 1 WHERE torrent_hash IN ({format_strings})",
|
||||
tuple(chunk))
|
||||
db.commit()
|
||||
|
||||
db.close()
|
||||
|
||||
print("\n" + "=" * 40)
|
||||
print(f"🏁 SCAN COMPLETE")
|
||||
print(f"✅ Physically Available: {found_count}")
|
||||
print(f"❌ Missing / Not Downloaded: {missing_count}")
|
||||
print(f"📊 Completion Rate: {int((found_count / total) * 100)}%")
|
||||
print("=" * 40)
|
||||
Reference in New Issue
Block a user