#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Projde /mnt/user/torrents/ultracc, pro každý soubor spočítá blake3 a porovná s tabulkou file_md5_index. Pokud je hash nalezen → soubor smaže. Po smazání souborů odstraní prázdné adresáře. """ import os import sys import blake3 import pymysql from pathlib import Path # ============================================================ # CONFIG # ============================================================ SCAN_DIR = "//tower/torrents/ultracc" DB_CONFIG = { "host": "192.168.1.76", "port": 3306, "user": "root", "password": "Vlado9674+", "database": "torrents", "charset": "utf8mb4", } CHUNK_SIZE = 8 * 1024 * 1024 # 8 MB DRY_RUN = True # True = pouze vypíše, nesmaže # ============================================================ # HELPERS # ============================================================ def compute_blake3(path: Path) -> bytes: """Vrátí blake3 digest jako 32 raw bytes.""" h = blake3.blake3() with open(path, "rb") as f: while True: chunk = f.read(CHUNK_SIZE) if not chunk: break h.update(chunk) return h.digest() def hash_in_db(cursor, digest: bytes) -> bool: cursor.execute( "SELECT 1 FROM file_md5_index WHERE blake3 = %s LIMIT 1", (digest,) ) return cursor.fetchone() is not None def remove_empty_dirs(root: str) -> int: """Rekurzivně smaže prázdné adresáře pod root. Vrátí počet smazaných.""" removed = 0 for dirpath, dirnames, filenames in os.walk(root, topdown=False): if dirpath == root: continue try: os.rmdir(dirpath) print(f" [rmdir] {dirpath}") removed += 1 except OSError: pass return removed # ============================================================ # MAIN # ============================================================ def main(): dry_run = DRY_RUN if dry_run: print("=== DRY RUN — nic se nesmaže ===\n") conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() scan_root = Path(SCAN_DIR) if not scan_root.exists(): print(f"CHYBA: Adresář neexistuje: {SCAN_DIR}") sys.exit(1) files_checked = 0 files_deleted = 0 files_kept = 0 bytes_deleted = 0 for file_path in scan_root.rglob("*"): if not file_path.is_file(): continue files_checked += 1 size = file_path.stat().st_size try: digest = compute_blake3(file_path) except OSError as e: print(f" [CHYBA čtení] {file_path}: {e}") continue if hash_in_db(cursor, digest): print(f" [SMAZAT] {file_path} ({size:,} B)") if not dry_run: try: file_path.unlink() files_deleted += 1 bytes_deleted += size except OSError as e: print(f" [CHYBA smazání] {file_path}: {e}") else: files_deleted += 1 bytes_deleted += size else: print(f" [zachovat] {file_path} ({size:,} B)") files_kept += 1 cursor.close() conn.close() print() print(f"Zkontrolováno: {files_checked} souborů") print(f"Ke smazání: {files_deleted} souborů ({bytes_deleted / 1024**3:.2f} GB)") print(f"Zachováno: {files_kept} souborů") if not dry_run and files_deleted > 0: print("\nOdstraňuji prázdné adresáře...") removed = remove_empty_dirs(SCAN_DIR) print(f"Odstraněno prázdných adresářů: {removed}") if dry_run: print("\n(Dry run — žádné změny nebyly provedeny)") if __name__ == "__main__": main()