From 489b236b9ba13f82db90c81240bacfb9abe13ef0 Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Fri, 6 Mar 2026 07:11:03 +0100 Subject: [PATCH] git --- Seedbox/80 DeleteWhatWeHave.py | 142 +++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 Seedbox/80 DeleteWhatWeHave.py diff --git a/Seedbox/80 DeleteWhatWeHave.py b/Seedbox/80 DeleteWhatWeHave.py new file mode 100644 index 0000000..cea7359 --- /dev/null +++ b/Seedbox/80 DeleteWhatWeHave.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Projde /mnt/user/torrents/ultracc, pro každý soubor spočítá blake3 +a porovná s tabulkou file_md5_index. Pokud je hash nalezen → soubor smaže. +Po smazání souborů odstraní prázdné adresáře. +""" + +import os +import sys +import blake3 +import pymysql +from pathlib import Path + +# ============================================================ +# CONFIG +# ============================================================ + +SCAN_DIR = "//tower/torrents/ultracc" + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", +} + +CHUNK_SIZE = 8 * 1024 * 1024 # 8 MB +DRY_RUN = True # True = pouze vypíše, nesmaže + +# ============================================================ +# HELPERS +# ============================================================ + +def compute_blake3(path: Path) -> bytes: + """Vrátí blake3 digest jako 32 raw bytes.""" + h = blake3.blake3() + with open(path, "rb") as f: + while True: + chunk = f.read(CHUNK_SIZE) + if not chunk: + break + h.update(chunk) + return h.digest() + + +def hash_in_db(cursor, digest: bytes) -> bool: + cursor.execute( + "SELECT 1 FROM file_md5_index WHERE blake3 = %s LIMIT 1", + (digest,) + ) + return cursor.fetchone() is not None + + +def remove_empty_dirs(root: str) -> int: + """Rekurzivně smaže prázdné adresáře pod root. Vrátí počet smazaných.""" + removed = 0 + for dirpath, dirnames, filenames in os.walk(root, topdown=False): + if dirpath == root: + continue + try: + os.rmdir(dirpath) + print(f" [rmdir] {dirpath}") + removed += 1 + except OSError: + pass + return removed + + +# ============================================================ +# MAIN +# ============================================================ + +def main(): + dry_run = DRY_RUN + + if dry_run: + print("=== DRY RUN — nic se nesmaže ===\n") + + conn = pymysql.connect(**DB_CONFIG) + cursor = conn.cursor() + + scan_root = Path(SCAN_DIR) + if not scan_root.exists(): + print(f"CHYBA: Adresář neexistuje: {SCAN_DIR}") + sys.exit(1) + + files_checked = 0 + files_deleted = 0 + files_kept = 0 + bytes_deleted = 0 + + for file_path in scan_root.rglob("*"): + if not file_path.is_file(): + continue + + files_checked += 1 + size = file_path.stat().st_size + + try: + digest = compute_blake3(file_path) + except OSError as e: + print(f" [CHYBA čtení] {file_path}: {e}") + continue + + if hash_in_db(cursor, digest): + print(f" [SMAZAT] {file_path} ({size:,} B)") + if not dry_run: + try: + file_path.unlink() + files_deleted += 1 + bytes_deleted += size + except OSError as e: + print(f" [CHYBA smazání] {file_path}: {e}") + else: + files_deleted += 1 + bytes_deleted += size + else: + print(f" [zachovat] {file_path} ({size:,} B)") + files_kept += 1 + + cursor.close() + conn.close() + + print() + print(f"Zkontrolováno: {files_checked} souborů") + print(f"Ke smazání: {files_deleted} souborů ({bytes_deleted / 1024**3:.2f} GB)") + print(f"Zachováno: {files_kept} souborů") + + if not dry_run and files_deleted > 0: + print("\nOdstraňuji prázdné adresáře...") + removed = remove_empty_dirs(SCAN_DIR) + print(f"Odstraněno prázdných adresářů: {removed}") + + if dry_run: + print("\n(Dry run — žádné změny nebyly provedeny)") + + +if __name__ == "__main__": + main()