This commit is contained in:
2026-03-06 07:11:03 +01:00
parent b37db5397e
commit 489b236b9b

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Projde /mnt/user/torrents/ultracc, pro každý soubor spočítá blake3
a porovná s tabulkou file_md5_index. Pokud je hash nalezen → soubor smaže.
Po smazání souborů odstraní prázdné adresáře.
"""
import os
import sys
import blake3
import pymysql
from pathlib import Path
# ============================================================
# CONFIG
# ============================================================
SCAN_DIR = "//tower/torrents/ultracc"
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3306,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
}
CHUNK_SIZE = 8 * 1024 * 1024 # 8 MB
DRY_RUN = True # True = pouze vypíše, nesmaže
# ============================================================
# HELPERS
# ============================================================
def compute_blake3(path: Path) -> bytes:
"""Vrátí blake3 digest jako 32 raw bytes."""
h = blake3.blake3()
with open(path, "rb") as f:
while True:
chunk = f.read(CHUNK_SIZE)
if not chunk:
break
h.update(chunk)
return h.digest()
def hash_in_db(cursor, digest: bytes) -> bool:
cursor.execute(
"SELECT 1 FROM file_md5_index WHERE blake3 = %s LIMIT 1",
(digest,)
)
return cursor.fetchone() is not None
def remove_empty_dirs(root: str) -> int:
"""Rekurzivně smaže prázdné adresáře pod root. Vrátí počet smazaných."""
removed = 0
for dirpath, dirnames, filenames in os.walk(root, topdown=False):
if dirpath == root:
continue
try:
os.rmdir(dirpath)
print(f" [rmdir] {dirpath}")
removed += 1
except OSError:
pass
return removed
# ============================================================
# MAIN
# ============================================================
def main():
dry_run = DRY_RUN
if dry_run:
print("=== DRY RUN — nic se nesmaže ===\n")
conn = pymysql.connect(**DB_CONFIG)
cursor = conn.cursor()
scan_root = Path(SCAN_DIR)
if not scan_root.exists():
print(f"CHYBA: Adresář neexistuje: {SCAN_DIR}")
sys.exit(1)
files_checked = 0
files_deleted = 0
files_kept = 0
bytes_deleted = 0
for file_path in scan_root.rglob("*"):
if not file_path.is_file():
continue
files_checked += 1
size = file_path.stat().st_size
try:
digest = compute_blake3(file_path)
except OSError as e:
print(f" [CHYBA čtení] {file_path}: {e}")
continue
if hash_in_db(cursor, digest):
print(f" [SMAZAT] {file_path} ({size:,} B)")
if not dry_run:
try:
file_path.unlink()
files_deleted += 1
bytes_deleted += size
except OSError as e:
print(f" [CHYBA smazání] {file_path}: {e}")
else:
files_deleted += 1
bytes_deleted += size
else:
print(f" [zachovat] {file_path} ({size:,} B)")
files_kept += 1
cursor.close()
conn.close()
print()
print(f"Zkontrolováno: {files_checked} souborů")
print(f"Ke smazání: {files_deleted} souborů ({bytes_deleted / 1024**3:.2f} GB)")
print(f"Zachováno: {files_kept} souborů")
if not dry_run and files_deleted > 0:
print("\nOdstraňuji prázdné adresáře...")
removed = remove_empty_dirs(SCAN_DIR)
print(f"Odstraněno prázdných adresářů: {removed}")
if dry_run:
print("\n(Dry run — žádné změny nebyly provedeny)")
if __name__ == "__main__":
main()