#!/usr/bin/env python3 # -*- coding: utf-8 -*- import hashlib import json from pathlib import Path import time import traceback # ======= CONFIG ======= MP_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") JSON_PATHS = [ Path(r"U:\Dropbox\Ordinace\LAB-PDF\processed_files.json"), Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná\processed_files.json"), ] CHUNK = 65536 PRINT_EVERY = 50 # ====================== def try_rename(old_path: Path, new_path: Path, retries: int = 5, delay: int = 5): """Try rename with retry mechanism.""" for attempt in range(1, retries + 1): try: old_path.rename(new_path) return True except Exception as e: print(f"⚠ Rename failed ({attempt}/{retries}): {e}") if attempt < retries: print(f" Waiting {delay}s before retry...") time.sleep(delay) else: print(" ❌ Maximum retries reached. Skipping.") traceback.print_exc() return False def md5_file(path: Path) -> str: h = hashlib.md5() with path.open("rb") as f: for chunk in iter(lambda: f.read(CHUNK), b""): h.update(chunk) return h.hexdigest() def load_all_md5(json_paths): """Načte MD5 ze všech JSONů a vrátí množinu.""" md5_set = set() for jp in json_paths: if not jp.exists(): print(f"⚠ JSON nenalezen: {jp}") continue try: with jp.open("r", encoding="utf-8") as f: db = json.load(f) for _, info in db.items(): md5_set.add(info["md5"]) print(f"Načteno {len(db)} záznamů z {jp}") except Exception as e: print(f"❌ Chyba při čtení {jp}: {e}") print(f"➡ Celkem MD5 hashů: {len(md5_set)}\n") return md5_set def mark_folders_if_all_marked(root: Path, dryrun: bool): print("\n=== KONTROLA ADRESÁŘŮ — OZNAČENÍ PLNĚ HOTOVÝCH ===") for folder in sorted(root.rglob("*")): if not folder.is_dir(): continue files = [f for f in folder.iterdir() if f.is_file()] if not files: continue # všechny soubory označené? if not all(f.name.startswith("▲") for f in files): continue # adresář už označen? if len(folder.name) > 10 and folder.name[10] == "▲": continue # vložení ▲ na 11. pozici insert_pos = 10 name = folder.name if len(name) <= insert_pos: new_name = name + "▲" else: new_name = name[:insert_pos] + "▲" + name[insert_pos:] new_path = folder.parent / new_name print(f"✔ Adresář označen: {folder.name} → {new_name}") if not dryrun: try_rename(folder, new_path) def run_matcher(dryrun: bool = True): print("\n=== MATCHER V3 — SOUBORY + ADRESÁŘE ===") print(f"Režim: {'DRYRUN (simulace)' if dryrun else 'OSTRÝ'}\n") all_md5 = load_all_md5(JSON_PATHS) counter = 0 renamed = 0 start = time.time() for file in MP_DIR.rglob("*"): if not file.is_file(): continue counter += 1 if counter % PRINT_EVERY == 0: speed = counter / (time.time() - start) print(f" {counter} soub. ({speed:.1f}/s)") md5 = md5_file(file) if md5 in all_md5: if file.name.startswith("▲"): continue new_name = "▲" + file.name new_path = file.parent / new_name if dryrun: print(f"[DRYRUN] Označil bych: {file.name} → {new_name}") else: # === RETRY RENAME (soubory) === success = try_rename(file, new_path) if success: renamed += 1 print(f"✔ {file.name} → {new_name}") # označit adresáře mark_folders_if_all_marked(MP_DIR, dryrun) total_time = time.time() - start speed = 0 if counter == 0 or total_time == 0 else counter / total_time print("\n=== MATCHER HOTOVO ===") print(f" Zkontrolováno: {counter} souborů") print(f" Označeno: {renamed}") print(f" Rychlost: {speed:.1f} soub./s") print(f" Režim: {'DRYRUN' if dryrun else 'OSTRÝ'}\n") if __name__ == "__main__": run_matcher(dryrun=False)