z230
This commit is contained in:
158
MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py
Normal file
158
MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py
Normal file
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
import time
|
||||
import traceback
|
||||
|
||||
|
||||
# ======= CONFIG =======
|
||||
|
||||
MP_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||
|
||||
JSON_PATHS = [
|
||||
Path(r"U:\Dropbox\Ordinace\LAB-PDF\processed_files.json"),
|
||||
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná\processed_files.json"),
|
||||
]
|
||||
|
||||
CHUNK = 65536
|
||||
PRINT_EVERY = 50
|
||||
# ======================
|
||||
|
||||
|
||||
def try_rename(old_path: Path, new_path: Path, retries: int = 5, delay: int = 5):
|
||||
"""Try rename with retry mechanism."""
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
old_path.rename(new_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠ Rename failed ({attempt}/{retries}): {e}")
|
||||
if attempt < retries:
|
||||
print(f" Waiting {delay}s before retry...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
print(" ❌ Maximum retries reached. Skipping.")
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def md5_file(path: Path) -> str:
|
||||
h = hashlib.md5()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def load_all_md5(json_paths):
|
||||
"""Načte MD5 ze všech JSONů a vrátí množinu."""
|
||||
md5_set = set()
|
||||
for jp in json_paths:
|
||||
if not jp.exists():
|
||||
print(f"⚠ JSON nenalezen: {jp}")
|
||||
continue
|
||||
try:
|
||||
with jp.open("r", encoding="utf-8") as f:
|
||||
db = json.load(f)
|
||||
for _, info in db.items():
|
||||
md5_set.add(info["md5"])
|
||||
print(f"Načteno {len(db)} záznamů z {jp}")
|
||||
except Exception as e:
|
||||
print(f"❌ Chyba při čtení {jp}: {e}")
|
||||
print(f"➡ Celkem MD5 hashů: {len(md5_set)}\n")
|
||||
return md5_set
|
||||
|
||||
|
||||
def mark_folders_if_all_marked(root: Path, dryrun: bool):
|
||||
print("\n=== KONTROLA ADRESÁŘŮ — OZNAČENÍ PLNĚ HOTOVÝCH ===")
|
||||
|
||||
for folder in sorted(root.rglob("*")):
|
||||
if not folder.is_dir():
|
||||
continue
|
||||
|
||||
files = [f for f in folder.iterdir() if f.is_file()]
|
||||
if not files:
|
||||
continue
|
||||
|
||||
# všechny soubory označené?
|
||||
if not all(f.name.startswith("▲") for f in files):
|
||||
continue
|
||||
|
||||
# adresář už označen?
|
||||
if len(folder.name) > 10 and folder.name[10] == "▲":
|
||||
continue
|
||||
|
||||
# vložení ▲ na 11. pozici
|
||||
insert_pos = 10
|
||||
name = folder.name
|
||||
|
||||
if len(name) <= insert_pos:
|
||||
new_name = name + "▲"
|
||||
else:
|
||||
new_name = name[:insert_pos] + "▲" + name[insert_pos:]
|
||||
|
||||
new_path = folder.parent / new_name
|
||||
|
||||
print(f"✔ Adresář označen: {folder.name} → {new_name}")
|
||||
|
||||
if not dryrun:
|
||||
try_rename(folder, new_path)
|
||||
|
||||
|
||||
def run_matcher(dryrun: bool = True):
|
||||
print("\n=== MATCHER V3 — SOUBORY + ADRESÁŘE ===")
|
||||
print(f"Režim: {'DRYRUN (simulace)' if dryrun else 'OSTRÝ'}\n")
|
||||
|
||||
all_md5 = load_all_md5(JSON_PATHS)
|
||||
|
||||
counter = 0
|
||||
renamed = 0
|
||||
start = time.time()
|
||||
|
||||
for file in MP_DIR.rglob("*"):
|
||||
if not file.is_file():
|
||||
continue
|
||||
|
||||
counter += 1
|
||||
|
||||
if counter % PRINT_EVERY == 0:
|
||||
speed = counter / (time.time() - start)
|
||||
print(f" {counter} soub. ({speed:.1f}/s)")
|
||||
|
||||
md5 = md5_file(file)
|
||||
|
||||
if md5 in all_md5:
|
||||
|
||||
if file.name.startswith("▲"):
|
||||
continue
|
||||
|
||||
new_name = "▲" + file.name
|
||||
new_path = file.parent / new_name
|
||||
|
||||
if dryrun:
|
||||
print(f"[DRYRUN] Označil bych: {file.name} → {new_name}")
|
||||
else:
|
||||
# === RETRY RENAME (soubory) ===
|
||||
success = try_rename(file, new_path)
|
||||
if success:
|
||||
renamed += 1
|
||||
print(f"✔ {file.name} → {new_name}")
|
||||
|
||||
# označit adresáře
|
||||
mark_folders_if_all_marked(MP_DIR, dryrun)
|
||||
|
||||
total_time = time.time() - start
|
||||
speed = 0 if counter == 0 or total_time == 0 else counter / total_time
|
||||
|
||||
print("\n=== MATCHER HOTOVO ===")
|
||||
print(f" Zkontrolováno: {counter} souborů")
|
||||
print(f" Označeno: {renamed}")
|
||||
print(f" Rychlost: {speed:.1f} soub./s")
|
||||
print(f" Režim: {'DRYRUN' if dryrun else 'OSTRÝ'}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_matcher(dryrun=False)
|
||||
Reference in New Issue
Block a user