diff --git a/MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py b/MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py new file mode 100644 index 0000000..0729d18 --- /dev/null +++ b/MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import hashlib +import json +from pathlib import Path +import time + +# === ZDE JEN PŘIDÁVEJ ADRESÁŘE === +DIRECTORIES = [ + Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"), + Path(r"U:\Dropbox\Ordinace\LAB-PDF"), +] + +CHUNK = 65536 + + +def md5_file(path: Path) -> str: + h = hashlib.md5() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(CHUNK), b""): + h.update(chunk) + return h.hexdigest() + + +def load_db(db_path: Path) -> dict: + if db_path.exists(): + with db_path.open("r", encoding="utf-8") as f: + return json.load(f) + return {} + + +def save_db(db: dict, db_path: Path): + with db_path.open("w", encoding="utf-8") as f: + json.dump(db, f, ensure_ascii=False, indent=2) + + +def process_directory(root: Path): + print("\n===========================================") + print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}") + print("===========================================\n") + + db_path = root / "processed_files.json" + + # Načíst databázi + db = load_db(db_path) + print(f"Načteno z DB: {len(db)} záznamů") + + # Projít souborový systém + files_in_fs = {} + start_scan = time.time() + + for f in root.rglob("*"): + if f.is_file() and f.suffix.lower() != ".json": + stat = f.stat() + files_in_fs[f.name] = { + "size": stat.st_size, + "mtime": int(stat.st_mtime), + "path": str(f) + } + + print(f"Nalezeno v FS: {len(files_in_fs)} souborů") + print(f"Čas skenu: {time.time() - start_scan:.2f} s\n") + + new_files = 0 + changed_files = 0 + + for fname, info in files_in_fs.items(): + + size = info["size"] + mtime = info["mtime"] + + # nový soubor? + if fname not in db: + print(f"Nový soubor → MD5: {fname}") + new_files += 1 + db[fname] = { + "size": size, + "mtime": mtime, + "md5": md5_file(Path(info["path"])) + } + continue + + # změněný soubor? + if db[fname]["size"] != size or db[fname]["mtime"] != mtime: + print(f"Změněný soubor → MD5: {fname}") + changed_files += 1 + db[fname] = { + "size": size, + "mtime": mtime, + "md5": md5_file(Path(info["path"])) + } + + # uložit databázi + save_db(db, db_path) + + print("\n=== Výsledky ===") + print(f" Nové soubory: {new_files}") + print(f" Změněné soubory: {changed_files}") + print(f" Celkem v DB: {len(db)}") + print(f" Databáze: {db_path}") + print("=============================\n") + + +def main(): + for directory in DIRECTORIES: + process_directory(directory) + + +if __name__ == "__main__": + main() diff --git a/MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py b/MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py new file mode 100644 index 0000000..651ccf2 --- /dev/null +++ b/MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import hashlib +import json +from pathlib import Path +import time +import traceback + + +# ======= CONFIG ======= + +MP_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") + +JSON_PATHS = [ + Path(r"U:\Dropbox\Ordinace\LAB-PDF\processed_files.json"), + Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná\processed_files.json"), +] + +CHUNK = 65536 +PRINT_EVERY = 50 +# ====================== + + +def try_rename(old_path: Path, new_path: Path, retries: int = 5, delay: int = 5): + """Try rename with retry mechanism.""" + for attempt in range(1, retries + 1): + try: + old_path.rename(new_path) + return True + except Exception as e: + print(f"⚠ Rename failed ({attempt}/{retries}): {e}") + if attempt < retries: + print(f" Waiting {delay}s before retry...") + time.sleep(delay) + else: + print(" ❌ Maximum retries reached. Skipping.") + traceback.print_exc() + return False + + +def md5_file(path: Path) -> str: + h = hashlib.md5() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(CHUNK), b""): + h.update(chunk) + return h.hexdigest() + + +def load_all_md5(json_paths): + """Načte MD5 ze všech JSONů a vrátí množinu.""" + md5_set = set() + for jp in json_paths: + if not jp.exists(): + print(f"⚠ JSON nenalezen: {jp}") + continue + try: + with jp.open("r", encoding="utf-8") as f: + db = json.load(f) + for _, info in db.items(): + md5_set.add(info["md5"]) + print(f"Načteno {len(db)} záznamů z {jp}") + except Exception as e: + print(f"❌ Chyba při čtení {jp}: {e}") + print(f"➡ Celkem MD5 hashů: {len(md5_set)}\n") + return md5_set + + +def mark_folders_if_all_marked(root: Path, dryrun: bool): + print("\n=== KONTROLA ADRESÁŘŮ — OZNAČENÍ PLNĚ HOTOVÝCH ===") + + for folder in sorted(root.rglob("*")): + if not folder.is_dir(): + continue + + files = [f for f in folder.iterdir() if f.is_file()] + if not files: + continue + + # všechny soubory označené? + if not all(f.name.startswith("▲") for f in files): + continue + + # adresář už označen? + if len(folder.name) > 10 and folder.name[10] == "▲": + continue + + # vložení ▲ na 11. pozici + insert_pos = 10 + name = folder.name + + if len(name) <= insert_pos: + new_name = name + "▲" + else: + new_name = name[:insert_pos] + "▲" + name[insert_pos:] + + new_path = folder.parent / new_name + + print(f"✔ Adresář označen: {folder.name} → {new_name}") + + if not dryrun: + try_rename(folder, new_path) + + +def run_matcher(dryrun: bool = True): + print("\n=== MATCHER V3 — SOUBORY + ADRESÁŘE ===") + print(f"Režim: {'DRYRUN (simulace)' if dryrun else 'OSTRÝ'}\n") + + all_md5 = load_all_md5(JSON_PATHS) + + counter = 0 + renamed = 0 + start = time.time() + + for file in MP_DIR.rglob("*"): + if not file.is_file(): + continue + + counter += 1 + + if counter % PRINT_EVERY == 0: + speed = counter / (time.time() - start) + print(f" {counter} soub. ({speed:.1f}/s)") + + md5 = md5_file(file) + + if md5 in all_md5: + + if file.name.startswith("▲"): + continue + + new_name = "▲" + file.name + new_path = file.parent / new_name + + if dryrun: + print(f"[DRYRUN] Označil bych: {file.name} → {new_name}") + else: + # === RETRY RENAME (soubory) === + success = try_rename(file, new_path) + if success: + renamed += 1 + print(f"✔ {file.name} → {new_name}") + + # označit adresáře + mark_folders_if_all_marked(MP_DIR, dryrun) + + total_time = time.time() - start + speed = 0 if counter == 0 or total_time == 0 else counter / total_time + + print("\n=== MATCHER HOTOVO ===") + print(f" Zkontrolováno: {counter} souborů") + print(f" Označeno: {renamed}") + print(f" Rychlost: {speed:.1f} soub./s") + print(f" Režim: {'DRYRUN' if dryrun else 'OSTRÝ'}\n") + + +if __name__ == "__main__": + run_matcher(dryrun=False)