#!/usr/bin/env python3 # -*- coding: utf-8 -*- import hashlib import json from pathlib import Path import time # === ZDE JEN PŘIDÁVEJ ADRESÁŘE === DIRECTORIES = [ Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"), Path(r"U:\Dropbox\Ordinace\LAB-PDF"), ] CHUNK = 65536 def md5_file(path: Path) -> str: h = hashlib.md5() with path.open("rb") as f: for chunk in iter(lambda: f.read(CHUNK), b""): h.update(chunk) return h.hexdigest() def load_db(db_path: Path) -> dict: if db_path.exists(): with db_path.open("r", encoding="utf-8") as f: return json.load(f) return {} def save_db(db: dict, db_path: Path): with db_path.open("w", encoding="utf-8") as f: json.dump(db, f, ensure_ascii=False, indent=2) def process_directory(root: Path): print("\n===========================================") print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}") print("===========================================\n") db_path = root / "processed_files.json" # Načíst databázi db = load_db(db_path) print(f"Načteno z DB: {len(db)} záznamů") # Projít souborový systém files_in_fs = {} start_scan = time.time() for f in root.rglob("*"): if f.is_file() and f.suffix.lower() != ".json": stat = f.stat() files_in_fs[f.name] = { "size": stat.st_size, "mtime": int(stat.st_mtime), "path": str(f) } print(f"Nalezeno v FS: {len(files_in_fs)} souborů") print(f"Čas skenu: {time.time() - start_scan:.2f} s\n") new_files = 0 changed_files = 0 for fname, info in files_in_fs.items(): size = info["size"] mtime = info["mtime"] # nový soubor? if fname not in db: print(f"Nový soubor → MD5: {fname}") new_files += 1 db[fname] = { "size": size, "mtime": mtime, "md5": md5_file(Path(info["path"])) } continue # změněný soubor? if db[fname]["size"] != size or db[fname]["mtime"] != mtime: print(f"Změněný soubor → MD5: {fname}") changed_files += 1 db[fname] = { "size": size, "mtime": mtime, "md5": md5_file(Path(info["path"])) } # uložit databázi save_db(db, db_path) print("\n=== Výsledky ===") print(f" Nové soubory: {new_files}") print(f" Změněné soubory: {changed_files}") print(f" Celkem v DB: {len(db)}") print(f" Databáze: {db_path}") print("=============================\n") def main(): for directory in DIRECTORIES: process_directory(directory) if __name__ == "__main__": main()