Files
projects/MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py
2025-12-02 15:28:00 +01:00

112 lines
2.8 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import json
from pathlib import Path
import time
# === ZDE JEN PŘIDÁVEJ ADRESÁŘE ===
DIRECTORIES = [
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"),
Path(r"U:\Dropbox\Ordinace\LAB-PDF"),
]
CHUNK = 65536
def md5_file(path: Path) -> str:
h = hashlib.md5()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(CHUNK), b""):
h.update(chunk)
return h.hexdigest()
def load_db(db_path: Path) -> dict:
if db_path.exists():
with db_path.open("r", encoding="utf-8") as f:
return json.load(f)
return {}
def save_db(db: dict, db_path: Path):
with db_path.open("w", encoding="utf-8") as f:
json.dump(db, f, ensure_ascii=False, indent=2)
def process_directory(root: Path):
print("\n===========================================")
print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}")
print("===========================================\n")
db_path = root / "processed_files.json"
# Načíst databázi
db = load_db(db_path)
print(f"Načteno z DB: {len(db)} záznamů")
# Projít souborový systém
files_in_fs = {}
start_scan = time.time()
for f in root.rglob("*"):
if f.is_file() and f.suffix.lower() != ".json":
stat = f.stat()
files_in_fs[f.name] = {
"size": stat.st_size,
"mtime": int(stat.st_mtime),
"path": str(f)
}
print(f"Nalezeno v FS: {len(files_in_fs)} souborů")
print(f"Čas skenu: {time.time() - start_scan:.2f} s\n")
new_files = 0
changed_files = 0
for fname, info in files_in_fs.items():
size = info["size"]
mtime = info["mtime"]
# nový soubor?
if fname not in db:
print(f"Nový soubor → MD5: {fname}")
new_files += 1
db[fname] = {
"size": size,
"mtime": mtime,
"md5": md5_file(Path(info["path"]))
}
continue
# změněný soubor?
if db[fname]["size"] != size or db[fname]["mtime"] != mtime:
print(f"Změněný soubor → MD5: {fname}")
changed_files += 1
db[fname] = {
"size": size,
"mtime": mtime,
"md5": md5_file(Path(info["path"]))
}
# uložit databázi
save_db(db, db_path)
print("\n=== Výsledky ===")
print(f" Nové soubory: {new_files}")
print(f" Změněné soubory: {changed_files}")
print(f" Celkem v DB: {len(db)}")
print(f" Databáze: {db_path}")
print("=============================\n")
def main():
for directory in DIRECTORIES:
process_directory(directory)
if __name__ == "__main__":
main()