z230
This commit is contained in:
111
MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py
Normal file
111
MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
# === ZDE JEN PŘIDÁVEJ ADRESÁŘE ===
|
||||
DIRECTORIES = [
|
||||
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"),
|
||||
Path(r"U:\Dropbox\Ordinace\LAB-PDF"),
|
||||
]
|
||||
|
||||
CHUNK = 65536
|
||||
|
||||
|
||||
def md5_file(path: Path) -> str:
|
||||
h = hashlib.md5()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def load_db(db_path: Path) -> dict:
|
||||
if db_path.exists():
|
||||
with db_path.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def save_db(db: dict, db_path: Path):
|
||||
with db_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(db, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def process_directory(root: Path):
|
||||
print("\n===========================================")
|
||||
print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}")
|
||||
print("===========================================\n")
|
||||
|
||||
db_path = root / "processed_files.json"
|
||||
|
||||
# Načíst databázi
|
||||
db = load_db(db_path)
|
||||
print(f"Načteno z DB: {len(db)} záznamů")
|
||||
|
||||
# Projít souborový systém
|
||||
files_in_fs = {}
|
||||
start_scan = time.time()
|
||||
|
||||
for f in root.rglob("*"):
|
||||
if f.is_file() and f.suffix.lower() != ".json":
|
||||
stat = f.stat()
|
||||
files_in_fs[f.name] = {
|
||||
"size": stat.st_size,
|
||||
"mtime": int(stat.st_mtime),
|
||||
"path": str(f)
|
||||
}
|
||||
|
||||
print(f"Nalezeno v FS: {len(files_in_fs)} souborů")
|
||||
print(f"Čas skenu: {time.time() - start_scan:.2f} s\n")
|
||||
|
||||
new_files = 0
|
||||
changed_files = 0
|
||||
|
||||
for fname, info in files_in_fs.items():
|
||||
|
||||
size = info["size"]
|
||||
mtime = info["mtime"]
|
||||
|
||||
# nový soubor?
|
||||
if fname not in db:
|
||||
print(f"Nový soubor → MD5: {fname}")
|
||||
new_files += 1
|
||||
db[fname] = {
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
"md5": md5_file(Path(info["path"]))
|
||||
}
|
||||
continue
|
||||
|
||||
# změněný soubor?
|
||||
if db[fname]["size"] != size or db[fname]["mtime"] != mtime:
|
||||
print(f"Změněný soubor → MD5: {fname}")
|
||||
changed_files += 1
|
||||
db[fname] = {
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
"md5": md5_file(Path(info["path"]))
|
||||
}
|
||||
|
||||
# uložit databázi
|
||||
save_db(db, db_path)
|
||||
|
||||
print("\n=== Výsledky ===")
|
||||
print(f" Nové soubory: {new_files}")
|
||||
print(f" Změněné soubory: {changed_files}")
|
||||
print(f" Celkem v DB: {len(db)}")
|
||||
print(f" Databáze: {db_path}")
|
||||
print("=============================\n")
|
||||
|
||||
|
||||
def main():
|
||||
for directory in DIRECTORIES:
|
||||
process_directory(directory)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user