112 lines
2.8 KiB
Python
112 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import hashlib
|
|
import json
|
|
from pathlib import Path
|
|
import time
|
|
|
|
# === ZDE JEN PŘIDÁVEJ ADRESÁŘE ===
|
|
DIRECTORIES = [
|
|
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"),
|
|
Path(r"U:\Dropbox\Ordinace\LAB-PDF"),
|
|
]
|
|
|
|
CHUNK = 65536
|
|
|
|
|
|
def md5_file(path: Path) -> str:
|
|
h = hashlib.md5()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(CHUNK), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def load_db(db_path: Path) -> dict:
|
|
if db_path.exists():
|
|
with db_path.open("r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
|
|
def save_db(db: dict, db_path: Path):
|
|
with db_path.open("w", encoding="utf-8") as f:
|
|
json.dump(db, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def process_directory(root: Path):
|
|
print("\n===========================================")
|
|
print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}")
|
|
print("===========================================\n")
|
|
|
|
db_path = root / "processed_files.json"
|
|
|
|
# Načíst databázi
|
|
db = load_db(db_path)
|
|
print(f"Načteno z DB: {len(db)} záznamů")
|
|
|
|
# Projít souborový systém
|
|
files_in_fs = {}
|
|
start_scan = time.time()
|
|
|
|
for f in root.rglob("*"):
|
|
if f.is_file() and f.suffix.lower() != ".json":
|
|
stat = f.stat()
|
|
files_in_fs[f.name] = {
|
|
"size": stat.st_size,
|
|
"mtime": int(stat.st_mtime),
|
|
"path": str(f)
|
|
}
|
|
|
|
print(f"Nalezeno v FS: {len(files_in_fs)} souborů")
|
|
print(f"Čas skenu: {time.time() - start_scan:.2f} s\n")
|
|
|
|
new_files = 0
|
|
changed_files = 0
|
|
|
|
for fname, info in files_in_fs.items():
|
|
|
|
size = info["size"]
|
|
mtime = info["mtime"]
|
|
|
|
# nový soubor?
|
|
if fname not in db:
|
|
print(f"Nový soubor → MD5: {fname}")
|
|
new_files += 1
|
|
db[fname] = {
|
|
"size": size,
|
|
"mtime": mtime,
|
|
"md5": md5_file(Path(info["path"]))
|
|
}
|
|
continue
|
|
|
|
# změněný soubor?
|
|
if db[fname]["size"] != size or db[fname]["mtime"] != mtime:
|
|
print(f"Změněný soubor → MD5: {fname}")
|
|
changed_files += 1
|
|
db[fname] = {
|
|
"size": size,
|
|
"mtime": mtime,
|
|
"md5": md5_file(Path(info["path"]))
|
|
}
|
|
|
|
# uložit databázi
|
|
save_db(db, db_path)
|
|
|
|
print("\n=== Výsledky ===")
|
|
print(f" Nové soubory: {new_files}")
|
|
print(f" Změněné soubory: {changed_files}")
|
|
print(f" Celkem v DB: {len(db)}")
|
|
print(f" Databáze: {db_path}")
|
|
print("=============================\n")
|
|
|
|
|
|
def main():
|
|
for directory in DIRECTORIES:
|
|
process_directory(directory)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|