z230
This commit is contained in:
111
MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py
Normal file
111
MedevioPřílohykontroly/05 SpočítejMD5ZpracoveneALAB.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
# === ZDE JEN PŘIDÁVEJ ADRESÁŘE ===
|
||||
DIRECTORIES = [
|
||||
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná"),
|
||||
Path(r"U:\Dropbox\Ordinace\LAB-PDF"),
|
||||
]
|
||||
|
||||
CHUNK = 65536
|
||||
|
||||
|
||||
def md5_file(path: Path) -> str:
|
||||
h = hashlib.md5()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def load_db(db_path: Path) -> dict:
|
||||
if db_path.exists():
|
||||
with db_path.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def save_db(db: dict, db_path: Path):
|
||||
with db_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(db, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def process_directory(root: Path):
|
||||
print("\n===========================================")
|
||||
print(f"📁 ZPRACOVÁVÁM ADRESÁŘ: {root}")
|
||||
print("===========================================\n")
|
||||
|
||||
db_path = root / "processed_files.json"
|
||||
|
||||
# Načíst databázi
|
||||
db = load_db(db_path)
|
||||
print(f"Načteno z DB: {len(db)} záznamů")
|
||||
|
||||
# Projít souborový systém
|
||||
files_in_fs = {}
|
||||
start_scan = time.time()
|
||||
|
||||
for f in root.rglob("*"):
|
||||
if f.is_file() and f.suffix.lower() != ".json":
|
||||
stat = f.stat()
|
||||
files_in_fs[f.name] = {
|
||||
"size": stat.st_size,
|
||||
"mtime": int(stat.st_mtime),
|
||||
"path": str(f)
|
||||
}
|
||||
|
||||
print(f"Nalezeno v FS: {len(files_in_fs)} souborů")
|
||||
print(f"Čas skenu: {time.time() - start_scan:.2f} s\n")
|
||||
|
||||
new_files = 0
|
||||
changed_files = 0
|
||||
|
||||
for fname, info in files_in_fs.items():
|
||||
|
||||
size = info["size"]
|
||||
mtime = info["mtime"]
|
||||
|
||||
# nový soubor?
|
||||
if fname not in db:
|
||||
print(f"Nový soubor → MD5: {fname}")
|
||||
new_files += 1
|
||||
db[fname] = {
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
"md5": md5_file(Path(info["path"]))
|
||||
}
|
||||
continue
|
||||
|
||||
# změněný soubor?
|
||||
if db[fname]["size"] != size or db[fname]["mtime"] != mtime:
|
||||
print(f"Změněný soubor → MD5: {fname}")
|
||||
changed_files += 1
|
||||
db[fname] = {
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
"md5": md5_file(Path(info["path"]))
|
||||
}
|
||||
|
||||
# uložit databázi
|
||||
save_db(db, db_path)
|
||||
|
||||
print("\n=== Výsledky ===")
|
||||
print(f" Nové soubory: {new_files}")
|
||||
print(f" Změněné soubory: {changed_files}")
|
||||
print(f" Celkem v DB: {len(db)}")
|
||||
print(f" Databáze: {db_path}")
|
||||
print("=============================\n")
|
||||
|
||||
|
||||
def main():
|
||||
for directory in DIRECTORIES:
|
||||
process_directory(directory)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
158
MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py
Normal file
158
MedevioPřílohykontroly/10 spočítejMD5AoznačsouboryAadresáře.py
Normal file
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
import time
|
||||
import traceback
|
||||
|
||||
|
||||
# ======= CONFIG =======
|
||||
|
||||
MP_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||
|
||||
JSON_PATHS = [
|
||||
Path(r"U:\Dropbox\Ordinace\LAB-PDF\processed_files.json"),
|
||||
Path(r"U:\Dropbox\Ordinace\Dokumentace_zpracovaná\processed_files.json"),
|
||||
]
|
||||
|
||||
CHUNK = 65536
|
||||
PRINT_EVERY = 50
|
||||
# ======================
|
||||
|
||||
|
||||
def try_rename(old_path: Path, new_path: Path, retries: int = 5, delay: int = 5):
|
||||
"""Try rename with retry mechanism."""
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
old_path.rename(new_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠ Rename failed ({attempt}/{retries}): {e}")
|
||||
if attempt < retries:
|
||||
print(f" Waiting {delay}s before retry...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
print(" ❌ Maximum retries reached. Skipping.")
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def md5_file(path: Path) -> str:
|
||||
h = hashlib.md5()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def load_all_md5(json_paths):
|
||||
"""Načte MD5 ze všech JSONů a vrátí množinu."""
|
||||
md5_set = set()
|
||||
for jp in json_paths:
|
||||
if not jp.exists():
|
||||
print(f"⚠ JSON nenalezen: {jp}")
|
||||
continue
|
||||
try:
|
||||
with jp.open("r", encoding="utf-8") as f:
|
||||
db = json.load(f)
|
||||
for _, info in db.items():
|
||||
md5_set.add(info["md5"])
|
||||
print(f"Načteno {len(db)} záznamů z {jp}")
|
||||
except Exception as e:
|
||||
print(f"❌ Chyba při čtení {jp}: {e}")
|
||||
print(f"➡ Celkem MD5 hashů: {len(md5_set)}\n")
|
||||
return md5_set
|
||||
|
||||
|
||||
def mark_folders_if_all_marked(root: Path, dryrun: bool):
|
||||
print("\n=== KONTROLA ADRESÁŘŮ — OZNAČENÍ PLNĚ HOTOVÝCH ===")
|
||||
|
||||
for folder in sorted(root.rglob("*")):
|
||||
if not folder.is_dir():
|
||||
continue
|
||||
|
||||
files = [f for f in folder.iterdir() if f.is_file()]
|
||||
if not files:
|
||||
continue
|
||||
|
||||
# všechny soubory označené?
|
||||
if not all(f.name.startswith("▲") for f in files):
|
||||
continue
|
||||
|
||||
# adresář už označen?
|
||||
if len(folder.name) > 10 and folder.name[10] == "▲":
|
||||
continue
|
||||
|
||||
# vložení ▲ na 11. pozici
|
||||
insert_pos = 10
|
||||
name = folder.name
|
||||
|
||||
if len(name) <= insert_pos:
|
||||
new_name = name + "▲"
|
||||
else:
|
||||
new_name = name[:insert_pos] + "▲" + name[insert_pos:]
|
||||
|
||||
new_path = folder.parent / new_name
|
||||
|
||||
print(f"✔ Adresář označen: {folder.name} → {new_name}")
|
||||
|
||||
if not dryrun:
|
||||
try_rename(folder, new_path)
|
||||
|
||||
|
||||
def run_matcher(dryrun: bool = True):
|
||||
print("\n=== MATCHER V3 — SOUBORY + ADRESÁŘE ===")
|
||||
print(f"Režim: {'DRYRUN (simulace)' if dryrun else 'OSTRÝ'}\n")
|
||||
|
||||
all_md5 = load_all_md5(JSON_PATHS)
|
||||
|
||||
counter = 0
|
||||
renamed = 0
|
||||
start = time.time()
|
||||
|
||||
for file in MP_DIR.rglob("*"):
|
||||
if not file.is_file():
|
||||
continue
|
||||
|
||||
counter += 1
|
||||
|
||||
if counter % PRINT_EVERY == 0:
|
||||
speed = counter / (time.time() - start)
|
||||
print(f" {counter} soub. ({speed:.1f}/s)")
|
||||
|
||||
md5 = md5_file(file)
|
||||
|
||||
if md5 in all_md5:
|
||||
|
||||
if file.name.startswith("▲"):
|
||||
continue
|
||||
|
||||
new_name = "▲" + file.name
|
||||
new_path = file.parent / new_name
|
||||
|
||||
if dryrun:
|
||||
print(f"[DRYRUN] Označil bych: {file.name} → {new_name}")
|
||||
else:
|
||||
# === RETRY RENAME (soubory) ===
|
||||
success = try_rename(file, new_path)
|
||||
if success:
|
||||
renamed += 1
|
||||
print(f"✔ {file.name} → {new_name}")
|
||||
|
||||
# označit adresáře
|
||||
mark_folders_if_all_marked(MP_DIR, dryrun)
|
||||
|
||||
total_time = time.time() - start
|
||||
speed = 0 if counter == 0 or total_time == 0 else counter / total_time
|
||||
|
||||
print("\n=== MATCHER HOTOVO ===")
|
||||
print(f" Zkontrolováno: {counter} souborů")
|
||||
print(f" Označeno: {renamed}")
|
||||
print(f" Rychlost: {speed:.1f} soub./s")
|
||||
print(f" Režim: {'DRYRUN' if dryrun else 'OSTRÝ'}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_matcher(dryrun=False)
|
||||
Reference in New Issue
Block a user