#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import shutil import pymysql import re from pathlib import Path from datetime import datetime # ============================== # ⚙️ CONFIGURATION # ============================== DB_CONFIG = { "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", "database": "medevio", "charset": "utf8mb4", } BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") BASE_DIR.mkdir(parents=True, exist_ok=True) def sanitize_name(name: str) -> str: return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() def clean_folder(folder: Path, valid_files: set): """Remove files that do NOT exist in MySQL for this request.""" if not folder.exists(): return for f in folder.iterdir(): if f.is_file() and sanitize_name(f.name) not in valid_files: print(f"🗑️ Removing unexpected file: {f.name}") try: f.unlink() except Exception as e: print(f"⚠️ Cannot delete {f}: {e}") # ============================== # 📥 LOAD EVERYTHING IN ONE QUERY # ============================== conn = pymysql.connect(**DB_CONFIG) cur = conn.cursor(pymysql.cursors.DictCursor) print("📥 Loading ALL metadata + BLOBs with ONE MySQL query…") cur.execute(""" SELECT d.id AS download_id, d.request_id, d.filename, d.file_content, p.updatedAt AS req_updated_at, p.pacient_jmeno AS jmeno, p.pacient_prijmeni AS prijmeni FROM medevio_downloads d JOIN pozadavky p ON d.request_id = p.id ORDER BY p.updatedAt DESC, d.created_at ASC """) rows = cur.fetchall() print(f"📦 Loaded {len(rows)} total file rows.\n") conn.close() # ============================== # 🔄 ORGANIZE ROWS PER REQUEST # ============================== requests = {} # req_id → list of file dicts for r in rows: req_id = r["request_id"] if req_id not in requests: requests[req_id] = [] requests[req_id].append(r) print(f"📌 Unique requests: {len(requests)}\n") # ============================== # 🧠 MAIN LOOP – SAME LOGIC AS BEFORE # ============================== for req_id, filelist in requests.items(): # ========== GET UPDATEDAT (same logic) ========== any_row = filelist[0] updated_at = any_row["req_updated_at"] or datetime.now() date_str = updated_at.strftime("%Y-%m-%d") prijmeni = sanitize_name(any_row["prijmeni"] or "Unknown") jmeno = sanitize_name(any_row["jmeno"] or "") folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {req_id}") main_folder = BASE_DIR / folder_name # ========== VALID FILES ========== valid_files = {sanitize_name(r["filename"]) for r in filelist} # ========== FIND OLD FOLDERS ========== possible_dups = [ f for f in BASE_DIR.iterdir() if f.is_dir() and req_id in f.name and f != main_folder ] # ========== MERGE OLD FOLDERS ========== for dup in possible_dups: print(f"♻️ Merging folder: {dup.name}") clean_folder(dup, valid_files) main_folder.mkdir(parents=True, exist_ok=True) for f in dup.iterdir(): if f.is_file(): target = main_folder / f.name if not target.exists(): f.rename(target) shutil.rmtree(dup, ignore_errors=True) # ========== CLEAN MAIN FOLDER ========== main_folder.mkdir(parents=True, exist_ok=True) clean_folder(main_folder, valid_files) # ========== SAVE FILES (fast now) ========== for r in filelist: filename = sanitize_name(r["filename"]) dest = main_folder / filename if dest.exists(): continue content = r["file_content"] if not content: continue with open(dest, "wb") as f: f.write(content) print(f"💾 Saved: {dest.relative_to(BASE_DIR)}") print("\n🎯 Export complete.\n")