#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import shutil import pymysql import re from pathlib import Path from datetime import datetime import time # ============================== # ⚙️ CONFIGURATION # ============================== DB_CONFIG = { "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", "database": "medevio", "charset": "utf8mb4", } BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1") BASE_DIR.mkdir(parents=True, exist_ok=True) def sanitize_name(name: str) -> str: """Replace invalid filename characters with underscore.""" return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() # ============================== # 🧹 DELETE UNEXPECTED FILES # ============================== def clean_folder(folder: Path, valid_files: set): """Remove all files in folder that are NOT present in valid_files.""" if not folder.exists(): return for f in folder.iterdir(): if f.is_file(): if sanitize_name(f.name) not in valid_files: print(f"🗑️ Removing unexpected file: {f.name}") try: f.unlink() except Exception as e: print(f"⚠️ Could not delete {f}: {e}") # ============================== # 📦 DB CONNECTION # ============================== conn = pymysql.connect(**DB_CONFIG) cur_meta = conn.cursor(pymysql.cursors.DictCursor) cur_blob = conn.cursor() print("🔍 Loading metadata from DB (FAST)…") cur_meta.execute(""" SELECT d.id AS download_id, d.request_id, d.filename, d.created_at, p.updatedAt AS req_updated_at, p.pacient_jmeno AS jmeno, p.pacient_prijmeni AS prijmeni FROM medevio_downloads d JOIN pozadavky p ON d.request_id = p.id ORDER BY p.updatedAt DESC """) rows = cur_meta.fetchall() print(f"📋 Found {len(rows)} attachment records.\n") # ============================== # 🧠 MAIN LOOP # ============================== processed_requests = set() for r in rows: req_id = r["request_id"] if req_id in processed_requests: continue processed_requests.add(req_id) # ========== FETCH ALL VALID FILES FOR THIS REQUEST ========== cur_meta.execute( "SELECT filename FROM medevio_downloads WHERE request_id=%s", (req_id,) ) valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()} # ========== FOLDER NAME BASED ON UPDATEDAT ========== updated_at = r["req_updated_at"] or datetime.now() date_str = updated_at.strftime("%Y-%m-%d") prijmeni = sanitize_name(r["prijmeni"] or "Unknown") jmeno = sanitize_name(r["jmeno"] or "") folder_name = f"{date_str} {prijmeni}, {jmeno} {req_id}" folder_name = sanitize_name(folder_name) main_folder = BASE_DIR / folder_name # ========== FIND OLD FOLDER (DUPLICATE) ========== # Any folder that contains "_" and is not main_folder is duplicate possible_dups = [ f for f in BASE_DIR.iterdir() if f.is_dir() and req_id in f.name and f != main_folder ] # ========== MERGE DUPLICATES ========== for dup in possible_dups: print(f"♻️ Merging duplicate folder: {dup.name}") # 1) Clean unexpected files in dup clean_folder(dup, valid_files) # 2) Move files from dup to main folder main_folder.mkdir(parents=True, exist_ok=True) for f in dup.iterdir(): if f.is_file(): target = main_folder / f.name if not target.exists(): f.rename(target) # 3) Remove the duplicate folder try: shutil.rmtree(dup, ignore_errors=True) except Exception as e: print(f"⚠️ Could not delete duplicate folder {dup}: {e}") # ========== CLEAN MAIN FOLDER ========== clean_folder(main_folder, valid_files) # ========== DOWNLOAD MISSING FILES ========== main_folder.mkdir(parents=True, exist_ok=True) for filename in valid_files: dest = main_folder / filename if dest.exists(): continue # fetch blob only now start = time.perf_counter() cur_blob.execute( "SELECT file_content FROM medevio_downloads " "WHERE request_id=%s AND filename=%s", (req_id, filename) ) row = cur_blob.fetchone() if not row: continue end = time.perf_counter() print(f"⏱ Took {end - start:.4f} seconds") content = row[0] if not content: continue with open(dest, "wb") as f: f.write(content) print(f"💾 Wrote: {dest.relative_to(BASE_DIR)}") print("\n🎯 Export complete.\n") cur_blob.close() cur_meta.close() conn.close()