#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import shutil import pymysql import re from pathlib import Path from datetime import datetime from collections import defaultdict import time import sys # Force UTF-8 output even under Windows Task Scheduler import sys try: sys.stdout.reconfigure(encoding='utf-8') sys.stderr.reconfigure(encoding='utf-8') except AttributeError: # Python < 3.7 fallback (not needed for you, but safe) import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') # ============================== # 🛡 SAFE PRINT FOR CP1250 / EMOJI # ============================== def safe_print(text: str = ""): enc = sys.stdout.encoding or "" if not enc.lower().startswith("utf"): # Strip emoji and characters outside BMP for Task Scheduler text = ''.join(ch for ch in text if ord(ch) < 65536) try: print(text) except UnicodeEncodeError: # ASCII fallback text = ''.join(ch for ch in text if ord(ch) < 128) print(text) # ============================== # ⚙️ CONFIGURATION # ============================== DB_CONFIG = { "host": "192.168.1.76", "port": 3306, "user": "root", "password": "Vlado9674+", "database": "medevio", "charset": "utf8mb4", } BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") BASE_DIR.mkdir(parents=True, exist_ok=True) def sanitize_name(name: str) -> str: """Replace invalid filename characters with underscore.""" return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() def make_abbrev(title: str) -> str: if not title: return "" words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title) abbr = "" for w in words: if w.isdigit(): abbr += w else: abbr += w[0] return abbr.upper() # ============================== # 🧹 DELETE UNEXPECTED FILES # ============================== def clean_folder(folder: Path, valid_files: set): if not folder.exists(): return for f in folder.iterdir(): if f.is_file(): if f.name.startswith("▲"): continue sanitized = sanitize_name(f.name) if sanitized not in valid_files: safe_print(f"🗑️ Removing unexpected file: {f.name}") try: f.unlink() except Exception as e: safe_print(f"⚠️ Could not delete {f}: {e}") # ============================== # 📦 DB CONNECTION # ============================== conn = pymysql.connect(**DB_CONFIG) cur_meta = conn.cursor(pymysql.cursors.DictCursor) cur_blob = conn.cursor() safe_print("🔍 Loading metadata from DB (FAST)…") cur_meta.execute(""" SELECT d.id AS download_id, d.request_id, d.filename, d.created_at, p.updatedAt AS req_updated_at, p.pacient_jmeno AS jmeno, p.pacient_prijmeni AS prijmeni, p.displayTitle FROM medevio_downloads d JOIN pozadavky p ON d.request_id = p.id WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY) ORDER BY p.updatedAt DESC """) rows = cur_meta.fetchall() safe_print(f"📋 Found {len(rows)} attachment records.\n") # ============================== # 🧠 MAIN LOOP WITH PROGRESS # ============================== # Group rows by request_id in Python — avoids N extra SELECT filename queries rows_by_request = defaultdict(list) for r in rows: rows_by_request[r["request_id"]].append(r) total_requests = len(rows_by_request) safe_print(f"🔄 Processing {total_requests} unique requests...\n") # Pre-index BASE_DIR once — avoids iterdir() called twice per request folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()] for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1): percent = (current_index / total_requests) * 100 safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}") # ========== VALID FILENAMES from already-loaded rows ========== # original filename → sanitized name (needed for DB query later) file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows} valid_files = set(file_map.keys()) # ========== BUILD FOLDER NAME ========== r = req_rows[0] updated_at = r["req_updated_at"] or datetime.now() date_str = updated_at.strftime("%Y-%m-%d") prijmeni = sanitize_name(r["prijmeni"] or "Unknown") jmeno = sanitize_name(r["jmeno"] or "") title = r.get("displayTitle") or "" abbr = make_abbrev(title) clean_folder_name = sanitize_name( f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}" ) # ========== DETECT EXISTING FOLDER from pre-built index ========== req_id_str = str(req_id) matching = [f for f, name in folder_list if req_id_str in name] existing_folder = matching[0] if matching else None main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name # ========== MERGE DUPLICATES ========== possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder] for dup in possible_dups: safe_print(f"♻️ Merging duplicate folder: {dup.name}") clean_folder(dup, valid_files) main_folder.mkdir(parents=True, exist_ok=True) for f in dup.iterdir(): if f.is_file(): target = main_folder / f.name if not target.exists(): f.rename(target) shutil.rmtree(dup, ignore_errors=True) # ========== CLEAN MAIN FOLDER ========== clean_folder(main_folder, valid_files) # ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ========== main_folder.mkdir(parents=True, exist_ok=True) added_new_file = False missing_san = [ fn for fn in valid_files if not (main_folder / fn).exists() and not (main_folder / ("▲" + fn)).exists() ] if missing_san: # Fetch all missing blobs in a single query instead of one per file missing_orig = [file_map[fn] for fn in missing_san] placeholders = ",".join(["%s"] * len(missing_orig)) cur_blob.execute( f"SELECT filename, file_content FROM medevio_downloads " f"WHERE request_id=%s AND filename IN ({placeholders})", [req_id] + missing_orig, ) for blob_filename, content in cur_blob.fetchall(): if not content: continue dest_plain = main_folder / sanitize_name(blob_filename) with open(dest_plain, "wb") as fh: fh.write(content) safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") added_new_file = True # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ========== if added_new_file and "▲" in main_folder.name: new_name = main_folder.name.replace("▲", "").strip() new_path = main_folder.parent / new_name if new_path != main_folder: try: main_folder.rename(new_path) safe_print(f"🔄 Folder flag ▲ removed → {new_name}") main_folder = new_path except Exception as e: safe_print(f"⚠️ Could not rename folder: {e}") safe_print("\n🎯 Export complete.\n") cur_blob.close() cur_meta.close() conn.close()