From 7c08ad8e35ef8dff87de1f41bcaba5719579cff4 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Thu, 29 Jan 2026 11:34:52 +0100 Subject: [PATCH] Z230 --- 12 Tower1/50 SaveToFileSystem incremental.py | 252 +++++++++++++++++++ 12 Tower1/50 SaveToFileSystem single step.py | 146 +++++++++++ 2 files changed, 398 insertions(+) create mode 100644 12 Tower1/50 SaveToFileSystem incremental.py create mode 100644 12 Tower1/50 SaveToFileSystem single step.py diff --git a/12 Tower1/50 SaveToFileSystem incremental.py b/12 Tower1/50 SaveToFileSystem incremental.py new file mode 100644 index 0000000..831ceb9 --- /dev/null +++ b/12 Tower1/50 SaveToFileSystem incremental.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime +import time +import sys + +# Force UTF-8 output even under Windows Task Scheduler +import sys +try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') +except AttributeError: + # Python < 3.7 fallback (not needed for you, but safe) + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + +# ============================== +# 🛡 SAFE PRINT FOR CP1250 / EMOJI +# ============================== +def safe_print(text: str = ""): + enc = sys.stdout.encoding or "" + if not enc.lower().startswith("utf"): + # Strip emoji and characters outside BMP for Task Scheduler + text = ''.join(ch for ch in text if ord(ch) < 65536) + try: + print(text) + except UnicodeEncodeError: + # ASCII fallback + text = ''.join(ch for ch in text if ord(ch) < 128) + print(text) + + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + """Replace invalid filename characters with underscore.""" + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +def make_abbrev(title: str) -> str: + if not title: + return "" + words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title) + abbr = "" + for w in words: + if w.isdigit(): + abbr += w + else: + abbr += w[0] + return abbr.upper() + + +# ============================== +# 🧹 DELETE UNEXPECTED FILES +# ============================== +def clean_folder(folder: Path, valid_files: set): + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file(): + if f.name.startswith("▲"): + continue + sanitized = sanitize_name(f.name) + if sanitized not in valid_files: + safe_print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + safe_print(f"⚠️ Could not delete {f}: {e}") + + +# ============================== +# 📦 DB CONNECTION +# ============================== +conn = pymysql.connect(**DB_CONFIG) + +cur_meta = conn.cursor(pymysql.cursors.DictCursor) +cur_blob = conn.cursor() + +safe_print("🔍 Loading metadata from DB (FAST)…") + +cur_meta.execute(""" + SELECT d.id AS download_id, + d.request_id, + d.filename, + d.created_at, + p.updatedAt AS req_updated_at, + p.pacient_jmeno AS jmeno, + p.pacient_prijmeni AS prijmeni, + p.displayTitle + FROM medevio_downloads d + JOIN pozadavky p ON d.request_id = p.id + ORDER BY p.updatedAt DESC +""") + +rows = cur_meta.fetchall() +safe_print(f"📋 Found {len(rows)} attachment records.\n") + +# ============================== +# 🧠 MAIN LOOP WITH PROGRESS +# ============================== + +unique_request_ids = [] +seen = set() +for r in rows: + req_id = r["request_id"] + if req_id not in seen: + unique_request_ids.append(req_id) + seen.add(req_id) + +total_requests = len(unique_request_ids) +safe_print(f"🔄 Processing {total_requests} unique requests...\n") + +processed_requests = set() +current_index = 0 + +for r in rows: + req_id = r["request_id"] + + if req_id in processed_requests: + continue + processed_requests.add(req_id) + + current_index += 1 + percent = (current_index / total_requests) * 100 + + safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}") + + # ========== FETCH VALID FILENAMES ========== + cur_meta.execute( + "SELECT filename FROM medevio_downloads WHERE request_id=%s", + (req_id,) + ) + valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()} + + # ========== BUILD FOLDER NAME ========== + updated_at = r["req_updated_at"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(r["prijmeni"] or "Unknown") + jmeno = sanitize_name(r["jmeno"] or "") + title = r.get("displayTitle") or "" + abbr = make_abbrev(title) + + clean_folder_name = sanitize_name( + f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}" + ) + + # ========== DETECT EXISTING FOLDER ========== + existing_folder = None + + for f in BASE_DIR.iterdir(): + if f.is_dir() and req_id in f.name: + existing_folder = f + break + + main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name + + # ========== MERGE DUPLICATES ========== + possible_dups = [ + f for f in BASE_DIR.iterdir() + if f.is_dir() and req_id in f.name and f != main_folder + ] + + for dup in possible_dups: + safe_print(f"♻️ Merging duplicate folder: {dup.name}") + + clean_folder(dup, valid_files) + main_folder.mkdir(parents=True, exist_ok=True) + + for f in dup.iterdir(): + if f.is_file(): + target = main_folder / f.name + if not target.exists(): + f.rename(target) + + shutil.rmtree(dup, ignore_errors=True) + + # ========== CLEAN MAIN FOLDER ========== + clean_folder(main_folder, valid_files) + + # ========== DOWNLOAD MISSING FILES ========== + added_new_file = False + main_folder.mkdir(parents=True, exist_ok=True) + + for filename in valid_files: + dest_plain = main_folder / filename + dest_marked = main_folder / ("▲" + filename) + + if dest_plain.exists() or dest_marked.exists(): + continue + + added_new_file = True + + cur_blob.execute( + "SELECT file_content FROM medevio_downloads " + "WHERE request_id=%s AND filename=%s", + (req_id, filename) + ) + row = cur_blob.fetchone() + if not row: + continue + + content = row[0] + if not content: + continue + + with open(dest_plain, "wb") as f: + f.write(content) + + safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") + + # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ========== + if added_new_file and "▲" in main_folder.name: + new_name = main_folder.name.replace("▲", "").strip() + new_path = main_folder.parent / new_name + + if new_path != main_folder: + try: + main_folder.rename(new_path) + safe_print(f"🔄 Folder flag ▲ removed → {new_name}") + main_folder = new_path + except Exception as e: + safe_print(f"⚠️ Could not rename folder: {e}") + +safe_print("\n🎯 Export complete.\n") + +cur_blob.close() +cur_meta.close() +conn.close() diff --git a/12 Tower1/50 SaveToFileSystem single step.py b/12 Tower1/50 SaveToFileSystem single step.py new file mode 100644 index 0000000..65bfd4f --- /dev/null +++ b/12 Tower1/50 SaveToFileSystem single step.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +def clean_folder(folder: Path, valid_files: set): + """Remove files that do NOT exist in MySQL for this request.""" + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file() and sanitize_name(f.name) not in valid_files: + print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + print(f"⚠️ Cannot delete {f}: {e}") + + +# ============================== +# 📥 LOAD EVERYTHING IN ONE QUERY +# ============================== +conn = pymysql.connect(**DB_CONFIG) +cur = conn.cursor(pymysql.cursors.DictCursor) + +print("📥 Loading ALL metadata + BLOBs with ONE MySQL query…") + +cur.execute(""" + SELECT + d.id AS download_id, + d.request_id, + d.filename, + d.file_content, + p.updatedAt AS req_updated_at, + p.pacient_jmeno AS jmeno, + p.pacient_prijmeni AS prijmeni + FROM medevio_downloads d + JOIN pozadavky p ON d.request_id = p.id + ORDER BY p.updatedAt DESC, d.created_at ASC +""") + +rows = cur.fetchall() +print(f"📦 Loaded {len(rows)} total file rows.\n") + +conn.close() + +# ============================== +# 🔄 ORGANIZE ROWS PER REQUEST +# ============================== +requests = {} # req_id → list of file dicts + +for r in rows: + req_id = r["request_id"] + if req_id not in requests: + requests[req_id] = [] + requests[req_id].append(r) + +print(f"📌 Unique requests: {len(requests)}\n") + +# ============================== +# 🧠 MAIN LOOP – SAME LOGIC AS BEFORE +# ============================== +for req_id, filelist in requests.items(): + + # ========== GET UPDATEDAT (same logic) ========== + any_row = filelist[0] + updated_at = any_row["req_updated_at"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(any_row["prijmeni"] or "Unknown") + jmeno = sanitize_name(any_row["jmeno"] or "") + + folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {req_id}") + main_folder = BASE_DIR / folder_name + + # ========== VALID FILES ========== + valid_files = {sanitize_name(r["filename"]) for r in filelist} + + # ========== FIND OLD FOLDERS ========== + possible_dups = [ + f for f in BASE_DIR.iterdir() + if f.is_dir() and req_id in f.name and f != main_folder + ] + + # ========== MERGE OLD FOLDERS ========== + for dup in possible_dups: + print(f"♻️ Merging folder: {dup.name}") + + clean_folder(dup, valid_files) + main_folder.mkdir(parents=True, exist_ok=True) + + for f in dup.iterdir(): + if f.is_file(): + target = main_folder / f.name + if not target.exists(): + f.rename(target) + + shutil.rmtree(dup, ignore_errors=True) + + # ========== CLEAN MAIN FOLDER ========== + main_folder.mkdir(parents=True, exist_ok=True) + clean_folder(main_folder, valid_files) + + # ========== SAVE FILES (fast now) ========== + for r in filelist: + filename = sanitize_name(r["filename"]) + dest = main_folder / filename + + if dest.exists(): + continue + + content = r["file_content"] + if not content: + continue + + with open(dest, "wb") as f: + f.write(content) + + print(f"💾 Saved: {dest.relative_to(BASE_DIR)}") + +print("\n🎯 Export complete.\n")