From c0a16de8904561e462dc01c3a368168abaaf579d Mon Sep 17 00:00:00 2001 From: vlado Date: Sun, 16 Nov 2025 12:29:44 +0100 Subject: [PATCH] tw22 --- .idea/Medevio.iml | 2 +- .idea/misc.xml | 2 +- ...AVIDELNE_5_SaveToFileSystem incremental.py | 173 ++++++++++++++++++ ...AVIDELNE_5_SaveToFileSystem single step.py | 146 +++++++++++++++ .../PRAVIDELNE_5_SaveToFileSystem.py | 113 ------------ Testy/19 Test 2.py | 2 +- Testy/19 Test.py | 2 +- 7 files changed, 323 insertions(+), 117 deletions(-) create mode 100644 10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py create mode 100644 10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py delete mode 100644 10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem.py diff --git a/.idea/Medevio.iml b/.idea/Medevio.iml index 3cd7809..6cb8b9a 100644 --- a/.idea/Medevio.iml +++ b/.idea/Medevio.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 7a3c570..3c48b1e 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py new file mode 100644 index 0000000..2d4d5eb --- /dev/null +++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime +import time + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + """Replace invalid filename characters with underscore.""" + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +# ============================== +# 🧹 DELETE UNEXPECTED FILES +# ============================== +def clean_folder(folder: Path, valid_files: set): + """Remove all files in folder that are NOT present in valid_files.""" + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file(): + if sanitize_name(f.name) not in valid_files: + print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + print(f"⚠️ Could not delete {f}: {e}") + + +# ============================== +# 📦 DB CONNECTION +# ============================== +conn = pymysql.connect(**DB_CONFIG) + +cur_meta = conn.cursor(pymysql.cursors.DictCursor) +cur_blob = conn.cursor() + +print("🔍 Loading metadata from DB (FAST)…") + +cur_meta.execute(""" + SELECT d.id AS download_id, + d.request_id, + d.filename, + d.created_at, + p.updatedAt AS req_updated_at, + p.pacient_jmeno AS jmeno, + p.pacient_prijmeni AS prijmeni + FROM medevio_downloads d + JOIN pozadavky p ON d.request_id = p.id + ORDER BY p.updatedAt DESC +""") + +rows = cur_meta.fetchall() +print(f"📋 Found {len(rows)} attachment records.\n") + +# ============================== +# 🧠 MAIN LOOP +# ============================== +processed_requests = set() + +for r in rows: + req_id = r["request_id"] + + if req_id in processed_requests: + continue + processed_requests.add(req_id) + + # ========== FETCH ALL VALID FILES FOR THIS REQUEST ========== + cur_meta.execute( + "SELECT filename FROM medevio_downloads WHERE request_id=%s", + (req_id,) + ) + valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()} + + # ========== FOLDER NAME BASED ON UPDATEDAT ========== + updated_at = r["req_updated_at"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(r["prijmeni"] or "Unknown") + jmeno = sanitize_name(r["jmeno"] or "") + + folder_name = f"{date_str} {prijmeni}, {jmeno} {req_id}" + folder_name = sanitize_name(folder_name) + main_folder = BASE_DIR / folder_name + + # ========== FIND OLD FOLDER (DUPLICATE) ========== + # Any folder that contains "_" and is not main_folder is duplicate + possible_dups = [ + f for f in BASE_DIR.iterdir() + if f.is_dir() and req_id in f.name and f != main_folder + ] + + # ========== MERGE DUPLICATES ========== + for dup in possible_dups: + print(f"♻️ Merging duplicate folder: {dup.name}") + + # 1) Clean unexpected files in dup + clean_folder(dup, valid_files) + + # 2) Move files from dup to main folder + main_folder.mkdir(parents=True, exist_ok=True) + + for f in dup.iterdir(): + if f.is_file(): + target = main_folder / f.name + if not target.exists(): + f.rename(target) + + # 3) Remove the duplicate folder + try: + shutil.rmtree(dup, ignore_errors=True) + except Exception as e: + print(f"⚠️ Could not delete duplicate folder {dup}: {e}") + + # ========== CLEAN MAIN FOLDER ========== + clean_folder(main_folder, valid_files) + + # ========== DOWNLOAD MISSING FILES ========== + main_folder.mkdir(parents=True, exist_ok=True) + + for filename in valid_files: + dest = main_folder / filename + if dest.exists(): + continue + + # fetch blob only now + start = time.perf_counter() + cur_blob.execute( + "SELECT file_content FROM medevio_downloads " + "WHERE request_id=%s AND filename=%s", + (req_id, filename) + ) + row = cur_blob.fetchone() + if not row: + continue + end = time.perf_counter() + print(f"⏱ Took {end - start:.4f} seconds") + + content = row[0] + if not content: + continue + + with open(dest, "wb") as f: + f.write(content) + + print(f"💾 Wrote: {dest.relative_to(BASE_DIR)}") + +print("\n🎯 Export complete.\n") + +cur_blob.close() +cur_meta.close() +conn.close() diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py new file mode 100644 index 0000000..54d8af6 --- /dev/null +++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +def clean_folder(folder: Path, valid_files: set): + """Remove files that do NOT exist in MySQL for this request.""" + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file() and sanitize_name(f.name) not in valid_files: + print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + print(f"⚠️ Cannot delete {f}: {e}") + + +# ============================== +# 📥 LOAD EVERYTHING IN ONE QUERY +# ============================== +conn = pymysql.connect(**DB_CONFIG) +cur = conn.cursor(pymysql.cursors.DictCursor) + +print("📥 Loading ALL metadata + BLOBs with ONE MySQL query…") + +cur.execute(""" + SELECT + d.id AS download_id, + d.request_id, + d.filename, + d.file_content, + p.updatedAt AS req_updated_at, + p.pacient_jmeno AS jmeno, + p.pacient_prijmeni AS prijmeni + FROM medevio_downloads d + JOIN pozadavky p ON d.request_id = p.id + ORDER BY p.updatedAt DESC, d.created_at ASC +""") + +rows = cur.fetchall() +print(f"📦 Loaded {len(rows)} total file rows.\n") + +conn.close() + +# ============================== +# 🔄 ORGANIZE ROWS PER REQUEST +# ============================== +requests = {} # req_id → list of file dicts + +for r in rows: + req_id = r["request_id"] + if req_id not in requests: + requests[req_id] = [] + requests[req_id].append(r) + +print(f"📌 Unique requests: {len(requests)}\n") + +# ============================== +# 🧠 MAIN LOOP – SAME LOGIC AS BEFORE +# ============================== +for req_id, filelist in requests.items(): + + # ========== GET UPDATEDAT (same logic) ========== + any_row = filelist[0] + updated_at = any_row["req_updated_at"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(any_row["prijmeni"] or "Unknown") + jmeno = sanitize_name(any_row["jmeno"] or "") + + folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {req_id}") + main_folder = BASE_DIR / folder_name + + # ========== VALID FILES ========== + valid_files = {sanitize_name(r["filename"]) for r in filelist} + + # ========== FIND OLD FOLDERS ========== + possible_dups = [ + f for f in BASE_DIR.iterdir() + if f.is_dir() and req_id in f.name and f != main_folder + ] + + # ========== MERGE OLD FOLDERS ========== + for dup in possible_dups: + print(f"♻️ Merging folder: {dup.name}") + + clean_folder(dup, valid_files) + main_folder.mkdir(parents=True, exist_ok=True) + + for f in dup.iterdir(): + if f.is_file(): + target = main_folder / f.name + if not target.exists(): + f.rename(target) + + shutil.rmtree(dup, ignore_errors=True) + + # ========== CLEAN MAIN FOLDER ========== + main_folder.mkdir(parents=True, exist_ok=True) + clean_folder(main_folder, valid_files) + + # ========== SAVE FILES (fast now) ========== + for r in filelist: + filename = sanitize_name(r["filename"]) + dest = main_folder / filename + + if dest.exists(): + continue + + content = r["file_content"] + if not content: + continue + + with open(dest, "wb") as f: + f.write(content) + + print(f"💾 Saved: {dest.relative_to(BASE_DIR)}") + +print("\n🎯 Export complete.\n") diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem.py deleted file mode 100644 index fd636c4..0000000 --- a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import zlib -import pymysql -import re -from pathlib import Path -from datetime import datetime - -# ============================== -# ⚙️ CONFIGURATION -# ============================== -DB_CONFIG = { - "host": "192.168.1.76", - "port": 3307, - "user": "root", - "password": "Vlado9674+", - "database": "medevio", - "charset": "utf8mb4", -} - -BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") -BASE_DIR.mkdir(parents=True, exist_ok=True) - - -def sanitize_name(name: str) -> str: - """Replace invalid filename characters with underscore.""" - return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() - - -# ============================== -# 📦 EXPORT WITH JOIN TO POZADAVKY -# ============================== -conn = pymysql.connect(**DB_CONFIG) -cur_meta = conn.cursor(pymysql.cursors.DictCursor) -cur_blob = conn.cursor() - -# 🎯 JOIN medevio_downloads → pozadavky -cur_meta.execute(""" - SELECT d.id, d.request_id, d.attachment_id, d.filename, - d.created_at, d.downloaded_at, - p.pacient_jmeno AS jmeno, - p.pacient_prijmeni AS prijmeni - FROM medevio_downloads d - JOIN pozadavky p ON d.request_id = p.id - WHERE d.file_content IS NOT NULL; -""") - -rows = cur_meta.fetchall() -print(f"📋 Found {len(rows)} records to check/export") - -skipped, exported = 0, 0 - -for r in rows: - try: - created = r["created_at"] or r["downloaded_at"] or datetime.now() - date_str = created.strftime("%Y-%m-%d") - - # 👍 Now always correct from pozadavky - prijmeni = sanitize_name(r["prijmeni"] or "Unknown") - jmeno = sanitize_name(r["jmeno"] or "") - - # 🔥 Full request_id for folder identification - full_req_id = sanitize_name(r["request_id"]) - - # Folder names (normal and triangle) - base_folder = f"{date_str} {prijmeni}, {jmeno} {full_req_id}" - tri_folder = f"{date_str}▲ {prijmeni}, {jmeno} {full_req_id}" - - base_folder = sanitize_name(base_folder) - tri_folder = sanitize_name(tri_folder) - - base_path = BASE_DIR / base_folder - tri_path = BASE_DIR / tri_folder - - filename = sanitize_name(r["filename"] or f"unknown_{r['id']}.bin") - file_path_base = base_path / filename - file_path_tri = tri_path / filename - - # 🟡 Skip if file already exists - if file_path_base.exists() or file_path_tri.exists(): - skipped += 1 - found_in = "▲" if file_path_tri.exists() else "" - print(f"⏭️ Skipping existing{found_in}: {filename}") - continue - - # Ensure directory exists - base_path.mkdir(parents=True, exist_ok=True) - - # 2️⃣ Fetch blob content - cur_blob.execute( - "SELECT file_content FROM medevio_downloads WHERE id = %s", - (r["id"],) - ) - blob = cur_blob.fetchone()[0] - - if blob: - with open(file_path_base, "wb") as f: - f.write(blob) - exported += 1 - print(f"✅ Saved: {file_path_base.relative_to(BASE_DIR)}") - else: - print(f"⚠️ No content for id={r['id']}") - - except Exception as e: - print(f"❌ Error for id={r['id']}: {e}") - -cur_blob.close() -cur_meta.close() -conn.close() - -print(f"\n🎯 Export complete — {exported} new files saved, {skipped} skipped.\n") diff --git a/Testy/19 Test 2.py b/Testy/19 Test 2.py index ee25308..54d8af6 100644 --- a/Testy/19 Test 2.py +++ b/Testy/19 Test 2.py @@ -20,7 +20,7 @@ DB_CONFIG = { "charset": "utf8mb4", } -BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1") +BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") BASE_DIR.mkdir(parents=True, exist_ok=True) diff --git a/Testy/19 Test.py b/Testy/19 Test.py index 0464f41..2d4d5eb 100644 --- a/Testy/19 Test.py +++ b/Testy/19 Test.py @@ -21,7 +21,7 @@ DB_CONFIG = { "charset": "utf8mb4", } -BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1") +BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") BASE_DIR.mkdir(parents=True, exist_ok=True)