pohoda

2025-11-17 11:28:31 +01:00
parent a210f801d3
commit ea32ea0bc1
5 changed files with 261 additions and 5 deletions
--- a/.idea/Medevio.iml
+++ b/.idea/Medevio.iml
@@ -4,7 +4,7 @@
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/.venv" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.12 (Medevio)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
 </project>
--- a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem
+++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem
@@ -107,11 +107,24 @@ cur_meta.execute("""
 rows = cur_meta.fetchall()
 print(f"📋 Found {len(rows)} attachment records.\n")

+# ==============================
+# 🧠 MAIN LOOP WITH PROGRESS
+# ==============================
+
+# list of unique request_ids in order
+unique_request_ids = []
+seen = set()
+for r in rows:
+    req_id = r["request_id"]
+    if req_id not in seen:
+        unique_request_ids.append(req_id)
+        seen.add(req_id)
+
+total_requests = len(unique_request_ids)
+print(f"🔄 Processing {total_requests} unique requests...\n")

-# ==============================
-# 🧠 MAIN LOOP
-# ==============================
 processed_requests = set()
+current_index = 0

 for r in rows:
    req_id = r["request_id"]
@@ -120,11 +133,17 @@ for r in rows:
        continue
    processed_requests.add(req_id)

+    current_index += 1
+    percent = (current_index / total_requests) * 100
+
+    print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}")
+
    # ========== FETCH ALL VALID FILES FOR THIS REQUEST ==========
    cur_meta.execute(
        "SELECT filename FROM medevio_downloads WHERE request_id=%s",
        (req_id,)
    )
+
    valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}

    # ========== FOLDER NAME BASED ON UPDATEDAT ==========
--- a/10ReadPozadavky/PRAVIDELNE_PLNYSCRIPT.py
+++ b/10ReadPozadavky/PRAVIDELNE_PLNYSCRIPT.py
@@ -12,6 +12,14 @@ Spustí všechny PRAVIDELNÉ skripty v daném pořadí:
 5) PRAVIDELNE_5_SaveToFileSystem incremental.py
 """

+import time, socket
+for _ in range(30):
+    try:
+        socket.create_connection(("127.0.0.1", 3307), timeout=3).close()
+        break
+    except OSError:
+        time.sleep(10)
+
 import sys
 import subprocess
 from pathlib import Path
--- a/Testy/000
+++ b/Testy/000
@@ -0,0 +1,229 @@
+import os
+import shutil
+import pymysql
+import re
+from pathlib import Path
+from datetime import datetime
+import time
+
+# ==============================
+# ⚙️ CONFIGURATION
+# ==============================
+DB_CONFIG = {
+    "host": "127.0.0.1",
+    "port": 3307,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "medevio",
+    "charset": "utf8mb4",
+}
+
+BASE_DIR = Path(r"z:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
+BASE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+# ---- helper function for timing ----
+def log_section(name):
+    print(f"\n=== ⏱ {name} ===")
+    return time.time()
+
+
+def log_done(start):
+    print(f"    -> done in {time.time() - start:0.2f} sec")
+
+
+def sanitize_name(name: str) -> str:
+    return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
+
+
+def make_abbrev(title: str) -> str:
+    if not title:
+        return ""
+    words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title)
+    abbr = "".join(w if w.isdigit() else w[0] for w in words)
+    return abbr.upper()
+
+
+# ==============================
+# 🧹 DELETE UNEXPECTED FILES
+# ==============================
+def clean_folder(folder: Path, valid_files: set):
+    start = log_section(f"Cleaning folder: {folder.name}")
+    if not folder.exists():
+        log_done(start)
+        return
+
+    for f in folder.iterdir():
+        if f.is_file():
+
+            if f.name.startswith("▲"):
+                continue
+
+            sanitized = sanitize_name(f.name)
+            if sanitized not in valid_files:
+                print(f"🗑 Removing unexpected: {f.name}")
+                try:
+                    f.unlink()
+                except Exception as e:
+                    print(f"⚠ Could not delete {f}: {e}")
+
+    log_done(start)
+
+
+# ==============================
+# 📦 DB CONNECTION
+# ==============================
+print("\n🔌 Connecting to DB…")
+start_db = time.time()
+conn = pymysql.connect(**DB_CONFIG)
+cur_meta = conn.cursor(pymysql.cursors.DictCursor)
+cur_blob = conn.cursor()
+print(f"  -> connected in {time.time() - start_db:0.2f} sec")
+
+print("\n🔍 Loading metadata from DB…")
+start_sql = time.time()
+
+cur_meta.execute("""
+    SELECT d.id AS download_id,
+           d.request_id,
+           d.filename,
+           d.created_at,
+           p.updatedAt AS req_updated_at,
+           p.pacient_jmeno AS jmeno,
+           p.pacient_prijmeni AS prijmeni,
+           p.displayTitle
+    FROM medevio_downloads d
+    JOIN pozadavky p ON d.request_id = p.id
+    ORDER BY p.updatedAt DESC
+""")
+
+rows = cur_meta.fetchall()
+
+print(f"📋 Loaded {len(rows)} attachment rows in {time.time() - start_sql:0.2f} sec.\n")
+
+
+# ==============================
+# 🧠 MAIN LOOP
+# ==============================
+processed_requests = set()
+
+for r in rows:
+    req_id = r["request_id"]
+
+    if req_id in processed_requests:
+        continue
+    processed_requests.add(req_id)
+
+    section = f"Processing request {req_id}"
+    sec_start = log_section(section)
+
+    # ========== FETCH ALL VALID FILES ==========
+    start_valid = log_section("Loading valid filenames")
+    cur_meta.execute(
+        "SELECT filename FROM medevio_downloads WHERE request_id=%s",
+        (req_id,)
+    )
+    valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
+    log_done(start_valid)
+
+    # ========== PREPARE FOLDER NAME ==========
+    updated_at = r["req_updated_at"] or datetime.now()
+    date_str = updated_at.strftime("%Y-%m-%d")
+
+    prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
+    jmeno = sanitize_name(r["jmeno"] or "")
+    title = r.get("displayTitle") or ""
+    abbr = make_abbrev(title)
+
+    clean_folder_name = sanitize_name(
+        f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
+    )
+
+    # ========== DETECT EXISTING FOLDER ==========
+    start_detect = log_section("Detecting existing folder(s)")
+    existing_folder = None
+    folder_has_flag = False
+
+    for f in BASE_DIR.iterdir():
+        if f.is_dir() and req_id in f.name:
+            existing_folder = f
+            folder_has_flag = ("▲" in f.name)
+            break
+    log_done(start_detect)
+
+    main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
+
+    # ========== MERGE DUPLICATES ==========
+    start_merge = log_section("Scanning for duplicate folders")
+    possible_dups = [
+        f for f in BASE_DIR.iterdir()
+        if f.is_dir() and req_id in f.name and f != main_folder
+    ]
+
+    for dup in possible_dups:
+        print(f"♻ Merging duplicate folder: {dup.name}")
+        clean_folder(dup, valid_files)
+        main_folder.mkdir(parents=True, exist_ok=True)
+
+        for f in dup.iterdir():
+            if f.is_file():
+                target = main_folder / f.name
+                if not target.exists():
+                    f.rename(target)
+        shutil.rmtree(dup, ignore_errors=True)
+
+    log_done(start_merge)
+
+    # ========== CLEAN MAIN FOLDER ==========
+    clean_folder(main_folder, valid_files)
+
+    # ========== DOWNLOAD MISSING FILES ==========
+    start_dl = log_section("Downloading missing files")
+    added_new_file = False
+    main_folder.mkdir(parents=True, exist_ok=True)
+
+    for filename in valid_files:
+        dest_plain = main_folder / filename
+        dest_marked = main_folder / ("▲" + filename)
+
+        if dest_plain.exists() or dest_marked.exists():
+            continue
+
+        added_new_file = True
+
+        cur_blob.execute(
+            "SELECT file_content FROM medevio_downloads "
+            "WHERE request_id=%s AND filename=%s",
+            (req_id, filename)
+        )
+        row = cur_blob.fetchone()
+
+        if not row or not row[0]:
+            continue
+
+        with open(dest_plain, "wb") as f:
+            f.write(row[0])
+
+        print(f"💾 wrote: {dest_plain.name}")
+
+    log_done(start_dl)
+
+    # ========== REMOVE FOLDER FLAG ==========
+    if added_new_file and "▲" in main_folder.name:
+        try:
+            new_name = main_folder.name.replace("▲", "").strip()
+            new_path = main_folder.parent / new_name
+            main_folder.rename(new_path)
+            print(f"🔄 Folder flag removed → {new_name}")
+            main_folder = new_path
+        except Exception as e:
+            print(f"⚠ Could not rename folder: {e}")
+
+    log_done(sec_start)
+
+
+print("\n🎯 Export complete.\n")
+
+cur_blob.close()
+cur_meta.close()
+conn.close()