Z230

2026-04-27 11:00:40 +02:00
parent d4825553a9
commit 90bd0ecdf5
11 changed files with 1002 additions and 130 deletions
@@ -7,6 +7,7 @@ import pymysql
 import re
 from pathlib import Path
 from datetime import datetime
+from collections import defaultdict
 import time
 import sys

@@ -112,6 +113,7 @@ cur_meta.execute("""
           p.displayTitle
    FROM medevio_downloads d
    JOIN pozadavky p ON d.request_id = p.id
+    WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY)
    ORDER BY p.updatedAt DESC
 """)

@@ -122,40 +124,28 @@ safe_print(f"📋 Found {len(rows)} attachment records.\n")
 # 🧠 MAIN LOOP WITH PROGRESS
 # ==============================

-unique_request_ids = []
-seen = set()
+# Group rows by request_id in Python — avoids N extra SELECT filename queries
+rows_by_request = defaultdict(list)
 for r in rows:
-    req_id = r["request_id"]
-    if req_id not in seen:
-        unique_request_ids.append(req_id)
-        seen.add(req_id)
+    rows_by_request[r["request_id"]].append(r)

-total_requests = len(unique_request_ids)
+total_requests = len(rows_by_request)
 safe_print(f"🔄 Processing {total_requests} unique requests...\n")

-processed_requests = set()
-current_index = 0
+# Pre-index BASE_DIR once — avoids iterdir() called twice per request
+folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()]

-for r in rows:
-    req_id = r["request_id"]
-
-    if req_id in processed_requests:
-        continue
-    processed_requests.add(req_id)
-
-    current_index += 1
+for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1):
    percent = (current_index / total_requests) * 100
-
    safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}")

-    # ========== FETCH VALID FILENAMES ==========
-    cur_meta.execute(
-        "SELECT filename FROM medevio_downloads WHERE request_id=%s",
-        (req_id,)
-    )
-    valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
+    # ========== VALID FILENAMES from already-loaded rows ==========
+    # original filename → sanitized name (needed for DB query later)
+    file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows}
+    valid_files = set(file_map.keys())

    # ========== BUILD FOLDER NAME ==========
+    r = req_rows[0]
    updated_at = r["req_updated_at"] or datetime.now()
    date_str = updated_at.strftime("%Y-%m-%d")

@@ -168,21 +158,15 @@ for r in rows:
        f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
    )

-    # ========== DETECT EXISTING FOLDER ==========
-    existing_folder = None
-
-    for f in BASE_DIR.iterdir():
-        if f.is_dir() and req_id in f.name:
-            existing_folder = f
-            break
+    # ========== DETECT EXISTING FOLDER from pre-built index ==========
+    req_id_str = str(req_id)
+    matching = [f for f, name in folder_list if req_id_str in name]
+    existing_folder = matching[0] if matching else None

    main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name

    # ========== MERGE DUPLICATES ==========
-    possible_dups = [
-        f for f in BASE_DIR.iterdir()
-        if f.is_dir() and req_id in f.name and f != main_folder
-    ]
+    possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder]

    for dup in possible_dups:
        safe_print(f"♻️ Merging duplicate folder: {dup.name}")
@@ -201,36 +185,32 @@ for r in rows:
    # ========== CLEAN MAIN FOLDER ==========
    clean_folder(main_folder, valid_files)

-    # ========== DOWNLOAD MISSING FILES ==========
-    added_new_file = False
+    # ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ==========
    main_folder.mkdir(parents=True, exist_ok=True)
+    added_new_file = False

-    for filename in valid_files:
-        dest_plain = main_folder / filename
-        dest_marked = main_folder / ("▲" + filename)
-
-        if dest_plain.exists() or dest_marked.exists():
-            continue
-
-        added_new_file = True
+    missing_san = [
+        fn for fn in valid_files
+        if not (main_folder / fn).exists() and not (main_folder / ("▲" + fn)).exists()
+    ]

+    if missing_san:
+        # Fetch all missing blobs in a single query instead of one per file
+        missing_orig = [file_map[fn] for fn in missing_san]
+        placeholders = ",".join(["%s"] * len(missing_orig))
        cur_blob.execute(
-            "SELECT file_content FROM medevio_downloads "
-            "WHERE request_id=%s AND filename=%s",
-            (req_id, filename)
+            f"SELECT filename, file_content FROM medevio_downloads "
+            f"WHERE request_id=%s AND filename IN ({placeholders})",
+            [req_id] + missing_orig,
        )
-        row = cur_blob.fetchone()
-        if not row:
-            continue
-
-        content = row[0]
-        if not content:
-            continue
-
-        with open(dest_plain, "wb") as f:
-            f.write(content)
-
-        safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
+        for blob_filename, content in cur_blob.fetchall():
+            if not content:
+                continue
+            dest_plain = main_folder / sanitize_name(blob_filename)
+            with open(dest_plain, "wb") as fh:
+                fh.write(content)
+            safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
+            added_new_file = True

    # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ==========
    if added_new_file and "▲" in main_folder.name: