Z230
This commit is contained in:
@@ -7,6 +7,7 @@ import pymysql
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import time
|
||||
import sys
|
||||
|
||||
@@ -112,6 +113,7 @@ cur_meta.execute("""
|
||||
p.displayTitle
|
||||
FROM medevio_downloads d
|
||||
JOIN pozadavky p ON d.request_id = p.id
|
||||
WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY)
|
||||
ORDER BY p.updatedAt DESC
|
||||
""")
|
||||
|
||||
@@ -122,40 +124,28 @@ safe_print(f"📋 Found {len(rows)} attachment records.\n")
|
||||
# 🧠 MAIN LOOP WITH PROGRESS
|
||||
# ==============================
|
||||
|
||||
unique_request_ids = []
|
||||
seen = set()
|
||||
# Group rows by request_id in Python — avoids N extra SELECT filename queries
|
||||
rows_by_request = defaultdict(list)
|
||||
for r in rows:
|
||||
req_id = r["request_id"]
|
||||
if req_id not in seen:
|
||||
unique_request_ids.append(req_id)
|
||||
seen.add(req_id)
|
||||
rows_by_request[r["request_id"]].append(r)
|
||||
|
||||
total_requests = len(unique_request_ids)
|
||||
total_requests = len(rows_by_request)
|
||||
safe_print(f"🔄 Processing {total_requests} unique requests...\n")
|
||||
|
||||
processed_requests = set()
|
||||
current_index = 0
|
||||
# Pre-index BASE_DIR once — avoids iterdir() called twice per request
|
||||
folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()]
|
||||
|
||||
for r in rows:
|
||||
req_id = r["request_id"]
|
||||
|
||||
if req_id in processed_requests:
|
||||
continue
|
||||
processed_requests.add(req_id)
|
||||
|
||||
current_index += 1
|
||||
for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1):
|
||||
percent = (current_index / total_requests) * 100
|
||||
|
||||
safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}")
|
||||
|
||||
# ========== FETCH VALID FILENAMES ==========
|
||||
cur_meta.execute(
|
||||
"SELECT filename FROM medevio_downloads WHERE request_id=%s",
|
||||
(req_id,)
|
||||
)
|
||||
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
|
||||
# ========== VALID FILENAMES from already-loaded rows ==========
|
||||
# original filename → sanitized name (needed for DB query later)
|
||||
file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows}
|
||||
valid_files = set(file_map.keys())
|
||||
|
||||
# ========== BUILD FOLDER NAME ==========
|
||||
r = req_rows[0]
|
||||
updated_at = r["req_updated_at"] or datetime.now()
|
||||
date_str = updated_at.strftime("%Y-%m-%d")
|
||||
|
||||
@@ -168,21 +158,15 @@ for r in rows:
|
||||
f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
|
||||
)
|
||||
|
||||
# ========== DETECT EXISTING FOLDER ==========
|
||||
existing_folder = None
|
||||
|
||||
for f in BASE_DIR.iterdir():
|
||||
if f.is_dir() and req_id in f.name:
|
||||
existing_folder = f
|
||||
break
|
||||
# ========== DETECT EXISTING FOLDER from pre-built index ==========
|
||||
req_id_str = str(req_id)
|
||||
matching = [f for f, name in folder_list if req_id_str in name]
|
||||
existing_folder = matching[0] if matching else None
|
||||
|
||||
main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
|
||||
|
||||
# ========== MERGE DUPLICATES ==========
|
||||
possible_dups = [
|
||||
f for f in BASE_DIR.iterdir()
|
||||
if f.is_dir() and req_id in f.name and f != main_folder
|
||||
]
|
||||
possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder]
|
||||
|
||||
for dup in possible_dups:
|
||||
safe_print(f"♻️ Merging duplicate folder: {dup.name}")
|
||||
@@ -201,36 +185,32 @@ for r in rows:
|
||||
# ========== CLEAN MAIN FOLDER ==========
|
||||
clean_folder(main_folder, valid_files)
|
||||
|
||||
# ========== DOWNLOAD MISSING FILES ==========
|
||||
added_new_file = False
|
||||
# ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ==========
|
||||
main_folder.mkdir(parents=True, exist_ok=True)
|
||||
added_new_file = False
|
||||
|
||||
for filename in valid_files:
|
||||
dest_plain = main_folder / filename
|
||||
dest_marked = main_folder / ("▲" + filename)
|
||||
|
||||
if dest_plain.exists() or dest_marked.exists():
|
||||
continue
|
||||
|
||||
added_new_file = True
|
||||
missing_san = [
|
||||
fn for fn in valid_files
|
||||
if not (main_folder / fn).exists() and not (main_folder / ("▲" + fn)).exists()
|
||||
]
|
||||
|
||||
if missing_san:
|
||||
# Fetch all missing blobs in a single query instead of one per file
|
||||
missing_orig = [file_map[fn] for fn in missing_san]
|
||||
placeholders = ",".join(["%s"] * len(missing_orig))
|
||||
cur_blob.execute(
|
||||
"SELECT file_content FROM medevio_downloads "
|
||||
"WHERE request_id=%s AND filename=%s",
|
||||
(req_id, filename)
|
||||
f"SELECT filename, file_content FROM medevio_downloads "
|
||||
f"WHERE request_id=%s AND filename IN ({placeholders})",
|
||||
[req_id] + missing_orig,
|
||||
)
|
||||
row = cur_blob.fetchone()
|
||||
if not row:
|
||||
continue
|
||||
|
||||
content = row[0]
|
||||
if not content:
|
||||
continue
|
||||
|
||||
with open(dest_plain, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
|
||||
for blob_filename, content in cur_blob.fetchall():
|
||||
if not content:
|
||||
continue
|
||||
dest_plain = main_folder / sanitize_name(blob_filename)
|
||||
with open(dest_plain, "wb") as fh:
|
||||
fh.write(content)
|
||||
safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
|
||||
added_new_file = True
|
||||
|
||||
# ========== REMOVE ▲ FLAG IF NEW FILES ADDED ==========
|
||||
if added_new_file and "▲" in main_folder.name:
|
||||
|
||||
Reference in New Issue
Block a user