notebookvb

This commit is contained in:
Vladimir Buzalka
2026-04-29 06:55:23 +02:00
parent a9c143ba24
commit daad4adeab
113 changed files with 16563 additions and 0 deletions
@@ -0,0 +1,232 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import shutil
import pymysql
import re
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import time
import sys
# Force UTF-8 output even under Windows Task Scheduler
import sys
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
except AttributeError:
# Python < 3.7 fallback (not needed for you, but safe)
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# ==============================
# 🛡 SAFE PRINT FOR CP1250 / EMOJI
# ==============================
def safe_print(text: str = ""):
enc = sys.stdout.encoding or ""
if not enc.lower().startswith("utf"):
# Strip emoji and characters outside BMP for Task Scheduler
text = ''.join(ch for ch in text if ord(ch) < 65536)
try:
print(text)
except UnicodeEncodeError:
# ASCII fallback
text = ''.join(ch for ch in text if ord(ch) < 128)
print(text)
# ==============================
# ⚙️ CONFIGURATION
# ==============================
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3306,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
}
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
BASE_DIR.mkdir(parents=True, exist_ok=True)
def sanitize_name(name: str) -> str:
"""Replace invalid filename characters with underscore."""
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
def make_abbrev(title: str) -> str:
if not title:
return ""
words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title)
abbr = ""
for w in words:
if w.isdigit():
abbr += w
else:
abbr += w[0]
return abbr.upper()
# ==============================
# 🧹 DELETE UNEXPECTED FILES
# ==============================
def clean_folder(folder: Path, valid_files: set):
if not folder.exists():
return
for f in folder.iterdir():
if f.is_file():
if f.name.startswith(""):
continue
sanitized = sanitize_name(f.name)
if sanitized not in valid_files:
safe_print(f"🗑️ Removing unexpected file: {f.name}")
try:
f.unlink()
except Exception as e:
safe_print(f"⚠️ Could not delete {f}: {e}")
# ==============================
# 📦 DB CONNECTION
# ==============================
conn = pymysql.connect(**DB_CONFIG)
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
cur_blob = conn.cursor()
safe_print("🔍 Loading metadata from DB (FAST)…")
cur_meta.execute("""
SELECT d.id AS download_id,
d.request_id,
d.filename,
d.created_at,
p.updatedAt AS req_updated_at,
p.pacient_jmeno AS jmeno,
p.pacient_prijmeni AS prijmeni,
p.displayTitle
FROM medevio_downloads d
JOIN pozadavky p ON d.request_id = p.id
WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY)
ORDER BY p.updatedAt DESC
""")
rows = cur_meta.fetchall()
safe_print(f"📋 Found {len(rows)} attachment records.\n")
# ==============================
# 🧠 MAIN LOOP WITH PROGRESS
# ==============================
# Group rows by request_id in Python — avoids N extra SELECT filename queries
rows_by_request = defaultdict(list)
for r in rows:
rows_by_request[r["request_id"]].append(r)
total_requests = len(rows_by_request)
safe_print(f"🔄 Processing {total_requests} unique requests...\n")
# Pre-index BASE_DIR once — avoids iterdir() called twice per request
folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()]
for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1):
percent = (current_index / total_requests) * 100
safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}")
# ========== VALID FILENAMES from already-loaded rows ==========
# original filename → sanitized name (needed for DB query later)
file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows}
valid_files = set(file_map.keys())
# ========== BUILD FOLDER NAME ==========
r = req_rows[0]
updated_at = r["req_updated_at"] or datetime.now()
date_str = updated_at.strftime("%Y-%m-%d")
prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
jmeno = sanitize_name(r["jmeno"] or "")
title = r.get("displayTitle") or ""
abbr = make_abbrev(title)
clean_folder_name = sanitize_name(
f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
)
# ========== DETECT EXISTING FOLDER from pre-built index ==========
req_id_str = str(req_id)
matching = [f for f, name in folder_list if req_id_str in name]
existing_folder = matching[0] if matching else None
main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
# ========== MERGE DUPLICATES ==========
possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder]
for dup in possible_dups:
safe_print(f"♻️ Merging duplicate folder: {dup.name}")
clean_folder(dup, valid_files)
main_folder.mkdir(parents=True, exist_ok=True)
for f in dup.iterdir():
if f.is_file():
target = main_folder / f.name
if not target.exists():
f.rename(target)
shutil.rmtree(dup, ignore_errors=True)
# ========== CLEAN MAIN FOLDER ==========
clean_folder(main_folder, valid_files)
# ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ==========
main_folder.mkdir(parents=True, exist_ok=True)
added_new_file = False
missing_san = [
fn for fn in valid_files
if not (main_folder / fn).exists() and not (main_folder / ("" + fn)).exists()
]
if missing_san:
# Fetch all missing blobs in a single query instead of one per file
missing_orig = [file_map[fn] for fn in missing_san]
placeholders = ",".join(["%s"] * len(missing_orig))
cur_blob.execute(
f"SELECT filename, file_content FROM medevio_downloads "
f"WHERE request_id=%s AND filename IN ({placeholders})",
[req_id] + missing_orig,
)
for blob_filename, content in cur_blob.fetchall():
if not content:
continue
dest_plain = main_folder / sanitize_name(blob_filename)
with open(dest_plain, "wb") as fh:
fh.write(content)
safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
added_new_file = True
# ========== REMOVE ▲ FLAG IF NEW FILES ADDED ==========
if added_new_file and "" in main_folder.name:
new_name = main_folder.name.replace("", "").strip()
new_path = main_folder.parent / new_name
if new_path != main_folder:
try:
main_folder.rename(new_path)
safe_print(f"🔄 Folder flag ▲ removed → {new_name}")
main_folder = new_path
except Exception as e:
safe_print(f"⚠️ Could not rename folder: {e}")
safe_print("\n🎯 Export complete.\n")
cur_blob.close()
cur_meta.close()
conn.close()