Files
medevio/Testy/19 Test.py
2025-11-16 10:59:38 +01:00

170 lines
4.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import shutil
import pymysql
import re
from pathlib import Path
from datetime import datetime
# ==============================
# ⚙️ CONFIGURATION
# ==============================
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
}
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1")
BASE_DIR.mkdir(parents=True, exist_ok=True)
def sanitize_name(name: str) -> str:
"""Replace invalid filename characters with underscore."""
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
# ==============================
# 🧹 DELETE UNEXPECTED FILES
# ==============================
def clean_folder(folder: Path, valid_files: set):
"""Remove all files in folder that are NOT present in valid_files."""
if not folder.exists():
return
for f in folder.iterdir():
if f.is_file():
if sanitize_name(f.name) not in valid_files:
print(f"🗑️ Removing unexpected file: {f.name}")
try:
f.unlink()
except Exception as e:
print(f"⚠️ Could not delete {f}: {e}")
# ==============================
# 📦 DB CONNECTION
# ==============================
conn = pymysql.connect(**DB_CONFIG)
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
cur_blob = conn.cursor()
print("🔍 Loading metadata from DB (FAST)…")
cur_meta.execute("""
SELECT d.id AS download_id,
d.request_id,
d.filename,
d.created_at,
p.updatedAt AS req_updated_at,
p.pacient_jmeno AS jmeno,
p.pacient_prijmeni AS prijmeni
FROM medevio_downloads d
JOIN pozadavky p ON d.request_id = p.id
ORDER BY p.updatedAt DESC
""")
rows = cur_meta.fetchall()
print(f"📋 Found {len(rows)} attachment records.\n")
# ==============================
# 🧠 MAIN LOOP
# ==============================
processed_requests = set()
for r in rows:
req_id = r["request_id"]
if req_id in processed_requests:
continue
processed_requests.add(req_id)
# ========== FETCH ALL VALID FILES FOR THIS REQUEST ==========
cur_meta.execute(
"SELECT filename FROM medevio_downloads WHERE request_id=%s",
(req_id,)
)
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
# ========== FOLDER NAME BASED ON UPDATEDAT ==========
updated_at = r["req_updated_at"] or datetime.now()
date_str = updated_at.strftime("%Y-%m-%d")
prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
jmeno = sanitize_name(r["jmeno"] or "")
folder_name = f"{date_str} {prijmeni}, {jmeno} {req_id}"
folder_name = sanitize_name(folder_name)
main_folder = BASE_DIR / folder_name
# ========== FIND OLD FOLDER (DUPLICATE) ==========
# Any folder that contains "_<req_id>" and is not main_folder is duplicate
possible_dups = [
f for f in BASE_DIR.iterdir()
if f.is_dir() and req_id in f.name and f != main_folder
]
# ========== MERGE DUPLICATES ==========
for dup in possible_dups:
print(f"♻️ Merging duplicate folder: {dup.name}")
# 1) Clean unexpected files in dup
clean_folder(dup, valid_files)
# 2) Move files from dup to main folder
main_folder.mkdir(parents=True, exist_ok=True)
for f in dup.iterdir():
if f.is_file():
target = main_folder / f.name
if not target.exists():
f.rename(target)
# 3) Remove the duplicate folder
try:
shutil.rmtree(dup, ignore_errors=True)
except Exception as e:
print(f"⚠️ Could not delete duplicate folder {dup}: {e}")
# ========== CLEAN MAIN FOLDER ==========
clean_folder(main_folder, valid_files)
# ========== DOWNLOAD MISSING FILES ==========
main_folder.mkdir(parents=True, exist_ok=True)
for filename in valid_files:
dest = main_folder / filename
if dest.exists():
continue
# fetch blob only now
cur_blob.execute(
"SELECT file_content FROM medevio_downloads "
"WHERE request_id=%s AND filename=%s",
(req_id, filename)
)
row = cur_blob.fetchone()
if not row:
continue
content = row[0]
if not content:
continue
with open(dest, "wb") as f:
f.write(content)
print(f"💾 Wrote: {dest.relative_to(BASE_DIR)}")
print("\n🎯 Export complete.\n")
cur_blob.close()
cur_meta.close()
conn.close()