This commit is contained in:
michaela.buzalkova
2025-11-17 11:28:31 +01:00
parent a210f801d3
commit ea32ea0bc1
5 changed files with 261 additions and 5 deletions

2
.idea/Medevio.iml generated
View File

@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View File

@@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Python 3.12 (Medevio)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
</project>

View File

@@ -107,11 +107,24 @@ cur_meta.execute("""
rows = cur_meta.fetchall()
print(f"📋 Found {len(rows)} attachment records.\n")
# ==============================
# 🧠 MAIN LOOP WITH PROGRESS
# ==============================
# list of unique request_ids in order
unique_request_ids = []
seen = set()
for r in rows:
req_id = r["request_id"]
if req_id not in seen:
unique_request_ids.append(req_id)
seen.add(req_id)
total_requests = len(unique_request_ids)
print(f"🔄 Processing {total_requests} unique requests...\n")
# ==============================
# 🧠 MAIN LOOP
# ==============================
processed_requests = set()
current_index = 0
for r in rows:
req_id = r["request_id"]
@@ -120,11 +133,17 @@ for r in rows:
continue
processed_requests.add(req_id)
current_index += 1
percent = (current_index / total_requests) * 100
print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}")
# ========== FETCH ALL VALID FILES FOR THIS REQUEST ==========
cur_meta.execute(
"SELECT filename FROM medevio_downloads WHERE request_id=%s",
(req_id,)
)
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
# ========== FOLDER NAME BASED ON UPDATEDAT ==========

View File

@@ -12,6 +12,14 @@ Spustí všechny PRAVIDELNÉ skripty v daném pořadí:
5) PRAVIDELNE_5_SaveToFileSystem incremental.py
"""
import time, socket
for _ in range(30):
try:
socket.create_connection(("127.0.0.1", 3307), timeout=3).close()
break
except OSError:
time.sleep(10)
import sys
import subprocess
from pathlib import Path

229
Testy/000 Testy.py Normal file
View File

@@ -0,0 +1,229 @@
import os
import shutil
import pymysql
import re
from pathlib import Path
from datetime import datetime
import time
# ==============================
# ⚙️ CONFIGURATION
# ==============================
DB_CONFIG = {
"host": "127.0.0.1",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
}
BASE_DIR = Path(r"z:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
BASE_DIR.mkdir(parents=True, exist_ok=True)
# ---- helper function for timing ----
def log_section(name):
print(f"\n=== ⏱ {name} ===")
return time.time()
def log_done(start):
print(f" -> done in {time.time() - start:0.2f} sec")
def sanitize_name(name: str) -> str:
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
def make_abbrev(title: str) -> str:
if not title:
return ""
words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title)
abbr = "".join(w if w.isdigit() else w[0] for w in words)
return abbr.upper()
# ==============================
# 🧹 DELETE UNEXPECTED FILES
# ==============================
def clean_folder(folder: Path, valid_files: set):
start = log_section(f"Cleaning folder: {folder.name}")
if not folder.exists():
log_done(start)
return
for f in folder.iterdir():
if f.is_file():
if f.name.startswith(""):
continue
sanitized = sanitize_name(f.name)
if sanitized not in valid_files:
print(f"🗑 Removing unexpected: {f.name}")
try:
f.unlink()
except Exception as e:
print(f"⚠ Could not delete {f}: {e}")
log_done(start)
# ==============================
# 📦 DB CONNECTION
# ==============================
print("\n🔌 Connecting to DB…")
start_db = time.time()
conn = pymysql.connect(**DB_CONFIG)
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
cur_blob = conn.cursor()
print(f" -> connected in {time.time() - start_db:0.2f} sec")
print("\n🔍 Loading metadata from DB…")
start_sql = time.time()
cur_meta.execute("""
SELECT d.id AS download_id,
d.request_id,
d.filename,
d.created_at,
p.updatedAt AS req_updated_at,
p.pacient_jmeno AS jmeno,
p.pacient_prijmeni AS prijmeni,
p.displayTitle
FROM medevio_downloads d
JOIN pozadavky p ON d.request_id = p.id
ORDER BY p.updatedAt DESC
""")
rows = cur_meta.fetchall()
print(f"📋 Loaded {len(rows)} attachment rows in {time.time() - start_sql:0.2f} sec.\n")
# ==============================
# 🧠 MAIN LOOP
# ==============================
processed_requests = set()
for r in rows:
req_id = r["request_id"]
if req_id in processed_requests:
continue
processed_requests.add(req_id)
section = f"Processing request {req_id}"
sec_start = log_section(section)
# ========== FETCH ALL VALID FILES ==========
start_valid = log_section("Loading valid filenames")
cur_meta.execute(
"SELECT filename FROM medevio_downloads WHERE request_id=%s",
(req_id,)
)
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
log_done(start_valid)
# ========== PREPARE FOLDER NAME ==========
updated_at = r["req_updated_at"] or datetime.now()
date_str = updated_at.strftime("%Y-%m-%d")
prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
jmeno = sanitize_name(r["jmeno"] or "")
title = r.get("displayTitle") or ""
abbr = make_abbrev(title)
clean_folder_name = sanitize_name(
f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
)
# ========== DETECT EXISTING FOLDER ==========
start_detect = log_section("Detecting existing folder(s)")
existing_folder = None
folder_has_flag = False
for f in BASE_DIR.iterdir():
if f.is_dir() and req_id in f.name:
existing_folder = f
folder_has_flag = ("" in f.name)
break
log_done(start_detect)
main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
# ========== MERGE DUPLICATES ==========
start_merge = log_section("Scanning for duplicate folders")
possible_dups = [
f for f in BASE_DIR.iterdir()
if f.is_dir() and req_id in f.name and f != main_folder
]
for dup in possible_dups:
print(f"♻ Merging duplicate folder: {dup.name}")
clean_folder(dup, valid_files)
main_folder.mkdir(parents=True, exist_ok=True)
for f in dup.iterdir():
if f.is_file():
target = main_folder / f.name
if not target.exists():
f.rename(target)
shutil.rmtree(dup, ignore_errors=True)
log_done(start_merge)
# ========== CLEAN MAIN FOLDER ==========
clean_folder(main_folder, valid_files)
# ========== DOWNLOAD MISSING FILES ==========
start_dl = log_section("Downloading missing files")
added_new_file = False
main_folder.mkdir(parents=True, exist_ok=True)
for filename in valid_files:
dest_plain = main_folder / filename
dest_marked = main_folder / ("" + filename)
if dest_plain.exists() or dest_marked.exists():
continue
added_new_file = True
cur_blob.execute(
"SELECT file_content FROM medevio_downloads "
"WHERE request_id=%s AND filename=%s",
(req_id, filename)
)
row = cur_blob.fetchone()
if not row or not row[0]:
continue
with open(dest_plain, "wb") as f:
f.write(row[0])
print(f"💾 wrote: {dest_plain.name}")
log_done(start_dl)
# ========== REMOVE FOLDER FLAG ==========
if added_new_file and "" in main_folder.name:
try:
new_name = main_folder.name.replace("", "").strip()
new_path = main_folder.parent / new_name
main_folder.rename(new_path)
print(f"🔄 Folder flag removed → {new_name}")
main_folder = new_path
except Exception as e:
print(f"⚠ Could not rename folder: {e}")
log_done(sec_start)
print("\n🎯 Export complete.\n")
cur_blob.close()
cur_meta.close()
conn.close()