This commit is contained in:
2025-11-30 20:21:01 +01:00
parent ac16eedde9
commit f8ada463a2
6 changed files with 57 additions and 73 deletions

View File

@@ -16,7 +16,7 @@ CLINIC_SLUG = "mudr-buzalkova"
BATCH_SIZE = 100 BATCH_SIZE = 100
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",

View File

@@ -15,7 +15,7 @@ CLINIC_SLUG = "mudr-buzalkova"
LIMIT = 300 # stáhneme posledních 300 ukončených požadavků LIMIT = 300 # stáhneme posledních 300 ukončených požadavků
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",

View File

@@ -21,7 +21,7 @@ CLINIC_SLUG = "mudr-buzalkova"
GRAPHQL_URL = "https://api.medevio.cz/graphql" GRAPHQL_URL = "https://api.medevio.cz/graphql"
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",

View File

@@ -22,7 +22,7 @@ import time
TOKEN_PATH = Path("token.txt") TOKEN_PATH = Path("token.txt")
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",

View File

@@ -22,7 +22,7 @@ TOKEN_PATH = Path("token.txt")
CLINIC_SLUG = "mudr-buzalkova" CLINIC_SLUG = "mudr-buzalkova"
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",

View File

@@ -8,12 +8,29 @@ import re
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
import time import time
import sys
# ==============================
# 🛡 SAFE PRINT FOR CP1250 / EMOJI
# ==============================
def safe_print(text: str = ""):
enc = sys.stdout.encoding or ""
if not enc.lower().startswith("utf"):
# Strip emoji and characters outside BMP for Task Scheduler
text = ''.join(ch for ch in text if ord(ch) < 65536)
try:
print(text)
except UnicodeEncodeError:
# ASCII fallback
text = ''.join(ch for ch in text if ord(ch) < 128)
print(text)
# ============================== # ==============================
# ⚙️ CONFIGURATION # ⚙️ CONFIGURATION
# ============================== # ==============================
DB_CONFIG = { DB_CONFIG = {
"host": "127.0.0.1", "host": "192.168.1.76",
"port": 3307, "port": 3307,
"user": "root", "user": "root",
"password": "Vlado9674+", "password": "Vlado9674+",
@@ -31,24 +48,15 @@ def sanitize_name(name: str) -> str:
def make_abbrev(title: str) -> str: def make_abbrev(title: str) -> str:
"""
Create abbreviation from displayTitle:
- First letter of each word
- Keep digits together
- Uppercase
"""
if not title: if not title:
return "" return ""
words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title) words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title)
abbr = "" abbr = ""
for w in words: for w in words:
if w.isdigit(): if w.isdigit():
abbr += w abbr += w
else: else:
abbr += w[0] abbr += w[0]
return abbr.upper() return abbr.upper()
@@ -56,28 +64,20 @@ def make_abbrev(title: str) -> str:
# 🧹 DELETE UNEXPECTED FILES # 🧹 DELETE UNEXPECTED FILES
# ============================== # ==============================
def clean_folder(folder: Path, valid_files: set): def clean_folder(folder: Path, valid_files: set):
"""
Remove unexpected files.
RULE:
- Files starting with `▲` are ALWAYS kept.
"""
if not folder.exists(): if not folder.exists():
return return
for f in folder.iterdir(): for f in folder.iterdir():
if f.is_file(): if f.is_file():
# zpracované soubory (▲filename.pdf) nikdy nemažeme
if f.name.startswith(""): if f.name.startswith(""):
continue continue
sanitized = sanitize_name(f.name) sanitized = sanitize_name(f.name)
if sanitized not in valid_files: if sanitized not in valid_files:
print(f"🗑️ Removing unexpected file: {f.name}") safe_print(f"🗑️ Removing unexpected file: {f.name}")
try: try:
f.unlink() f.unlink()
except Exception as e: except Exception as e:
print(f"⚠️ Could not delete {f}: {e}") safe_print(f"⚠️ Could not delete {f}: {e}")
# ============================== # ==============================
@@ -88,30 +88,30 @@ conn = pymysql.connect(**DB_CONFIG)
cur_meta = conn.cursor(pymysql.cursors.DictCursor) cur_meta = conn.cursor(pymysql.cursors.DictCursor)
cur_blob = conn.cursor() cur_blob = conn.cursor()
print("🔍 Loading metadata from DB (FAST)…") safe_print("🔍 Loading metadata from DB (FAST)…")
cur_meta.execute(""" cur_meta.execute("""
SELECT d.id AS download_id, SELECT d.id AS download_id,
d.request_id, d.request_id,
d.filename, d.filename,
d.created_at, d.created_at,
p.updatedAt AS req_updated_at, p.updatedAt AS req_updated_at,
p.pacient_jmeno AS jmeno, p.pacient_jmeno AS jmeno,
p.pacient_prijmeni AS prijmeni, p.pacient_prijmeni AS prijmeni,
p.displayTitle p.displayTitle
FROM medevio_downloads d FROM medevio_downloads d
JOIN pozadavky p ON d.request_id = p.id JOIN pozadavky p ON d.request_id = p.id
ORDER BY p.updatedAt DESC WHERE d.downloaded_at IS NULL
ORDER BY p.updatedAt DESC
""") """)
rows = cur_meta.fetchall() rows = cur_meta.fetchall()
print(f"📋 Found {len(rows)} attachment records.\n") safe_print(f"📋 Found {len(rows)} attachment records.\n")
# ============================== # ==============================
# 🧠 MAIN LOOP WITH PROGRESS # 🧠 MAIN LOOP WITH PROGRESS
# ============================== # ==============================
# list of unique request_ids in order
unique_request_ids = [] unique_request_ids = []
seen = set() seen = set()
for r in rows: for r in rows:
@@ -121,7 +121,7 @@ for r in rows:
seen.add(req_id) seen.add(req_id)
total_requests = len(unique_request_ids) total_requests = len(unique_request_ids)
print(f"🔄 Processing {total_requests} unique requests...\n") safe_print(f"🔄 Processing {total_requests} unique requests...\n")
processed_requests = set() processed_requests = set()
current_index = 0 current_index = 0
@@ -136,17 +136,16 @@ for r in rows:
current_index += 1 current_index += 1
percent = (current_index / total_requests) * 100 percent = (current_index / total_requests) * 100
print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}") safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}")
# ========== FETCH ALL VALID FILES FOR THIS REQUEST ========== # ========== FETCH VALID FILENAMES ==========
cur_meta.execute( cur_meta.execute(
"SELECT filename FROM medevio_downloads WHERE request_id=%s", "SELECT filename FROM medevio_downloads WHERE request_id=%s",
(req_id,) (req_id,)
) )
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()} valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
# ========== FOLDER NAME BASED ON UPDATEDAT ========== # ========== BUILD FOLDER NAME ==========
updated_at = r["req_updated_at"] or datetime.now() updated_at = r["req_updated_at"] or datetime.now()
date_str = updated_at.strftime("%Y-%m-%d") date_str = updated_at.strftime("%Y-%m-%d")
@@ -159,17 +158,14 @@ for r in rows:
f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}" f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
) )
# ========== DETECT EXISTING FOLDER (WITH OR WITHOUT ▲) ========== # ========== DETECT EXISTING FOLDER ==========
existing_folder = None existing_folder = None
folder_has_flag = False
for f in BASE_DIR.iterdir(): for f in BASE_DIR.iterdir():
if f.is_dir() and req_id in f.name: if f.is_dir() and req_id in f.name:
existing_folder = f existing_folder = f
folder_has_flag = ("" in f.name)
break break
# pokud složka existuje → pracujeme v ní
main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
# ========== MERGE DUPLICATES ========== # ========== MERGE DUPLICATES ==========
@@ -179,14 +175,13 @@ for r in rows:
] ]
for dup in possible_dups: for dup in possible_dups:
print(f"♻️ Merging duplicate folder: {dup.name}") safe_print(f"♻️ Merging duplicate folder: {dup.name}")
clean_folder(dup, valid_files) clean_folder(dup, valid_files)
main_folder.mkdir(parents=True, exist_ok=True) main_folder.mkdir(parents=True, exist_ok=True)
for f in dup.iterdir(): for f in dup.iterdir():
if f.is_file(): if f.is_file():
# prostě přesuneme, ▲ případně zůstane v názvu
target = main_folder / f.name target = main_folder / f.name
if not target.exists(): if not target.exists():
f.rename(target) f.rename(target)
@@ -204,11 +199,9 @@ for r in rows:
dest_plain = main_folder / filename dest_plain = main_folder / filename
dest_marked = main_folder / ("" + filename) dest_marked = main_folder / ("" + filename)
# soubor už existuje (buď filename, nebo ▲filename)
if dest_plain.exists() or dest_marked.exists(): if dest_plain.exists() or dest_marked.exists():
continue continue
# stáhneme nový soubor → znamená že se má odstranit ▲ složky
added_new_file = True added_new_file = True
cur_blob.execute( cur_blob.execute(
@@ -227,31 +220,22 @@ for r in rows:
with open(dest_plain, "wb") as f: with open(dest_plain, "wb") as f:
f.write(content) f.write(content)
print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
# ============================== # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ==========
# 🔵 REMOVE FOLDER-LEVEL ▲ ONLY IF NEW FILE ADDED if added_new_file and "" in main_folder.name:
# ============================== new_name = main_folder.name.replace("", "").strip()
if added_new_file: new_path = main_folder.parent / new_name
# složka se má přejmenovat bez ▲
if "" in main_folder.name:
new_name = main_folder.name.replace("", "")
new_name = new_name.strip() # pro jistotu
new_path = main_folder.parent / new_name
if new_path != main_folder: if new_path != main_folder:
try: try:
main_folder.rename(new_path) main_folder.rename(new_path)
print(f"🔄 Folder flag ▲ removed → {new_name}") safe_print(f"🔄 Folder flag ▲ removed → {new_name}")
main_folder = new_path main_folder = new_path
except Exception as e: except Exception as e:
print(f"⚠️ Could not rename folder: {e}") safe_print(f"⚠️ Could not rename folder: {e}")
else:
# žádné nové soubory → NIKDY nesahat na název složky
pass
safe_print("\n🎯 Export complete.\n")
print("\n🎯 Export complete.\n")
cur_blob.close() cur_blob.close()
cur_meta.close() cur_meta.close()