From bc44a65806cea1d2712475a5d0cf258ccf20ef9c Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Fri, 5 Dec 2025 08:53:11 +0100 Subject: [PATCH] notebook --- .../PRAVIDELNE_1_ReadLast300DonePozadavku.py | 96 ++++---- .../PRAVIDELNE_4_StahniPrilohyUlozDoMySQL.py | 3 +- ...AVIDELNE_5_SaveToFileSystem incremental.py | 2 +- ...VIDELNE_5_SaveToFileSystem incremental1.py | 224 ++++++++++++++++++ ...VIDELNE_5_SaveToFileSystem incremental2.py | 193 +++++++++++++++ 5 files changed, 463 insertions(+), 55 deletions(-) create mode 100644 10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py create mode 100644 10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental2.py diff --git a/10ReadPozadavky/PRAVIDELNE_1_ReadLast300DonePozadavku.py b/10ReadPozadavky/PRAVIDELNE_1_ReadLast300DonePozadavku.py index aefb18b..00b3a52 100644 --- a/10ReadPozadavky/PRAVIDELNE_1_ReadLast300DonePozadavku.py +++ b/10ReadPozadavky/PRAVIDELNE_1_ReadLast300DonePozadavku.py @@ -6,41 +6,15 @@ import requests from pathlib import Path from datetime import datetime from dateutil import parser -import sys - -# Force UTF-8 output even under Windows Task Scheduler -import sys -try: - sys.stdout.reconfigure(encoding='utf-8') - sys.stderr.reconfigure(encoding='utf-8') -except AttributeError: - # Python < 3.7 fallback (not needed for you, but safe) - import io - sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') - sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') - -# ================================ -# 🛡 SAFE PRINT FOR CP1250 / Emoji -# ================================ -def safe_print(text: str): - enc = sys.stdout.encoding or "" - if not enc.lower().startswith("utf"): - # strip emoji + characters outside BMP - text = ''.join(ch for ch in text if ord(ch) < 65536) - try: - print(text) - except UnicodeEncodeError: - # final fallback to ASCII only - text = ''.join(ch for ch in text if ord(ch) < 128) - print(text) - # ================================ # 🔧 CONFIGURATION # ================================ TOKEN_PATH = Path("token.txt") CLINIC_SLUG = "mudr-buzalkova" -LIMIT = 300 + +LIMIT = 500 # batch size / number of records +FULL_DOWNLOAD = False # 🔥 TOGGLE: False = last X, True = ALL batches DB_CONFIG = { "host": "192.168.1.76", @@ -52,7 +26,7 @@ DB_CONFIG = { "cursorclass": pymysql.cursors.DictCursor, } -# ⭐ GraphQL query +# ⭐ Query with lastMessage GRAPHQL_QUERY = r""" query ClinicRequestList2( $clinicSlug: String!, @@ -95,27 +69,23 @@ query ClinicRequestList2( # ================================ def read_token(path: Path) -> str: tok = path.read_text(encoding="utf-8").strip() - if tok.startswith("Bearer "): - return tok.split(" ", 1)[1] - return tok - + return tok.split(" ", 1)[1] if tok.startswith("Bearer ") else tok # ================================ -# DATETIME PARSER +# DATETIME PARSER (UTC → MySQL) # ================================ def to_mysql_dt(iso_str): if not iso_str: return None try: - dt = parser.isoparse(iso_str) - dt = dt.astimezone() + dt = parser.isoparse(iso_str) # ISO8601 → aware datetime (UTC) + dt = dt.astimezone() # convert to local timezone return dt.strftime("%Y-%m-%d %H:%M:%S") except: return None - # ================================ -# UPSERT +# UPSERT REQUEST # ================================ def upsert(conn, r): p = r.get("extendedPatient") or {} @@ -147,7 +117,7 @@ def upsert(conn, r): """ vals = ( - r.get("id"), + r["id"], r.get("displayTitle"), to_mysql_dt(r.get("createdAt")), final_updated, @@ -163,16 +133,15 @@ def upsert(conn, r): conn.commit() - # ================================ -# FETCH LAST 300 DONE REQUESTS +# FETCH DONE REQUESTS (one batch) # ================================ -def fetch_done(headers): +def fetch_done(headers, offset): vars = { "clinicSlug": CLINIC_SLUG, "queueId": None, "queueAssignment": "ANY", - "pageInfo": {"first": LIMIT, "offset": 0}, + "pageInfo": {"first": LIMIT, "offset": offset}, "locale": "cs", "state": "DONE", } @@ -187,8 +156,7 @@ def fetch_done(headers): r.raise_for_status() data = r.json()["data"]["requestsResponse"] - return data.get("patientRequests", []) - + return data.get("patientRequests", []), data.get("count", 0) # ================================ # MAIN @@ -203,18 +171,40 @@ def main(): conn = pymysql.connect(**DB_CONFIG) - safe_print(f"\n=== Downloading last {LIMIT} DONE requests @ {datetime.now():%Y-%m-%d %H:%M:%S} ===") + print(f"\n=== Sync CLOSED requests @ {datetime.now():%Y-%m-%d %H:%M:%S} ===") - requests_list = fetch_done(headers) - safe_print(f"📌 Requests returned: {len(requests_list)}") + offset = 0 + total_count = None + total_processed = 0 - for r in requests_list: - upsert(conn, r) + while True: + batch, count = fetch_done(headers, offset) + + if total_count is None: + total_count = count + print(f"📡 Total DONE in Medevio: {count}") + + if not batch: + break + + print(f" • Processing batch offset={offset} size={len(batch)}") + + for r in batch: + upsert(conn, r) + total_processed += len(batch) + + if not FULL_DOWNLOAD: + # process only last LIMIT records + break + + # FULL DOWNLOAD → fetch next batch + offset += LIMIT + if offset >= count: + break conn.close() - safe_print("\n\u2705 DONE - latest closed requests synced.\n") + print(f"\n✅ DONE — {total_processed} requests synced.\n") -# ================================ if __name__ == "__main__": main() diff --git a/10ReadPozadavky/PRAVIDELNE_4_StahniPrilohyUlozDoMySQL.py b/10ReadPozadavky/PRAVIDELNE_4_StahniPrilohyUlozDoMySQL.py index 22f5275..64a3afd 100644 --- a/10ReadPozadavky/PRAVIDELNE_4_StahniPrilohyUlozDoMySQL.py +++ b/10ReadPozadavky/PRAVIDELNE_4_StahniPrilohyUlozDoMySQL.py @@ -190,9 +190,10 @@ def main(): # Build query for pozadavky sql = """ - SELECT id, pacient_prijmeni, pacient_jmeno, createdAt + SELECT id, pacient_prijmeni, pacient_jmeno, createdAt, updatedAt, attachmentsProcessed FROM pozadavky WHERE attachmentsProcessed IS NULL + OR updatedAt > attachmentsProcessed """ params = [] if CREATED_AFTER: diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py index 2492756..f4225cb 100644 --- a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py +++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py @@ -49,7 +49,7 @@ DB_CONFIG = { "charset": "utf8mb4", } -BASE_DIR = Path(r"z:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") BASE_DIR.mkdir(parents=True, exist_ok=True) diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py new file mode 100644 index 0000000..ca07716 --- /dev/null +++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime +from collections import defaultdict + +# ============================== +# ⚙️ CONFIGURATION +# ============================== + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +# ============================== +# 🔧 HELPERS +# ============================== + +def sanitize_name(name: str) -> str: + """Replace invalid Windows filename characters.""" + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +def make_abbrev(title: str) -> str: + """Create abbreviation from title.""" + if not title: + return "" + words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title) + abbr = "" + for w in words: + if w.isdigit(): + abbr += w + else: + abbr += w[0] + return abbr.upper() + + +def clean_folder(folder: Path, valid_files: set): + """Remove unexpected files except ▲ files.""" + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file(): + if f.name.startswith("▲"): + continue + sanitized = sanitize_name(f.name) + if sanitized not in valid_files: + print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + print(f"⚠️ Could not delete {f}: {e}") + + +# ============================== +# 📦 DB CONNECTION +# ============================== + +conn = pymysql.connect(**DB_CONFIG) +cur_meta = conn.cursor(pymysql.cursors.DictCursor) +cur_blob = conn.cursor() + +print("🔍 Loading only requests with NEW attachments…") + +cur_meta.execute(""" + SELECT + p.id AS request_id, + p.displayTitle, + p.pacient_jmeno, + p.pacient_prijmeni, + p.updatedAt, + p.attachmentsProcessed, + d.filename, + d.created_at + FROM pozadavky p + JOIN medevio_downloads d ON d.request_id = p.id + LEFT JOIN ( + SELECT request_id, MAX(created_at) AS last_attachment_ts + FROM medevio_downloads + GROUP BY request_id + ) x ON x.request_id = p.id + WHERE p.attachmentsProcessed IS NULL + OR p.attachmentsProcessed < x.last_attachment_ts + ORDER BY p.updatedAt DESC; +""") + +rows = cur_meta.fetchall() +print(f"📋 Found {len(rows)} attachment rows belonging to requests needing processing.\n") + +# ============================== +# 🧠 PREPARE REQUEST GROUPING +# ============================== + +grouped = defaultdict(list) +for r in rows: + grouped[r["request_id"]].append(r) + +unique_request_ids = list(grouped.keys()) +total_requests = len(unique_request_ids) + +print(f"🔄 Processing {total_requests} requests needing updates…\n") + +# ============================== +# 🧠 MAIN LOOP +# ============================== + +index = 0 + +for req_id in unique_request_ids: + index += 1 + pct = (index / total_requests) * 100 + + print(f"\n[ {pct:5.1f}% ] Processing request {index}/{total_requests} → {req_id}") + + req_rows = grouped[req_id] + first = req_rows[0] + + # Build folder name + updated_at = first["updatedAt"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(first["pacient_prijmeni"] or "Unknown") + jmeno = sanitize_name(first["pacient_jmeno"] or "") + abbr = make_abbrev(first["displayTitle"]) + + desired_folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}") + + # Detect existing folder for request + main_folder = None + for f in BASE_DIR.iterdir(): + if f.is_dir() and req_id in f.name: + main_folder = f + break + + if not main_folder: + main_folder = BASE_DIR / desired_folder_name + + main_folder.mkdir(parents=True, exist_ok=True) + + # Build valid filename set + valid_files = {sanitize_name(r["filename"]) for r in req_rows} + + # Clean unexpected non-▲ files + clean_folder(main_folder, valid_files) + + # Track if ANY new files were downloaded + added_new_file = False + + # DOWNLOAD MISSING FILES + for r in req_rows: + filename = sanitize_name(r["filename"]) + dest_plain = main_folder / filename + dest_flag = main_folder / ("▲" + filename) + + # Skip if file already exists (plain or ▲) + if dest_plain.exists() or dest_flag.exists(): + continue + + # Fetch content + cur_blob.execute(""" + SELECT file_content + FROM medevio_downloads + WHERE request_id=%s AND filename=%s + """, (req_id, r["filename"])) + + row = cur_blob.fetchone() + if not row or not row[0]: + continue + + with open(dest_plain, "wb") as f: + f.write(row[0]) + + print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") + added_new_file = True + + # ------------------------------------ + # 🟦 FOLDER ▲ LOGIC (IMPORTANT) + # ------------------------------------ + if added_new_file: + # If folder contains ▲ in its name → remove it + if "▲" in main_folder.name: + new_name = main_folder.name.replace("▲", "").strip() + new_path = main_folder.parent / new_name + + try: + main_folder.rename(new_path) + print(f"🔄 Folder flag ▲ removed → {new_name}") + main_folder = new_path + except Exception as e: + print(f"⚠️ Could not rename folder: {e}") + else: + # NO new files → NEVER rename folder + pass + + # Mark request as processed + cur_meta.execute( + "UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id=%s", + (req_id,) + ) + conn.commit() + +# ============================== +# 🏁 DONE +# ============================== + +print("\n🎯 Export complete.\n") +cur_blob.close() +cur_meta.close() +conn.close() diff --git a/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental2.py b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental2.py new file mode 100644 index 0000000..5d2b313 --- /dev/null +++ b/10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental2.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import shutil +import pymysql +import re +from pathlib import Path +from datetime import datetime + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + """Replace invalid filename characters with underscore.""" + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +def make_abbrev(title: str) -> str: + """Create abbreviation from displayTitle.""" + if not title: + return "" + words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title) + abbr = "" + for w in words: + abbr += w if w.isdigit() else w[0] + return abbr.upper() + + +# ============================== +# 🧹 DELETE UNEXPECTED FILES +# ============================== +def clean_folder(folder: Path, valid_files: set): + if not folder.exists(): + return + + for f in folder.iterdir(): + if f.is_file(): + if f.name.startswith("▲"): + continue + sanitized = sanitize_name(f.name) + if sanitized not in valid_files: + print(f"🗑️ Removing unexpected file: {f.name}") + try: + f.unlink() + except Exception as e: + print(f"⚠️ Could not delete {f}: {e}") + + +# ============================== +# 📦 DB CONNECTION +# ============================== +conn = pymysql.connect(**DB_CONFIG) +cur_meta = conn.cursor(pymysql.cursors.DictCursor) +cur_blob = conn.cursor() + +print("🔍 Loading ALL metadata without file_content…") + +# ⭐ Load ALL metadata once (NO BLOBs) +cur_meta.execute(""" + SELECT + d.request_id, + d.filename, + d.created_at, + p.updatedAt AS req_updated_at, + p.pacient_jmeno AS jmeno, + p.pacient_prijmeni AS prijmeni, + p.displayTitle + FROM medevio_downloads d + JOIN pozadavky p ON d.request_id = p.id + ORDER BY p.updatedAt DESC; +""") + +rows = cur_meta.fetchall() +print(f"📋 Found {len(rows)} metadata rows.\n") + +# ============================== +# 🧠 PRE-GROUP METADATA +# ============================== + +# Build dictionary: request_id → all metadata rows for that request +grouped = {} +for row in rows: + grouped.setdefault(row["request_id"], []).append(row) + +unique_request_ids = list(grouped.keys()) +total_requests = len(unique_request_ids) + +print(f"🔄 Processing {total_requests} unique requests…\n") + +# ============================== +# 🧠 MAIN LOOP +# ============================== + +for idx, req_id in enumerate(unique_request_ids, start=1): + pct = (idx / total_requests) * 100 + req_rows = grouped[req_id] + first = req_rows[0] + + print(f"\n[ {pct:5.1f}% ] Processing request {idx}/{total_requests} → {req_id}") + + # ====================== + # Build folder name + # ====================== + updated_at = first["req_updated_at"] or datetime.now() + date_str = updated_at.strftime("%Y-%m-%d") + prijmeni = sanitize_name(first["prijmeni"] or "Unknown") + jmeno = sanitize_name(first["jmeno"] or "") + abbr = make_abbrev(first["displayTitle"] or "") + + clean_folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}") + + # Detect existing folder + existing_folder = None + for f in BASE_DIR.iterdir(): + if f.is_dir() and req_id in f.name: + existing_folder = f + break + + main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name + main_folder.mkdir(parents=True, exist_ok=True) + + # ====================== + # Valid files for this request + # ====================== + valid_files = {sanitize_name(r["filename"]) for r in req_rows} + + # Clean unexpected files + clean_folder(main_folder, valid_files) + + # ====================== + # DOWNLOAD MISSING FILES → only now load BLOBs + # ====================== + added_new_file = False + + for r in req_rows: + filename = sanitize_name(r["filename"]) + dest_plain = main_folder / filename + dest_marked = main_folder / ("▲" + filename) + + if dest_plain.exists() or dest_marked.exists(): + continue + + added_new_file = True + + # ⭐ Load BLOB only when needed + cur_blob.execute(""" + SELECT file_content + FROM medevio_downloads + WHERE request_id=%s AND filename=%s + """, (req_id, r["filename"])) + + row = cur_blob.fetchone() + if not row or not row[0]: + continue + + with open(dest_plain, "wb") as f: + f.write(row[0]) + + print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") + + # ====================== + # Folder-level ▲ logic + # ====================== + if added_new_file and "▲" in main_folder.name: + new_name = main_folder.name.replace("▲", "").strip() + new_path = main_folder.parent / new_name + + try: + main_folder.rename(new_path) + main_folder = new_path + print(f"🔄 Folder flag ▲ removed → {new_name}") + except Exception as e: + print(f"⚠️ Could not rename folder: {e}") + +cur_blob.close() +cur_meta.close() +conn.close() + +print("\n🎯 Export complete.\n")