diff --git a/10ReadPozadavky/0702 Plně funkční uložení přílohy.py b/10ReadPozadavky/0702 Plně funkční uložení přílohyfilesystem.py similarity index 100% rename from 10ReadPozadavky/0702 Plně funkční uložení přílohy.py rename to 10ReadPozadavky/0702 Plně funkční uložení přílohyfilesystem.py diff --git a/10ReadPozadavky/0704 Plne funkční uložení do mysql.py b/10ReadPozadavky/0704 Plne funkční uložení do mysql.py index e69de29..1b448de 100644 --- a/10ReadPozadavky/0704 Plne funkční uložení do mysql.py +++ b/10ReadPozadavky/0704 Plne funkční uložení do mysql.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Download all attachments for pozadavky where attachmentsProcessed IS NULL +and (optionally) createdAt is newer than a configurable cutoff date. +Store them in MySQL table `medevio_downloads`, and update pozadavky.attachmentsProcessed = NOW(). +""" + +import zlib +import json +import requests +import pymysql +from pathlib import Path +from datetime import datetime +import time + +# ============================== +# 🔧 CONFIGURATION +# ============================== +TOKEN_PATH = Path("token.txt") +CLINIC_SLUG = "mudr-buzalkova" + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", + "cursorclass": pymysql.cursors.DictCursor, +} + +# ✅ Optional: Only process requests created after this date +# Leave empty ("") to process all +CREATED_AFTER = "2024-12-01" # 🕓 Adjust freely, or set to "" for no limit + +GRAPHQL_QUERY = r""" +query ClinicRequestDetail_GetPatientRequest2($requestId: UUID!) { + patientRequestMedicalRecords: listMedicalRecordsForPatientRequest( + attachmentTypes: [ECRF_FILL_ATTACHMENT, MESSAGE_ATTACHMENT, PATIENT_REQUEST_ATTACHMENT] + patientRequestId: $requestId + pageInfo: {first: 100, offset: 0} + ) { + attachmentType + id + medicalRecord { + contentType + description + downloadUrl + id + url + visibleToPatient + } + } +} +""" + +# ============================== +# 🧮 HELPERS +# ============================== +def short_crc8(uuid_str: str) -> str: + """Return deterministic 8-char hex string from any input string (CRC32).""" + return f"{zlib.crc32(uuid_str.encode('utf-8')) & 0xffffffff:08x}" + +def extract_filename_from_url(url: str) -> str: + """Extracts filename from S3-style URL (between last '/' and first '?').""" + try: + return url.split("/")[-1].split("?")[0] + except Exception: + return "unknown_filename" + +def read_token(p: Path) -> str: + """Read Bearer token from file.""" + tok = p.read_text(encoding="utf-8").strip() + if tok.startswith("Bearer "): + tok = tok.split(" ", 1)[1] + return tok + +# ============================== +# 📡 FETCH ATTACHMENTS +# ============================== +def fetch_attachments(headers, request_id): + variables = {"requestId": request_id} + payload = { + "operationName": "ClinicRequestDetail_GetPatientRequest2", + "query": GRAPHQL_QUERY, + "variables": variables, + } + r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers, timeout=30) + if r.status_code != 200: + print(f"❌ HTTP {r.status_code} for request {request_id}") + return [] + data = r.json().get("data", {}).get("patientRequestMedicalRecords", []) + return data + +# ============================== +# 💾 SAVE TO MYSQL (with skip) +# ============================== +def insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids): + attachment_id = a.get("id") + if attachment_id in existing_ids: + print(f" ⏭️ Skipping already downloaded attachment {attachment_id}") + return False + + url = m.get("downloadUrl") + if not url: + print(" ⚠️ No download URL") + return False + + filename = extract_filename_from_url(url) + try: + r = requests.get(url, timeout=30) + r.raise_for_status() + content = r.content + except Exception as e: + print(f" ⚠️ Failed to download {url}: {e}") + return False + + file_size = len(content) + attachment_type = a.get("attachmentType") + content_type = m.get("contentType") + + cur.execute(""" + INSERT INTO medevio_downloads ( + request_id, attachment_id, attachment_type, filename, + content_type, file_size, pacient_jmeno, pacient_prijmeni, + created_at, file_content + ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + ON DUPLICATE KEY UPDATE + file_content = VALUES(file_content), + file_size = VALUES(file_size), + downloaded_at = NOW() + """, ( + req_id, + attachment_id, + attachment_type, + filename, + content_type, + file_size, + jmeno, + prijmeni, + created_date, + content + )) + existing_ids.add(attachment_id) + print(f" 💾 Saved {filename} ({file_size/1024:.1f} kB)") + return True + +# ============================== +# 🧠 MAIN +# ============================== +def main(): + token = read_token(TOKEN_PATH) + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + conn = pymysql.connect(**DB_CONFIG) + + print("📦 Loading list of already downloaded attachments...") + with conn.cursor() as cur: + cur.execute("SELECT attachment_id FROM medevio_downloads") + existing_ids = {row["attachment_id"] for row in cur.fetchall()} + print(f"✅ Found {len(existing_ids)} attachments already saved.") + + # ✅ Dynamic SQL with optional createdAt filter + sql = """ + SELECT id, displayTitle, pacient_prijmeni, pacient_jmeno, createdAt + FROM pozadavky + WHERE attachmentsProcessed IS NULL + """ + params = [] + if CREATED_AFTER: + sql += " AND createdAt >= %s" + params.append(CREATED_AFTER) + + with conn.cursor() as cur: + cur.execute(sql, params) + rows = cur.fetchall() + + print(f"📋 Found {len(rows)} pozadavky to process (attachmentsProcessed IS NULL" + + (f", created >= {CREATED_AFTER}" if CREATED_AFTER else "") + ")") + + for i, row in enumerate(rows, 1): + req_id = row["id"] + prijmeni = row.get("pacient_prijmeni") or "Neznamy" + jmeno = row.get("pacient_jmeno") or "" + created = row.get("createdAt") + + try: + created_date = datetime.strptime(str(created), "%Y-%m-%d %H:%M:%S") + except Exception: + created_date = None + + print(f"\n[{i}/{len(rows)}] 🧾 {prijmeni}, {jmeno} ({req_id})") + + attachments = fetch_attachments(headers, req_id) + if not attachments: + print(" ⚠️ No attachments found") + with conn.cursor() as cur: + cur.execute("UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id = %s", (req_id,)) + conn.commit() + continue + + with conn.cursor() as cur: + for a in attachments: + m = a.get("medicalRecord") or {} + insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids) + conn.commit() + + # ✅ mark processed + with conn.cursor() as cur: + cur.execute("UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id = %s", (req_id,)) + conn.commit() + + print(f" ✅ {len(attachments)} attachments processed for {prijmeni}, {jmeno}") + time.sleep(0.3) # polite API delay + + conn.close() + print("\n✅ Done! All new attachments processed and pozadavky updated.") + +# ============================== +if __name__ == "__main__": + main() diff --git a/10ReadPozadavky/ReadPozadavkySaveMySql.py b/10ReadPozadavky/ReadPozadavkySaveMySql.py index 42f4775..ed314c5 100644 --- a/10ReadPozadavky/ReadPozadavkySaveMySql.py +++ b/10ReadPozadavky/ReadPozadavkySaveMySql.py @@ -8,12 +8,12 @@ from datetime import datetime import time import time, socket -for _ in range(30): - try: - socket.create_connection(("127.0.0.1", 3307), timeout=3).close() - break - except OSError: - time.sleep(10) +# for _ in range(30): +# try: +# socket.create_connection(("127.0.0.1", 3307), timeout=3).close() +# break +# except OSError: +# time.sleep(10) # ================================ # 🔧 CONFIGURATION # ================================ @@ -23,7 +23,7 @@ BATCH_SIZE = 100 DONE_LIMIT = 200 # only last 200 DONE DB_CONFIG = { - "host": "127.0.0.1", + "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", diff --git a/20SaveDownloads/10 SaveToFilesystem.py b/20SaveDownloads/10 SaveToFilesystem.py new file mode 100644 index 0000000..000c9c1 --- /dev/null +++ b/20SaveDownloads/10 SaveToFilesystem.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import zlib +import pymysql +import re +from pathlib import Path +from datetime import datetime + +# ============================== +# ⚙️ CONFIGURATION +# ============================== +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", +} + +BASE_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") +BASE_DIR.mkdir(parents=True, exist_ok=True) + + +def sanitize_name(name: str) -> str: + """Replace invalid filename characters with underscore.""" + return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip() + + +# ============================== +# 📦 STREAMING EXPORT WITH TRIANGLE CHECK +# ============================== +conn = pymysql.connect(**DB_CONFIG) +cur_meta = conn.cursor(pymysql.cursors.DictCursor) +cur_blob = conn.cursor() + +cur_meta.execute(""" + SELECT id, request_id, attachment_id, filename, pacient_jmeno, + pacient_prijmeni, created_at, downloaded_at + FROM medevio_downloads + WHERE file_content IS NOT NULL; +""") + +rows = cur_meta.fetchall() +print(f"📋 Found {len(rows)} records to check/export") + +skipped, exported = 0, 0 + +for r in rows: + try: + created = r["created_at"] or r["downloaded_at"] or datetime.now() + date_str = created.strftime("%Y-%m-%d") + + prijmeni = sanitize_name(r["pacient_prijmeni"] or "Unknown") + jmeno = sanitize_name(r["pacient_jmeno"] or "") + + crc = f"{zlib.crc32(r['request_id'].encode('utf-8')) & 0xFFFFFFFF:08X}" + + # Base (non-triangle) and processed (triangle) folder variants + base_folder = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {crc}") + tri_folder = sanitize_name(f"{date_str}▲ {prijmeni}, {jmeno} {crc}") + + base_path = BASE_DIR / base_folder + tri_path = BASE_DIR / tri_folder + + filename = sanitize_name(r["filename"] or f"unknown_{r['id']}.bin") + file_path_base = base_path / filename + file_path_tri = tri_path / filename + + # 🟡 Skip if exists in either version + if file_path_base.exists() or file_path_tri.exists(): + skipped += 1 + found_in = "▲" if file_path_tri.exists() else "" + print(f"⏭️ Skipping existing{found_in}: {filename}") + continue + + # Make sure base folder exists before saving + base_path.mkdir(parents=True, exist_ok=True) + + # 2️⃣ Fetch blob + cur_blob.execute("SELECT file_content FROM medevio_downloads WHERE id = %s", (r["id"],)) + blob = cur_blob.fetchone()[0] + + if blob: + with open(file_path_base, "wb") as f: + f.write(blob) + exported += 1 + print(f"✅ Saved: {file_path_base.relative_to(BASE_DIR)}") + else: + print(f"⚠️ No content for id={r['id']}") + + except Exception as e: + print(f"❌ Error for id={r['id']}: {e}") + +cur_blob.close() +cur_meta.close() +conn.close() + +print(f"\n🎯 Export complete — {exported} new files saved, {skipped} skipped.\n")