Files
medevio/10ReadPozadavky/0704 Plne funkční uložení do mysql.py
2025-11-10 21:21:51 +01:00

210 lines
6.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Download all 'Odeslat lékařskou zprávu' attachments from Medevio API
and store them (including binary content) directly into MySQL table `medevio_downloads`.
Each attachment (PDF, image, etc.) is fetched once and saved as LONGBLOB.
Duplicate protection is ensured via UNIQUE KEY on `attachment_id`.
"""
import zlib
import json
import requests
import pymysql
from pathlib import Path
from datetime import datetime
import time
# ==============================
# 🔧 CONFIGURATION
# ==============================
TOKEN_PATH = Path("token.txt")
CLINIC_SLUG = "mudr-buzalkova"
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}
GRAPHQL_QUERY = r"""
query ClinicRequestDetail_GetPatientRequest2($requestId: UUID!) {
patientRequestMedicalRecords: listMedicalRecordsForPatientRequest(
attachmentTypes: [ECRF_FILL_ATTACHMENT, MESSAGE_ATTACHMENT, PATIENT_REQUEST_ATTACHMENT]
patientRequestId: $requestId
pageInfo: {first: 100, offset: 0}
) {
attachmentType
id
medicalRecord {
contentType
description
downloadUrl
id
url
visibleToPatient
}
}
}
"""
# ==============================
# 🧮 HELPERS
# ==============================
def short_crc8(uuid_str: str) -> str:
"""Return deterministic 8-char hex string from any input string (CRC32)."""
return f"{zlib.crc32(uuid_str.encode('utf-8')) & 0xffffffff:08x}"
def extract_filename_from_url(url: str) -> str:
"""Extracts filename from S3-style URL (between last '/' and first '?')."""
try:
return url.split("/")[-1].split("?")[0]
except Exception:
return "unknown_filename"
def read_token(p: Path) -> str:
"""Read Bearer token from file."""
tok = p.read_text(encoding="utf-8").strip()
if tok.startswith("Bearer "):
tok = tok.split(" ", 1)[1]
return tok
# ==============================
# 📡 FETCH ATTACHMENTS
# ==============================
def fetch_attachments(headers, request_id):
variables = {"requestId": request_id}
payload = {
"operationName": "ClinicRequestDetail_GetPatientRequest2",
"query": GRAPHQL_QUERY,
"variables": variables,
}
r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers, timeout=30)
if r.status_code != 200:
print(f"❌ HTTP {r.status_code} for request {request_id}")
return []
data = r.json().get("data", {}).get("patientRequestMedicalRecords", [])
return data
# ==============================
# 💾 SAVE TO MYSQL (with skip)
# ==============================
def insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids):
attachment_id = a.get("id")
if attachment_id in existing_ids:
print(f" ⏭️ Skipping already downloaded attachment {attachment_id}")
return
url = m.get("downloadUrl")
if not url:
print(" ⚠️ No download URL")
return
filename = extract_filename_from_url(url)
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
content = r.content
except Exception as e:
print(f" ⚠️ Failed to download {url}: {e}")
return
file_size = len(content)
attachment_type = a.get("attachmentType")
content_type = m.get("contentType")
cur.execute("""
INSERT INTO medevio_downloads (
request_id, attachment_id, attachment_type, filename,
content_type, file_size, pacient_jmeno, pacient_prijmeni,
created_at, file_content
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
file_content = VALUES(file_content),
file_size = VALUES(file_size),
downloaded_at = NOW()
""", (
req_id,
attachment_id,
attachment_type,
filename,
content_type,
file_size,
jmeno,
prijmeni,
created_date,
content
))
print(f" 💾 Saved {filename} ({file_size/1024:.1f} kB)")
existing_ids.add(attachment_id) # add to skip list
# ==============================
# 🧠 MAIN
# ==============================
def main():
token = read_token(TOKEN_PATH)
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
conn = pymysql.connect(**DB_CONFIG)
print("📦 Loading list of already downloaded attachments...")
with conn.cursor() as cur:
cur.execute("SELECT attachment_id FROM medevio_downloads")
existing_ids = {row["attachment_id"] for row in cur.fetchall()}
print(f"✅ Found {len(existing_ids)} attachments already saved.")
with conn.cursor() as cur:
cur.execute("""
SELECT id, displayTitle, pacient_prijmeni, pacient_jmeno, createdAt
FROM pozadavky
WHERE displayTitle = 'Odeslat lékařskou zprávu'
""")
rows = cur.fetchall()
print(f"📋 Found {len(rows)} 'Odeslat lékařskou zprávu' requests")
for i, row in enumerate(rows, 1):
req_id = row["id"]
prijmeni = row.get("pacient_prijmeni") or "Neznamy"
jmeno = row.get("pacient_jmeno") or ""
created = row.get("createdAt")
try:
created_date = datetime.strptime(str(created), "%Y-%m-%d %H:%M:%S")
except Exception:
created_date = None
print(f"\n[{i}/{len(rows)}] 🧾 {prijmeni}, {jmeno} ({req_id})")
attachments = fetch_attachments(headers, req_id)
if not attachments:
print(" ⚠️ No attachments")
continue
with conn.cursor() as cur:
for a in attachments:
m = a.get("medicalRecord") or {}
insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids)
conn.commit()
print(f"{len(attachments)} attachments saved for {prijmeni}, {jmeno}")
time.sleep(0.5) # be nice to the API
conn.close()
print("\n✅ Done! All attachments stored in MySQL table `medevio_downloads`.")
# ==============================
if __name__ == "__main__":
main()