notebook
This commit is contained in:
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Download all attachments for pozadavky where attachmentsProcessed IS NULL
|
||||
and (optionally) createdAt is newer than a configurable cutoff date.
|
||||
Store them in MySQL table `medevio_downloads`, and update pozadavky.attachmentsProcessed = NOW().
|
||||
"""
|
||||
|
||||
import zlib
|
||||
import json
|
||||
import requests
|
||||
import pymysql
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
# ==============================
|
||||
# 🔧 CONFIGURATION
|
||||
# ==============================
|
||||
TOKEN_PATH = Path("token.txt")
|
||||
CLINIC_SLUG = "mudr-buzalkova"
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medevio",
|
||||
"charset": "utf8mb4",
|
||||
"cursorclass": pymysql.cursors.DictCursor,
|
||||
}
|
||||
|
||||
# ✅ Optional: Only process requests created after this date
|
||||
# Leave empty ("") to process all
|
||||
CREATED_AFTER = "2024-12-01" # 🕓 Adjust freely, or set to "" for no limit
|
||||
|
||||
GRAPHQL_QUERY = r"""
|
||||
query ClinicRequestDetail_GetPatientRequest2($requestId: UUID!) {
|
||||
patientRequestMedicalRecords: listMedicalRecordsForPatientRequest(
|
||||
attachmentTypes: [ECRF_FILL_ATTACHMENT, MESSAGE_ATTACHMENT, PATIENT_REQUEST_ATTACHMENT]
|
||||
patientRequestId: $requestId
|
||||
pageInfo: {first: 100, offset: 0}
|
||||
) {
|
||||
attachmentType
|
||||
id
|
||||
medicalRecord {
|
||||
contentType
|
||||
description
|
||||
downloadUrl
|
||||
id
|
||||
url
|
||||
visibleToPatient
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# ==============================
|
||||
# 🧮 HELPERS
|
||||
# ==============================
|
||||
def short_crc8(uuid_str: str) -> str:
|
||||
"""Return deterministic 8-char hex string from any input string (CRC32)."""
|
||||
return f"{zlib.crc32(uuid_str.encode('utf-8')) & 0xffffffff:08x}"
|
||||
|
||||
def extract_filename_from_url(url: str) -> str:
|
||||
"""Extracts filename from S3-style URL (between last '/' and first '?')."""
|
||||
try:
|
||||
return url.split("/")[-1].split("?")[0]
|
||||
except Exception:
|
||||
return "unknown_filename"
|
||||
|
||||
def read_token(p: Path) -> str:
|
||||
"""Read Bearer token from file."""
|
||||
tok = p.read_text(encoding="utf-8").strip()
|
||||
if tok.startswith("Bearer "):
|
||||
tok = tok.split(" ", 1)[1]
|
||||
return tok
|
||||
|
||||
# ==============================
|
||||
# 📡 FETCH ATTACHMENTS
|
||||
# ==============================
|
||||
def fetch_attachments(headers, request_id):
|
||||
variables = {"requestId": request_id}
|
||||
payload = {
|
||||
"operationName": "ClinicRequestDetail_GetPatientRequest2",
|
||||
"query": GRAPHQL_QUERY,
|
||||
"variables": variables,
|
||||
}
|
||||
r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers, timeout=30)
|
||||
if r.status_code != 200:
|
||||
print(f"❌ HTTP {r.status_code} for request {request_id}")
|
||||
return []
|
||||
data = r.json().get("data", {}).get("patientRequestMedicalRecords", [])
|
||||
return data
|
||||
|
||||
# ==============================
|
||||
# 💾 SAVE TO MYSQL (with skip)
|
||||
# ==============================
|
||||
def insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids):
|
||||
attachment_id = a.get("id")
|
||||
if attachment_id in existing_ids:
|
||||
print(f" ⏭️ Skipping already downloaded attachment {attachment_id}")
|
||||
return False
|
||||
|
||||
url = m.get("downloadUrl")
|
||||
if not url:
|
||||
print(" ⚠️ No download URL")
|
||||
return False
|
||||
|
||||
filename = extract_filename_from_url(url)
|
||||
try:
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
content = r.content
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Failed to download {url}: {e}")
|
||||
return False
|
||||
|
||||
file_size = len(content)
|
||||
attachment_type = a.get("attachmentType")
|
||||
content_type = m.get("contentType")
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO medevio_downloads (
|
||||
request_id, attachment_id, attachment_type, filename,
|
||||
content_type, file_size, pacient_jmeno, pacient_prijmeni,
|
||||
created_at, file_content
|
||||
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_content = VALUES(file_content),
|
||||
file_size = VALUES(file_size),
|
||||
downloaded_at = NOW()
|
||||
""", (
|
||||
req_id,
|
||||
attachment_id,
|
||||
attachment_type,
|
||||
filename,
|
||||
content_type,
|
||||
file_size,
|
||||
jmeno,
|
||||
prijmeni,
|
||||
created_date,
|
||||
content
|
||||
))
|
||||
existing_ids.add(attachment_id)
|
||||
print(f" 💾 Saved {filename} ({file_size/1024:.1f} kB)")
|
||||
return True
|
||||
|
||||
# ==============================
|
||||
# 🧠 MAIN
|
||||
# ==============================
|
||||
def main():
|
||||
token = read_token(TOKEN_PATH)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
|
||||
print("📦 Loading list of already downloaded attachments...")
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT attachment_id FROM medevio_downloads")
|
||||
existing_ids = {row["attachment_id"] for row in cur.fetchall()}
|
||||
print(f"✅ Found {len(existing_ids)} attachments already saved.")
|
||||
|
||||
# ✅ Dynamic SQL with optional createdAt filter
|
||||
sql = """
|
||||
SELECT id, displayTitle, pacient_prijmeni, pacient_jmeno, createdAt
|
||||
FROM pozadavky
|
||||
WHERE attachmentsProcessed IS NULL
|
||||
"""
|
||||
params = []
|
||||
if CREATED_AFTER:
|
||||
sql += " AND createdAt >= %s"
|
||||
params.append(CREATED_AFTER)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, params)
|
||||
rows = cur.fetchall()
|
||||
|
||||
print(f"📋 Found {len(rows)} pozadavky to process (attachmentsProcessed IS NULL"
|
||||
+ (f", created >= {CREATED_AFTER}" if CREATED_AFTER else "") + ")")
|
||||
|
||||
for i, row in enumerate(rows, 1):
|
||||
req_id = row["id"]
|
||||
prijmeni = row.get("pacient_prijmeni") or "Neznamy"
|
||||
jmeno = row.get("pacient_jmeno") or ""
|
||||
created = row.get("createdAt")
|
||||
|
||||
try:
|
||||
created_date = datetime.strptime(str(created), "%Y-%m-%d %H:%M:%S")
|
||||
except Exception:
|
||||
created_date = None
|
||||
|
||||
print(f"\n[{i}/{len(rows)}] 🧾 {prijmeni}, {jmeno} ({req_id})")
|
||||
|
||||
attachments = fetch_attachments(headers, req_id)
|
||||
if not attachments:
|
||||
print(" ⚠️ No attachments found")
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id = %s", (req_id,))
|
||||
conn.commit()
|
||||
continue
|
||||
|
||||
with conn.cursor() as cur:
|
||||
for a in attachments:
|
||||
m = a.get("medicalRecord") or {}
|
||||
insert_download(cur, req_id, a, m, jmeno, prijmeni, created_date, existing_ids)
|
||||
conn.commit()
|
||||
|
||||
# ✅ mark processed
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id = %s", (req_id,))
|
||||
conn.commit()
|
||||
|
||||
print(f" ✅ {len(attachments)} attachments processed for {prijmeni}, {jmeno}")
|
||||
time.sleep(0.3) # polite API delay
|
||||
|
||||
conn.close()
|
||||
print("\n✅ Done! All new attachments processed and pozadavky updated.")
|
||||
|
||||
# ==============================
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -8,12 +8,12 @@ from datetime import datetime
|
||||
import time
|
||||
|
||||
import time, socket
|
||||
for _ in range(30):
|
||||
try:
|
||||
socket.create_connection(("127.0.0.1", 3307), timeout=3).close()
|
||||
break
|
||||
except OSError:
|
||||
time.sleep(10)
|
||||
# for _ in range(30):
|
||||
# try:
|
||||
# socket.create_connection(("127.0.0.1", 3307), timeout=3).close()
|
||||
# break
|
||||
# except OSError:
|
||||
# time.sleep(10)
|
||||
# ================================
|
||||
# 🔧 CONFIGURATION
|
||||
# ================================
|
||||
@@ -23,7 +23,7 @@ BATCH_SIZE = 100
|
||||
DONE_LIMIT = 200 # only last 200 DONE
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "127.0.0.1",
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
|
||||
101
20SaveDownloads/10 SaveToFilesystem.py
Normal file
101
20SaveDownloads/10 SaveToFilesystem.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import zlib
|
||||
import pymysql
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# ==============================
|
||||
# ⚙️ CONFIGURATION
|
||||
# ==============================
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medevio",
|
||||
"charset": "utf8mb4",
|
||||
}
|
||||
|
||||
BASE_DIR = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def sanitize_name(name: str) -> str:
|
||||
"""Replace invalid filename characters with underscore."""
|
||||
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
|
||||
|
||||
|
||||
# ==============================
|
||||
# 📦 STREAMING EXPORT WITH TRIANGLE CHECK
|
||||
# ==============================
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
|
||||
cur_blob = conn.cursor()
|
||||
|
||||
cur_meta.execute("""
|
||||
SELECT id, request_id, attachment_id, filename, pacient_jmeno,
|
||||
pacient_prijmeni, created_at, downloaded_at
|
||||
FROM medevio_downloads
|
||||
WHERE file_content IS NOT NULL;
|
||||
""")
|
||||
|
||||
rows = cur_meta.fetchall()
|
||||
print(f"📋 Found {len(rows)} records to check/export")
|
||||
|
||||
skipped, exported = 0, 0
|
||||
|
||||
for r in rows:
|
||||
try:
|
||||
created = r["created_at"] or r["downloaded_at"] or datetime.now()
|
||||
date_str = created.strftime("%Y-%m-%d")
|
||||
|
||||
prijmeni = sanitize_name(r["pacient_prijmeni"] or "Unknown")
|
||||
jmeno = sanitize_name(r["pacient_jmeno"] or "")
|
||||
|
||||
crc = f"{zlib.crc32(r['request_id'].encode('utf-8')) & 0xFFFFFFFF:08X}"
|
||||
|
||||
# Base (non-triangle) and processed (triangle) folder variants
|
||||
base_folder = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {crc}")
|
||||
tri_folder = sanitize_name(f"{date_str}▲ {prijmeni}, {jmeno} {crc}")
|
||||
|
||||
base_path = BASE_DIR / base_folder
|
||||
tri_path = BASE_DIR / tri_folder
|
||||
|
||||
filename = sanitize_name(r["filename"] or f"unknown_{r['id']}.bin")
|
||||
file_path_base = base_path / filename
|
||||
file_path_tri = tri_path / filename
|
||||
|
||||
# 🟡 Skip if exists in either version
|
||||
if file_path_base.exists() or file_path_tri.exists():
|
||||
skipped += 1
|
||||
found_in = "▲" if file_path_tri.exists() else ""
|
||||
print(f"⏭️ Skipping existing{found_in}: {filename}")
|
||||
continue
|
||||
|
||||
# Make sure base folder exists before saving
|
||||
base_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 2️⃣ Fetch blob
|
||||
cur_blob.execute("SELECT file_content FROM medevio_downloads WHERE id = %s", (r["id"],))
|
||||
blob = cur_blob.fetchone()[0]
|
||||
|
||||
if blob:
|
||||
with open(file_path_base, "wb") as f:
|
||||
f.write(blob)
|
||||
exported += 1
|
||||
print(f"✅ Saved: {file_path_base.relative_to(BASE_DIR)}")
|
||||
else:
|
||||
print(f"⚠️ No content for id={r['id']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error for id={r['id']}: {e}")
|
||||
|
||||
cur_blob.close()
|
||||
cur_meta.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\n🎯 Export complete — {exported} new files saved, {skipped} skipped.\n")
|
||||
Reference in New Issue
Block a user