This commit is contained in:
2025-11-13 12:03:31 +01:00
parent c349acf253
commit 11840507c1
11 changed files with 779 additions and 245 deletions

View File

@@ -0,0 +1,296 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Read conversation messages for pozadavky where messagesProcessed IS NULL
(Optionally filtered by createdAt), insert them into `medevio_conversation`,
and if a message has an attachment (medicalRecord), download it and save into
`medevio_downloads` (same logic as your attachments script).
Finally, mark pozadavky.messagesProcessed = NOW().
"""
import zlib
import json
import requests
import pymysql
from pathlib import Path
from datetime import datetime
import time
# ==============================
# 🔧 CONFIGURATION
# ==============================
TOKEN_PATH = Path("token.txt")
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}
# ✅ Optional: Only process requests created after this date ("" = no limit)
CREATED_AFTER = "2024-01-01"
GRAPHQL_QUERY_MESSAGES = r"""
query UseMessages_ListMessages($requestId: String!, $updatedSince: DateTime) {
messages: listMessages(patientRequestId: $requestId, updatedSince: $updatedSince) {
id
createdAt
updatedAt
readAt
text
type
sender {
id
name
surname
clinicId
}
medicalRecord {
id
description
contentType
url
downloadUrl
token
createdAt
updatedAt
}
}
}
"""
# ==============================
# 🧮 HELPERS
# ==============================
def short_crc8(uuid_str: str) -> str:
return f"{zlib.crc32(uuid_str.encode('utf-8')) & 0xffffffff:08x}"
def extract_filename_from_url(url: str) -> str:
try:
return url.split("/")[-1].split("?")[0]
except Exception:
return "unknown_filename"
def read_token(p: Path) -> str:
tok = p.read_text(encoding="utf-8").strip()
if tok.startswith("Bearer "):
tok = tok.split(" ", 1)[1]
return tok
def parse_dt(s):
if not s:
return None
# handle both "YYYY-mm-ddTHH:MM:SS" and "YYYY-mm-dd HH:MM:SS"
s = s.replace("T", " ")
fmts = ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M")
for f in fmts:
try:
return datetime.strptime(s[:19], f)
except Exception:
pass
return None
# ==============================
# 📡 FETCH MESSAGES
# ==============================
def fetch_messages(headers, request_id):
variables = {"requestId": request_id, "updatedSince": None}
payload = {
"operationName": "UseMessages_ListMessages",
"query": GRAPHQL_QUERY_MESSAGES,
"variables": variables,
}
r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers, timeout=30)
if r.status_code != 200:
print(f"❌ HTTP {r.status_code} for messages of request {request_id}")
return []
data = r.json().get("data", {}).get("messages", [])
return data or []
# ==============================
# 💾 SAVE: conversation row
# ==============================
def insert_message(cur, req_id, msg):
sender = msg.get("sender") or {}
sender_name = " ".join(x for x in [sender.get("name"), sender.get("surname")] if x).strip() or None
sender_id = sender.get("id")
sender_clinic_id = sender.get("clinicId")
text = msg.get("text")
created_at = parse_dt(msg.get("createdAt"))
read_at = parse_dt(msg.get("readAt"))
updated_at = parse_dt(msg.get("updatedAt"))
mr = msg.get("medicalRecord") or {}
attachment_url = mr.get("downloadUrl") or mr.get("url")
attachment_description = mr.get("description")
attachment_content_type = mr.get("contentType")
sql = """
INSERT INTO medevio_conversation (
id, request_id, sender_name, sender_id, sender_clinic_id,
text, created_at, read_at, updated_at,
attachment_url, attachment_description, attachment_content_type
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
sender_name = VALUES(sender_name),
sender_id = VALUES(sender_id),
sender_clinic_id = VALUES(sender_clinic_id),
text = VALUES(text),
created_at = VALUES(created_at),
read_at = VALUES(read_at),
updated_at = VALUES(updated_at),
attachment_url = VALUES(attachment_url),
attachment_description = VALUES(attachment_description),
attachment_content_type = VALUES(attachment_content_type)
"""
cur.execute(sql, (
msg.get("id"),
req_id,
sender_name,
sender_id,
sender_clinic_id,
text,
created_at,
read_at,
updated_at,
attachment_url,
attachment_description,
attachment_content_type
))
# ==============================
# 💾 SAVE: download attachment (from message)
# ==============================
def insert_download_from_message(cur, req_id, msg, existing_ids):
mr = msg.get("medicalRecord") or {}
attachment_id = mr.get("id")
if not attachment_id:
return False
if attachment_id in existing_ids:
print(f" ⏭️ Skipping already downloaded message-attachment {attachment_id}")
return False
url = mr.get("downloadUrl") or mr.get("url")
if not url:
return False
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
content = r.content
except Exception as e:
print(f" ⚠️ Failed to download message attachment {attachment_id}: {e}")
return False
filename = extract_filename_from_url(url)
content_type = mr.get("contentType")
file_size = len(content)
created_date = parse_dt(msg.get("createdAt"))
# We don't have patient names on the message level here; keep NULLs.
cur.execute("""
INSERT INTO medevio_downloads (
request_id, attachment_id, attachment_type, filename,
content_type, file_size, pacient_jmeno, pacient_prijmeni,
created_at, file_content
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
file_content = VALUES(file_content),
file_size = VALUES(file_size),
downloaded_at = NOW()
""", (
req_id,
attachment_id,
"MESSAGE_ATTACHMENT",
filename,
content_type,
file_size,
None,
None,
created_date,
content
))
existing_ids.add(attachment_id)
print(f" 💾 Saved msg attachment {filename} ({file_size/1024:.1f} kB)")
return True
# ==============================
# 🧠 MAIN
# ==============================
def main():
token = read_token(TOKEN_PATH)
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
conn = pymysql.connect(**DB_CONFIG)
# Load existing download IDs to skip duplicates (same logic as your script)
print("📦 Loading list of already downloaded attachments...")
with conn.cursor() as cur:
cur.execute("SELECT attachment_id FROM medevio_downloads")
existing_ids = {row["attachment_id"] for row in cur.fetchall()}
print(f"✅ Found {len(existing_ids)} attachments already saved.")
# Pull pozadavky where messagesProcessed IS NULL (optionally by createdAt)
sql = """
SELECT id, displayTitle, pacient_prijmeni, pacient_jmeno, createdAt
FROM pozadavky
WHERE messagesProcessed IS NULL
"""
params = []
if CREATED_AFTER:
sql += " AND createdAt >= %s"
params.append(CREATED_AFTER)
with conn.cursor() as cur:
cur.execute(sql, params)
rows = cur.fetchall()
print(f"📋 Found {len(rows)} pozadavky to process (messagesProcessed IS NULL"
+ (f", created >= {CREATED_AFTER}" if CREATED_AFTER else "") + ")")
for i, row in enumerate(rows, 1):
req_id = row["id"]
prijmeni = row.get("pacient_prijmeni") or "Neznamy"
jmeno = row.get("pacient_jmeno") or ""
print(f"\n[{i}/{len(rows)}] 💬 {prijmeni}, {jmeno} ({req_id})")
messages = fetch_messages(headers, req_id)
if not messages:
print(" ⚠️ No messages found")
with conn.cursor() as cur:
cur.execute("UPDATE pozadavky SET messagesProcessed = NOW() WHERE id = %s", (req_id,))
conn.commit()
continue
inserted = 0
with conn.cursor() as cur:
for msg in messages:
insert_message(cur, req_id, msg)
# also pull any message attachments into downloads table
insert_download_from_message(cur, req_id, msg, existing_ids)
inserted += 1
conn.commit()
# mark processed
with conn.cursor() as cur:
cur.execute("UPDATE pozadavky SET messagesProcessed = NOW() WHERE id = %s", (req_id,))
conn.commit()
print(f"{inserted} messages processed for {prijmeni}, {jmeno}")
time.sleep(0.3) # polite API delay
conn.close()
print("\n✅ Done! All new conversations processed and pozadavky updated.")
# ==============================
if __name__ == "__main__":
main()