Files
medevio/Medevio2.py
2025-09-21 21:35:39 +02:00

250 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# extract_patient_detail.py
# Usage:
# 1) Put your medevio_storage.json path into STATE_FILE.
# 2) Set PATIENT_ID to a real UUID from your list.
# 3) Run: python extract_patient_detail.py
#
# Output: prints a dict to console and saves patient_<ID>.json next to the script.
from pathlib import Path
import json, sys, time, re
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
STATE_FILE = r"medevio_storage.json"
BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
PATIENT_ID = "fcb2414b-067b-4ca2-91b2-6c36a86d4cbb" # <-- put target ID here
# ---------- helpers ----------
def wait_for_grid(page, timeout=15000):
try:
page.wait_for_selector("div[role='rowgroup']", timeout=timeout)
except PWTimeout:
pass
page.wait_for_selector("div[role='row'][data-id]", timeout=timeout)
def open_detail_via_query(page, patient_id):
# Try opening page with ?pacient=... (SPA should open drawer/detail)
target = f"{BASE_URL}?pacient={patient_id}"
page.goto(target, wait_until="domcontentloaded")
# Wait briefly for the drawer/dialog to render
if not wait_for_detail_open(page, quick=True):
# Some apps need a tiny delay to mount the panel
time.sleep(0.8)
return is_detail_open(page)
def is_detail_open(page):
# Look for a dialog/drawer that likely contains patient detail.
# Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc.
selectors = [
"[role='dialog']",
"div.MuiDrawer-paper",
"div.MuiModal-root [role='dialog']",
"div[aria-modal='true']",
]
for sel in selectors:
loc = page.locator(sel)
if loc.count() and loc.first.is_visible():
return True
return False
def wait_for_detail_open(page, quick=False):
timeout = 4000 if quick else 15000
selectors = [
"[role='dialog']",
"div.MuiDrawer-paper",
"div.MuiModal-root [role='dialog']",
"div[aria-modal='true']",
]
for sel in selectors:
try:
page.wait_for_selector(sel, timeout=timeout, state="visible")
return True
except PWTimeout:
continue
return False
def open_detail_by_click(page, patient_id):
# Click the row with matching data-id (fallback)
wait_for_grid(page, timeout=15000)
row = page.locator(f"div[role='row'][data-id='{patient_id}']").first
if not row.count():
return False
row.click()
return wait_for_detail_open(page)
def find_detail_root(page):
# Return the locator that represents the open detail container
for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
loc = page.locator(sel)
if loc.count() and loc.first.is_visible():
return loc.first
# Fallback to the last visible modal-ish container
return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last
def extract_text(el):
try:
return el.inner_text().strip()
except Exception:
return ""
def extract_field_by_label(root, label_texts):
"""
Try to find a field value by its label text (CZ/EN variants).
Looks for elements containing the label and then a sibling/value element.
"""
labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts])
loc = root.locator(f"xpath=({labels_xpath})")
if not loc.count():
# Try contains(label)
labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts])
loc = root.locator(f"xpath=({labels_xpath2})")
if not loc.count():
return None
candidate = loc.first
# Value might be in parent/next sibling
parent = candidate.locator("xpath=..")
siblings = [
parent.locator("xpath=following-sibling::*[1]"),
candidate.locator("xpath=following-sibling::*[1]"),
parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"),
]
for s in siblings:
if s.count():
text = extract_text(s.first)
# Clean common label-value formatting like "E-mail\nx@y.cz"
if text:
# If the label text is included, strip it
for t in label_texts:
text = re.sub(rf"^{re.escape(t)}\s*[:]?\s*", "", text, flags=re.I)
text = re.sub(r"\s+\n\s+", "", text).strip()
return text
# As a last fallback, try reading the parent block's text minus the label
block_text = extract_text(parent)
if block_text:
for t in label_texts:
block_text = re.sub(rf"{re.escape(t)}\s*[:]?\s*", "", block_text, flags=re.I)
return block_text.strip()
return None
def extract_all_text_pairs(root):
"""
Generic key-value sweep for components that render details as 2-column grids.
Returns a dict of guessed label->value pairs.
"""
result = {}
# Try common MUI grid/list patterns
blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root")
for i in range(min(20, blocks.count())):
block = blocks.nth(i)
text = extract_text(block)
if not text:
continue
# naive split by newlines, pair neighbors "Label\nValue"
parts = [t.strip() for t in text.splitlines() if t.strip()]
for j in range(len(parts) - 1):
label, value = parts[j], parts[j+1]
# Heuristic: labels usually short, values not identical, ignore obvious noise
if len(label) <= 32 and label != value and ":" not in value:
if label not in result:
result[label] = value
return result
def extract_patient_detail(page, patient_id):
root = find_detail_root(page)
if not root:
return {"id": patient_id, "error": "detail_not_found"}
# Try to get a headline with the name
name = None
for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]:
loc = root.locator(sel)
if loc.count():
nm = extract_text(loc.first)
if nm and len(nm) > 1:
name = nm
break
# Targeted fields (CZ + EN aliases)
fields = {
"Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]),
"Rodné číslo": extract_field_by_label(root, ["Rodné číslo", "", "Personal ID"]),
"Telefon": extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]),
"E-mail": extract_field_by_label(root, ["E-mail", "Email", "E-mail"]),
"Zdravotní pojišťovna": extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]),
"Adresa": extract_field_by_label(root, ["Adresa", "Address"]),
"Poznámka": extract_field_by_label(root, ["Poznámka", "Note", "Notes"]),
"Pohlaví": extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]),
"Praktický lékař": extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]),
}
# Sweep for any extra key→value pairs we didnt explicitly target
extras = extract_all_text_pairs(root)
# Merge non-empty fields
data = {"id": patient_id}
if name: data["name"] = name
for k, v in fields.items():
if v and v.strip():
data[k] = v.strip()
# Add extras that aren't already present
for k, v in extras.items():
if k not in data and v and v.strip():
data[k] = v.strip()
return data
# ---------- main ----------
def main():
if not PATIENT_ID or len(PATIENT_ID) < 8:
print("Set PATIENT_ID to a valid patient UUID.")
sys.exit(1)
sf = Path(STATE_FILE)
if not sf.exists():
print(f"Storage file not found: {sf}")
sys.exit(1)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False) # set False to watch
context = browser.new_context(storage_state=str(sf))
context.set_default_navigation_timeout(30000)
context.set_default_timeout(15000)
page = context.new_page()
# Try via query param first
opened = open_detail_via_query(page, PATIENT_ID)
# If not opened, go to base list and click the row
if not opened:
# Ensure the base grid exists
page.goto(BASE_URL, wait_until="domcontentloaded")
if "/prihlaseni" in page.url.lower():
print("Redirected to login — refresh your medevio_storage.json.")
browser.close()
return
if not open_detail_by_click(page, PATIENT_ID):
print("Could not open detail panel (neither via query nor by clicking).")
browser.close()
return
# At this point, detail should be open
data = extract_patient_detail(page, PATIENT_ID)
print("\n=== Patient detail ===")
print(json.dumps(data, ensure_ascii=False, indent=2))
out = Path(f"patient_{PATIENT_ID}.json")
out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"\nSaved → {out.resolve()}")
browser.close()
if __name__ == "__main__":
main()