250 lines
9.2 KiB
Python
250 lines
9.2 KiB
Python
# extract_patient_detail.py
|
||
# Usage:
|
||
# 1) Put your medevio_storage.json path into STATE_FILE.
|
||
# 2) Set PATIENT_ID to a real UUID from your list.
|
||
# 3) Run: python extract_patient_detail.py
|
||
#
|
||
# Output: prints a dict to console and saves patient_<ID>.json next to the script.
|
||
|
||
from pathlib import Path
|
||
import json, sys, time, re
|
||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||
|
||
STATE_FILE = r"medevio_storage.json"
|
||
BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||
PATIENT_ID = "fcb2414b-067b-4ca2-91b2-6c36a86d4cbb" # <-- put target ID here
|
||
|
||
# ---------- helpers ----------
|
||
|
||
def wait_for_grid(page, timeout=15000):
|
||
try:
|
||
page.wait_for_selector("div[role='rowgroup']", timeout=timeout)
|
||
except PWTimeout:
|
||
pass
|
||
page.wait_for_selector("div[role='row'][data-id]", timeout=timeout)
|
||
|
||
def open_detail_via_query(page, patient_id):
|
||
# Try opening page with ?pacient=... (SPA should open drawer/detail)
|
||
target = f"{BASE_URL}?pacient={patient_id}"
|
||
page.goto(target, wait_until="domcontentloaded")
|
||
# Wait briefly for the drawer/dialog to render
|
||
if not wait_for_detail_open(page, quick=True):
|
||
# Some apps need a tiny delay to mount the panel
|
||
time.sleep(0.8)
|
||
return is_detail_open(page)
|
||
|
||
def is_detail_open(page):
|
||
# Look for a dialog/drawer that likely contains patient detail.
|
||
# Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc.
|
||
selectors = [
|
||
"[role='dialog']",
|
||
"div.MuiDrawer-paper",
|
||
"div.MuiModal-root [role='dialog']",
|
||
"div[aria-modal='true']",
|
||
]
|
||
for sel in selectors:
|
||
loc = page.locator(sel)
|
||
if loc.count() and loc.first.is_visible():
|
||
return True
|
||
return False
|
||
|
||
def wait_for_detail_open(page, quick=False):
|
||
timeout = 4000 if quick else 15000
|
||
selectors = [
|
||
"[role='dialog']",
|
||
"div.MuiDrawer-paper",
|
||
"div.MuiModal-root [role='dialog']",
|
||
"div[aria-modal='true']",
|
||
]
|
||
for sel in selectors:
|
||
try:
|
||
page.wait_for_selector(sel, timeout=timeout, state="visible")
|
||
return True
|
||
except PWTimeout:
|
||
continue
|
||
return False
|
||
|
||
def open_detail_by_click(page, patient_id):
|
||
# Click the row with matching data-id (fallback)
|
||
wait_for_grid(page, timeout=15000)
|
||
row = page.locator(f"div[role='row'][data-id='{patient_id}']").first
|
||
if not row.count():
|
||
return False
|
||
row.click()
|
||
return wait_for_detail_open(page)
|
||
|
||
def find_detail_root(page):
|
||
# Return the locator that represents the open detail container
|
||
for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
|
||
loc = page.locator(sel)
|
||
if loc.count() and loc.first.is_visible():
|
||
return loc.first
|
||
# Fallback to the last visible modal-ish container
|
||
return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last
|
||
|
||
def extract_text(el):
|
||
try:
|
||
return el.inner_text().strip()
|
||
except Exception:
|
||
return ""
|
||
|
||
def extract_field_by_label(root, label_texts):
|
||
"""
|
||
Try to find a field value by its label text (CZ/EN variants).
|
||
Looks for elements containing the label and then a sibling/value element.
|
||
"""
|
||
labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts])
|
||
loc = root.locator(f"xpath=({labels_xpath})")
|
||
if not loc.count():
|
||
# Try contains(label)
|
||
labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts])
|
||
loc = root.locator(f"xpath=({labels_xpath2})")
|
||
if not loc.count():
|
||
return None
|
||
|
||
candidate = loc.first
|
||
# Value might be in parent/next sibling
|
||
parent = candidate.locator("xpath=..")
|
||
siblings = [
|
||
parent.locator("xpath=following-sibling::*[1]"),
|
||
candidate.locator("xpath=following-sibling::*[1]"),
|
||
parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"),
|
||
]
|
||
|
||
for s in siblings:
|
||
if s.count():
|
||
text = extract_text(s.first)
|
||
# Clean common label-value formatting like "E-mail\nx@y.cz"
|
||
if text:
|
||
# If the label text is included, strip it
|
||
for t in label_texts:
|
||
text = re.sub(rf"^{re.escape(t)}\s*[::]?\s*", "", text, flags=re.I)
|
||
text = re.sub(r"\s+\n\s+", " — ", text).strip()
|
||
return text
|
||
|
||
# As a last fallback, try reading the parent block's text minus the label
|
||
block_text = extract_text(parent)
|
||
if block_text:
|
||
for t in label_texts:
|
||
block_text = re.sub(rf"{re.escape(t)}\s*[::]?\s*", "", block_text, flags=re.I)
|
||
return block_text.strip()
|
||
return None
|
||
|
||
def extract_all_text_pairs(root):
|
||
"""
|
||
Generic key-value sweep for components that render details as 2-column grids.
|
||
Returns a dict of guessed label->value pairs.
|
||
"""
|
||
result = {}
|
||
# Try common MUI grid/list patterns
|
||
blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root")
|
||
for i in range(min(20, blocks.count())):
|
||
block = blocks.nth(i)
|
||
text = extract_text(block)
|
||
if not text:
|
||
continue
|
||
# naive split by newlines, pair neighbors "Label\nValue"
|
||
parts = [t.strip() for t in text.splitlines() if t.strip()]
|
||
for j in range(len(parts) - 1):
|
||
label, value = parts[j], parts[j+1]
|
||
# Heuristic: labels usually short, values not identical, ignore obvious noise
|
||
if len(label) <= 32 and label != value and ":" not in value:
|
||
if label not in result:
|
||
result[label] = value
|
||
return result
|
||
|
||
def extract_patient_detail(page, patient_id):
|
||
root = find_detail_root(page)
|
||
if not root:
|
||
return {"id": patient_id, "error": "detail_not_found"}
|
||
|
||
# Try to get a headline with the name
|
||
name = None
|
||
for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]:
|
||
loc = root.locator(sel)
|
||
if loc.count():
|
||
nm = extract_text(loc.first)
|
||
if nm and len(nm) > 1:
|
||
name = nm
|
||
break
|
||
|
||
# Targeted fields (CZ + EN aliases)
|
||
fields = {
|
||
"Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]),
|
||
"Rodné číslo": extract_field_by_label(root, ["Rodné číslo", "RČ", "Personal ID"]),
|
||
"Telefon": extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]),
|
||
"E-mail": extract_field_by_label(root, ["E-mail", "Email", "E-mail"]),
|
||
"Zdravotní pojišťovna": extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]),
|
||
"Adresa": extract_field_by_label(root, ["Adresa", "Address"]),
|
||
"Poznámka": extract_field_by_label(root, ["Poznámka", "Note", "Notes"]),
|
||
"Pohlaví": extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]),
|
||
"Praktický lékař": extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]),
|
||
}
|
||
|
||
# Sweep for any extra key→value pairs we didn’t explicitly target
|
||
extras = extract_all_text_pairs(root)
|
||
|
||
# Merge non-empty fields
|
||
data = {"id": patient_id}
|
||
if name: data["name"] = name
|
||
for k, v in fields.items():
|
||
if v and v.strip():
|
||
data[k] = v.strip()
|
||
|
||
# Add extras that aren't already present
|
||
for k, v in extras.items():
|
||
if k not in data and v and v.strip():
|
||
data[k] = v.strip()
|
||
|
||
return data
|
||
|
||
# ---------- main ----------
|
||
|
||
def main():
|
||
if not PATIENT_ID or len(PATIENT_ID) < 8:
|
||
print("Set PATIENT_ID to a valid patient UUID.")
|
||
sys.exit(1)
|
||
|
||
sf = Path(STATE_FILE)
|
||
if not sf.exists():
|
||
print(f"Storage file not found: {sf}")
|
||
sys.exit(1)
|
||
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False) # set False to watch
|
||
context = browser.new_context(storage_state=str(sf))
|
||
context.set_default_navigation_timeout(30000)
|
||
context.set_default_timeout(15000)
|
||
page = context.new_page()
|
||
|
||
# Try via query param first
|
||
opened = open_detail_via_query(page, PATIENT_ID)
|
||
|
||
# If not opened, go to base list and click the row
|
||
if not opened:
|
||
# Ensure the base grid exists
|
||
page.goto(BASE_URL, wait_until="domcontentloaded")
|
||
if "/prihlaseni" in page.url.lower():
|
||
print("Redirected to login — refresh your medevio_storage.json.")
|
||
browser.close()
|
||
return
|
||
if not open_detail_by_click(page, PATIENT_ID):
|
||
print("Could not open detail panel (neither via query nor by clicking).")
|
||
browser.close()
|
||
return
|
||
|
||
# At this point, detail should be open
|
||
data = extract_patient_detail(page, PATIENT_ID)
|
||
|
||
print("\n=== Patient detail ===")
|
||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||
|
||
out = Path(f"patient_{PATIENT_ID}.json")
|
||
out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
print(f"\nSaved → {out.resolve()}")
|
||
|
||
browser.close()
|
||
|
||
if __name__ == "__main__":
|
||
main()
|