medevio/Medevio2.py

# extract_patient_detail.py
# Usage:
#   1) Put your medevio_storage.json path into STATE_FILE.
#   2) Set PATIENT_ID to a real UUID from your list.
#   3) Run: python extract_patient_detail.py
#
# Output: prints a dict to console and saves patient_<ID>.json next to the script.

from pathlib import Path
import json, sys, time, re
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout

STATE_FILE   = r"medevio_storage.json"
BASE_URL     = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
PATIENT_ID   = "fcb2414b-067b-4ca2-91b2-6c36a86d4cbb"  # <-- put target ID here

# ---------- helpers ----------

def wait_for_grid(page, timeout=15000):
    try:
        page.wait_for_selector("div[role='rowgroup']", timeout=timeout)
    except PWTimeout:
        pass
    page.wait_for_selector("div[role='row'][data-id]", timeout=timeout)

def open_detail_via_query(page, patient_id):
    # Try opening page with ?pacient=... (SPA should open drawer/detail)
    target = f"{BASE_URL}?pacient={patient_id}"
    page.goto(target, wait_until="domcontentloaded")
    # Wait briefly for the drawer/dialog to render
    if not wait_for_detail_open(page, quick=True):
        # Some apps need a tiny delay to mount the panel
        time.sleep(0.8)
    return is_detail_open(page)

def is_detail_open(page):
    # Look for a dialog/drawer that likely contains patient detail.
    # Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc.
    selectors = [
        "[role='dialog']",
        "div.MuiDrawer-paper",
        "div.MuiModal-root [role='dialog']",
        "div[aria-modal='true']",
    ]
    for sel in selectors:
        loc = page.locator(sel)
        if loc.count() and loc.first.is_visible():
            return True
    return False

def wait_for_detail_open(page, quick=False):
    timeout = 4000 if quick else 15000
    selectors = [
        "[role='dialog']",
        "div.MuiDrawer-paper",
        "div.MuiModal-root [role='dialog']",
        "div[aria-modal='true']",
    ]
    for sel in selectors:
        try:
            page.wait_for_selector(sel, timeout=timeout, state="visible")
            return True
        except PWTimeout:
            continue
    return False

def open_detail_by_click(page, patient_id):
    # Click the row with matching data-id (fallback)
    wait_for_grid(page, timeout=15000)
    row = page.locator(f"div[role='row'][data-id='{patient_id}']").first
    if not row.count():
        return False
    row.click()
    return wait_for_detail_open(page)

def find_detail_root(page):
    # Return the locator that represents the open detail container
    for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
        loc = page.locator(sel)
        if loc.count() and loc.first.is_visible():
            return loc.first
    # Fallback to the last visible modal-ish container
    return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last

def extract_text(el):
    try:
        return el.inner_text().strip()
    except Exception:
        return ""

def extract_field_by_label(root, label_texts):
    """
    Try to find a field value by its label text (CZ/EN variants).
    Looks for elements containing the label and then a sibling/value element.
    """
    labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts])
    loc = root.locator(f"xpath=({labels_xpath})")
    if not loc.count():
        # Try contains(label)
        labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts])
        loc = root.locator(f"xpath=({labels_xpath2})")
        if not loc.count():
            return None

    candidate = loc.first
    # Value might be in parent/next sibling
    parent = candidate.locator("xpath=..")
    siblings = [
        parent.locator("xpath=following-sibling::*[1]"),
        candidate.locator("xpath=following-sibling::*[1]"),
        parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"),
    ]

    for s in siblings:
        if s.count():
            text = extract_text(s.first)
            # Clean common label-value formatting like "E-mail\nx@y.cz"
            if text:
                # If the label text is included, strip it
                for t in label_texts:
                    text = re.sub(rf"^{re.escape(t)}\s*[:：]?\s*", "", text, flags=re.I)
                text = re.sub(r"\s+\n\s+", " — ", text).strip()
                return text

    # As a last fallback, try reading the parent block's text minus the label
    block_text = extract_text(parent)
    if block_text:
        for t in label_texts:
            block_text = re.sub(rf"{re.escape(t)}\s*[:：]?\s*", "", block_text, flags=re.I)
        return block_text.strip()
    return None

def extract_all_text_pairs(root):
    """
    Generic key-value sweep for components that render details as 2-column grids.
    Returns a dict of guessed label->value pairs.
    """
    result = {}
    # Try common MUI grid/list patterns
    blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root")
    for i in range(min(20, blocks.count())):
        block = blocks.nth(i)
        text = extract_text(block)
        if not text:
            continue
        # naive split by newlines, pair neighbors "Label\nValue"
        parts = [t.strip() for t in text.splitlines() if t.strip()]
        for j in range(len(parts) - 1):
            label, value = parts[j], parts[j+1]
            # Heuristic: labels usually short, values not identical, ignore obvious noise
            if len(label) <= 32 and label != value and ":" not in value:
                if label not in result:
                    result[label] = value
    return result

def extract_patient_detail(page, patient_id):
    root = find_detail_root(page)
    if not root:
        return {"id": patient_id, "error": "detail_not_found"}

    # Try to get a headline with the name
    name = None
    for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]:
        loc = root.locator(sel)
        if loc.count():
            nm = extract_text(loc.first)
            if nm and len(nm) > 1:
                name = nm
                break

    # Targeted fields (CZ + EN aliases)
    fields = {
        "Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]),
        "Rodné číslo":           extract_field_by_label(root, ["Rodné číslo", "RČ", "Personal ID"]),
        "Telefon":               extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]),
        "E-mail":                extract_field_by_label(root, ["E-mail", "Email", "E-mail"]),
        "Zdravotní pojišťovna":  extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]),
        "Adresa":                extract_field_by_label(root, ["Adresa", "Address"]),
        "Poznámka":              extract_field_by_label(root, ["Poznámka", "Note", "Notes"]),
        "Pohlaví":               extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]),
        "Praktický lékař":       extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]),
    }

    # Sweep for any extra key→value pairs we didn’t explicitly target
    extras = extract_all_text_pairs(root)

    # Merge non-empty fields
    data = {"id": patient_id}
    if name: data["name"] = name
    for k, v in fields.items():
        if v and v.strip():
            data[k] = v.strip()

    # Add extras that aren't already present
    for k, v in extras.items():
        if k not in data and v and v.strip():
            data[k] = v.strip()

    return data

# ---------- main ----------

def main():
    if not PATIENT_ID or len(PATIENT_ID) < 8:
        print("Set PATIENT_ID to a valid patient UUID.")
        sys.exit(1)

    sf = Path(STATE_FILE)
    if not sf.exists():
        print(f"Storage file not found: {sf}")
        sys.exit(1)

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)  # set False to watch
        context = browser.new_context(storage_state=str(sf))
        context.set_default_navigation_timeout(30000)
        context.set_default_timeout(15000)
        page = context.new_page()

        # Try via query param first
        opened = open_detail_via_query(page, PATIENT_ID)

        # If not opened, go to base list and click the row
        if not opened:
            # Ensure the base grid exists
            page.goto(BASE_URL, wait_until="domcontentloaded")
            if "/prihlaseni" in page.url.lower():
                print("Redirected to login — refresh your medevio_storage.json.")
                browser.close()
                return
            if not open_detail_by_click(page, PATIENT_ID):
                print("Could not open detail panel (neither via query nor by clicking).")
                browser.close()
                return

        # At this point, detail should be open
        data = extract_patient_detail(page, PATIENT_ID)

        print("\n=== Patient detail ===")
        print(json.dumps(data, ensure_ascii=False, indent=2))

        out = Path(f"patient_{PATIENT_ID}.json")
        out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
        print(f"\nSaved → {out.resolve()}")

        browser.close()

if __name__ == "__main__":
    main()