d

2025-09-20 09:23:29 +02:00
parent 4fed311575
commit b3e410be28
7 changed files with 702 additions and 0 deletions
--- a/Medevio2.py
+++ b/Medevio2.py
@@ -0,0 +1,249 @@
+# extract_patient_detail.py
+# Usage:
+#   1) Put your medevio_storage.json path into STATE_FILE.
+#   2) Set PATIENT_ID to a real UUID from your list.
+#   3) Run: python extract_patient_detail.py
+#
+# Output: prints a dict to console and saves patient_<ID>.json next to the script.
+
+from pathlib import Path
+import json, sys, time, re
+from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
+
+STATE_FILE   = r"/medevio_storage.json"
+BASE_URL     = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
+PATIENT_ID   = "236b3759-4c2b-4fa8-ab52-ce4ddb2e9064"  # <-- put target ID here
+
+# ---------- helpers ----------
+
+def wait_for_grid(page, timeout=15000):
+    try:
+        page.wait_for_selector("div[role='rowgroup']", timeout=timeout)
+    except PWTimeout:
+        pass
+    page.wait_for_selector("div[role='row'][data-id]", timeout=timeout)
+
+def open_detail_via_query(page, patient_id):
+    # Try opening page with ?pacient=... (SPA should open drawer/detail)
+    target = f"{BASE_URL}?pacient={patient_id}"
+    page.goto(target, wait_until="domcontentloaded")
+    # Wait briefly for the drawer/dialog to render
+    if not wait_for_detail_open(page, quick=True):
+        # Some apps need a tiny delay to mount the panel
+        time.sleep(0.8)
+    return is_detail_open(page)
+
+def is_detail_open(page):
+    # Look for a dialog/drawer that likely contains patient detail.
+    # Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc.
+    selectors = [
+        "[role='dialog']",
+        "div.MuiDrawer-paper",
+        "div.MuiModal-root [role='dialog']",
+        "div[aria-modal='true']",
+    ]
+    for sel in selectors:
+        loc = page.locator(sel)
+        if loc.count() and loc.first.is_visible():
+            return True
+    return False
+
+def wait_for_detail_open(page, quick=False):
+    timeout = 4000 if quick else 15000
+    selectors = [
+        "[role='dialog']",
+        "div.MuiDrawer-paper",
+        "div.MuiModal-root [role='dialog']",
+        "div[aria-modal='true']",
+    ]
+    for sel in selectors:
+        try:
+            page.wait_for_selector(sel, timeout=timeout, state="visible")
+            return True
+        except PWTimeout:
+            continue
+    return False
+
+def open_detail_by_click(page, patient_id):
+    # Click the row with matching data-id (fallback)
+    wait_for_grid(page, timeout=15000)
+    row = page.locator(f"div[role='row'][data-id='{patient_id}']").first
+    if not row.count():
+        return False
+    row.click()
+    return wait_for_detail_open(page)
+
+def find_detail_root(page):
+    # Return the locator that represents the open detail container
+    for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
+        loc = page.locator(sel)
+        if loc.count() and loc.first.is_visible():
+            return loc.first
+    # Fallback to the last visible modal-ish container
+    return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last
+
+def extract_text(el):
+    try:
+        return el.inner_text().strip()
+    except Exception:
+        return ""
+
+def extract_field_by_label(root, label_texts):
+    """
+    Try to find a field value by its label text (CZ/EN variants).
+    Looks for elements containing the label and then a sibling/value element.
+    """
+    labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts])
+    loc = root.locator(f"xpath=({labels_xpath})")
+    if not loc.count():
+        # Try contains(label)
+        labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts])
+        loc = root.locator(f"xpath=({labels_xpath2})")
+        if not loc.count():
+            return None
+
+    candidate = loc.first
+    # Value might be in parent/next sibling
+    parent = candidate.locator("xpath=..")
+    siblings = [
+        parent.locator("xpath=following-sibling::*[1]"),
+        candidate.locator("xpath=following-sibling::*[1]"),
+        parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"),
+    ]
+
+    for s in siblings:
+        if s.count():
+            text = extract_text(s.first)
+            # Clean common label-value formatting like "E-mail\nx@y.cz"
+            if text:
+                # If the label text is included, strip it
+                for t in label_texts:
+                    text = re.sub(rf"^{re.escape(t)}\s*[:：]?\s*", "", text, flags=re.I)
+                text = re.sub(r"\s+\n\s+", " — ", text).strip()
+                return text
+
+    # As a last fallback, try reading the parent block's text minus the label
+    block_text = extract_text(parent)
+    if block_text:
+        for t in label_texts:
+            block_text = re.sub(rf"{re.escape(t)}\s*[:：]?\s*", "", block_text, flags=re.I)
+        return block_text.strip()
+    return None
+
+def extract_all_text_pairs(root):
+    """
+    Generic key-value sweep for components that render details as 2-column grids.
+    Returns a dict of guessed label->value pairs.
+    """
+    result = {}
+    # Try common MUI grid/list patterns
+    blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root")
+    for i in range(min(20, blocks.count())):
+        block = blocks.nth(i)
+        text = extract_text(block)
+        if not text:
+            continue
+        # naive split by newlines, pair neighbors "Label\nValue"
+        parts = [t.strip() for t in text.splitlines() if t.strip()]
+        for j in range(len(parts) - 1):
+            label, value = parts[j], parts[j+1]
+            # Heuristic: labels usually short, values not identical, ignore obvious noise
+            if len(label) <= 32 and label != value and ":" not in value:
+                if label not in result:
+                    result[label] = value
+    return result
+
+def extract_patient_detail(page, patient_id):
+    root = find_detail_root(page)
+    if not root:
+        return {"id": patient_id, "error": "detail_not_found"}
+
+    # Try to get a headline with the name
+    name = None
+    for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]:
+        loc = root.locator(sel)
+        if loc.count():
+            nm = extract_text(loc.first)
+            if nm and len(nm) > 1:
+                name = nm
+                break
+
+    # Targeted fields (CZ + EN aliases)
+    fields = {
+        "Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]),
+        "Rodné číslo":           extract_field_by_label(root, ["Rodné číslo", "RČ", "Personal ID"]),
+        "Telefon":               extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]),
+        "E-mail":                extract_field_by_label(root, ["E-mail", "Email", "E-mail"]),
+        "Zdravotní pojišťovna":  extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]),
+        "Adresa":                extract_field_by_label(root, ["Adresa", "Address"]),
+        "Poznámka":              extract_field_by_label(root, ["Poznámka", "Note", "Notes"]),
+        "Pohlaví":               extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]),
+        "Praktický lékař":       extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]),
+    }
+
+    # Sweep for any extra key→value pairs we didn’t explicitly target
+    extras = extract_all_text_pairs(root)
+
+    # Merge non-empty fields
+    data = {"id": patient_id}
+    if name: data["name"] = name
+    for k, v in fields.items():
+        if v and v.strip():
+            data[k] = v.strip()
+
+    # Add extras that aren't already present
+    for k, v in extras.items():
+        if k not in data and v and v.strip():
+            data[k] = v.strip()
+
+    return data
+
+# ---------- main ----------
+
+def main():
+    if not PATIENT_ID or len(PATIENT_ID) < 8:
+        print("Set PATIENT_ID to a valid patient UUID.")
+        sys.exit(1)
+
+    sf = Path(STATE_FILE)
+    if not sf.exists():
+        print(f"Storage file not found: {sf}")
+        sys.exit(1)
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)  # set False to watch
+        context = browser.new_context(storage_state=str(sf))
+        context.set_default_navigation_timeout(30000)
+        context.set_default_timeout(15000)
+        page = context.new_page()
+
+        # Try via query param first
+        opened = open_detail_via_query(page, PATIENT_ID)
+
+        # If not opened, go to base list and click the row
+        if not opened:
+            # Ensure the base grid exists
+            page.goto(BASE_URL, wait_until="domcontentloaded")
+            if "/prihlaseni" in page.url.lower():
+                print("Redirected to login — refresh your medevio_storage.json.")
+                browser.close()
+                return
+            if not open_detail_by_click(page, PATIENT_ID):
+                print("Could not open detail panel (neither via query nor by clicking).")
+                browser.close()
+                return
+
+        # At this point, detail should be open
+        data = extract_patient_detail(page, PATIENT_ID)
+
+        print("\n=== Patient detail ===")
+        print(json.dumps(data, ensure_ascii=False, indent=2))
+
+        out = Path(f"patient_{PATIENT_ID}.json")
+        out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+        print(f"\nSaved → {out.resolve()}")
+
+        browser.close()
+
+if __name__ == "__main__":
+    main()