# extract_patient_detail.py # Usage: # 1) Put your medevio_storage.json path into STATE_FILE. # 2) Set PATIENT_ID to a real UUID from your list. # 3) Run: python extract_patient_detail.py # # Output: prints a dict to console and saves patient_.json next to the script. from pathlib import Path import json, sys, time, re from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout STATE_FILE = r"/medevio_storage.json" BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" PATIENT_ID = "236b3759-4c2b-4fa8-ab52-ce4ddb2e9064" # <-- put target ID here # ---------- helpers ---------- def wait_for_grid(page, timeout=15000): try: page.wait_for_selector("div[role='rowgroup']", timeout=timeout) except PWTimeout: pass page.wait_for_selector("div[role='row'][data-id]", timeout=timeout) def open_detail_via_query(page, patient_id): # Try opening page with ?pacient=... (SPA should open drawer/detail) target = f"{BASE_URL}?pacient={patient_id}" page.goto(target, wait_until="domcontentloaded") # Wait briefly for the drawer/dialog to render if not wait_for_detail_open(page, quick=True): # Some apps need a tiny delay to mount the panel time.sleep(0.8) return is_detail_open(page) def is_detail_open(page): # Look for a dialog/drawer that likely contains patient detail. # Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc. selectors = [ "[role='dialog']", "div.MuiDrawer-paper", "div.MuiModal-root [role='dialog']", "div[aria-modal='true']", ] for sel in selectors: loc = page.locator(sel) if loc.count() and loc.first.is_visible(): return True return False def wait_for_detail_open(page, quick=False): timeout = 4000 if quick else 15000 selectors = [ "[role='dialog']", "div.MuiDrawer-paper", "div.MuiModal-root [role='dialog']", "div[aria-modal='true']", ] for sel in selectors: try: page.wait_for_selector(sel, timeout=timeout, state="visible") return True except PWTimeout: continue return False def open_detail_by_click(page, patient_id): # Click the row with matching data-id (fallback) wait_for_grid(page, timeout=15000) row = page.locator(f"div[role='row'][data-id='{patient_id}']").first if not row.count(): return False row.click() return wait_for_detail_open(page) def find_detail_root(page): # Return the locator that represents the open detail container for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]: loc = page.locator(sel) if loc.count() and loc.first.is_visible(): return loc.first # Fallback to the last visible modal-ish container return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last def extract_text(el): try: return el.inner_text().strip() except Exception: return "" def extract_field_by_label(root, label_texts): """ Try to find a field value by its label text (CZ/EN variants). Looks for elements containing the label and then a sibling/value element. """ labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts]) loc = root.locator(f"xpath=({labels_xpath})") if not loc.count(): # Try contains(label) labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts]) loc = root.locator(f"xpath=({labels_xpath2})") if not loc.count(): return None candidate = loc.first # Value might be in parent/next sibling parent = candidate.locator("xpath=..") siblings = [ parent.locator("xpath=following-sibling::*[1]"), candidate.locator("xpath=following-sibling::*[1]"), parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"), ] for s in siblings: if s.count(): text = extract_text(s.first) # Clean common label-value formatting like "E-mail\nx@y.cz" if text: # If the label text is included, strip it for t in label_texts: text = re.sub(rf"^{re.escape(t)}\s*[::]?\s*", "", text, flags=re.I) text = re.sub(r"\s+\n\s+", " — ", text).strip() return text # As a last fallback, try reading the parent block's text minus the label block_text = extract_text(parent) if block_text: for t in label_texts: block_text = re.sub(rf"{re.escape(t)}\s*[::]?\s*", "", block_text, flags=re.I) return block_text.strip() return None def extract_all_text_pairs(root): """ Generic key-value sweep for components that render details as 2-column grids. Returns a dict of guessed label->value pairs. """ result = {} # Try common MUI grid/list patterns blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root") for i in range(min(20, blocks.count())): block = blocks.nth(i) text = extract_text(block) if not text: continue # naive split by newlines, pair neighbors "Label\nValue" parts = [t.strip() for t in text.splitlines() if t.strip()] for j in range(len(parts) - 1): label, value = parts[j], parts[j+1] # Heuristic: labels usually short, values not identical, ignore obvious noise if len(label) <= 32 and label != value and ":" not in value: if label not in result: result[label] = value return result def extract_patient_detail(page, patient_id): root = find_detail_root(page) if not root: return {"id": patient_id, "error": "detail_not_found"} # Try to get a headline with the name name = None for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]: loc = root.locator(sel) if loc.count(): nm = extract_text(loc.first) if nm and len(nm) > 1: name = nm break # Targeted fields (CZ + EN aliases) fields = { "Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]), "Rodné číslo": extract_field_by_label(root, ["Rodné číslo", "RČ", "Personal ID"]), "Telefon": extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]), "E-mail": extract_field_by_label(root, ["E-mail", "Email", "E-mail"]), "Zdravotní pojišťovna": extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]), "Adresa": extract_field_by_label(root, ["Adresa", "Address"]), "Poznámka": extract_field_by_label(root, ["Poznámka", "Note", "Notes"]), "Pohlaví": extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]), "Praktický lékař": extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]), } # Sweep for any extra key→value pairs we didn’t explicitly target extras = extract_all_text_pairs(root) # Merge non-empty fields data = {"id": patient_id} if name: data["name"] = name for k, v in fields.items(): if v and v.strip(): data[k] = v.strip() # Add extras that aren't already present for k, v in extras.items(): if k not in data and v and v.strip(): data[k] = v.strip() return data # ---------- main ---------- def main(): if not PATIENT_ID or len(PATIENT_ID) < 8: print("Set PATIENT_ID to a valid patient UUID.") sys.exit(1) sf = Path(STATE_FILE) if not sf.exists(): print(f"Storage file not found: {sf}") sys.exit(1) with sync_playwright() as p: browser = p.chromium.launch(headless=True) # set False to watch context = browser.new_context(storage_state=str(sf)) context.set_default_navigation_timeout(30000) context.set_default_timeout(15000) page = context.new_page() # Try via query param first opened = open_detail_via_query(page, PATIENT_ID) # If not opened, go to base list and click the row if not opened: # Ensure the base grid exists page.goto(BASE_URL, wait_until="domcontentloaded") if "/prihlaseni" in page.url.lower(): print("Redirected to login — refresh your medevio_storage.json.") browser.close() return if not open_detail_by_click(page, PATIENT_ID): print("Could not open detail panel (neither via query nor by clicking).") browser.close() return # At this point, detail should be open data = extract_patient_detail(page, PATIENT_ID) print("\n=== Patient detail ===") print(json.dumps(data, ensure_ascii=False, indent=2)) out = Path(f"patient_{PATIENT_ID}.json") out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") print(f"\nSaved → {out.resolve()}") browser.close() if __name__ == "__main__": main()