# print_patients_first_page_ids.py from pathlib import Path import json, time, sys from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout STATE_FILE = r"medevio_storage.json" PATIENTS_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" def harvest_ids_on_page(page): ids = set() for sel in ["div[role='row'][data-id]", "div.MuiDataGrid-row[data-id]"]: for row in page.locator(sel).all(): pid = row.get_attribute("data-id") if pid: ids.add(pid) return ids def set_page_size(page, value="100"): # Open the page-size combobox for loc in [ page.get_by_role("combobox", name="Řádků na stránce:"), page.get_by_role("combobox", name="Rows per page:"), page.locator("div.MuiTablePagination-root [role='combobox']"), ]: if loc.count(): loc.first.click() break # Select option "100" (portal-safe) opt = page.get_by_role("option", name=value) if not opt.count(): opt = page.locator(f"//li[normalize-space(.)='{value}']") opt.first.wait_for(state="visible", timeout=5000) opt.first.click() # Wait a moment for refresh try: page.wait_for_selector("div[role='row'][data-id]", timeout=10000) except PWTimeout: time.sleep(0.8) def main(): sf = Path(STATE_FILE) if not sf.exists(): print(f"ERROR: storage not found: {sf}") sys.exit(1) with sync_playwright() as p: browser = p.chromium.launch(headless=False) # set False to watch context = browser.new_context(storage_state=str(sf)) context.set_default_navigation_timeout(30000) context.set_default_timeout(15000) page = context.new_page() try: page.goto(PATIENTS_URL, wait_until="domcontentloaded") except PWTimeout: print("Warning: goto timeout; continuing…") # Detect redirect to login if "/prihlaseni" in page.url.lower(): print("You were redirected to the login page → saved session is expired. Re-run the login-save step.") browser.close() return # (Optional) print pagination label before/after try: print("Before:", page.locator("p.MuiTablePagination-displayedRows").first.inner_text()) except Exception: pass try: set_page_size(page, "100") except Exception as e: print(f"Could not set page size to 100: {e!r}") try: print("After :", page.locator("p.MuiTablePagination-displayedRows").first.inner_text()) except Exception: pass page.wait_for_selector("div[role='row'][data-id]", timeout=15000) ids = sorted(harvest_ids_on_page(page)) print(f"\nCollected {len(ids)} IDs on first page:") for pid in ids: print(pid) # Also save if you want out_json = Path("patient_ids_first_page.json") out_csv = Path("patient_ids_first_page.csv") out_json.write_text(json.dumps(ids, ensure_ascii=False, indent=2), encoding="utf-8") out_csv.write_text("patient_id\n" + "\n".join(ids), encoding="utf-8") print(f"\nSaved → {out_json.resolve()}") print(f"Saved → {out_csv.resolve()}") browser.close() if __name__ == "__main__": main()