From 4a7e1039a94b25895f368ca97725aeccebc86578 Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Sun, 21 Sep 2025 21:35:39 +0200 Subject: [PATCH] d --- Medevio1_newrid.py | 98 +++++++++ Medevio2.py | 6 +- Medevio4.py | 9 +- Medevio4_newrid.py | 258 +++++++++++++++++++++++ Merevio05ReadWhetherRegisteredMedevio.py | 188 +++++++++++++++++ 5 files changed, 553 insertions(+), 6 deletions(-) create mode 100644 Medevio1_newrid.py create mode 100644 Medevio4_newrid.py create mode 100644 Merevio05ReadWhetherRegisteredMedevio.py diff --git a/Medevio1_newrid.py b/Medevio1_newrid.py new file mode 100644 index 0000000..0291411 --- /dev/null +++ b/Medevio1_newrid.py @@ -0,0 +1,98 @@ +# print_patients_first_page_ids.py +from pathlib import Path +import json, time, sys +from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout + +STATE_FILE = r"medevio_storage.json" +PATIENTS_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" + +def harvest_ids_on_page(page): + ids = set() + for sel in ["div[role='row'][data-id]", "div.MuiDataGrid-row[data-id]"]: + for row in page.locator(sel).all(): + pid = row.get_attribute("data-id") + if pid: + ids.add(pid) + return ids + +def set_page_size(page, value="100"): + # Open the page-size combobox + for loc in [ + page.get_by_role("combobox", name="Řádků na stránce:"), + page.get_by_role("combobox", name="Rows per page:"), + page.locator("div.MuiTablePagination-root [role='combobox']"), + ]: + if loc.count(): + loc.first.click() + break + # Select option "100" (portal-safe) + opt = page.get_by_role("option", name=value) + if not opt.count(): + opt = page.locator(f"//li[normalize-space(.)='{value}']") + opt.first.wait_for(state="visible", timeout=5000) + opt.first.click() + # Wait a moment for refresh + try: + page.wait_for_selector("div[role='row'][data-id]", timeout=10000) + except PWTimeout: + time.sleep(0.8) + +def main(): + sf = Path(STATE_FILE) + if not sf.exists(): + print(f"ERROR: storage not found: {sf}") + sys.exit(1) + + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) # set False to watch + context = browser.new_context(storage_state=str(sf)) + context.set_default_navigation_timeout(30000) + context.set_default_timeout(15000) + + page = context.new_page() + try: + page.goto(PATIENTS_URL, wait_until="domcontentloaded") + except PWTimeout: + print("Warning: goto timeout; continuing…") + + # Detect redirect to login + if "/prihlaseni" in page.url.lower(): + print("You were redirected to the login page → saved session is expired. Re-run the login-save step.") + browser.close() + return + + # (Optional) print pagination label before/after + try: + print("Before:", page.locator("p.MuiTablePagination-displayedRows").first.inner_text()) + except Exception: + pass + + try: + set_page_size(page, "100") + except Exception as e: + print(f"Could not set page size to 100: {e!r}") + + try: + print("After :", page.locator("p.MuiTablePagination-displayedRows").first.inner_text()) + except Exception: + pass + + page.wait_for_selector("div[role='row'][data-id]", timeout=15000) + + ids = sorted(harvest_ids_on_page(page)) + print(f"\nCollected {len(ids)} IDs on first page:") + for pid in ids: + print(pid) + + # Also save if you want + out_json = Path("patient_ids_first_page.json") + out_csv = Path("patient_ids_first_page.csv") + out_json.write_text(json.dumps(ids, ensure_ascii=False, indent=2), encoding="utf-8") + out_csv.write_text("patient_id\n" + "\n".join(ids), encoding="utf-8") + print(f"\nSaved → {out_json.resolve()}") + print(f"Saved → {out_csv.resolve()}") + + browser.close() + +if __name__ == "__main__": + main() diff --git a/Medevio2.py b/Medevio2.py index af7e90e..e2538d0 100644 --- a/Medevio2.py +++ b/Medevio2.py @@ -10,9 +10,9 @@ from pathlib import Path import json, sys, time, re from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout -STATE_FILE = r"/medevio_storage.json" +STATE_FILE = r"medevio_storage.json" BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" -PATIENT_ID = "236b3759-4c2b-4fa8-ab52-ce4ddb2e9064" # <-- put target ID here +PATIENT_ID = "fcb2414b-067b-4ca2-91b2-6c36a86d4cbb" # <-- put target ID here # ---------- helpers ---------- @@ -211,7 +211,7 @@ def main(): sys.exit(1) with sync_playwright() as p: - browser = p.chromium.launch(headless=True) # set False to watch + browser = p.chromium.launch(headless=False) # set False to watch context = browser.new_context(storage_state=str(sf)) context.set_default_navigation_timeout(30000) context.set_default_timeout(15000) diff --git a/Medevio4.py b/Medevio4.py index 41d439b..38b14aa 100644 --- a/Medevio4.py +++ b/Medevio4.py @@ -1,3 +1,6 @@ +#Tento kod se pripoji do kartoteky Medevio, zmeni na 100 pacientu na stranu, nactene + + # medevio_dump_patients_html_to_mysql.py import time import json @@ -10,13 +13,13 @@ from mysql.connector import errorcode from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout # ---------- CONFIG ---------- -STATE_FILE = r"/medevio_storage.json" +STATE_FILE = r"medevio_storage.json" BASE_LIST_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" SAVE_DELAY_SECONDS = 10 # throttle: 10 sec per patient # MySQL connection settings (fill in) MYSQL_CFG = dict( - host="192.168.1.74", + host="192.168.1.76", port=3307, user="root", password="Vlado9674+", @@ -234,7 +237,7 @@ def main(): print(f"Already in DB: {len(already)} ids") with sync_playwright() as p: - browser = p.chromium.launch(headless=True) # set False to watch + browser = p.chromium.launch(headless=False) # set False to watch context = browser.new_context(storage_state=STATE_FILE) # 1) Collect all IDs from the listing (all pages) diff --git a/Medevio4_newrid.py b/Medevio4_newrid.py new file mode 100644 index 0000000..a48d16a --- /dev/null +++ b/Medevio4_newrid.py @@ -0,0 +1,258 @@ +# medevio_dump_patients_html_to_mysql.py +import time +import json +from pathlib import Path +from datetime import datetime +from typing import Set + +import mysql.connector +from mysql.connector import errorcode +from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout + +# ---------- CONFIG ---------- +STATE_FILE = r"medevio_storage.json" +BASE_LIST_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti" +SAVE_DELAY_SECONDS = 10 # throttle: 10 sec per patient + +# MySQL connection settings (fill in) +MYSQL_CFG = dict( + host="192.168.1.74", + port=3307, + user="root", + password="Vlado9674+", + database="medevio", +) + +TABLE_NAME = "patients_html" # schema created automatically + + +# ---------- DB helpers ---------- +def db_connect(): + try: + conn = mysql.connector.connect(**MYSQL_CFG) + return conn + except mysql.connector.Error as e: + raise SystemExit(f"MySQL connection failed: {e}") + +def db_ensure_table(conn): + ddl = f""" + CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` ( + patient_id VARCHAR(64) PRIMARY KEY, + html LONGTEXT NOT NULL, + fetched_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; + """ + cur = conn.cursor() + cur.execute(ddl) + conn.commit() + cur.close() + +def db_existing_ids(conn) -> Set[str]: + ids = set() + cur = conn.cursor() + cur.execute(f"SELECT patient_id FROM `{TABLE_NAME}`") + for (pid,) in cur.fetchall(): + ids.add(pid) + cur.close() + return ids + +def db_upsert_html(conn, patient_id: str, html: str): + cur = conn.cursor() + cur.execute( + f"""INSERT INTO `{TABLE_NAME}` (patient_id, html, fetched_at) + VALUES (%s, %s, NOW()) + ON DUPLICATE KEY UPDATE html = VALUES(html), fetched_at = VALUES(fetched_at)""", + (patient_id, html), + ) + conn.commit() + cur.close() + + +# ---------- Playwright helpers ---------- +def wait_for_grid_ready(page): + # grid present & at least one row (be generous on timeout) + page.wait_for_selector("div[role='rowgroup']", timeout=20000) + page.wait_for_selector("div[role='row'][data-id]", timeout=20000) + +def set_page_size_100(page): + # Click the page-size combobox (CZ/EN + generic) + for loc in [ + page.get_by_role("combobox", name="Řádků na stránce:"), + page.get_by_role("combobox", name="Rows per page:"), + page.locator("div.MuiTablePagination-root [role='combobox']"), + ]: + if loc.count(): + loc.first.click() + break + # Select 100 (MUI menu often renders in a portal) + opt = page.get_by_role("option", name="100") + if not opt.count(): + opt = page.locator("//li[normalize-space(.)='100']") + opt.first.wait_for(state="visible", timeout=5000) + opt.first.click() + # Wait for rows to refresh + try: + page.wait_for_selector("div[role='row'][data-id]", timeout=10000) + except PWTimeout: + time.sleep(0.8) + +def harvest_ids_on_current_page(page) -> Set[str]: + ids = set() + for sel in ["div[role='row'][data-id]", "div.MuiDataGrid-row[data-id]"]: + for row in page.locator(sel).all(): + pid = row.get_attribute("data-id") + if pid: + ids.add(pid) + return ids + +def click_next_page(page) -> bool: + # Prefer ARIA label + nxt = page.get_by_role("button", name="Go to next page") + if nxt.count(): + try: + if nxt.first.is_enabled(): + nxt.first.click() + return True + except Exception: + pass + # Fallback (CZ) + nxt2 = page.get_by_role("button", name="Další") + if nxt2.count(): + try: + if nxt2.first.is_enabled(): + nxt2.first.click() + return True + except Exception: + pass + return False + +def ensure_detail_open(page) -> bool: + # Detail drawer/dialog visible? + for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]: + loc = page.locator(sel) + if loc.count() and loc.first.is_visible(): + return True + return False + + +# ---------- Main workflow ---------- +def collect_all_patient_ids(context) -> Set[str]: + page = context.new_page() + page.set_default_timeout(15000) + page.set_default_navigation_timeout(30000) + + # Use domcontentloaded (SPAs often keep network busy) + page.goto(BASE_LIST_URL, wait_until="domcontentloaded") + if "/prihlaseni" in page.url.lower(): + raise SystemExit("Session expired → refresh medevio_storage.json via the login script.") + + wait_for_grid_ready(page) + + # optional: print label like "1–25 z 1856" + try: + label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text() + print("Pagination label BEFORE:", label) + except Exception: + pass + + # Set 100/page + try: + set_page_size_100(page) + try: + label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text() + print("Pagination label AFTER :", label) + except Exception: + pass + except Exception as e: + print(f"Warning: could not set page size to 100: {e!r}") + + all_ids: Set[str] = set() + page_index = 1 + + while True: + wait_for_grid_ready(page) + ids_now = harvest_ids_on_current_page(page) + print(f"Page {page_index}: harvested {len(ids_now)} ids") + all_ids |= ids_now + + # Try to go next; if cannot, break + if not click_next_page(page): + break + + # Wait for DOM to actually update (new rows) + try: + page.wait_for_load_state("domcontentloaded", timeout=10000) + except PWTimeout: + pass + time.sleep(0.5) + page_index += 1 + + page.close() + print(f"Total unique IDs collected: {len(all_ids)}") + return all_ids + +def fetch_and_store_patient_html(context, conn, patient_id: str): + page = context.new_page() + page.set_default_timeout(15000) + page.set_default_navigation_timeout(30000) + + url = f"{BASE_LIST_URL}?pacient={patient_id}" + page.goto(url, wait_until="domcontentloaded") + + # If detail didn’t open, fallback: go to list, click row + if not ensure_detail_open(page): + page.goto(BASE_LIST_URL, wait_until="domcontentloaded") + try: + page.wait_for_selector(f"div[role='row'][data-id='{patient_id}']", timeout=15000) + page.locator(f"div[role='row'][data-id='{patient_id}']").first.click() + # wait for drawer/dialog + page.wait_for_selector("[role='dialog'], div.MuiDrawer-paper, div[aria-modal='true']", timeout=12000) + except PWTimeout: + print(f"[{patient_id}] detail panel did not open — skipping") + page.close() + return + + # Save full HTML of the page (includes the open detail drawer) + html = page.content() + db_upsert_html(conn, patient_id, html) + print(f"[{patient_id}] saved HTML ({len(html)} bytes) at {datetime.now().isoformat(timespec='seconds')}") + + page.close() + # Throttle per your requirement + time.sleep(SAVE_DELAY_SECONDS) + + +def main(): + # Check storage exists + if not Path(STATE_FILE).exists(): + raise SystemExit(f"Storage not found: {STATE_FILE}") + + # DB ready + conn = db_connect() + db_ensure_table(conn) + already = db_existing_ids(conn) + print(f"Already in DB: {len(already)} ids") + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) # set False to watch + context = browser.new_context(storage_state=STATE_FILE) + + # 1) Collect all IDs from the listing (all pages) + all_ids = collect_all_patient_ids(context) + + # 2) Iterate and store HTML (skip existing) + todo = [pid for pid in sorted(all_ids) if pid not in already] + print(f"To fetch now: {len(todo)} ids (skipping {len(all_ids)-len(todo)} already saved)") + + for i, pid in enumerate(todo, 1): + try: + fetch_and_store_patient_html(context, conn, pid) + except Exception as e: + print(f"[{pid}] ERROR: {e!r} — continuing with next") + + browser.close() + conn.close() + print("Done.") + +if __name__ == "__main__": + main() diff --git a/Merevio05ReadWhetherRegisteredMedevio.py b/Merevio05ReadWhetherRegisteredMedevio.py new file mode 100644 index 0000000..a1c111d --- /dev/null +++ b/Merevio05ReadWhetherRegisteredMedevio.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import time +from pathlib import Path +from datetime import datetime +import pymysql +from pymysql.cursors import DictCursor +from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout + +# ========= CONFIG ========= +MYSQL_CFG = dict( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="medevio", + cursorclass=DictCursor, + autocommit=False, # we commit in batches +) + +# Column in patients_extracted that stores Medevio UUID used in the URL: +UUID_COLUMN = "rid" # <-- change if your column name differs + +# Output columns (will be created if missing; MySQL 8.0+ supports IF NOT EXISTS): +REGISTERED_COL = "medevio_registered" # TINYINT(1) NULL/0/1 +CHECKED_AT_COL = "medevio_checked_at" # DATETIME NULL +ERROR_COL = "medevio_check_error" # TEXT NULL (optional) + +# Medevio routing +PATIENT_URL_TMPL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti?pacient={uuid}" + +# Login session (created earlier with your script) +STATE_FILE = Path("medevio_storage.json") + +# Batch/pace +BATCH_LIMIT = 5 # how many patients per run +SLEEP_SECONDS = 3 # wait between patients (requested) +NAV_TIMEOUT = 20_000 # ms +TEXT_TIMEOUT = 15_000 # ms (for main area/heading) + +# Texts indicating NOT registered: +NOT_REGISTERED_STRINGS = [ + "Pacientka zatím nemá Medevio účet.", + "Pacient zatím nemá Medevio účet.", +] +# ========================== + + + + +SELECT_SQL = f""" +SELECT {UUID_COLUMN} AS uuid, jmeno, prijmeni, rc +FROM patients_extracted +WHERE {UUID_COLUMN} IS NOT NULL + AND {UUID_COLUMN} <> '' + AND {REGISTERED_COL} IS NULL +LIMIT {BATCH_LIMIT}; +""" + +UPDATE_OK_SQL = f""" +UPDATE patients_extracted +SET {REGISTERED_COL}=%s, {CHECKED_AT_COL}=NOW(), {ERROR_COL}=NULL +WHERE {UUID_COLUMN}=%s +""" + +UPDATE_ERR_SQL = f""" +UPDATE patients_extracted +SET {REGISTERED_COL}=NULL, {CHECKED_AT_COL}=NOW(), {ERROR_COL}=%s +WHERE {UUID_COLUMN}=%s +""" + +DDL_SQLS = [ + f"ALTER TABLE patients_extracted ADD COLUMN {REGISTERED_COL} TINYINT(1) NULL", + f"ALTER TABLE patients_extracted ADD COLUMN {CHECKED_AT_COL} DATETIME NULL", + f"ALTER TABLE patients_extracted ADD COLUMN {ERROR_COL} TEXT NULL", +] + +CHECKS_FOR_DDL_SQLS=[ + f"SELECT COUNT(*) AS cnt FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'patients_extracted' AND COLUMN_NAME = '{REGISTERED_COL}'", + f"SELECT COUNT(*) AS cnt FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'patients_extracted' AND COLUMN_NAME = '{CHECKED_AT_COL}'", + f"SELECT COUNT(*) AS cnt FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'patients_extracted' AND COLUMN_NAME = '{ERROR_COL}'", +] +def ensure_columns(conn): + with conn.cursor() as cur: + for ddl,ddlcheck in zip(DDL_SQLS,CHECKS_FOR_DDL_SQLS): + cur.execute(ddlcheck) + row = cur.fetchone() + if row["cnt"] == 0: + print("Column missing") + cur.execute(ddl) + print(f"✓ Executed: {ddl}") + else: + print("Column exists") + conn.commit() + + +def pick_registered_flag(page_text: str) -> int: + text = page_text or "" + # If any NOT-registered phrase is present → 0; otherwise assume registered → 1 + for marker in NOT_REGISTERED_STRINGS: + if marker in text: + return 0 + return 1 + + +def main(): + # --- DB: fetch a batch to process --- + conn = pymysql.connect(**MYSQL_CFG) + try: + ensure_columns(conn) + with conn.cursor() as cur: + cur.execute("SET NAMES utf8mb4 COLLATE utf8mb4_czech_ci") + cur.execute("SET collation_connection = 'utf8mb4_czech_ci'") + + cur.execute(SELECT_SQL) + rows = cur.fetchall() + + if not rows: + print("No patients to check (all have medevio_registered filled).") + return + + print(f"Will process {len(rows)} patients…") + + # --- Playwright session --- + with sync_playwright() as p: + browser = p.chromium.launch(headless=False, slow_mo=0) + context = browser.new_context(storage_state=str(STATE_FILE)) + page = context.new_page() + page.set_default_timeout(NAV_TIMEOUT) + + processed = ok = errs = 0 + + for r in rows: + processed += 1 + # pid = r["id"] + uuid = r["uuid"] + name = f"{r.get('prijmeni','')}, {r.get('jmeno','')}" + rc = r.get("rc","") + + url = PATIENT_URL_TMPL.format(uuid=uuid) + print(f"URL pro otevření pacienta je: {url}0") + print(f"[{processed:>3}] {name} | RC {rc} | {uuid} → {url}") + + try: + page.goto(url, wait_until="domcontentloaded") + # Optionally wait for a stable anchor; fallback to sleep + try: + # A stable bit we saw earlier + page.get_by_text("Historie požadavků").wait_for(timeout=TEXT_TIMEOUT) + except PWTimeout: + pass + + # Wait the requested 3 seconds for the UI to settle + time.sleep(SLEEP_SECONDS) + + # Get full text and detect + full_text = page.content() # HTML; safer to check visible text too: + vis_text = page.inner_text("body") + + registered = pick_registered_flag(full_text) if full_text else pick_registered_flag(vis_text) + + with conn.cursor() as cur: + cur.execute(UPDATE_OK_SQL, (registered, pid)) + conn.commit() + ok += 1 + state = "REGISTERED" if registered == 1 else "NOT REGISTERED" + print(f" → {state}") + + except Exception as e: + conn.rollback() + errs += 1 + msg = f"{type(e).__name__}: {e}" + with conn.cursor() as cur: + cur.execute(UPDATE_ERR_SQL, (msg[:1000], pid)) + conn.commit() + print(f" ! ERROR → {msg}") + + browser.close() + + print(f"Done. processed={processed}, ok={ok}, errors={errs}") + + finally: + conn.close() + + +if __name__ == "__main__": + main()