d
This commit is contained in:
98
Medevio1.py
Normal file
98
Medevio1.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# print_patients_first_page_ids.py
|
||||||
|
from pathlib import Path
|
||||||
|
import json, time, sys
|
||||||
|
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||||
|
|
||||||
|
STATE_FILE = r"/medevio_storage.json"
|
||||||
|
PATIENTS_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||||||
|
|
||||||
|
def harvest_ids_on_page(page):
|
||||||
|
ids = set()
|
||||||
|
for sel in ["div[role='row'][data-id]", "div.MuiDataGrid-row[data-id]"]:
|
||||||
|
for row in page.locator(sel).all():
|
||||||
|
pid = row.get_attribute("data-id")
|
||||||
|
if pid:
|
||||||
|
ids.add(pid)
|
||||||
|
return ids
|
||||||
|
|
||||||
|
def set_page_size(page, value="100"):
|
||||||
|
# Open the page-size combobox
|
||||||
|
for loc in [
|
||||||
|
page.get_by_role("combobox", name="Řádků na stránce:"),
|
||||||
|
page.get_by_role("combobox", name="Rows per page:"),
|
||||||
|
page.locator("div.MuiTablePagination-root [role='combobox']"),
|
||||||
|
]:
|
||||||
|
if loc.count():
|
||||||
|
loc.first.click()
|
||||||
|
break
|
||||||
|
# Select option "100" (portal-safe)
|
||||||
|
opt = page.get_by_role("option", name=value)
|
||||||
|
if not opt.count():
|
||||||
|
opt = page.locator(f"//li[normalize-space(.)='{value}']")
|
||||||
|
opt.first.wait_for(state="visible", timeout=5000)
|
||||||
|
opt.first.click()
|
||||||
|
# Wait a moment for refresh
|
||||||
|
try:
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=10000)
|
||||||
|
except PWTimeout:
|
||||||
|
time.sleep(0.8)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
sf = Path(STATE_FILE)
|
||||||
|
if not sf.exists():
|
||||||
|
print(f"ERROR: storage not found: {sf}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True) # set False to watch
|
||||||
|
context = browser.new_context(storage_state=str(sf))
|
||||||
|
context.set_default_navigation_timeout(30000)
|
||||||
|
context.set_default_timeout(15000)
|
||||||
|
|
||||||
|
page = context.new_page()
|
||||||
|
try:
|
||||||
|
page.goto(PATIENTS_URL, wait_until="domcontentloaded")
|
||||||
|
except PWTimeout:
|
||||||
|
print("Warning: goto timeout; continuing…")
|
||||||
|
|
||||||
|
# Detect redirect to login
|
||||||
|
if "/prihlaseni" in page.url.lower():
|
||||||
|
print("You were redirected to the login page → saved session is expired. Re-run the login-save step.")
|
||||||
|
browser.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
# (Optional) print pagination label before/after
|
||||||
|
try:
|
||||||
|
print("Before:", page.locator("p.MuiTablePagination-displayedRows").first.inner_text())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
set_page_size(page, "100")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Could not set page size to 100: {e!r}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("After :", page.locator("p.MuiTablePagination-displayedRows").first.inner_text())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=15000)
|
||||||
|
|
||||||
|
ids = sorted(harvest_ids_on_page(page))
|
||||||
|
print(f"\nCollected {len(ids)} IDs on first page:")
|
||||||
|
for pid in ids:
|
||||||
|
print(pid)
|
||||||
|
|
||||||
|
# Also save if you want
|
||||||
|
out_json = Path("patient_ids_first_page.json")
|
||||||
|
out_csv = Path("patient_ids_first_page.csv")
|
||||||
|
out_json.write_text(json.dumps(ids, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
out_csv.write_text("patient_id\n" + "\n".join(ids), encoding="utf-8")
|
||||||
|
print(f"\nSaved → {out_json.resolve()}")
|
||||||
|
print(f"Saved → {out_csv.resolve()}")
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
249
Medevio2.py
Normal file
249
Medevio2.py
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
# extract_patient_detail.py
|
||||||
|
# Usage:
|
||||||
|
# 1) Put your medevio_storage.json path into STATE_FILE.
|
||||||
|
# 2) Set PATIENT_ID to a real UUID from your list.
|
||||||
|
# 3) Run: python extract_patient_detail.py
|
||||||
|
#
|
||||||
|
# Output: prints a dict to console and saves patient_<ID>.json next to the script.
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import json, sys, time, re
|
||||||
|
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||||
|
|
||||||
|
STATE_FILE = r"/medevio_storage.json"
|
||||||
|
BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||||||
|
PATIENT_ID = "236b3759-4c2b-4fa8-ab52-ce4ddb2e9064" # <-- put target ID here
|
||||||
|
|
||||||
|
# ---------- helpers ----------
|
||||||
|
|
||||||
|
def wait_for_grid(page, timeout=15000):
|
||||||
|
try:
|
||||||
|
page.wait_for_selector("div[role='rowgroup']", timeout=timeout)
|
||||||
|
except PWTimeout:
|
||||||
|
pass
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=timeout)
|
||||||
|
|
||||||
|
def open_detail_via_query(page, patient_id):
|
||||||
|
# Try opening page with ?pacient=... (SPA should open drawer/detail)
|
||||||
|
target = f"{BASE_URL}?pacient={patient_id}"
|
||||||
|
page.goto(target, wait_until="domcontentloaded")
|
||||||
|
# Wait briefly for the drawer/dialog to render
|
||||||
|
if not wait_for_detail_open(page, quick=True):
|
||||||
|
# Some apps need a tiny delay to mount the panel
|
||||||
|
time.sleep(0.8)
|
||||||
|
return is_detail_open(page)
|
||||||
|
|
||||||
|
def is_detail_open(page):
|
||||||
|
# Look for a dialog/drawer that likely contains patient detail.
|
||||||
|
# Typical MUI patterns: role="dialog", or an aside/div with aria-modal etc.
|
||||||
|
selectors = [
|
||||||
|
"[role='dialog']",
|
||||||
|
"div.MuiDrawer-paper",
|
||||||
|
"div.MuiModal-root [role='dialog']",
|
||||||
|
"div[aria-modal='true']",
|
||||||
|
]
|
||||||
|
for sel in selectors:
|
||||||
|
loc = page.locator(sel)
|
||||||
|
if loc.count() and loc.first.is_visible():
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def wait_for_detail_open(page, quick=False):
|
||||||
|
timeout = 4000 if quick else 15000
|
||||||
|
selectors = [
|
||||||
|
"[role='dialog']",
|
||||||
|
"div.MuiDrawer-paper",
|
||||||
|
"div.MuiModal-root [role='dialog']",
|
||||||
|
"div[aria-modal='true']",
|
||||||
|
]
|
||||||
|
for sel in selectors:
|
||||||
|
try:
|
||||||
|
page.wait_for_selector(sel, timeout=timeout, state="visible")
|
||||||
|
return True
|
||||||
|
except PWTimeout:
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
def open_detail_by_click(page, patient_id):
|
||||||
|
# Click the row with matching data-id (fallback)
|
||||||
|
wait_for_grid(page, timeout=15000)
|
||||||
|
row = page.locator(f"div[role='row'][data-id='{patient_id}']").first
|
||||||
|
if not row.count():
|
||||||
|
return False
|
||||||
|
row.click()
|
||||||
|
return wait_for_detail_open(page)
|
||||||
|
|
||||||
|
def find_detail_root(page):
|
||||||
|
# Return the locator that represents the open detail container
|
||||||
|
for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
|
||||||
|
loc = page.locator(sel)
|
||||||
|
if loc.count() and loc.first.is_visible():
|
||||||
|
return loc.first
|
||||||
|
# Fallback to the last visible modal-ish container
|
||||||
|
return page.locator("div.MuiModal-root, div.MuiDrawer-paper").last
|
||||||
|
|
||||||
|
def extract_text(el):
|
||||||
|
try:
|
||||||
|
return el.inner_text().strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def extract_field_by_label(root, label_texts):
|
||||||
|
"""
|
||||||
|
Try to find a field value by its label text (CZ/EN variants).
|
||||||
|
Looks for elements containing the label and then a sibling/value element.
|
||||||
|
"""
|
||||||
|
labels_xpath = " | ".join([f".//*[normalize-space()='{t}']" for t in label_texts])
|
||||||
|
loc = root.locator(f"xpath=({labels_xpath})")
|
||||||
|
if not loc.count():
|
||||||
|
# Try contains(label)
|
||||||
|
labels_xpath2 = " | ".join([f".//*[contains(normalize-space(), '{t}')]" for t in label_texts])
|
||||||
|
loc = root.locator(f"xpath=({labels_xpath2})")
|
||||||
|
if not loc.count():
|
||||||
|
return None
|
||||||
|
|
||||||
|
candidate = loc.first
|
||||||
|
# Value might be in parent/next sibling
|
||||||
|
parent = candidate.locator("xpath=..")
|
||||||
|
siblings = [
|
||||||
|
parent.locator("xpath=following-sibling::*[1]"),
|
||||||
|
candidate.locator("xpath=following-sibling::*[1]"),
|
||||||
|
parent.locator(".//*[(self::span or self::div) and string-length(normalize-space())>0]"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for s in siblings:
|
||||||
|
if s.count():
|
||||||
|
text = extract_text(s.first)
|
||||||
|
# Clean common label-value formatting like "E-mail\nx@y.cz"
|
||||||
|
if text:
|
||||||
|
# If the label text is included, strip it
|
||||||
|
for t in label_texts:
|
||||||
|
text = re.sub(rf"^{re.escape(t)}\s*[::]?\s*", "", text, flags=re.I)
|
||||||
|
text = re.sub(r"\s+\n\s+", " — ", text).strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
# As a last fallback, try reading the parent block's text minus the label
|
||||||
|
block_text = extract_text(parent)
|
||||||
|
if block_text:
|
||||||
|
for t in label_texts:
|
||||||
|
block_text = re.sub(rf"{re.escape(t)}\s*[::]?\s*", "", block_text, flags=re.I)
|
||||||
|
return block_text.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_all_text_pairs(root):
|
||||||
|
"""
|
||||||
|
Generic key-value sweep for components that render details as 2-column grids.
|
||||||
|
Returns a dict of guessed label->value pairs.
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
# Try common MUI grid/list patterns
|
||||||
|
blocks = root.locator("div.MuiGrid-container, dl, ul.MuiList-root")
|
||||||
|
for i in range(min(20, blocks.count())):
|
||||||
|
block = blocks.nth(i)
|
||||||
|
text = extract_text(block)
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
# naive split by newlines, pair neighbors "Label\nValue"
|
||||||
|
parts = [t.strip() for t in text.splitlines() if t.strip()]
|
||||||
|
for j in range(len(parts) - 1):
|
||||||
|
label, value = parts[j], parts[j+1]
|
||||||
|
# Heuristic: labels usually short, values not identical, ignore obvious noise
|
||||||
|
if len(label) <= 32 and label != value and ":" not in value:
|
||||||
|
if label not in result:
|
||||||
|
result[label] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
def extract_patient_detail(page, patient_id):
|
||||||
|
root = find_detail_root(page)
|
||||||
|
if not root:
|
||||||
|
return {"id": patient_id, "error": "detail_not_found"}
|
||||||
|
|
||||||
|
# Try to get a headline with the name
|
||||||
|
name = None
|
||||||
|
for sel in ["h1", "h2", "h3", "header h2", "[data-testid='PatientName']"]:
|
||||||
|
loc = root.locator(sel)
|
||||||
|
if loc.count():
|
||||||
|
nm = extract_text(loc.first)
|
||||||
|
if nm and len(nm) > 1:
|
||||||
|
name = nm
|
||||||
|
break
|
||||||
|
|
||||||
|
# Targeted fields (CZ + EN aliases)
|
||||||
|
fields = {
|
||||||
|
"Datum narození / Born": extract_field_by_label(root, ["Datum narození", "Datum nar.", "Date of birth", "Born"]),
|
||||||
|
"Rodné číslo": extract_field_by_label(root, ["Rodné číslo", "RČ", "Personal ID"]),
|
||||||
|
"Telefon": extract_field_by_label(root, ["Telefon", "Tel.", "Phone", "Mobile"]),
|
||||||
|
"E-mail": extract_field_by_label(root, ["E-mail", "Email", "E-mail"]),
|
||||||
|
"Zdravotní pojišťovna": extract_field_by_label(root, ["Pojišťovna", "Zdravotní pojišťovna", "Insurer", "Insurance"]),
|
||||||
|
"Adresa": extract_field_by_label(root, ["Adresa", "Address"]),
|
||||||
|
"Poznámka": extract_field_by_label(root, ["Poznámka", "Note", "Notes"]),
|
||||||
|
"Pohlaví": extract_field_by_label(root, ["Pohlaví", "Gender", "Sex"]),
|
||||||
|
"Praktický lékař": extract_field_by_label(root, ["Praktický lékař", "GP", "General practitioner"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Sweep for any extra key→value pairs we didn’t explicitly target
|
||||||
|
extras = extract_all_text_pairs(root)
|
||||||
|
|
||||||
|
# Merge non-empty fields
|
||||||
|
data = {"id": patient_id}
|
||||||
|
if name: data["name"] = name
|
||||||
|
for k, v in fields.items():
|
||||||
|
if v and v.strip():
|
||||||
|
data[k] = v.strip()
|
||||||
|
|
||||||
|
# Add extras that aren't already present
|
||||||
|
for k, v in extras.items():
|
||||||
|
if k not in data and v and v.strip():
|
||||||
|
data[k] = v.strip()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
# ---------- main ----------
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if not PATIENT_ID or len(PATIENT_ID) < 8:
|
||||||
|
print("Set PATIENT_ID to a valid patient UUID.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sf = Path(STATE_FILE)
|
||||||
|
if not sf.exists():
|
||||||
|
print(f"Storage file not found: {sf}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True) # set False to watch
|
||||||
|
context = browser.new_context(storage_state=str(sf))
|
||||||
|
context.set_default_navigation_timeout(30000)
|
||||||
|
context.set_default_timeout(15000)
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
# Try via query param first
|
||||||
|
opened = open_detail_via_query(page, PATIENT_ID)
|
||||||
|
|
||||||
|
# If not opened, go to base list and click the row
|
||||||
|
if not opened:
|
||||||
|
# Ensure the base grid exists
|
||||||
|
page.goto(BASE_URL, wait_until="domcontentloaded")
|
||||||
|
if "/prihlaseni" in page.url.lower():
|
||||||
|
print("Redirected to login — refresh your medevio_storage.json.")
|
||||||
|
browser.close()
|
||||||
|
return
|
||||||
|
if not open_detail_by_click(page, PATIENT_ID):
|
||||||
|
print("Could not open detail panel (neither via query nor by clicking).")
|
||||||
|
browser.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
# At this point, detail should be open
|
||||||
|
data = extract_patient_detail(page, PATIENT_ID)
|
||||||
|
|
||||||
|
print("\n=== Patient detail ===")
|
||||||
|
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
out = Path(f"patient_{PATIENT_ID}.json")
|
||||||
|
out.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
print(f"\nSaved → {out.resolve()}")
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
42
Medevio3.py
Normal file
42
Medevio3.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# save_patient_detail_page.py
|
||||||
|
from pathlib import Path
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
STATE_FILE = r"/medevio_storage.json"
|
||||||
|
BASE_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||||||
|
PATIENT_ID = "fcb2414b-067b-4ca2-91b2-6c36a86d4cbb" # <-- any valid patient UUID
|
||||||
|
|
||||||
|
def main():
|
||||||
|
out_dir = Path(f"capture_patient_{PATIENT_ID}")
|
||||||
|
out_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
html_path = out_dir / "detail.html"
|
||||||
|
screenshot_path = out_dir / "detail.png"
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True) # set False if you want to watch
|
||||||
|
context = browser.new_context(storage_state=STATE_FILE)
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
# Open the detail directly
|
||||||
|
target_url = f"{BASE_URL}?pacient={PATIENT_ID}"
|
||||||
|
page.goto(target_url, wait_until="domcontentloaded")
|
||||||
|
|
||||||
|
# Wait a bit for the detail drawer/dialog to render
|
||||||
|
try:
|
||||||
|
page.wait_for_selector("[role='dialog'], div.MuiDrawer-paper, div[aria-modal='true']", timeout=10000)
|
||||||
|
except:
|
||||||
|
print("Warning: did not detect a detail panel quickly")
|
||||||
|
|
||||||
|
# Save raw HTML and screenshot
|
||||||
|
html_path.write_text(page.content(), encoding="utf-8")
|
||||||
|
page.screenshot(path=str(screenshot_path), full_page=True)
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
print("Saved:")
|
||||||
|
print(" -", html_path.resolve())
|
||||||
|
print(" -", screenshot_path.resolve())
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
258
Medevio4.py
Normal file
258
Medevio4.py
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
# medevio_dump_patients_html_to_mysql.py
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Set
|
||||||
|
|
||||||
|
import mysql.connector
|
||||||
|
from mysql.connector import errorcode
|
||||||
|
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||||
|
|
||||||
|
# ---------- CONFIG ----------
|
||||||
|
STATE_FILE = r"/medevio_storage.json"
|
||||||
|
BASE_LIST_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||||||
|
SAVE_DELAY_SECONDS = 10 # throttle: 10 sec per patient
|
||||||
|
|
||||||
|
# MySQL connection settings (fill in)
|
||||||
|
MYSQL_CFG = dict(
|
||||||
|
host="192.168.1.74",
|
||||||
|
port=3307,
|
||||||
|
user="root",
|
||||||
|
password="Vlado9674+",
|
||||||
|
database="medevio",
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLE_NAME = "patients_html" # schema created automatically
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- DB helpers ----------
|
||||||
|
def db_connect():
|
||||||
|
try:
|
||||||
|
conn = mysql.connector.connect(**MYSQL_CFG)
|
||||||
|
return conn
|
||||||
|
except mysql.connector.Error as e:
|
||||||
|
raise SystemExit(f"MySQL connection failed: {e}")
|
||||||
|
|
||||||
|
def db_ensure_table(conn):
|
||||||
|
ddl = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` (
|
||||||
|
patient_id VARCHAR(64) PRIMARY KEY,
|
||||||
|
html LONGTEXT NOT NULL,
|
||||||
|
fetched_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
||||||
|
"""
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(ddl)
|
||||||
|
conn.commit()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
def db_existing_ids(conn) -> Set[str]:
|
||||||
|
ids = set()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(f"SELECT patient_id FROM `{TABLE_NAME}`")
|
||||||
|
for (pid,) in cur.fetchall():
|
||||||
|
ids.add(pid)
|
||||||
|
cur.close()
|
||||||
|
return ids
|
||||||
|
|
||||||
|
def db_upsert_html(conn, patient_id: str, html: str):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
f"""INSERT INTO `{TABLE_NAME}` (patient_id, html, fetched_at)
|
||||||
|
VALUES (%s, %s, NOW())
|
||||||
|
ON DUPLICATE KEY UPDATE html = VALUES(html), fetched_at = VALUES(fetched_at)""",
|
||||||
|
(patient_id, html),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Playwright helpers ----------
|
||||||
|
def wait_for_grid_ready(page):
|
||||||
|
# grid present & at least one row (be generous on timeout)
|
||||||
|
page.wait_for_selector("div[role='rowgroup']", timeout=20000)
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=20000)
|
||||||
|
|
||||||
|
def set_page_size_100(page):
|
||||||
|
# Click the page-size combobox (CZ/EN + generic)
|
||||||
|
for loc in [
|
||||||
|
page.get_by_role("combobox", name="Řádků na stránce:"),
|
||||||
|
page.get_by_role("combobox", name="Rows per page:"),
|
||||||
|
page.locator("div.MuiTablePagination-root [role='combobox']"),
|
||||||
|
]:
|
||||||
|
if loc.count():
|
||||||
|
loc.first.click()
|
||||||
|
break
|
||||||
|
# Select 100 (MUI menu often renders in a portal)
|
||||||
|
opt = page.get_by_role("option", name="100")
|
||||||
|
if not opt.count():
|
||||||
|
opt = page.locator("//li[normalize-space(.)='100']")
|
||||||
|
opt.first.wait_for(state="visible", timeout=5000)
|
||||||
|
opt.first.click()
|
||||||
|
# Wait for rows to refresh
|
||||||
|
try:
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=10000)
|
||||||
|
except PWTimeout:
|
||||||
|
time.sleep(0.8)
|
||||||
|
|
||||||
|
def harvest_ids_on_current_page(page) -> Set[str]:
|
||||||
|
ids = set()
|
||||||
|
for sel in ["div[role='row'][data-id]", "div.MuiDataGrid-row[data-id]"]:
|
||||||
|
for row in page.locator(sel).all():
|
||||||
|
pid = row.get_attribute("data-id")
|
||||||
|
if pid:
|
||||||
|
ids.add(pid)
|
||||||
|
return ids
|
||||||
|
|
||||||
|
def click_next_page(page) -> bool:
|
||||||
|
# Prefer ARIA label
|
||||||
|
nxt = page.get_by_role("button", name="Go to next page")
|
||||||
|
if nxt.count():
|
||||||
|
try:
|
||||||
|
if nxt.first.is_enabled():
|
||||||
|
nxt.first.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Fallback (CZ)
|
||||||
|
nxt2 = page.get_by_role("button", name="Další")
|
||||||
|
if nxt2.count():
|
||||||
|
try:
|
||||||
|
if nxt2.first.is_enabled():
|
||||||
|
nxt2.first.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def ensure_detail_open(page) -> bool:
|
||||||
|
# Detail drawer/dialog visible?
|
||||||
|
for sel in ["[role='dialog']", "div.MuiDrawer-paper", "div[aria-modal='true']"]:
|
||||||
|
loc = page.locator(sel)
|
||||||
|
if loc.count() and loc.first.is_visible():
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Main workflow ----------
|
||||||
|
def collect_all_patient_ids(context) -> Set[str]:
|
||||||
|
page = context.new_page()
|
||||||
|
page.set_default_timeout(15000)
|
||||||
|
page.set_default_navigation_timeout(30000)
|
||||||
|
|
||||||
|
# Use domcontentloaded (SPAs often keep network busy)
|
||||||
|
page.goto(BASE_LIST_URL, wait_until="domcontentloaded")
|
||||||
|
if "/prihlaseni" in page.url.lower():
|
||||||
|
raise SystemExit("Session expired → refresh medevio_storage.json via the login script.")
|
||||||
|
|
||||||
|
wait_for_grid_ready(page)
|
||||||
|
|
||||||
|
# optional: print label like "1–25 z 1856"
|
||||||
|
try:
|
||||||
|
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
|
||||||
|
print("Pagination label BEFORE:", label)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Set 100/page
|
||||||
|
try:
|
||||||
|
set_page_size_100(page)
|
||||||
|
try:
|
||||||
|
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
|
||||||
|
print("Pagination label AFTER :", label)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: could not set page size to 100: {e!r}")
|
||||||
|
|
||||||
|
all_ids: Set[str] = set()
|
||||||
|
page_index = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
wait_for_grid_ready(page)
|
||||||
|
ids_now = harvest_ids_on_current_page(page)
|
||||||
|
print(f"Page {page_index}: harvested {len(ids_now)} ids")
|
||||||
|
all_ids |= ids_now
|
||||||
|
|
||||||
|
# Try to go next; if cannot, break
|
||||||
|
if not click_next_page(page):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Wait for DOM to actually update (new rows)
|
||||||
|
try:
|
||||||
|
page.wait_for_load_state("domcontentloaded", timeout=10000)
|
||||||
|
except PWTimeout:
|
||||||
|
pass
|
||||||
|
time.sleep(0.5)
|
||||||
|
page_index += 1
|
||||||
|
|
||||||
|
page.close()
|
||||||
|
print(f"Total unique IDs collected: {len(all_ids)}")
|
||||||
|
return all_ids
|
||||||
|
|
||||||
|
def fetch_and_store_patient_html(context, conn, patient_id: str):
|
||||||
|
page = context.new_page()
|
||||||
|
page.set_default_timeout(15000)
|
||||||
|
page.set_default_navigation_timeout(30000)
|
||||||
|
|
||||||
|
url = f"{BASE_LIST_URL}?pacient={patient_id}"
|
||||||
|
page.goto(url, wait_until="domcontentloaded")
|
||||||
|
|
||||||
|
# If detail didn’t open, fallback: go to list, click row
|
||||||
|
if not ensure_detail_open(page):
|
||||||
|
page.goto(BASE_LIST_URL, wait_until="domcontentloaded")
|
||||||
|
try:
|
||||||
|
page.wait_for_selector(f"div[role='row'][data-id='{patient_id}']", timeout=15000)
|
||||||
|
page.locator(f"div[role='row'][data-id='{patient_id}']").first.click()
|
||||||
|
# wait for drawer/dialog
|
||||||
|
page.wait_for_selector("[role='dialog'], div.MuiDrawer-paper, div[aria-modal='true']", timeout=12000)
|
||||||
|
except PWTimeout:
|
||||||
|
print(f"[{patient_id}] detail panel did not open — skipping")
|
||||||
|
page.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
# Save full HTML of the page (includes the open detail drawer)
|
||||||
|
html = page.content()
|
||||||
|
db_upsert_html(conn, patient_id, html)
|
||||||
|
print(f"[{patient_id}] saved HTML ({len(html)} bytes) at {datetime.now().isoformat(timespec='seconds')}")
|
||||||
|
|
||||||
|
page.close()
|
||||||
|
# Throttle per your requirement
|
||||||
|
time.sleep(SAVE_DELAY_SECONDS)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Check storage exists
|
||||||
|
if not Path(STATE_FILE).exists():
|
||||||
|
raise SystemExit(f"Storage not found: {STATE_FILE}")
|
||||||
|
|
||||||
|
# DB ready
|
||||||
|
conn = db_connect()
|
||||||
|
db_ensure_table(conn)
|
||||||
|
already = db_existing_ids(conn)
|
||||||
|
print(f"Already in DB: {len(already)} ids")
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True) # set False to watch
|
||||||
|
context = browser.new_context(storage_state=STATE_FILE)
|
||||||
|
|
||||||
|
# 1) Collect all IDs from the listing (all pages)
|
||||||
|
all_ids = collect_all_patient_ids(context)
|
||||||
|
|
||||||
|
# 2) Iterate and store HTML (skip existing)
|
||||||
|
todo = [pid for pid in sorted(all_ids) if pid not in already]
|
||||||
|
print(f"To fetch now: {len(todo)} ids (skipping {len(all_ids)-len(todo)} already saved)")
|
||||||
|
|
||||||
|
for i, pid in enumerate(todo, 1):
|
||||||
|
try:
|
||||||
|
fetch_and_store_patient_html(context, conn, pid)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[{pid}] ERROR: {e!r} — continuing with next")
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
conn.close()
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
36
activate_this.py
Normal file
36
activate_this.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
"""
|
||||||
|
Activate virtualenv for current interpreter:
|
||||||
|
|
||||||
|
Use exec(open(this_file).read(), {'__file__': this_file}).
|
||||||
|
|
||||||
|
This can be used when you must use an existing Python interpreter, not the virtualenv bin/python.
|
||||||
|
""" # noqa: D415
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import site
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
abs_file = os.path.abspath(__file__)
|
||||||
|
except NameError as exc:
|
||||||
|
msg = "You must use exec(open(this_file).read(), {'__file__': this_file}))"
|
||||||
|
raise AssertionError(msg) from exc
|
||||||
|
|
||||||
|
bin_dir = os.path.dirname(abs_file)
|
||||||
|
base = bin_dir[: -len("Scripts") - 1] # strip away the bin part from the __file__, plus the path separator
|
||||||
|
|
||||||
|
# prepend bin to PATH (this file is inside the bin directory)
|
||||||
|
os.environ["PATH"] = os.pathsep.join([bin_dir, *os.environ.get("PATH", "").split(os.pathsep)])
|
||||||
|
os.environ["VIRTUAL_ENV"] = base # virtual env is right above bin directory
|
||||||
|
os.environ["VIRTUAL_ENV_PROMPT"] = "" or os.path.basename(base) # noqa: SIM222
|
||||||
|
|
||||||
|
# add the virtual environments libraries to the host python import mechanism
|
||||||
|
prev_length = len(sys.path)
|
||||||
|
for lib in "..\\Lib\\site-packages".split(os.pathsep):
|
||||||
|
path = os.path.realpath(os.path.join(bin_dir, lib))
|
||||||
|
site.addsitedir(path.decode("utf-8") if "" else path)
|
||||||
|
sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]
|
||||||
|
|
||||||
|
sys.real_prefix = sys.prefix
|
||||||
|
sys.prefix = base
|
||||||
18
medevio.py
Normal file
18
medevio.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
LOGIN_URL = "https://my.medevio.cz/prihlaseni"
|
||||||
|
STATE_FILE = "medevio_storage.json"
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=False, slow_mo=150) # zobrazí prohlížeč
|
||||||
|
context = browser.new_context()
|
||||||
|
page = context.new_page()
|
||||||
|
page.goto(LOGIN_URL, wait_until="load")
|
||||||
|
|
||||||
|
print(">>> Přihlas se v otevřeném okně (Medevio).")
|
||||||
|
print(">>> Jakmile jsi na hlavní stránce po přihlášení, vrať se do PyCharm konzole a stiskni Enter.")
|
||||||
|
input()
|
||||||
|
|
||||||
|
context.storage_state(path=STATE_FILE)
|
||||||
|
print(f"Session uložena do: {STATE_FILE}")
|
||||||
|
browser.close()
|
||||||
1
medevio_storage.json
Normal file
1
medevio_storage.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"cookies": [{"name": "gateway-access-token", "value": "IiUnIp7pjWdBQmI8rO+HfhUPwO0Cj7TknqcagWe1+pQS5tDjFJUAK5gKdYJCJBH+2blsCJEQQulOrm/z", "domain": "my.medevio.cz", "path": "/", "expires": 1759832768, "httpOnly": false, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "https://my.medevio.cz", "localStorage": [{"name": "Application.Intl.locale", "value": "cs"}, {"name": "Password.prefill", "value": "{\"username\":\"vladimir.buzalka@buzalka.cz\",\"type\":\"email\"}"}]}]}
|
||||||
Reference in New Issue
Block a user