Files
medevio/ScrapePozadavkyJustManualScrolling.py
2025-09-24 16:40:34 +02:00

119 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from urllib.parse import urlparse, parse_qs
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout, Page
STATE_FILE = "medevio_storage.json"
POZADAVKY_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pozadavky?neprirazene=1"
# ---------- helpers ----------
def get_uuid_from_href(href: str) -> str | None:
try:
q = parse_qs(urlparse(href).query)
val = q.get("pozadavek", [None])[0]
return val if val else None
except Exception:
return None
def is_flu_request(text: str) -> bool:
return bool(re.search(r"ch(r|ř)ipk", text, re.IGNORECASE))
def scrape_visible_rows(page: Page, seen: set) -> list[dict]:
"""Collect all *new* visible rows on the current screen."""
bucket: list[dict] = []
rows = page.locator('tr[data-testid="patient-request-row"]')
n = rows.count()
for i in range(n):
row = rows.nth(i)
href_el = row.locator('a[href*="pozadavky?pozadavek="]').first
href = href_el.get_attribute("href") if href_el.count() else None
req_id = get_uuid_from_href(href) if href else None
if not req_id or req_id in seen:
continue
name = (row.locator('td:nth-child(2) a span').first.text_content(timeout=0) or "").strip()
rc = (row.locator('a.MuiTypography-overline2').first.text_content(timeout=0) or "").strip()
text_p = row.locator('td:nth-child(3) p.MuiTypography-body1, td:nth-child(4) p.MuiTypography-body1').first
text_req = (text_p.text_content(timeout=0) or "").strip()
if not text_req:
aria = row.locator('td:nth-child(3) [aria-label], td:nth-child(4) [aria-label]').first
text_req = (aria.get_attribute("aria-label") or "").strip() if aria.count() else ""
avatar = row.locator('[data-testid="queue-avatar"]').first
assigned_to = (avatar.get_attribute("aria-label") or "").strip() if avatar.count() else ""
initials = (avatar.text_content(timeout=0) or "").strip() if avatar.count() else ""
seen.add(req_id)
bucket.append({
"id": req_id,
"name": name,
"rc": rc,
"text": text_req,
"assigned_to": assigned_to,
"initials": initials,
})
return bucket
def assign_request_to_buzalka(page: Page, request_uuid: str) -> None:
"""Open request detail by UUID and assign it to MUDr. Buzalka (já)."""
url = f"{POZADAVKY_URL.split('?')[0]}?pozadavek={request_uuid}"
page.goto(url, wait_until="domcontentloaded", timeout=60_000)
combo = page.locator('div[role="combobox"][aria-labelledby="queue-select-label"]')
combo.wait_for(state="visible")
combo.click()
option = page.get_by_role("option", name=re.compile(r"MUDr\.?\s*Buzalka", re.I))
option.click()
page.wait_for_load_state("networkidle")
page.locator("button.MuiDialog-close").click()
print(f"✔ Assigned to MUDr. Buzalka: {request_uuid}")
# ---------- main ----------
def main():
with sync_playwright() as pw:
browser = pw.chromium.launch(headless=False) # we want to see the page
context = browser.new_context(storage_state=STATE_FILE)
page = context.new_page()
page.goto(POZADAVKY_URL, wait_until="domcontentloaded", timeout=60_000)
# check login
body = (page.text_content("body") or "").lower()
if any(x in body for x in ["přihlášení", "přihlásit", "sign in", "login"]):
raise SystemExit("Not logged in refresh medevio_storage.json.")
try:
page.wait_for_selector('tr[data-testid="patient-request-row"]', timeout=20_000)
except PWTimeout:
raise SystemExit("Rows not found: tr[data-testid=patient-request-row].")
seen: set[str] = set()
assigned_count = 0
print("\n>>> Scroll the page manually. Press Enter here any time to scrape current view.")
print(" Press Ctrl+C to finish.\n")
while True:
input("Press Enter to scan visible rows...")
for item in scrape_visible_rows(page, seen):
text = item["text"]
initials = (item["initials"] or "").upper()
assigned_to = (item["assigned_to"] or "").lower()
if is_flu_request(text) and not ("buzalka" in assigned_to or initials == "VB"):
assign_request_to_buzalka(page, item["id"])
assigned_count += 1
print(f"Total newly assigned so far: {assigned_count}")
if __name__ == "__main__":
main()