Files
medevio/Medevio4-readandsavekartoteka.py
2025-09-22 07:19:26 +02:00

178 lines
5.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#Tento kod se pripoji do kartoteky Medevio, zmeni na 100 pacientu na stranu, nactene
# medevio_dump_patients_html_to_mysql.py
import time
import json
from pathlib import Path
from datetime import datetime
from typing import Set
import mysql.connector
from mysql.connector import errorcode
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
# ---------- CONFIG ----------
STATE_FILE = r"medevio_storage.json"
BASE_LIST_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
SAVE_DELAY_SECONDS = 10 # throttle: 10 sec per patient
# MySQL connection settings (fill in)
MYSQL_CFG = dict(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="medevio",
)
# ---------- DB helpers ----------
def db_connect():
try:
conn = mysql.connector.connect(**MYSQL_CFG)
return conn
except mysql.connector.Error as e:
raise SystemExit(f"MySQL connection failed: {e}")
# ---------- Playwright helpers ----------
def wait_for_grid_ready(page):
# grid present & at least one row (be generous on timeout)
page.wait_for_selector("div[role='rowgroup']", timeout=20000)
page.wait_for_selector("div[role='row'][data-id]", timeout=20000)
def set_page_size_100(page): #zde se nastavuje hodnota pacientu na stranu na 100, toto je jedno volani
# Click the page-size combobox (CZ/EN + generic)
for loc in [
page.get_by_role("combobox", name="Řádků na stránce:"),
page.get_by_role("combobox", name="Rows per page:"),
page.locator("div.MuiTablePagination-root [role='combobox']"),
]:
if loc.count():
loc.first.click()
break
# Select 100 (MUI menu often renders in a portal)
opt = page.get_by_role("option", name="100")
if not opt.count():
opt = page.locator("//li[normalize-space(.)='100']")
opt.first.wait_for(state="visible", timeout=5000)
opt.first.click()
# Wait for rows to refresh
try:
page.wait_for_selector("div[role='row'][data-id]", timeout=10000)
except PWTimeout:
time.sleep(0.8)
def click_next_page(page) -> bool: #toto je kliknuti, aby se nacetla dalsi stranka se 100 zaznamy
# Prefer ARIA label
nxt = page.get_by_role("button", name="Go to next page")
if nxt.count():
try:
if nxt.first.is_enabled():
nxt.first.click()
return True
except Exception:
pass
# Fallback (CZ)
nxt2 = page.get_by_role("button", name="Další")
if nxt2.count():
try:
if nxt2.first.is_enabled():
nxt2.first.click()
return True
except Exception:
pass
return False
# ---------- Main workflow ----------
def save_all_patient_htmls(conn,context,next_round): #toto ulozi do mysql vsechny html stranky z kartoteky, takze cca 19
page = context.new_page()
page.set_default_timeout(15000)
page.set_default_navigation_timeout(30000)
# Use domcontentloaded (SPAs often keep network busy)
page.goto(BASE_LIST_URL, wait_until="domcontentloaded")
if "/prihlaseni" in page.url.lower():
raise SystemExit("Session expired → refresh medevio_storage.json via the login script.")
wait_for_grid_ready(page)
# optional: print label like "125 z 1856"
try:
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
print("Pagination label BEFORE:", label)
except Exception:
pass
# Set 100/page
try:
set_page_size_100(page)
try:
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
print("Pagination label AFTER :", label)
except Exception:
pass
except Exception as e:
print(f"Warning: could not set page size to 100: {e!r}")
page_index = 1
while True:
wait_for_grid_ready(page)
#here I need code to save page into kartoteka_html
cur = conn.cursor()
cur.execute(
f"""INSERT INTO kartoteka_html (html,round)
VALUES (%s,%s)""",
(page.content(),next_round),
)
conn.commit()
cur.close()
print(f"DB saved page index {page_index}")
# Try to go next; if cannot, break
if not click_next_page(page):
break
# Wait for DOM to actually update (new rows)
try:
page.wait_for_load_state("domcontentloaded", timeout=10000)
except PWTimeout:
pass
time.sleep(0.5)
page_index += 1
page.close()
print(f"Total pages colleceted collected: {page_index}")
return
def main():
# Check storage exists
if not Path(STATE_FILE).exists():
raise SystemExit(f"Storage not found: {STATE_FILE}")
# DB ready
conn = db_connect()
#vymazat vsechny zaznamy z kartoteka_html, ktere nemaji hodnotu round
cur=conn.cursor()
cur.execute("delete from kartoteka_html where round=0")
conn.commit()
with conn.cursor() as cur:
cur.execute("SELECT MAX(`round`) AS max_round FROM kartoteka_html")
result = cur.fetchone()
# If table empty, use 0 as fallback
next_round = (result[0] or 0) + 1
print("Next round will be:", next_round)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False) # set False to watch
context = browser.new_context(storage_state=STATE_FILE)
save_all_patient_htmls(conn, context,next_round)
browser.close()
conn.close()
print("Done.")
if __name__ == "__main__":
main()