W22
This commit is contained in:
1
.idea/misc.xml
generated
1
.idea/misc.xml
generated
@@ -3,4 +3,5 @@
|
|||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.12 (Medevio)" />
|
<option name="sdkName" value="Python 3.12 (Medevio)" />
|
||||||
</component>
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (Medevio)" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
||||||
177
Medevio4-readandsavekartoteka.py
Normal file
177
Medevio4-readandsavekartoteka.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#Tento kod se pripoji do kartoteky Medevio, zmeni na 100 pacientu na stranu, nactene
|
||||||
|
|
||||||
|
|
||||||
|
# medevio_dump_patients_html_to_mysql.py
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Set
|
||||||
|
|
||||||
|
import mysql.connector
|
||||||
|
from mysql.connector import errorcode
|
||||||
|
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||||
|
|
||||||
|
# ---------- CONFIG ----------
|
||||||
|
STATE_FILE = r"medevio_storage.json"
|
||||||
|
BASE_LIST_URL = "https://my.medevio.cz/mudr-buzalkova/klinika/pacienti"
|
||||||
|
SAVE_DELAY_SECONDS = 10 # throttle: 10 sec per patient
|
||||||
|
|
||||||
|
# MySQL connection settings (fill in)
|
||||||
|
MYSQL_CFG = dict(
|
||||||
|
host="192.168.1.76",
|
||||||
|
port=3307,
|
||||||
|
user="root",
|
||||||
|
password="Vlado9674+",
|
||||||
|
database="medevio",
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------- DB helpers ----------
|
||||||
|
def db_connect():
|
||||||
|
try:
|
||||||
|
conn = mysql.connector.connect(**MYSQL_CFG)
|
||||||
|
return conn
|
||||||
|
except mysql.connector.Error as e:
|
||||||
|
raise SystemExit(f"MySQL connection failed: {e}")
|
||||||
|
|
||||||
|
# ---------- Playwright helpers ----------
|
||||||
|
def wait_for_grid_ready(page):
|
||||||
|
# grid present & at least one row (be generous on timeout)
|
||||||
|
page.wait_for_selector("div[role='rowgroup']", timeout=20000)
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=20000)
|
||||||
|
|
||||||
|
def set_page_size_100(page): #zde se nastavuje hodnota pacientu na stranu na 100, toto je jedno volani
|
||||||
|
# Click the page-size combobox (CZ/EN + generic)
|
||||||
|
for loc in [
|
||||||
|
page.get_by_role("combobox", name="Řádků na stránce:"),
|
||||||
|
page.get_by_role("combobox", name="Rows per page:"),
|
||||||
|
page.locator("div.MuiTablePagination-root [role='combobox']"),
|
||||||
|
]:
|
||||||
|
if loc.count():
|
||||||
|
loc.first.click()
|
||||||
|
break
|
||||||
|
# Select 100 (MUI menu often renders in a portal)
|
||||||
|
opt = page.get_by_role("option", name="100")
|
||||||
|
if not opt.count():
|
||||||
|
opt = page.locator("//li[normalize-space(.)='100']")
|
||||||
|
opt.first.wait_for(state="visible", timeout=5000)
|
||||||
|
opt.first.click()
|
||||||
|
# Wait for rows to refresh
|
||||||
|
try:
|
||||||
|
page.wait_for_selector("div[role='row'][data-id]", timeout=10000)
|
||||||
|
except PWTimeout:
|
||||||
|
time.sleep(0.8)
|
||||||
|
|
||||||
|
def click_next_page(page) -> bool: #toto je kliknuti, aby se nacetla dalsi stranka se 100 zaznamy
|
||||||
|
# Prefer ARIA label
|
||||||
|
nxt = page.get_by_role("button", name="Go to next page")
|
||||||
|
if nxt.count():
|
||||||
|
try:
|
||||||
|
if nxt.first.is_enabled():
|
||||||
|
nxt.first.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Fallback (CZ)
|
||||||
|
nxt2 = page.get_by_role("button", name="Další")
|
||||||
|
if nxt2.count():
|
||||||
|
try:
|
||||||
|
if nxt2.first.is_enabled():
|
||||||
|
nxt2.first.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ---------- Main workflow ----------
|
||||||
|
def save_all_patient_htmls(conn,context,next_round): #toto ulozi do mysql vsechny html stranky z kartoteky, takze cca 19
|
||||||
|
page = context.new_page()
|
||||||
|
page.set_default_timeout(15000)
|
||||||
|
page.set_default_navigation_timeout(30000)
|
||||||
|
|
||||||
|
# Use domcontentloaded (SPAs often keep network busy)
|
||||||
|
page.goto(BASE_LIST_URL, wait_until="domcontentloaded")
|
||||||
|
if "/prihlaseni" in page.url.lower():
|
||||||
|
raise SystemExit("Session expired → refresh medevio_storage.json via the login script.")
|
||||||
|
|
||||||
|
wait_for_grid_ready(page)
|
||||||
|
|
||||||
|
# optional: print label like "1–25 z 1856"
|
||||||
|
try:
|
||||||
|
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
|
||||||
|
print("Pagination label BEFORE:", label)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Set 100/page
|
||||||
|
try:
|
||||||
|
set_page_size_100(page)
|
||||||
|
try:
|
||||||
|
label = page.locator("p.MuiTablePagination-displayedRows").first.inner_text()
|
||||||
|
print("Pagination label AFTER :", label)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: could not set page size to 100: {e!r}")
|
||||||
|
|
||||||
|
page_index = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
wait_for_grid_ready(page)
|
||||||
|
|
||||||
|
#here I need code to save page into kartoteka_html
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
f"""INSERT INTO kartoteka_html (html,round)
|
||||||
|
VALUES (%s,%s)""",
|
||||||
|
(page.content(),next_round),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
cur.close()
|
||||||
|
print(f"DB saved page index {page_index}")
|
||||||
|
# Try to go next; if cannot, break
|
||||||
|
if not click_next_page(page):
|
||||||
|
break
|
||||||
|
# Wait for DOM to actually update (new rows)
|
||||||
|
try:
|
||||||
|
page.wait_for_load_state("domcontentloaded", timeout=10000)
|
||||||
|
except PWTimeout:
|
||||||
|
pass
|
||||||
|
time.sleep(0.5)
|
||||||
|
page_index += 1
|
||||||
|
|
||||||
|
page.close()
|
||||||
|
print(f"Total pages colleceted collected: {page_index}")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Check storage exists
|
||||||
|
if not Path(STATE_FILE).exists():
|
||||||
|
raise SystemExit(f"Storage not found: {STATE_FILE}")
|
||||||
|
|
||||||
|
# DB ready
|
||||||
|
conn = db_connect()
|
||||||
|
|
||||||
|
#vymazat vsechny zaznamy z kartoteka_html, ktere nemaji hodnotu round
|
||||||
|
cur=conn.cursor()
|
||||||
|
cur.execute("delete from kartoteka_html where round=0")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT MAX(`round`) AS max_round FROM kartoteka_html")
|
||||||
|
result = cur.fetchone()
|
||||||
|
# If table empty, use 0 as fallback
|
||||||
|
next_round = (result[0] or 0) + 1
|
||||||
|
print("Next round will be:", next_round)
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=False) # set False to watch
|
||||||
|
context = browser.new_context(storage_state=STATE_FILE)
|
||||||
|
save_all_patient_htmls(conn, context,next_round)
|
||||||
|
browser.close()
|
||||||
|
conn.close()
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -241,7 +241,8 @@ def main():
|
|||||||
context = browser.new_context(storage_state=STATE_FILE)
|
context = browser.new_context(storage_state=STATE_FILE)
|
||||||
|
|
||||||
# 1) Collect all IDs from the listing (all pages)
|
# 1) Collect all IDs from the listing (all pages)
|
||||||
all_ids = collect_all_patient_ids(context)
|
# all_ids = collect_all_patient_ids(context)
|
||||||
|
all_ids=db_existing_ids(conn)
|
||||||
|
|
||||||
# 2) Iterate and store HTML (skip existing)
|
# 2) Iterate and store HTML (skip existing)
|
||||||
todo = [pid for pid in sorted(all_ids) if pid not in already]
|
todo = [pid for pid in sorted(all_ids) if pid not in already]
|
||||||
|
|||||||
42
Medevio5_ReadNamesFromKartoteka_html.py
Normal file
42
Medevio5_ReadNamesFromKartoteka_html.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import mysql.connector
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- CONFIG ----------
|
||||||
|
# MySQL connection settings (fill in)
|
||||||
|
MYSQL_CFG = dict(
|
||||||
|
host="192.168.1.76",
|
||||||
|
port=3307,
|
||||||
|
user="root",
|
||||||
|
password="Vlado9674+",
|
||||||
|
database="medevio",
|
||||||
|
)
|
||||||
|
|
||||||
|
conn=mysql.connector.connect(**MYSQL_CFG)
|
||||||
|
cur=conn.cursor()
|
||||||
|
cur.execute("select html from kartoteka_html where 'fetched-at'=(SELECT MAX('fetched-at') FROM kartoteka_html)")
|
||||||
|
html=cur.fetchone()
|
||||||
|
html=html[0]
|
||||||
|
|
||||||
|
|
||||||
|
# html is the string containing the entire web page
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Find every <button> that has that specific class sequence
|
||||||
|
# (space-separated class names → match as a set)
|
||||||
|
buttons = soup.find_all(
|
||||||
|
"button",
|
||||||
|
class_="MuiTypography-root MuiTypography-body2 "
|
||||||
|
"MuiLink-root MuiLink-underlineAlways "
|
||||||
|
"MuiLink-button css-xf7pf8"
|
||||||
|
)
|
||||||
|
names = []
|
||||||
|
for btn in buttons:
|
||||||
|
text = btn.get_text(strip=True)
|
||||||
|
print(text)
|
||||||
|
names.append(text)
|
||||||
|
|
||||||
|
print(names)
|
||||||
|
# names = [btn.get_text(strip=True) for btn in buttons]
|
||||||
|
print(names)
|
||||||
1
medevio_storage.json
Normal file
1
medevio_storage.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"cookies": [{"name": "gateway-access-token", "value": "siiQdoYzEwMy3QUPQtHkUoJr6KGkdMJWX2xP47Bwr1SH8Tin4sROlJV/KpBlKl/bVViG9aktQOXmcmcY", "domain": "my.medevio.cz", "path": "/", "expires": 1761076304, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "aws-waf-token", "value": "d1c38c75-3a6a-458d-b446-67df055372e9:CgoAliiKsQdKAAAA:9TJuIJZ01Qb0+kOyO8Ts9H7zh4kHnIldQcWNkJRmwzQJNDwQ/J/Jpw67r76cKM9jDfFXRYYbJtBPb/QJ7kbMcWckSZKB0fL2TKJhib9xLarYjvm2Eu+wTll08nisMUOa7LaroeMJ7nnE0m8jCRmoCz3c2+EWMjRHRJsJ9e+fDE4uXfYrALyBZryvX+7048AY17JW73UuNK21bus3ODXjOviZCFBh3OCHwC9IX0ClW4xrBCzN5uqtXT9OGm2aJKSWGs3IlZQ=", "domain": ".my.medevio.cz", "path": "/", "expires": 1758829907, "httpOnly": false, "secure": true, "sameSite": "Lax"}], "origins": [{"origin": "https://my.medevio.cz", "localStorage": [{"name": "awswaf_token_refresh_timestamp", "value": "1758484300098"}, {"name": "awswaf_session_storage", "value": "d1c38c75-3a6a-458d-b446-67df055372e9:CgoAliiKsQhKAAAA:FW9mIZy/Za1OyAHnRHgnpQBZWQz3rrOPnh2MOZ/83oVVADSY5xWChgKzF9ZZAg0Vd19PnOaE8AlAsw1KHD6xlmrBvlAlL/qzqxgWO8fmJmtrO3ZZbUrMyaqlQRLab0G85japL6jJKHQWcMtdj1lKnE17RqnoeVHX6FJvK6kvhIeVLp88j1TNTBkMcMJShnXMLn4F/l/GC91TcgnE1k4G6VWAbLzOqcRcxjWLe/boFkAWRiF5EhqO6By635TryNmgGiXQT5kXiGsHzxhUg17xewy+IHe6prGaq8UIpMXD7LMRFjlc8+RbQJAUwieztpmc1urc+GCqb3O9fGBLWiqekHPS+95bmr9x0lLAJOVNma11waGApkTusHADPWJ4c+eBfw+5OgGBQesBCtW/HG3Obj6Ou0dfTiUKG2XmXHpEsN6Xc2w3nTmyDzioSSMhfFmBCv9AJ8G9iVOyRIfknsLH8Wl8zgFkaJI7d5awiC3W/a1BV0HPONLQywrVq5l5TG3QpZs3GbQXUs1gHZjdI8hCoLeFJPSrxis3CwezKCs4qexfkyDRvzuqGS8Qy7s6hL/JqU3RiprpzzXOWNLVcDC+TpYArmH35yET1neb7k/iinozEjjW7Kp3cB4XXJpvR0dK+5NTva7+lw5HovGPs9UNj08a1LrfvdtzisI09vaSTVnYPiaWi3j4bUc+7FXEU2qfKoA51ZabIDkFwdbLkX6156KQls4fDEnj/1+/hWkCkBa/LfRg/KU9iDex96HxZi95pDqdVSe5Rgyt6PGDqCXXw8/tgw7v8qYMOr9zAQRqQc3zMHZvC4PqVVAfuXwy3mCJ9KY1gH1ZkofuR77TDbtn//vL2XAbSm1WhVOP8MF12O6C6q9sCx6nuypwCOY7v/Ix1JqgeZIWfSi0KkQ99EzRyFyTbcJO4A=="}, {"name": "Application.Intl.locale", "value": "cs"}, {"name": "Password.prefill", "value": "{\"username\":\"vladimir.buzalka@buzalka.cz\",\"type\":\"email\"}"}]}]}
|
||||||
Reference in New Issue
Block a user