Files
medevio/Medevio5_ReadNamesFromKartoteka_html.py
2025-09-22 10:45:44 +02:00

111 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import mysql.connector
from bs4 import BeautifulSoup
import re
import time
MYSQL_CFG = dict(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="medevio",
)
#Helper functions
def is_valid_rc(rc: str) -> bool:
"""
Very basic RC check:
remove any slash
must be 9 or 10 digits
"""
rc_clean = rc.replace("/", "")
return bool(re.fullmatch(r"\d{9,10}", rc_clean))
conn = mysql.connector.connect(**MYSQL_CFG)
# --- get latest HTML (single-row result) ---
with conn.cursor() as cur:
cur.execute("""
SELECT html
FROM kartoteka_html
where round=3
ORDER BY `fetched-at` DESC
""")
rows = cur.fetchall()
if not rows:
raise RuntimeError("No HTML found in kartoteka_html")
for row in rows:
html = row[0]
soup = BeautifulSoup(html, "html.parser")
records = []
for row in soup.find_all("div", attrs={"role": "row", "data-id": True}):
data_id = row["data-id"]
# full name -> surname + rest
name_btn = row.find("button", class_="MuiTypography-root")
fullname = name_btn.get_text(strip=True) if name_btn else ""
parts = fullname.split()
surname = parts[0] if parts else ""
name = " ".join(parts[1:]) if len(parts) > 1 else ""
# RC
id_cell = row.find("div", attrs={"data-field": "IdentificationNumber"})
rc = (id_cell.get("title", "") if id_cell else "")
rc = rc.replace("/", "").replace("\\", "")
# Phone
ph_cell = row.find("div", attrs={"data-field": "Phone"})
raw_phone = ph_cell.get("title", "") if ph_cell else ""
raw_phone = raw_phone.replace("\u00A0", " ") # NBSP -> space
phone = re.sub(r"[^\d+]", "", raw_phone) # keep + and digits
# Insurance
ins_cell = row.find("div", attrs={"data-field": "InsuranceCompany"})
poj = ins_cell.get("title", "") if ins_cell else ""
# Skip rows with no name or no RC or not valid TC
if not fullname or not rc:
continue
if not is_valid_rc(rc):
continue
records.append((data_id, fullname, rc, phone, poj))
# --- per-patient lookup: use a fresh cursor each time (or buffered=True) ---
with conn.cursor(buffered=True) as cur2:
cur2.execute(
"""
SELECT *
FROM patients_extracted
WHERE rc=%s
""",
(rc,),
)
rows = cur2.fetchall()
# print(surname, name, rc, len(rows))
if len(rows) > 1:
print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x")
time.sleep(1)
if len(rows)==0:
print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x")
time.sleep(1)
if len(rows)==1 and rows[0][0]!=data_id:
print(f"Pacient {surname} {name} {rc} má v medeviu jiný id, v db je {rows[0][0]} and nyní je {data_id}")
time.sleep(.1)
if len(rows) == 1:
cur2.execute("""
Update patients_extracted set rid=%s where rc=%s""",(data_id,rc))
conn.commit()
# preview
# for r in records[:10]:
# print(f"ID: {r[0]} Name: {r[1]} RC: {r[2]} Phone: {r[3]} Pojistovna: {r[4]}")
#
# print("Total patients:", len(records))