111 lines
3.4 KiB
Python
111 lines
3.4 KiB
Python
import mysql.connector
|
||
from bs4 import BeautifulSoup
|
||
import re
|
||
import time
|
||
|
||
MYSQL_CFG = dict(
|
||
host="192.168.1.76",
|
||
port=3307,
|
||
user="root",
|
||
password="Vlado9674+",
|
||
database="medevio",
|
||
)
|
||
|
||
#Helper functions
|
||
def is_valid_rc(rc: str) -> bool:
|
||
"""
|
||
Very basic RC check:
|
||
– remove any slash
|
||
– must be 9 or 10 digits
|
||
"""
|
||
rc_clean = rc.replace("/", "")
|
||
return bool(re.fullmatch(r"\d{9,10}", rc_clean))
|
||
|
||
conn = mysql.connector.connect(**MYSQL_CFG)
|
||
|
||
# --- get latest HTML (single-row result) ---
|
||
with conn.cursor() as cur:
|
||
cur.execute("""
|
||
SELECT html
|
||
FROM kartoteka_html
|
||
where round=3
|
||
ORDER BY `fetched-at` DESC
|
||
""")
|
||
rows = cur.fetchall()
|
||
if not rows:
|
||
raise RuntimeError("No HTML found in kartoteka_html")
|
||
|
||
for row in rows:
|
||
|
||
html = row[0]
|
||
|
||
soup = BeautifulSoup(html, "html.parser")
|
||
|
||
records = []
|
||
for row in soup.find_all("div", attrs={"role": "row", "data-id": True}):
|
||
data_id = row["data-id"]
|
||
|
||
# full name -> surname + rest
|
||
name_btn = row.find("button", class_="MuiTypography-root")
|
||
fullname = name_btn.get_text(strip=True) if name_btn else ""
|
||
parts = fullname.split()
|
||
surname = parts[0] if parts else ""
|
||
name = " ".join(parts[1:]) if len(parts) > 1 else ""
|
||
|
||
# RC
|
||
id_cell = row.find("div", attrs={"data-field": "IdentificationNumber"})
|
||
rc = (id_cell.get("title", "") if id_cell else "")
|
||
rc = rc.replace("/", "").replace("\\", "")
|
||
|
||
# Phone
|
||
ph_cell = row.find("div", attrs={"data-field": "Phone"})
|
||
raw_phone = ph_cell.get("title", "") if ph_cell else ""
|
||
raw_phone = raw_phone.replace("\u00A0", " ") # NBSP -> space
|
||
phone = re.sub(r"[^\d+]", "", raw_phone) # keep + and digits
|
||
|
||
# Insurance
|
||
ins_cell = row.find("div", attrs={"data-field": "InsuranceCompany"})
|
||
poj = ins_cell.get("title", "") if ins_cell else ""
|
||
|
||
# Skip rows with no name or no RC or not valid TC
|
||
if not fullname or not rc:
|
||
continue
|
||
if not is_valid_rc(rc):
|
||
continue
|
||
|
||
records.append((data_id, fullname, rc, phone, poj))
|
||
|
||
# --- per-patient lookup: use a fresh cursor each time (or buffered=True) ---
|
||
with conn.cursor(buffered=True) as cur2:
|
||
cur2.execute(
|
||
"""
|
||
SELECT *
|
||
FROM patients_extracted
|
||
WHERE rc=%s
|
||
""",
|
||
(rc,),
|
||
)
|
||
rows = cur2.fetchall()
|
||
|
||
# print(surname, name, rc, len(rows))
|
||
|
||
if len(rows) > 1:
|
||
print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x")
|
||
time.sleep(1)
|
||
if len(rows)==0:
|
||
print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x")
|
||
time.sleep(1)
|
||
if len(rows)==1 and rows[0][0]!=data_id:
|
||
print(f"Pacient {surname} {name} {rc} má v medeviu jiný id, v db je {rows[0][0]} and nyní je {data_id}")
|
||
time.sleep(.1)
|
||
|
||
if len(rows) == 1:
|
||
cur2.execute("""
|
||
Update patients_extracted set rid=%s where rc=%s""",(data_id,rc))
|
||
conn.commit()
|
||
# preview
|
||
# for r in records[:10]:
|
||
# print(f"ID: {r[0]} Name: {r[1]} RC: {r[2]} Phone: {r[3]} Pojistovna: {r[4]}")
|
||
#
|
||
# print("Total patients:", len(records))
|