import mysql.connector from bs4 import BeautifulSoup import re import time MYSQL_CFG = dict( host="192.168.1.76", port=3307, user="root", password="Vlado9674+", database="medevio", ) #Helper functions def is_valid_rc(rc: str) -> bool: """ Very basic RC check: – remove any slash – must be 9 or 10 digits """ rc_clean = rc.replace("/", "") return bool(re.fullmatch(r"\d{9,10}", rc_clean)) conn = mysql.connector.connect(**MYSQL_CFG) # --- get latest HTML (single-row result) --- with conn.cursor() as cur: cur.execute(""" SELECT html FROM kartoteka_html where round=3 ORDER BY `fetched-at` DESC """) rows = cur.fetchall() if not rows: raise RuntimeError("No HTML found in kartoteka_html") for row in rows: html = row[0] soup = BeautifulSoup(html, "html.parser") records = [] for row in soup.find_all("div", attrs={"role": "row", "data-id": True}): data_id = row["data-id"] # full name -> surname + rest name_btn = row.find("button", class_="MuiTypography-root") fullname = name_btn.get_text(strip=True) if name_btn else "" parts = fullname.split() surname = parts[0] if parts else "" name = " ".join(parts[1:]) if len(parts) > 1 else "" # RC id_cell = row.find("div", attrs={"data-field": "IdentificationNumber"}) rc = (id_cell.get("title", "") if id_cell else "") rc = rc.replace("/", "").replace("\\", "") # Phone ph_cell = row.find("div", attrs={"data-field": "Phone"}) raw_phone = ph_cell.get("title", "") if ph_cell else "" raw_phone = raw_phone.replace("\u00A0", " ") # NBSP -> space phone = re.sub(r"[^\d+]", "", raw_phone) # keep + and digits # Insurance ins_cell = row.find("div", attrs={"data-field": "InsuranceCompany"}) poj = ins_cell.get("title", "") if ins_cell else "" # Skip rows with no name or no RC or not valid TC if not fullname or not rc: continue if not is_valid_rc(rc): continue records.append((data_id, fullname, rc, phone, poj)) # --- per-patient lookup: use a fresh cursor each time (or buffered=True) --- with conn.cursor(buffered=True) as cur2: cur2.execute( """ SELECT * FROM patients_extracted WHERE rc=%s """, (rc,), ) rows = cur2.fetchall() # print(surname, name, rc, len(rows)) if len(rows) > 1: print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x") time.sleep(1) if len(rows)==0: print(f"Pacient {surname} {name} {rc} je v medeviu {len(rows)}x") time.sleep(1) if len(rows)==1 and rows[0][0]!=data_id: print(f"Pacient {surname} {name} {rc} má v medeviu jiný id, v db je {rows[0][0]} and nyní je {data_id}") time.sleep(.1) if len(rows) == 1: cur2.execute(""" Update patients_extracted set rid=%s where rc=%s""",(data_id,rc)) conn.commit() # preview # for r in records[:10]: # print(f"ID: {r[0]} Name: {r[1]} RC: {r[2]} Phone: {r[3]} Pojistovna: {r[4]}") # # print("Total patients:", len(records))