import re import pandas as pd import firebirdsql as fb from datetime import datetime, timedelta from pathlib import Path from striprtf.striprtf import rtf_to_text from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.utils import get_column_letter import textwrap # ================== CONFIGURATION ================== FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") # ================== FIREBIRD CONNECTION ================== conn = fb.connect( host="192.168.1.4", port=3050, database=FDB_PATH, user="SYSDBA", password="masterkey", charset="WIN1250", ) def query_df(sql, params=None): cur = conn.cursor() cur.execute(sql, params or ()) rows = cur.fetchall() cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize return pd.DataFrame(rows, columns=cols) # ================== QUERY ================== sql = """ SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail FROM fak WHERE EXTRACT(YEAR FROM datkapod) = 2025 AND EXTRACT(MONTH FROM datkapod) = 9 AND poj = '211' """ df = query_df(sql) # print(df.columns.tolist()) # print(df.head()) # Display the first KAPDETAIL text, wrapped nicely kap_text = df.loc[0, 'kapdetail'] # print("\n".join(textwrap.wrap(kap_text, width=100))) # ========== SOURCE STRING ========== text = kap_text # Extract triplets: patientid;age|price; pattern = r"(\d{4});(\d{1,3})\|([\d.]+);" matches = re.findall(pattern, text) # Create dataframe dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"]) # ✅ Fix small typo: use dfdavka (not df) for conversions dfdavka["patientid"] = dfdavka["patientid"].astype(int) dfdavka["age"] = dfdavka["age"].astype(int) dfdavka["price"] = dfdavka["price"].astype(float) # ========== LOAD KAR ========== sql = "SELECT idpac, rodcis FROM kar" dfkar = query_df(sql) # ========== MERGE ========== dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left") # Optional: reorder columns dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]] print(dfmerged.head()) print(dfmerged.info()) # ========== OPTIONAL EXPORT ========== # outfile = EXPORT_DIR / "kapdetail_merged.xlsx" # dfmerged.to_excel(outfile, index=False) # print(f"✅ Exported to {outfile}") # ========== 1️⃣ Load Excel and prepare dfpoj ========== xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx") df = pd.read_excel(xlsx_path) print("Columns in Excel:", df.columns.tolist()) # Select 2nd column (index 1) dfpoj = df.iloc[:, [1]].copy() dfpoj.columns = ["rodcis"] # give proper name dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip() print(dfpoj.head()) # ========== 2️⃣ Compare dfmerged vs dfpoj ========== dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip() # Find those in dfmerged but not in dfpoj df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy() print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}") print(df_missing.head()) # ========== 3️⃣ (Optional) Export differences ========== EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx" df_missing.to_excel(xlsx_out, index=False) print(f"✅ Výsledek uložen do {xlsx_out}")