From 45ecfe96e89c2daa7dcd2badab130038fe3233e7 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Thu, 23 Oct 2025 08:13:20 +0200 Subject: [PATCH] z230 --- 80 CtiKapitaceZTabulkyFAK.py | 111 +++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 80 CtiKapitaceZTabulkyFAK.py diff --git a/80 CtiKapitaceZTabulkyFAK.py b/80 CtiKapitaceZTabulkyFAK.py new file mode 100644 index 0000000..6aced92 --- /dev/null +++ b/80 CtiKapitaceZTabulkyFAK.py @@ -0,0 +1,111 @@ +import re +import pandas as pd +import firebirdsql as fb +from datetime import datetime, timedelta +from pathlib import Path +from striprtf.striprtf import rtf_to_text +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter +import textwrap + +# ================== CONFIGURATION ================== +FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" +EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") + +# ================== FIREBIRD CONNECTION ================== +conn = fb.connect( + host="192.168.1.4", + port=3050, + database=FDB_PATH, + user="SYSDBA", + password="masterkey", + charset="WIN1250", +) + +def query_df(sql, params=None): + cur = conn.cursor() + cur.execute(sql, params or ()) + rows = cur.fetchall() + cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize + return pd.DataFrame(rows, columns=cols) +# ================== QUERY ================== +sql = """ +SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail +FROM fak +WHERE EXTRACT(YEAR FROM datkapod) = 2025 + AND EXTRACT(MONTH FROM datkapod) = 9 + AND poj = '211' +""" +df = query_df(sql) + +# print(df.columns.tolist()) +# print(df.head()) + +# Display the first KAPDETAIL text, wrapped nicely +kap_text = df.loc[0, 'kapdetail'] +# print("\n".join(textwrap.wrap(kap_text, width=100))) + +# ========== SOURCE STRING ========== +text = kap_text + +# Extract triplets: patientid;age|price; +pattern = r"(\d{4});(\d{1,3})\|([\d.]+);" +matches = re.findall(pattern, text) + +# Create dataframe +dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"]) + +# ✅ Fix small typo: use dfdavka (not df) for conversions +dfdavka["patientid"] = dfdavka["patientid"].astype(int) +dfdavka["age"] = dfdavka["age"].astype(int) +dfdavka["price"] = dfdavka["price"].astype(float) + +# ========== LOAD KAR ========== +sql = "SELECT idpac, rodcis FROM kar" +dfkar = query_df(sql) + +# ========== MERGE ========== +dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left") + +# Optional: reorder columns +dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]] + +print(dfmerged.head()) +print(dfmerged.info()) + +# ========== OPTIONAL EXPORT ========== +# outfile = EXPORT_DIR / "kapdetail_merged.xlsx" +# dfmerged.to_excel(outfile, index=False) +# print(f"✅ Exported to {outfile}") + + + +# ========== 1️⃣ Load Excel and prepare dfpoj ========== +xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx") +df = pd.read_excel(xlsx_path) + +print("Columns in Excel:", df.columns.tolist()) + +# Select 2nd column (index 1) +dfpoj = df.iloc[:, [1]].copy() +dfpoj.columns = ["rodcis"] # give proper name +dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip() + +print(dfpoj.head()) + +# ========== 2️⃣ Compare dfmerged vs dfpoj ========== +dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip() + +# Find those in dfmerged but not in dfpoj +df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy() + +print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}") +print(df_missing.head()) + +# ========== 3️⃣ (Optional) Export differences ========== +EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx" + +df_missing.to_excel(xlsx_out, index=False) +print(f"✅ Výsledek uložen do {xlsx_out}") \ No newline at end of file