This commit is contained in:
2025-10-23 08:13:20 +02:00
parent d8d3de7949
commit 45ecfe96e8

View File

@@ -0,0 +1,111 @@
import re
import pandas as pd
import firebirdsql as fb
from datetime import datetime, timedelta
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
import textwrap
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
# ================== FIREBIRD CONNECTION ==================
conn = fb.connect(
host="192.168.1.4",
port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey",
charset="WIN1250",
)
def query_df(sql, params=None):
cur = conn.cursor()
cur.execute(sql, params or ())
rows = cur.fetchall()
cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize
return pd.DataFrame(rows, columns=cols)
# ================== QUERY ==================
sql = """
SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail
FROM fak
WHERE EXTRACT(YEAR FROM datkapod) = 2025
AND EXTRACT(MONTH FROM datkapod) = 9
AND poj = '211'
"""
df = query_df(sql)
# print(df.columns.tolist())
# print(df.head())
# Display the first KAPDETAIL text, wrapped nicely
kap_text = df.loc[0, 'kapdetail']
# print("\n".join(textwrap.wrap(kap_text, width=100)))
# ========== SOURCE STRING ==========
text = kap_text
# Extract triplets: patientid;age|price;
pattern = r"(\d{4});(\d{1,3})\|([\d.]+);"
matches = re.findall(pattern, text)
# Create dataframe
dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"])
# ✅ Fix small typo: use dfdavka (not df) for conversions
dfdavka["patientid"] = dfdavka["patientid"].astype(int)
dfdavka["age"] = dfdavka["age"].astype(int)
dfdavka["price"] = dfdavka["price"].astype(float)
# ========== LOAD KAR ==========
sql = "SELECT idpac, rodcis FROM kar"
dfkar = query_df(sql)
# ========== MERGE ==========
dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left")
# Optional: reorder columns
dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]]
print(dfmerged.head())
print(dfmerged.info())
# ========== OPTIONAL EXPORT ==========
# outfile = EXPORT_DIR / "kapdetail_merged.xlsx"
# dfmerged.to_excel(outfile, index=False)
# print(f"✅ Exported to {outfile}")
# ========== 1⃣ Load Excel and prepare dfpoj ==========
xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx")
df = pd.read_excel(xlsx_path)
print("Columns in Excel:", df.columns.tolist())
# Select 2nd column (index 1)
dfpoj = df.iloc[:, [1]].copy()
dfpoj.columns = ["rodcis"] # give proper name
dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip()
print(dfpoj.head())
# ========== 2⃣ Compare dfmerged vs dfpoj ==========
dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip()
# Find those in dfmerged but not in dfpoj
df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy()
print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}")
print(df_missing.head())
# ========== 3⃣ (Optional) Export differences ==========
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx"
df_missing.to_excel(xlsx_out, index=False)
print(f"✅ Výsledek uložen do {xlsx_out}")