Files
reporty/80 CtiKapitaceZTabulkyFAK.py
2025-10-23 08:13:20 +02:00

111 lines
3.3 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import pandas as pd
import firebirdsql as fb
from datetime import datetime, timedelta
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
import textwrap
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
# ================== FIREBIRD CONNECTION ==================
conn = fb.connect(
host="192.168.1.4",
port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey",
charset="WIN1250",
)
def query_df(sql, params=None):
cur = conn.cursor()
cur.execute(sql, params or ())
rows = cur.fetchall()
cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize
return pd.DataFrame(rows, columns=cols)
# ================== QUERY ==================
sql = """
SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail
FROM fak
WHERE EXTRACT(YEAR FROM datkapod) = 2025
AND EXTRACT(MONTH FROM datkapod) = 9
AND poj = '211'
"""
df = query_df(sql)
# print(df.columns.tolist())
# print(df.head())
# Display the first KAPDETAIL text, wrapped nicely
kap_text = df.loc[0, 'kapdetail']
# print("\n".join(textwrap.wrap(kap_text, width=100)))
# ========== SOURCE STRING ==========
text = kap_text
# Extract triplets: patientid;age|price;
pattern = r"(\d{4});(\d{1,3})\|([\d.]+);"
matches = re.findall(pattern, text)
# Create dataframe
dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"])
# ✅ Fix small typo: use dfdavka (not df) for conversions
dfdavka["patientid"] = dfdavka["patientid"].astype(int)
dfdavka["age"] = dfdavka["age"].astype(int)
dfdavka["price"] = dfdavka["price"].astype(float)
# ========== LOAD KAR ==========
sql = "SELECT idpac, rodcis FROM kar"
dfkar = query_df(sql)
# ========== MERGE ==========
dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left")
# Optional: reorder columns
dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]]
print(dfmerged.head())
print(dfmerged.info())
# ========== OPTIONAL EXPORT ==========
# outfile = EXPORT_DIR / "kapdetail_merged.xlsx"
# dfmerged.to_excel(outfile, index=False)
# print(f"✅ Exported to {outfile}")
# ========== 1⃣ Load Excel and prepare dfpoj ==========
xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx")
df = pd.read_excel(xlsx_path)
print("Columns in Excel:", df.columns.tolist())
# Select 2nd column (index 1)
dfpoj = df.iloc[:, [1]].copy()
dfpoj.columns = ["rodcis"] # give proper name
dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip()
print(dfpoj.head())
# ========== 2⃣ Compare dfmerged vs dfpoj ==========
dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip()
# Find those in dfmerged but not in dfpoj
df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy()
print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}")
print(df_missing.head())
# ========== 3⃣ (Optional) Export differences ==========
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx"
df_missing.to_excel(xlsx_out, index=False)
print(f"✅ Výsledek uložen do {xlsx_out}")