111 lines
3.3 KiB
Python
111 lines
3.3 KiB
Python
import re
|
||
import pandas as pd
|
||
import firebirdsql as fb
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
from striprtf.striprtf import rtf_to_text
|
||
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
|
||
from openpyxl.utils import get_column_letter
|
||
import textwrap
|
||
|
||
# ================== CONFIGURATION ==================
|
||
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
|
||
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
|
||
|
||
# ================== FIREBIRD CONNECTION ==================
|
||
conn = fb.connect(
|
||
host="192.168.1.4",
|
||
port=3050,
|
||
database=FDB_PATH,
|
||
user="SYSDBA",
|
||
password="masterkey",
|
||
charset="WIN1250",
|
||
)
|
||
|
||
def query_df(sql, params=None):
|
||
cur = conn.cursor()
|
||
cur.execute(sql, params or ())
|
||
rows = cur.fetchall()
|
||
cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize
|
||
return pd.DataFrame(rows, columns=cols)
|
||
# ================== QUERY ==================
|
||
sql = """
|
||
SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail
|
||
FROM fak
|
||
WHERE EXTRACT(YEAR FROM datkapod) = 2025
|
||
AND EXTRACT(MONTH FROM datkapod) = 9
|
||
AND poj = '211'
|
||
"""
|
||
df = query_df(sql)
|
||
|
||
# print(df.columns.tolist())
|
||
# print(df.head())
|
||
|
||
# Display the first KAPDETAIL text, wrapped nicely
|
||
kap_text = df.loc[0, 'kapdetail']
|
||
# print("\n".join(textwrap.wrap(kap_text, width=100)))
|
||
|
||
# ========== SOURCE STRING ==========
|
||
text = kap_text
|
||
|
||
# Extract triplets: patientid;age|price;
|
||
pattern = r"(\d{4});(\d{1,3})\|([\d.]+);"
|
||
matches = re.findall(pattern, text)
|
||
|
||
# Create dataframe
|
||
dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"])
|
||
|
||
# ✅ Fix small typo: use dfdavka (not df) for conversions
|
||
dfdavka["patientid"] = dfdavka["patientid"].astype(int)
|
||
dfdavka["age"] = dfdavka["age"].astype(int)
|
||
dfdavka["price"] = dfdavka["price"].astype(float)
|
||
|
||
# ========== LOAD KAR ==========
|
||
sql = "SELECT idpac, rodcis FROM kar"
|
||
dfkar = query_df(sql)
|
||
|
||
# ========== MERGE ==========
|
||
dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left")
|
||
|
||
# Optional: reorder columns
|
||
dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]]
|
||
|
||
print(dfmerged.head())
|
||
print(dfmerged.info())
|
||
|
||
# ========== OPTIONAL EXPORT ==========
|
||
# outfile = EXPORT_DIR / "kapdetail_merged.xlsx"
|
||
# dfmerged.to_excel(outfile, index=False)
|
||
# print(f"✅ Exported to {outfile}")
|
||
|
||
|
||
|
||
# ========== 1️⃣ Load Excel and prepare dfpoj ==========
|
||
xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx")
|
||
df = pd.read_excel(xlsx_path)
|
||
|
||
print("Columns in Excel:", df.columns.tolist())
|
||
|
||
# Select 2nd column (index 1)
|
||
dfpoj = df.iloc[:, [1]].copy()
|
||
dfpoj.columns = ["rodcis"] # give proper name
|
||
dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip()
|
||
|
||
print(dfpoj.head())
|
||
|
||
# ========== 2️⃣ Compare dfmerged vs dfpoj ==========
|
||
dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip()
|
||
|
||
# Find those in dfmerged but not in dfpoj
|
||
df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy()
|
||
|
||
print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}")
|
||
print(df_missing.head())
|
||
|
||
# ========== 3️⃣ (Optional) Export differences ==========
|
||
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
|
||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||
xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx"
|
||
|
||
df_missing.to_excel(xlsx_out, index=False)
|
||
print(f"✅ Výsledek uložen do {xlsx_out}") |