diff --git a/50 Dekurs.py b/50 Dekurs.py index 9955e71..b8cf489 100644 --- a/50 Dekurs.py +++ b/50 Dekurs.py @@ -3,130 +3,149 @@ """ Export DEKURS records from Medicus Firebird DB into Excel. -- RTF text decoded to plain ASCII (no diacritics, first 100 chars) -- Příjmení + Jméno merged into one 'Pacient' column -- Proper date formatting (DD.MM.YYYY) -- Thin black borders, gold header, wide text column +- Includes only last 2 months from current date +- RTF text decoded and cleaned (Excel-safe, preserved newlines) +- Výkony of the same day concatenated into 'VYKONY DNE' +- Příjmení + Jméno merged into 'PACIENT' +- Proper column order and naming +- Bright yellow header, thin black borders, auto column widths +- Timestamped Excel filename """ -import time import re -import unicodedata -import fdb import pandas as pd +import firebirdsql as fb +from datetime import datetime, timedelta from pathlib import Path from striprtf.striprtf import rtf_to_text from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.utils import get_column_letter # ================== CONFIGURATION ================== -FDB_PATH = r"z:\Medicus 3\data\medicus.fdb" -EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230") -timestamp = time.strftime("%Y-%m-%d %H-%M-%S") -xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx" -DATE_FROM = "2024-01-01" +FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" +EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") + +# calculate last 2 months dynamically +DATE_FROM = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") + +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx" # ================== FIREBIRD CONNECTION ================== -con = fdb.connect( - dsn=f"localhost:{FDB_PATH}", - user="sysdba", +conn = fb.connect( + host="192.168.1.4", + port=3050, + database=FDB_PATH, + user="SYSDBA", password="masterkey", - charset="WIN1250" + charset="WIN1250", ) +def query_df(sql, params=None): + cur = conn.cursor() + cur.execute(sql, params or ()) + rows = cur.fetchall() + cols = [d[0].strip() for d in cur.description] + return pd.DataFrame(rows, columns=cols) + # ================== QUERY ================== sql = f""" SELECT - dekurs.id, - kar.prijmeni, - kar.jmeno, - kar.rodcis, - uzivatel.zkratka, - dekurs.datum, - "DEKURS" -FROM dekurs -JOIN kar ON dekurs.idpac = kar.idpac -JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi -WHERE dekurs.datum >= DATE '{DATE_FROM}' -ORDER BY dekurs.datum DESC + d.id, + k.prijmeni, + k.jmeno, + k.rodcis, + u.zkratka, + d.datum, + d.dekurs, + ( + SELECT LIST(dd.kod, ', ') + FROM dokladd dd + WHERE dd.rodcis = k.rodcis + AND CAST(dd.datose AS DATE) = CAST(d.datum AS DATE) + ) AS vykony_dne +FROM dekurs d +JOIN kar k ON d.idpac = k.idpac +JOIN uzivatel u ON d.iduzi = u.iduzi +WHERE d.datum >= DATE '{DATE_FROM}' +ORDER BY d.datum DESC """ -df = pd.read_sql(sql, con) -con.close() +df = query_df(sql) +conn.close() -# ================== DATA PREPARATION ================== +# ================== DATA CLEANING ================== +def safe_rtf_to_text(x): + if isinstance(x, str) and x.strip(): + try: + return rtf_to_text(x) + except Exception: + return x + return "" + +df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text) + +df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True) +df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''}, + regex=True, inplace=True) +df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''}, + regex=True, inplace=True) -# Merge Příjmení + Jméno df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("") df.drop(columns=["PRIJMENI", "JMENO"], inplace=True) -# Ensure DATUM is datetime type -df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce") +df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True) +df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date +df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore") -# Decode RTF → ASCII (first 100 chars) -def decode_rtf_ascii(text): - """Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars.""" - if not text: - return "" +desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"] +df = df[[c for c in desired_order if c in df.columns]] +# ================== CLEANUP OLD FILES ================== +for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"): try: - plain = rtf_to_text(text) - except Exception: - plain = str(text) - plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain) - plain = re.sub(r"\s{3,}", " ", plain) - plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii") - return plain.strip()[:100] - -df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii) -df.drop(columns=["DEKURS"], inplace=True) - -# Rename columns for Excel -df.rename(columns={ - "ID": "ID zaznamu", - "PACIENT": "Pacient", - "RODCIS": "Rodne cislo", - "ZKRATKA": "Lekar", - "DATUM": "Datum", - "TEXT_ASCII": "Text ASCII (RTF->plain)" -}, inplace=True) - + old_file.unlink() + print(f"🧹 Deleted old file: {old_file.name}") + except Exception as e: + print(f"⚠️ Could not delete {old_file.name}: {e}") # ================== EXPORT TO EXCEL ================== with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="Dekurz") ws = writer.sheets["Dekurz"] - # ----- Header formatting ----- - header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid") + # ----- Bright yellow header ----- + header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") for cell in ws[1]: - cell.font = Font(bold=True) + cell.font = Font(bold=True, color="000000") cell.alignment = Alignment(horizontal="center", vertical="center") cell.fill = header_fill # ----- Format Datum column ----- - for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes) - if isinstance(cell.value, pd.Timestamp): - cell.value = cell.value.date() # remove time part + for cell in ws["A"][1:]: + if hasattr(cell.value, "date"): + cell.value = cell.value.date() cell.number_format = "DD.MM.YYYY" - # ----- Force DEKURS column as Text ----- - text_col_name = "Text ASCII (RTF->plain)" - text_col_index = None - for i, col in enumerate(df.columns, start=1): - if col == text_col_name: - text_col_index = i - break - if text_col_index: - for row in ws.iter_rows(min_row=2, max_row=ws.max_row, - min_col=text_col_index, max_col=text_col_index): - for cell in row: - cell.number_format = "@" + # ----- Vertical centering for key columns ----- + vertically_centered = {"DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE"} + for row in ws.iter_rows(min_row=2, max_row=ws.max_row): + for cell in row: + header = ws.cell(row=1, column=cell.column).value + if header in vertically_centered: + align = cell.alignment + cell.alignment = Alignment( + horizontal=align.horizontal or "left", + vertical="center", + wrap_text=align.wrap_text + ) + elif isinstance(cell.value, str) and "\n" in cell.value: + cell.alignment = Alignment(wrap_text=True, vertical="top") # ----- Column widths ----- for col in ws.columns: header = col[0].value col_letter = get_column_letter(col[0].column) - if header == text_col_name: - ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS + if header and "DEKURS" in str(header).upper(): + ws.column_dimensions[col_letter].width = 110 else: max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col) ws.column_dimensions[col_letter].width = min(max_len + 2, 80) @@ -140,3 +159,4 @@ with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: cell.border = border print(f"✅ Export hotov: {xlsx_path}") +print(f"📅 Dotaz zahrnuje data od: {DATE_FROM}")