diff --git a/50 Dekurs.py b/50 Dekurs.py new file mode 100644 index 0000000..9955e71 --- /dev/null +++ b/50 Dekurs.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Export DEKURS records from Medicus Firebird DB into Excel. +- RTF text decoded to plain ASCII (no diacritics, first 100 chars) +- Příjmení + Jméno merged into one 'Pacient' column +- Proper date formatting (DD.MM.YYYY) +- Thin black borders, gold header, wide text column +""" + +import time +import re +import unicodedata +import fdb +import pandas as pd +from pathlib import Path +from striprtf.striprtf import rtf_to_text +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +# ================== CONFIGURATION ================== +FDB_PATH = r"z:\Medicus 3\data\medicus.fdb" +EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230") +timestamp = time.strftime("%Y-%m-%d %H-%M-%S") +xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx" +DATE_FROM = "2024-01-01" + +# ================== FIREBIRD CONNECTION ================== +con = fdb.connect( + dsn=f"localhost:{FDB_PATH}", + user="sysdba", + password="masterkey", + charset="WIN1250" +) + +# ================== QUERY ================== +sql = f""" +SELECT + dekurs.id, + kar.prijmeni, + kar.jmeno, + kar.rodcis, + uzivatel.zkratka, + dekurs.datum, + "DEKURS" +FROM dekurs +JOIN kar ON dekurs.idpac = kar.idpac +JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi +WHERE dekurs.datum >= DATE '{DATE_FROM}' +ORDER BY dekurs.datum DESC +""" + +df = pd.read_sql(sql, con) +con.close() + +# ================== DATA PREPARATION ================== + +# Merge Příjmení + Jméno +df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("") +df.drop(columns=["PRIJMENI", "JMENO"], inplace=True) + +# Ensure DATUM is datetime type +df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce") + +# Decode RTF → ASCII (first 100 chars) +def decode_rtf_ascii(text): + """Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars.""" + if not text: + return "" + try: + plain = rtf_to_text(text) + except Exception: + plain = str(text) + plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain) + plain = re.sub(r"\s{3,}", " ", plain) + plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii") + return plain.strip()[:100] + +df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii) +df.drop(columns=["DEKURS"], inplace=True) + +# Rename columns for Excel +df.rename(columns={ + "ID": "ID zaznamu", + "PACIENT": "Pacient", + "RODCIS": "Rodne cislo", + "ZKRATKA": "Lekar", + "DATUM": "Datum", + "TEXT_ASCII": "Text ASCII (RTF->plain)" +}, inplace=True) + +# ================== EXPORT TO EXCEL ================== +with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: + df.to_excel(writer, index=False, sheet_name="Dekurz") + ws = writer.sheets["Dekurz"] + + # ----- Header formatting ----- + header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid") + for cell in ws[1]: + cell.font = Font(bold=True) + cell.alignment = Alignment(horizontal="center", vertical="center") + cell.fill = header_fill + + # ----- Format Datum column ----- + for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes) + if isinstance(cell.value, pd.Timestamp): + cell.value = cell.value.date() # remove time part + cell.number_format = "DD.MM.YYYY" + + # ----- Force DEKURS column as Text ----- + text_col_name = "Text ASCII (RTF->plain)" + text_col_index = None + for i, col in enumerate(df.columns, start=1): + if col == text_col_name: + text_col_index = i + break + if text_col_index: + for row in ws.iter_rows(min_row=2, max_row=ws.max_row, + min_col=text_col_index, max_col=text_col_index): + for cell in row: + cell.number_format = "@" + + # ----- Column widths ----- + for col in ws.columns: + header = col[0].value + col_letter = get_column_letter(col[0].column) + if header == text_col_name: + ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS + else: + max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col) + ws.column_dimensions[col_letter].width = min(max_len + 2, 80) + + # ----- Thin black borders ----- + thin = Side(border_style="thin", color="000000") + border = Border(top=thin, left=thin, right=thin, bottom=thin) + for row in ws.iter_rows(min_row=1, max_row=ws.max_row, + min_col=1, max_col=ws.max_column): + for cell in row: + cell.border = border + +print(f"✅ Export hotov: {xlsx_path}") diff --git a/54 Dekurz export noRTF.py b/54 Dekurz export noRTF.py new file mode 100644 index 0000000..422ee3c --- /dev/null +++ b/54 Dekurz export noRTF.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Test export of DEKURS table without RTF column, to verify Excel corruption source. +""" + +import time +import fdb +import pandas as pd +from pathlib import Path +from openpyxl.styles import Font, Alignment, PatternFill +from openpyxl.utils import get_column_letter + +# ================== CONFIGURATION ================== +FDB_PATH = r"z:\Medicus 3\data\medicus.fdb" +EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230") +timestamp = time.strftime("%Y-%m-%d %H-%M-%S") +xlsx_path = EXPORT_DIR / f"Dekurz export noRTF {timestamp}.xlsx" + +DATE_FROM = "2024-01-01" + +# ================== FIREBIRD CONNECTION ================== +con = fdb.connect( + dsn=f"localhost:{FDB_PATH}", + user="sysdba", + password="masterkey", + charset="WIN1250" +) + +# ================== QUERY (without "DEKURS" column) ================== +sql = f""" +SELECT + dekurs.id, + kar.prijmeni, + kar.jmeno, + kar.rodcis, + uzivatel.zkratka, + dekurs.datum +FROM dekurs +JOIN kar ON dekurs.idpac = kar.idpac +JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi +WHERE dekurs.datum >= DATE '{DATE_FROM}' +ORDER BY dekurs.datum DESC +""" + +df = pd.read_sql(sql, con) +con.close() + +# Rename for nicer Excel output +df.rename(columns={ + "ID": "ID záznamu", + "PRIJMENI": "Příjmení", + "JMENO": "Jméno", + "RODCIS": "Rodné číslo", + "ZKRATKA": "Lékař", + "DATUM": "Datum" +}, inplace=True) + +# ================== EXPORT TO EXCEL ================== +with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: + df.to_excel(writer, index=False, sheet_name="Dekurz") + ws = writer.sheets["Dekurz"] + + # Header styling + header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid") + for cell in ws[1]: + cell.font = Font(bold=True) + cell.alignment = Alignment(horizontal="center", vertical="center") + cell.fill = header_fill + + # Auto column widths + for col in ws.columns: + max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col) + ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 60) + +print(f"✅ Hotovo: {xlsx_path}") diff --git a/55 Dekurz show decoded.py b/55 Dekurz show decoded.py new file mode 100644 index 0000000..2e49255 --- /dev/null +++ b/55 Dekurz show decoded.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Show decoded physician notes (RTF → plain text) directly in console. +""" + +import fdb +from striprtf.striprtf import rtf_to_text + +# ===== connection ===== +con = fdb.connect( + dsn='localhost:z:\\Medicus 3\\data\\medicus.fdb', + user='sysdba', + password='masterkey', + charset='WIN1250' +) +cur = con.cursor() + +# ===== pick a few recent records ===== +cur.execute('SELECT ID, DATUM, "DEKURS" FROM DEKURS ORDER BY DATUM DESC ROWS 5') + +for id_, datum, rtf in cur.fetchall(): + print("=" * 80) + print(f"ID: {id_} | Datum: {datum}") + + if not rtf: + print("(empty)") + continue + + try: + plain = rtf_to_text(rtf) + except Exception as e: + plain = f"[decode error: {e}]" + + print(plain.strip()[:1500]) # show first 1500 chars of decoded text + print() + +cur.close() +con.close()