Tw22

2025-10-21 12:43:32 +02:00
parent 1f0b3a5d31
commit ea43e53949
3 changed files with 259 additions and 0 deletions
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Export DEKURS records from Medicus Firebird DB into Excel.
+- RTF text decoded to plain ASCII (no diacritics, first 100 chars)
+- Příjmení + Jméno merged into one 'Pacient' column
+- Proper date formatting (DD.MM.YYYY)
+- Thin black borders, gold header, wide text column
+"""
+
+import time
+import re
+import unicodedata
+import fdb
+import pandas as pd
+from pathlib import Path
+from striprtf.striprtf import rtf_to_text
+from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
+from openpyxl.utils import get_column_letter
+
+# ================== CONFIGURATION ==================
+FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
+EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
+timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
+xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx"
+DATE_FROM = "2024-01-01"
+
+# ================== FIREBIRD CONNECTION ==================
+con = fdb.connect(
+    dsn=f"localhost:{FDB_PATH}",
+    user="sysdba",
+    password="masterkey",
+    charset="WIN1250"
+)
+
+# ================== QUERY ==================
+sql = f"""
+SELECT
+    dekurs.id,
+    kar.prijmeni,
+    kar.jmeno,
+    kar.rodcis,
+    uzivatel.zkratka,
+    dekurs.datum,
+    "DEKURS"
+FROM dekurs
+JOIN kar ON dekurs.idpac = kar.idpac
+JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
+WHERE dekurs.datum >= DATE '{DATE_FROM}'
+ORDER BY dekurs.datum DESC
+"""
+
+df = pd.read_sql(sql, con)
+con.close()
+
+# ================== DATA PREPARATION ==================
+
+# Merge Příjmení + Jméno
+df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
+df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
+
+# Ensure DATUM is datetime type
+df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce")
+
+# Decode RTF → ASCII (first 100 chars)
+def decode_rtf_ascii(text):
+    """Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars."""
+    if not text:
+        return ""
+    try:
+        plain = rtf_to_text(text)
+    except Exception:
+        plain = str(text)
+    plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain)
+    plain = re.sub(r"\s{3,}", " ", plain)
+    plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
+    return plain.strip()[:100]
+
+df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
+df.drop(columns=["DEKURS"], inplace=True)
+
+# Rename columns for Excel
+df.rename(columns={
+    "ID": "ID zaznamu",
+    "PACIENT": "Pacient",
+    "RODCIS": "Rodne cislo",
+    "ZKRATKA": "Lekar",
+    "DATUM": "Datum",
+    "TEXT_ASCII": "Text ASCII (RTF->plain)"
+}, inplace=True)
+
+# ================== EXPORT TO EXCEL ==================
+with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
+    df.to_excel(writer, index=False, sheet_name="Dekurz")
+    ws = writer.sheets["Dekurz"]
+
+    # ----- Header formatting -----
+    header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
+    for cell in ws[1]:
+        cell.font = Font(bold=True)
+        cell.alignment = Alignment(horizontal="center", vertical="center")
+        cell.fill = header_fill
+
+    # ----- Format Datum column -----
+    for cell in ws["F"][1:]:  # column F = Datum (adjust if structure changes)
+        if isinstance(cell.value, pd.Timestamp):
+            cell.value = cell.value.date()  # remove time part
+        cell.number_format = "DD.MM.YYYY"
+
+    # ----- Force DEKURS column as Text -----
+    text_col_name = "Text ASCII (RTF->plain)"
+    text_col_index = None
+    for i, col in enumerate(df.columns, start=1):
+        if col == text_col_name:
+            text_col_index = i
+            break
+    if text_col_index:
+        for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
+                                min_col=text_col_index, max_col=text_col_index):
+            for cell in row:
+                cell.number_format = "@"
+
+    # ----- Column widths -----
+    for col in ws.columns:
+        header = col[0].value
+        col_letter = get_column_letter(col[0].column)
+        if header == text_col_name:
+            ws.column_dimensions[col_letter].width = 110  # fixed width for DEKURS
+        else:
+            max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
+            ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
+
+    # ----- Thin black borders -----
+    thin = Side(border_style="thin", color="000000")
+    border = Border(top=thin, left=thin, right=thin, bottom=thin)
+    for row in ws.iter_rows(min_row=1, max_row=ws.max_row,
+                            min_col=1, max_col=ws.max_column):
+        for cell in row:
+            cell.border = border
+
+print(f"✅ Export hotov: {xlsx_path}")