Tw22

2025-10-21 12:43:32 +02:00
parent 1f0b3a5d31
commit ea43e53949
3 changed files with 259 additions and 0 deletions
--- a/Dekurs.py
+++ b/Dekurs.py
@@ -0,0 +1,142 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Export DEKURS records from Medicus Firebird DB into Excel.
 - RTF text decoded to plain ASCII (no diacritics, first 100 chars)
 - Příjmení + Jméno merged into one 'Pacient' column
 - Proper date formatting (DD.MM.YYYY)
 - Thin black borders, gold header, wide text column
 """
 import time
 import re
 import unicodedata
 import fdb
 import pandas as pd
 from pathlib import Path
 from striprtf.striprtf import rtf_to_text
 from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
 from openpyxl.utils import get_column_letter
 # ================== CONFIGURATION ==================
 FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
 EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
 timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
 xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx"
 DATE_FROM = "2024-01-01"
 # ================== FIREBIRD CONNECTION ==================
 con = fdb.connect(
    dsn=f"localhost:{FDB_PATH}",
    user="sysdba",
    password="masterkey",
    charset="WIN1250"
 )
 # ================== QUERY ==================
 sql = f"""
 SELECT
    dekurs.id,
    kar.prijmeni,
    kar.jmeno,
    kar.rodcis,
    uzivatel.zkratka,
    dekurs.datum,
    "DEKURS"
 FROM dekurs
 JOIN kar ON dekurs.idpac = kar.idpac
 JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
 WHERE dekurs.datum >= DATE '{DATE_FROM}'
 ORDER BY dekurs.datum DESC
 """
 df = pd.read_sql(sql, con)
 con.close()
 # ================== DATA PREPARATION ==================
 # Merge Příjmení + Jméno
 df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
 df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
 # Ensure DATUM is datetime type
 df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce")
 # Decode RTF → ASCII (first 100 chars)
 def decode_rtf_ascii(text):
    """Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars."""
    if not text:
        return ""
    try:
        plain = rtf_to_text(text)
    except Exception:
        plain = str(text)
    plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain)
    plain = re.sub(r"\s{3,}", " ", plain)
    plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
    return plain.strip()[:100]
 df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
 df.drop(columns=["DEKURS"], inplace=True)
 # Rename columns for Excel
 df.rename(columns={
    "ID": "ID zaznamu",
    "PACIENT": "Pacient",
    "RODCIS": "Rodne cislo",
    "ZKRATKA": "Lekar",
    "DATUM": "Datum",
    "TEXT_ASCII": "Text ASCII (RTF->plain)"
 }, inplace=True)
 # ================== EXPORT TO EXCEL ==================
 with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
    df.to_excel(writer, index=False, sheet_name="Dekurz")
    ws = writer.sheets["Dekurz"]
    # ----- Header formatting -----
    header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
    for cell in ws[1]:
        cell.font = Font(bold=True)
        cell.alignment = Alignment(horizontal="center", vertical="center")
        cell.fill = header_fill
    # ----- Format Datum column -----
    for cell in ws["F"][1:]:  # column F = Datum (adjust if structure changes)
        if isinstance(cell.value, pd.Timestamp):
            cell.value = cell.value.date()  # remove time part
        cell.number_format = "DD.MM.YYYY"
    # ----- Force DEKURS column as Text -----
    text_col_name = "Text ASCII (RTF->plain)"
    text_col_index = None
    for i, col in enumerate(df.columns, start=1):
        if col == text_col_name:
            text_col_index = i
            break
    if text_col_index:
        for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
                                min_col=text_col_index, max_col=text_col_index):
            for cell in row:
                cell.number_format = "@"
    # ----- Column widths -----
    for col in ws.columns:
        header = col[0].value
        col_letter = get_column_letter(col[0].column)
        if header == text_col_name:
            ws.column_dimensions[col_letter].width = 110  # fixed width for DEKURS
        else:
            max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
            ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
    # ----- Thin black borders -----
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)
    for row in ws.iter_rows(min_row=1, max_row=ws.max_row,
                            min_col=1, max_col=ws.max_column):
        for cell in row:
            cell.border = border
 print(f"✅ Export hotov: {xlsx_path}")
--- a/noRTF.py
+++ b/noRTF.py
@@ -0,0 +1,77 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Test export of DEKURS table without RTF column, to verify Excel corruption source.
 """
 import time
 import fdb
 import pandas as pd
 from pathlib import Path
 from openpyxl.styles import Font, Alignment, PatternFill
 from openpyxl.utils import get_column_letter
 # ================== CONFIGURATION ==================
 FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
 EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
 timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
 xlsx_path = EXPORT_DIR / f"Dekurz export noRTF {timestamp}.xlsx"
 DATE_FROM = "2024-01-01"
 # ================== FIREBIRD CONNECTION ==================
 con = fdb.connect(
    dsn=f"localhost:{FDB_PATH}",
    user="sysdba",
    password="masterkey",
    charset="WIN1250"
 )
 # ================== QUERY (without "DEKURS" column) ==================
 sql = f"""
 SELECT
    dekurs.id,
    kar.prijmeni,
    kar.jmeno,
    kar.rodcis,
    uzivatel.zkratka,
    dekurs.datum
 FROM dekurs
 JOIN kar ON dekurs.idpac = kar.idpac
 JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
 WHERE dekurs.datum >= DATE '{DATE_FROM}'
 ORDER BY dekurs.datum DESC
 """
 df = pd.read_sql(sql, con)
 con.close()
 # Rename for nicer Excel output
 df.rename(columns={
    "ID": "ID záznamu",
    "PRIJMENI": "Příjmení",
    "JMENO": "Jméno",
    "RODCIS": "Rodné číslo",
    "ZKRATKA": "Lékař",
    "DATUM": "Datum"
 }, inplace=True)
 # ================== EXPORT TO EXCEL ==================
 with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
    df.to_excel(writer, index=False, sheet_name="Dekurz")
    ws = writer.sheets["Dekurz"]
    # Header styling
    header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
    for cell in ws[1]:
        cell.font = Font(bold=True)
        cell.alignment = Alignment(horizontal="center", vertical="center")
        cell.fill = header_fill
    # Auto column widths
    for col in ws.columns:
        max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
        ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 60)
 print(f"✅ Hotovo: {xlsx_path}")
--- a/decoded.py
+++ b/decoded.py
@@ -0,0 +1,40 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Show decoded physician notes (RTF → plain text) directly in console.
 """
 import fdb
 from striprtf.striprtf import rtf_to_text
 # ===== connection =====
 con = fdb.connect(
    dsn='localhost:z:\\Medicus 3\\data\\medicus.fdb',
    user='sysdba',
    password='masterkey',
    charset='WIN1250'
 )
 cur = con.cursor()
 # ===== pick a few recent records =====
 cur.execute('SELECT ID, DATUM, "DEKURS" FROM DEKURS ORDER BY DATUM DESC ROWS 5')
 for id_, datum, rtf in cur.fetchall():
    print("=" * 80)
    print(f"ID: {id_} | Datum: {datum}")
    if not rtf:
        print("(empty)")
        continue
    try:
        plain = rtf_to_text(rtf)
    except Exception as e:
        plain = f"[decode error: {e}]"
    print(plain.strip()[:1500])   # show first 1500 chars of decoded text
    print()
 cur.close()
 con.close()