Files
reporty/50 Dekurs.py
2025-10-21 12:43:32 +02:00

143 lines
4.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Export DEKURS records from Medicus Firebird DB into Excel.
- RTF text decoded to plain ASCII (no diacritics, first 100 chars)
- Příjmení + Jméno merged into one 'Pacient' column
- Proper date formatting (DD.MM.YYYY)
- Thin black borders, gold header, wide text column
"""
import time
import re
import unicodedata
import fdb
import pandas as pd
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx"
DATE_FROM = "2024-01-01"
# ================== FIREBIRD CONNECTION ==================
con = fdb.connect(
dsn=f"localhost:{FDB_PATH}",
user="sysdba",
password="masterkey",
charset="WIN1250"
)
# ================== QUERY ==================
sql = f"""
SELECT
dekurs.id,
kar.prijmeni,
kar.jmeno,
kar.rodcis,
uzivatel.zkratka,
dekurs.datum,
"DEKURS"
FROM dekurs
JOIN kar ON dekurs.idpac = kar.idpac
JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
WHERE dekurs.datum >= DATE '{DATE_FROM}'
ORDER BY dekurs.datum DESC
"""
df = pd.read_sql(sql, con)
con.close()
# ================== DATA PREPARATION ==================
# Merge Příjmení + Jméno
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
# Ensure DATUM is datetime type
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce")
# Decode RTF → ASCII (first 100 chars)
def decode_rtf_ascii(text):
"""Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars."""
if not text:
return ""
try:
plain = rtf_to_text(text)
except Exception:
plain = str(text)
plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain)
plain = re.sub(r"\s{3,}", " ", plain)
plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
return plain.strip()[:100]
df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
df.drop(columns=["DEKURS"], inplace=True)
# Rename columns for Excel
df.rename(columns={
"ID": "ID zaznamu",
"PACIENT": "Pacient",
"RODCIS": "Rodne cislo",
"ZKRATKA": "Lekar",
"DATUM": "Datum",
"TEXT_ASCII": "Text ASCII (RTF->plain)"
}, inplace=True)
# ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"]
# ----- Header formatting -----
header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
for cell in ws[1]:
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = header_fill
# ----- Format Datum column -----
for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes)
if isinstance(cell.value, pd.Timestamp):
cell.value = cell.value.date() # remove time part
cell.number_format = "DD.MM.YYYY"
# ----- Force DEKURS column as Text -----
text_col_name = "Text ASCII (RTF->plain)"
text_col_index = None
for i, col in enumerate(df.columns, start=1):
if col == text_col_name:
text_col_index = i
break
if text_col_index:
for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
min_col=text_col_index, max_col=text_col_index):
for cell in row:
cell.number_format = "@"
# ----- Column widths -----
for col in ws.columns:
header = col[0].value
col_letter = get_column_letter(col[0].column)
if header == text_col_name:
ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS
else:
max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
# ----- Thin black borders -----
thin = Side(border_style="thin", color="000000")
border = Border(top=thin, left=thin, right=thin, bottom=thin)
for row in ws.iter_rows(min_row=1, max_row=ws.max_row,
min_col=1, max_col=ws.max_column):
for cell in row:
cell.border = border
print(f"✅ Export hotov: {xlsx_path}")