This commit is contained in:
2025-10-21 12:43:32 +02:00
parent 1f0b3a5d31
commit ea43e53949
3 changed files with 259 additions and 0 deletions

142
50 Dekurs.py Normal file
View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Export DEKURS records from Medicus Firebird DB into Excel.
- RTF text decoded to plain ASCII (no diacritics, first 100 chars)
- Příjmení + Jméno merged into one 'Pacient' column
- Proper date formatting (DD.MM.YYYY)
- Thin black borders, gold header, wide text column
"""
import time
import re
import unicodedata
import fdb
import pandas as pd
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx"
DATE_FROM = "2024-01-01"
# ================== FIREBIRD CONNECTION ==================
con = fdb.connect(
dsn=f"localhost:{FDB_PATH}",
user="sysdba",
password="masterkey",
charset="WIN1250"
)
# ================== QUERY ==================
sql = f"""
SELECT
dekurs.id,
kar.prijmeni,
kar.jmeno,
kar.rodcis,
uzivatel.zkratka,
dekurs.datum,
"DEKURS"
FROM dekurs
JOIN kar ON dekurs.idpac = kar.idpac
JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
WHERE dekurs.datum >= DATE '{DATE_FROM}'
ORDER BY dekurs.datum DESC
"""
df = pd.read_sql(sql, con)
con.close()
# ================== DATA PREPARATION ==================
# Merge Příjmení + Jméno
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
# Ensure DATUM is datetime type
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce")
# Decode RTF → ASCII (first 100 chars)
def decode_rtf_ascii(text):
"""Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars."""
if not text:
return ""
try:
plain = rtf_to_text(text)
except Exception:
plain = str(text)
plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain)
plain = re.sub(r"\s{3,}", " ", plain)
plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
return plain.strip()[:100]
df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
df.drop(columns=["DEKURS"], inplace=True)
# Rename columns for Excel
df.rename(columns={
"ID": "ID zaznamu",
"PACIENT": "Pacient",
"RODCIS": "Rodne cislo",
"ZKRATKA": "Lekar",
"DATUM": "Datum",
"TEXT_ASCII": "Text ASCII (RTF->plain)"
}, inplace=True)
# ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"]
# ----- Header formatting -----
header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
for cell in ws[1]:
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = header_fill
# ----- Format Datum column -----
for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes)
if isinstance(cell.value, pd.Timestamp):
cell.value = cell.value.date() # remove time part
cell.number_format = "DD.MM.YYYY"
# ----- Force DEKURS column as Text -----
text_col_name = "Text ASCII (RTF->plain)"
text_col_index = None
for i, col in enumerate(df.columns, start=1):
if col == text_col_name:
text_col_index = i
break
if text_col_index:
for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
min_col=text_col_index, max_col=text_col_index):
for cell in row:
cell.number_format = "@"
# ----- Column widths -----
for col in ws.columns:
header = col[0].value
col_letter = get_column_letter(col[0].column)
if header == text_col_name:
ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS
else:
max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
# ----- Thin black borders -----
thin = Side(border_style="thin", color="000000")
border = Border(top=thin, left=thin, right=thin, bottom=thin)
for row in ws.iter_rows(min_row=1, max_row=ws.max_row,
min_col=1, max_col=ws.max_column):
for cell in row:
cell.border = border
print(f"✅ Export hotov: {xlsx_path}")

77
54 Dekurz export noRTF.py Normal file
View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test export of DEKURS table without RTF column, to verify Excel corruption source.
"""
import time
import fdb
import pandas as pd
from pathlib import Path
from openpyxl.styles import Font, Alignment, PatternFill
from openpyxl.utils import get_column_letter
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
xlsx_path = EXPORT_DIR / f"Dekurz export noRTF {timestamp}.xlsx"
DATE_FROM = "2024-01-01"
# ================== FIREBIRD CONNECTION ==================
con = fdb.connect(
dsn=f"localhost:{FDB_PATH}",
user="sysdba",
password="masterkey",
charset="WIN1250"
)
# ================== QUERY (without "DEKURS" column) ==================
sql = f"""
SELECT
dekurs.id,
kar.prijmeni,
kar.jmeno,
kar.rodcis,
uzivatel.zkratka,
dekurs.datum
FROM dekurs
JOIN kar ON dekurs.idpac = kar.idpac
JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
WHERE dekurs.datum >= DATE '{DATE_FROM}'
ORDER BY dekurs.datum DESC
"""
df = pd.read_sql(sql, con)
con.close()
# Rename for nicer Excel output
df.rename(columns={
"ID": "ID záznamu",
"PRIJMENI": "Příjmení",
"JMENO": "Jméno",
"RODCIS": "Rodné číslo",
"ZKRATKA": "Lékař",
"DATUM": "Datum"
}, inplace=True)
# ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"]
# Header styling
header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
for cell in ws[1]:
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = header_fill
# Auto column widths
for col in ws.columns:
max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 60)
print(f"✅ Hotovo: {xlsx_path}")

40
55 Dekurz show decoded.py Normal file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Show decoded physician notes (RTF → plain text) directly in console.
"""
import fdb
from striprtf.striprtf import rtf_to_text
# ===== connection =====
con = fdb.connect(
dsn='localhost:z:\\Medicus 3\\data\\medicus.fdb',
user='sysdba',
password='masterkey',
charset='WIN1250'
)
cur = con.cursor()
# ===== pick a few recent records =====
cur.execute('SELECT ID, DATUM, "DEKURS" FROM DEKURS ORDER BY DATUM DESC ROWS 5')
for id_, datum, rtf in cur.fetchall():
print("=" * 80)
print(f"ID: {id_} | Datum: {datum}")
if not rtf:
print("(empty)")
continue
try:
plain = rtf_to_text(rtf)
except Exception as e:
plain = f"[decode error: {e}]"
print(plain.strip()[:1500]) # show first 1500 chars of decoded text
print()
cur.close()
con.close()