This commit is contained in:
2025-10-21 14:02:42 +02:00
parent 1d2bbec61b
commit 348b393da6

View File

@@ -3,130 +3,149 @@
""" """
Export DEKURS records from Medicus Firebird DB into Excel. Export DEKURS records from Medicus Firebird DB into Excel.
- RTF text decoded to plain ASCII (no diacritics, first 100 chars) - Includes only last 2 months from current date
- Příjmení + Jméno merged into one 'Pacient' column - RTF text decoded and cleaned (Excel-safe, preserved newlines)
- Proper date formatting (DD.MM.YYYY) - Výkony of the same day concatenated into 'VYKONY DNE'
- Thin black borders, gold header, wide text column - Příjmení + Jméno merged into 'PACIENT'
- Proper column order and naming
- Bright yellow header, thin black borders, auto column widths
- Timestamped Excel filename
""" """
import time
import re import re
import unicodedata
import fdb
import pandas as pd import pandas as pd
import firebirdsql as fb
from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from striprtf.striprtf import rtf_to_text from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
# ================== CONFIGURATION ================== # ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\medicus.fdb" FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230") EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx" # calculate last 2 months dynamically
DATE_FROM = "2024-01-01" DATE_FROM = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx"
# ================== FIREBIRD CONNECTION ================== # ================== FIREBIRD CONNECTION ==================
con = fdb.connect( conn = fb.connect(
dsn=f"localhost:{FDB_PATH}", host="192.168.1.4",
user="sysdba", port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey", password="masterkey",
charset="WIN1250" charset="WIN1250",
) )
def query_df(sql, params=None):
cur = conn.cursor()
cur.execute(sql, params or ())
rows = cur.fetchall()
cols = [d[0].strip() for d in cur.description]
return pd.DataFrame(rows, columns=cols)
# ================== QUERY ================== # ================== QUERY ==================
sql = f""" sql = f"""
SELECT SELECT
dekurs.id, d.id,
kar.prijmeni, k.prijmeni,
kar.jmeno, k.jmeno,
kar.rodcis, k.rodcis,
uzivatel.zkratka, u.zkratka,
dekurs.datum, d.datum,
"DEKURS" d.dekurs,
FROM dekurs (
JOIN kar ON dekurs.idpac = kar.idpac SELECT LIST(dd.kod, ', ')
JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi FROM dokladd dd
WHERE dekurs.datum >= DATE '{DATE_FROM}' WHERE dd.rodcis = k.rodcis
ORDER BY dekurs.datum DESC AND CAST(dd.datose AS DATE) = CAST(d.datum AS DATE)
) AS vykony_dne
FROM dekurs d
JOIN kar k ON d.idpac = k.idpac
JOIN uzivatel u ON d.iduzi = u.iduzi
WHERE d.datum >= DATE '{DATE_FROM}'
ORDER BY d.datum DESC
""" """
df = pd.read_sql(sql, con) df = query_df(sql)
con.close() conn.close()
# ================== DATA PREPARATION ================== # ================== DATA CLEANING ==================
def safe_rtf_to_text(x):
if isinstance(x, str) and x.strip():
try:
return rtf_to_text(x)
except Exception:
return x
return ""
df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text)
df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True)
df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''},
regex=True, inplace=True)
df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''},
regex=True, inplace=True)
# Merge Příjmení + Jméno
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("") df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True) df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
# Ensure DATUM is datetime type df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True)
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce") df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date
df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore")
# Decode RTF → ASCII (first 100 chars) desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"]
def decode_rtf_ascii(text): df = df[[c for c in desired_order if c in df.columns]]
"""Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars.""" # ================== CLEANUP OLD FILES ==================
if not text: for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"):
return ""
try: try:
plain = rtf_to_text(text) old_file.unlink()
except Exception: print(f"🧹 Deleted old file: {old_file.name}")
plain = str(text) except Exception as e:
plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain) print(f"⚠️ Could not delete {old_file.name}: {e}")
plain = re.sub(r"\s{3,}", " ", plain)
plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
return plain.strip()[:100]
df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
df.drop(columns=["DEKURS"], inplace=True)
# Rename columns for Excel
df.rename(columns={
"ID": "ID zaznamu",
"PACIENT": "Pacient",
"RODCIS": "Rodne cislo",
"ZKRATKA": "Lekar",
"DATUM": "Datum",
"TEXT_ASCII": "Text ASCII (RTF->plain)"
}, inplace=True)
# ================== EXPORT TO EXCEL ================== # ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz") df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"] ws = writer.sheets["Dekurz"]
# ----- Header formatting ----- # ----- Bright yellow header -----
header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid") header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
for cell in ws[1]: for cell in ws[1]:
cell.font = Font(bold=True) cell.font = Font(bold=True, color="000000")
cell.alignment = Alignment(horizontal="center", vertical="center") cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = header_fill cell.fill = header_fill
# ----- Format Datum column ----- # ----- Format Datum column -----
for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes) for cell in ws["A"][1:]:
if isinstance(cell.value, pd.Timestamp): if hasattr(cell.value, "date"):
cell.value = cell.value.date() # remove time part cell.value = cell.value.date()
cell.number_format = "DD.MM.YYYY" cell.number_format = "DD.MM.YYYY"
# ----- Force DEKURS column as Text ----- # ----- Vertical centering for key columns -----
text_col_name = "Text ASCII (RTF->plain)" vertically_centered = {"DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE"}
text_col_index = None for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for i, col in enumerate(df.columns, start=1):
if col == text_col_name:
text_col_index = i
break
if text_col_index:
for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
min_col=text_col_index, max_col=text_col_index):
for cell in row: for cell in row:
cell.number_format = "@" header = ws.cell(row=1, column=cell.column).value
if header in vertically_centered:
align = cell.alignment
cell.alignment = Alignment(
horizontal=align.horizontal or "left",
vertical="center",
wrap_text=align.wrap_text
)
elif isinstance(cell.value, str) and "\n" in cell.value:
cell.alignment = Alignment(wrap_text=True, vertical="top")
# ----- Column widths ----- # ----- Column widths -----
for col in ws.columns: for col in ws.columns:
header = col[0].value header = col[0].value
col_letter = get_column_letter(col[0].column) col_letter = get_column_letter(col[0].column)
if header == text_col_name: if header and "DEKURS" in str(header).upper():
ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS ws.column_dimensions[col_letter].width = 110
else: else:
max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col) max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
ws.column_dimensions[col_letter].width = min(max_len + 2, 80) ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
@@ -140,3 +159,4 @@ with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
cell.border = border cell.border = border
print(f"✅ Export hotov: {xlsx_path}") print(f"✅ Export hotov: {xlsx_path}")
print(f"📅 Dotaz zahrnuje data od: {DATE_FROM}")