This commit is contained in:
2025-10-21 14:02:42 +02:00
parent 1d2bbec61b
commit 348b393da6

View File

@@ -3,130 +3,149 @@
"""
Export DEKURS records from Medicus Firebird DB into Excel.
- RTF text decoded to plain ASCII (no diacritics, first 100 chars)
- Příjmení + Jméno merged into one 'Pacient' column
- Proper date formatting (DD.MM.YYYY)
- Thin black borders, gold header, wide text column
- Includes only last 2 months from current date
- RTF text decoded and cleaned (Excel-safe, preserved newlines)
- Výkony of the same day concatenated into 'VYKONY DNE'
- Příjmení + Jméno merged into 'PACIENT'
- Proper column order and naming
- Bright yellow header, thin black borders, auto column widths
- Timestamped Excel filename
"""
import time
import re
import unicodedata
import fdb
import pandas as pd
import firebirdsql as fb
from datetime import datetime, timedelta
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\medicus.fdb"
EXPORT_DIR = Path(r"D:\Dropbox\!!!Days\Downloads Z230")
timestamp = time.strftime("%Y-%m-%d %H-%M-%S")
xlsx_path = EXPORT_DIR / f"Dekurz export ASCII {timestamp}.xlsx"
DATE_FROM = "2024-01-01"
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
# calculate last 2 months dynamically
DATE_FROM = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx"
# ================== FIREBIRD CONNECTION ==================
con = fdb.connect(
dsn=f"localhost:{FDB_PATH}",
user="sysdba",
conn = fb.connect(
host="192.168.1.4",
port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey",
charset="WIN1250"
charset="WIN1250",
)
def query_df(sql, params=None):
cur = conn.cursor()
cur.execute(sql, params or ())
rows = cur.fetchall()
cols = [d[0].strip() for d in cur.description]
return pd.DataFrame(rows, columns=cols)
# ================== QUERY ==================
sql = f"""
SELECT
dekurs.id,
kar.prijmeni,
kar.jmeno,
kar.rodcis,
uzivatel.zkratka,
dekurs.datum,
"DEKURS"
FROM dekurs
JOIN kar ON dekurs.idpac = kar.idpac
JOIN uzivatel ON dekurs.iduzi = uzivatel.iduzi
WHERE dekurs.datum >= DATE '{DATE_FROM}'
ORDER BY dekurs.datum DESC
d.id,
k.prijmeni,
k.jmeno,
k.rodcis,
u.zkratka,
d.datum,
d.dekurs,
(
SELECT LIST(dd.kod, ', ')
FROM dokladd dd
WHERE dd.rodcis = k.rodcis
AND CAST(dd.datose AS DATE) = CAST(d.datum AS DATE)
) AS vykony_dne
FROM dekurs d
JOIN kar k ON d.idpac = k.idpac
JOIN uzivatel u ON d.iduzi = u.iduzi
WHERE d.datum >= DATE '{DATE_FROM}'
ORDER BY d.datum DESC
"""
df = pd.read_sql(sql, con)
con.close()
df = query_df(sql)
conn.close()
# ================== DATA PREPARATION ==================
# ================== DATA CLEANING ==================
def safe_rtf_to_text(x):
if isinstance(x, str) and x.strip():
try:
return rtf_to_text(x)
except Exception:
return x
return ""
df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text)
df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True)
df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''},
regex=True, inplace=True)
df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''},
regex=True, inplace=True)
# Merge Příjmení + Jméno
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
# Ensure DATUM is datetime type
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce")
df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True)
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date
df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore")
# Decode RTF → ASCII (first 100 chars)
def decode_rtf_ascii(text):
"""Decode RTF, clean control chars, convert to ASCII (no diacritics), limit to 100 chars."""
if not text:
return ""
desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"]
df = df[[c for c in desired_order if c in df.columns]]
# ================== CLEANUP OLD FILES ==================
for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"):
try:
plain = rtf_to_text(text)
except Exception:
plain = str(text)
plain = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", plain)
plain = re.sub(r"\s{3,}", " ", plain)
plain = unicodedata.normalize("NFKD", plain).encode("ascii", "ignore").decode("ascii")
return plain.strip()[:100]
df["TEXT_ASCII"] = df["DEKURS"].apply(decode_rtf_ascii)
df.drop(columns=["DEKURS"], inplace=True)
# Rename columns for Excel
df.rename(columns={
"ID": "ID zaznamu",
"PACIENT": "Pacient",
"RODCIS": "Rodne cislo",
"ZKRATKA": "Lekar",
"DATUM": "Datum",
"TEXT_ASCII": "Text ASCII (RTF->plain)"
}, inplace=True)
old_file.unlink()
print(f"🧹 Deleted old file: {old_file.name}")
except Exception as e:
print(f"⚠️ Could not delete {old_file.name}: {e}")
# ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"]
# ----- Header formatting -----
header_fill = PatternFill(start_color="FFD966", end_color="FFD966", fill_type="solid")
# ----- Bright yellow header -----
header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
for cell in ws[1]:
cell.font = Font(bold=True)
cell.font = Font(bold=True, color="000000")
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = header_fill
# ----- Format Datum column -----
for cell in ws["F"][1:]: # column F = Datum (adjust if structure changes)
if isinstance(cell.value, pd.Timestamp):
cell.value = cell.value.date() # remove time part
for cell in ws["A"][1:]:
if hasattr(cell.value, "date"):
cell.value = cell.value.date()
cell.number_format = "DD.MM.YYYY"
# ----- Force DEKURS column as Text -----
text_col_name = "Text ASCII (RTF->plain)"
text_col_index = None
for i, col in enumerate(df.columns, start=1):
if col == text_col_name:
text_col_index = i
break
if text_col_index:
for row in ws.iter_rows(min_row=2, max_row=ws.max_row,
min_col=text_col_index, max_col=text_col_index):
for cell in row:
cell.number_format = "@"
# ----- Vertical centering for key columns -----
vertically_centered = {"DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE"}
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
header = ws.cell(row=1, column=cell.column).value
if header in vertically_centered:
align = cell.alignment
cell.alignment = Alignment(
horizontal=align.horizontal or "left",
vertical="center",
wrap_text=align.wrap_text
)
elif isinstance(cell.value, str) and "\n" in cell.value:
cell.alignment = Alignment(wrap_text=True, vertical="top")
# ----- Column widths -----
for col in ws.columns:
header = col[0].value
col_letter = get_column_letter(col[0].column)
if header == text_col_name:
ws.column_dimensions[col_letter].width = 110 # fixed width for DEKURS
if header and "DEKURS" in str(header).upper():
ws.column_dimensions[col_letter].width = 110
else:
max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col)
ws.column_dimensions[col_letter].width = min(max_len + 2, 80)
@@ -140,3 +159,4 @@ with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
cell.border = border
print(f"✅ Export hotov: {xlsx_path}")
print(f"📅 Dotaz zahrnuje data od: {DATE_FROM}")