#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Report: DXA requisitions (Medicus → Dropbox) -------------------------------------------- - Selects all histdoc records for year 2025 containing "DXA" - Finds matching PDF files in Dropbox (by rod_cis + "dxa" in name) - Outputs Excel report: datum, idpaci, rod_cis, prijmeni, jmeno, file """ import os import firebirdsql as fb import pandas as pd from pathlib import Path from datetime import datetime # ================== CONFIGURATION ================== FDB_PATH = r"M:\Medicus\Data\Medicus.fdb" EXPORT_DIR = Path(r"z:\Dropbox\Ordinace\Reporty") DOC_DIR = Path(r"z:\Dropbox\Ordinace\Dokumentace_zpracovaná") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") xlsx_path = EXPORT_DIR / f"{timestamp} DXA report.xlsx" # ================== DATABASE QUERY ================== conn = fb.connect( host="192.168.1.10", port=3050, database=FDB_PATH, user="SYSDBA", password="masterkey", charset="WIN1250", ) sql = """ SELECT h.datum, h.idpaci, k.rodcis, k.prijmeni, k.jmeno, ( SELECT LIST( CAST(dd.datose AS DATE) || ':' || dd.kod, ', ' ) FROM dokladd dd WHERE dd.rodcis = k.rodcis AND EXTRACT(YEAR FROM dd.datose) = 2025 AND dd.kod STARTING WITH '1132' ) AS vykony_1132_2025 FROM histdoc h JOIN kar k ON h.idpaci = k.idpac WHERE EXTRACT(YEAR FROM h.datum) = 2025 AND h.data CONTAINING 'dxa' ORDER BY h.datum DESC """ df = pd.read_sql(sql, conn) conn.close() # ================== REMOVE DUPLICATES AND SORT ================== # Convert to datetime → drop duplicates → keep newest per patient → sort descending df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce") # Drop duplicates, keep the newest by date df = ( df.sort_values("DATUM", ascending=False) .drop_duplicates(subset="RODCIS", keep="first") .sort_values("DATUM", ascending=False) .reset_index(drop=True) ) # Convert to pure date (no time portion) df["DATUM"] = df["DATUM"].dt.date # ================== FIND MATCHING PDF FILES ================== def find_dxa_file(rod_cis: str) -> str: """Return first matching DXA file for given rod_cis or ''.""" if not rod_cis: return "" rod_cis = rod_cis.strip() # Case-insensitive search for files like "1234567890 [DXA] something.pdf" for file in DOC_DIR.iterdir(): name = file.name.lower() if ( file.suffix.lower() == ".pdf" and name.startswith(rod_cis.lower()) and "[dxa]" in name ): return file.name return "" df["FILE"] = df["RODCIS"].apply(find_dxa_file) # ================== CLEAN OLD REPORTS ================== for f in EXPORT_DIR.glob("* DXA report.xlsx"): try: f.unlink() print(f"🗑️ Deleted old report: {f.name}") except Exception as e: print(f"⚠️ Could not delete {f.name}: {e}") # ================== EXPORT TO EXCEL ================== with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="DXA") ws = writer.sheets["DXA"] # Format header from openpyxl.styles import Font, Alignment, PatternFill, Border, Side header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") for cell in ws[1]: cell.font = Font(bold=True, color="000000") cell.alignment = Alignment(horizontal="center", vertical="center") cell.fill = header_fill # Auto column width, but hardcode FILE column to 120 from openpyxl.utils import get_column_letter for col in ws.columns: col_letter = get_column_letter(col[0].column) header = ws.cell(row=1, column=col[0].column).value if header == "FILE": ws.column_dimensions[col_letter].width = 120 else: max_len = max(len(str(cell.value)) if cell.value else 0 for cell in col) ws.column_dimensions[col_letter].width = min(max_len + 2, 80) # Borders thin = Side(border_style="thin", color="000000") border = Border(top=thin, left=thin, right=thin, bottom=thin) for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column): for cell in row: cell.border = border print(f"✅ Report created: {xlsx_path}") print(f"📂 Source folder scanned: {DOC_DIR}") print(f"🩻 {df['FILE'].astype(bool).sum()} matching DXA PDFs found")