janssen/create_accountability_report.py

import pandas as pd
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

INVENTORY_DIR  = Path("xls_reports")
DESTRUCTION_DIR = Path("xls_ip_destruction")
OUTPUT_FILE    = "accountability_combined.xlsx"
SHEET_NAME     = "CountryMedicationOverview"

COLUMN_RENAMES = {
    "Site":                                                  "Site",
    "Medication ID":                                         "Med ID",
    "Packaged Lot number":                                   "Lot No.",
    "Original Expiration Date when Packaged Lot was Added":  "Orig Exp Date",
    "Expiration date":                                       "Exp Date",
    "Received Date":                                         "Rcv Date",
    "Shipment Receipt User":                                 "Rcpt User",
    "Subject Identifier":                                    "Subject ID",
    "Quantity Assigned":                                     "Qty Asgn",
    "IRT Transaction":                                       "IRT Tx",
    "Date Assigned":                                         "Date Asgn",
    "Assignment User":                                       "Asgn User",
    "Dispensation Status":                                   "Disp Status",
    "Dispensing Date":                                       "Disp Date",
    "Quantity Dispensed":                                    "Qty Disp",
    "Dispensing User":                                       "Disp User",
    "Quantity Returned":                                     "Qty Ret",
    "Date Returned":                                         "Date Ret",
    "Return User":                                           "Ret User",
    "DestroyedOn":                                           "Destroyed",
    "Basket number":                                         "Basket No.",
}

DATE_COLUMNS = {
    "Orig Exp Date", "Exp Date", "Rcv Date",
    "Date Asgn", "Disp Date", "Date Ret", "Destroyed",
}

COLUMN_WIDTHS = {
    "Site":          14,
    "Med ID":        10,
    "Lot No.":       12,
    "Orig Exp Date": 16,
    "Exp Date":      14,
    "Rcv Date":      14,
    "Rcpt User":     22,
    "Subject ID":    14,
    "Qty Asgn":       9,
    "IRT Tx":         8,
    "Date Asgn":     14,
    "Asgn User":     20,
    "Disp Status":   16,
    "Disp Date":     14,
    "Qty Disp":       9,
    "Disp User":     20,
    "Qty Ret":       10,
    "Date Ret":      14,
    "Ret User":      18,
    "Destroyed":     14,
    "Basket No.":    12,
}


def read_inventory(path):
    df = pd.read_excel(path, header=None)
    header_row = df[df[0] == "Medication ID"].index[0]
    data = pd.read_excel(path, header=header_row)
    meta = {}
    for i in range(header_row):
        val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
        if val.startswith("Site:"):
            meta["site"] = val.replace("Site:", "").strip()
    return data, meta


def read_destruction_lookup():
    lookup = {}
    for path in DESTRUCTION_DIR.glob("*.xlsx"):
        df = pd.read_excel(path, header=None)
        basket_id = None
        destroyed_on = None
        for i in range(15):
            val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
            if val.startswith("Basket ID:"):
                basket_id = val.replace("Basket ID:", "").strip()
            if val.startswith("Drug Destruction Created Date:"):
                destroyed_on = val.replace("Drug Destruction Created Date:", "").strip()
        header_row = df[df[0] == "Medication ID Description"].index[0]
        data = pd.read_excel(path, header=header_row)
        for med_id in data["Medication ID"].dropna():
            lookup[int(med_id)] = (basket_id, destroyed_on)
    return lookup


def main():
    lookup = read_destruction_lookup()
    print(f"Loaded {len(lookup)} kits from destruction reports")

    all_rows = []
    for path in sorted(INVENTORY_DIR.glob("onsite_inventory_detail_*.xlsx")):
        df, meta = read_inventory(path)
        df["DestroyedOn"] = df["Medication ID"].apply(
            lambda x: lookup.get(int(x), (None, None))[1] if pd.notna(x) else None
        )
        df["Basket number"] = df["Medication ID"].apply(
            lambda x: lookup.get(int(x), (None, None))[0] if pd.notna(x) else None
        )
        df.insert(0, "Site", meta.get("site", path.stem))
        all_rows.append(df)
        print(f"  {path.name}: {len(df)} kits")

    combined = pd.concat(all_rows, ignore_index=True)

    # Rename columns
    combined.rename(columns=COLUMN_RENAMES, inplace=True)

    # Convert date columns
    for col in DATE_COLUMNS:
        if col in combined.columns:
            combined[col] = pd.to_datetime(combined[col], dayfirst=True, errors="coerce")

    # Sort
    combined.sort_values(["Site", "Rcv Date", "Med ID"], inplace=True, ignore_index=True)

    combined.to_excel(OUTPUT_FILE, index=False, sheet_name=SHEET_NAME)

    # ── Formatting ────────────────────────────────────────────────────────────
    wb = load_workbook(OUTPUT_FILE)
    ws = wb[SHEET_NAME]

    header_fill = PatternFill("solid", start_color="1F4E79")
    header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
    new_col_fill = PatternFill("solid", start_color="E2EFDA")
    row_font     = Font(name="Arial", size=10)

    thin   = Side(style="thin", color="000000")
    border = Border(left=thin, right=thin, top=thin, bottom=thin)

    headers  = [cell.value for cell in ws[1]]
    new_cols = {"Destroyed", "Basket No."}

    for cell in ws[1]:
        cell.fill      = header_fill
        cell.font      = header_font
        cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
        cell.border    = border

    for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
        for cell in row:
            col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
            cell.font      = row_font
            cell.border    = border
            cell.alignment = Alignment(horizontal="center")
            if col_name in DATE_COLUMNS:
                cell.number_format = "DD-MMM-YYYY"
            if col_name in new_cols:
                cell.fill = new_col_fill

    for cell in ws[1]:
        width = COLUMN_WIDTHS.get(cell.value, 14)
        ws.column_dimensions[get_column_letter(cell.column)].width = width

    ws.auto_filter.ref = ws.dimensions
    ws.freeze_panes    = "A2"

    wb.save(OUTPUT_FILE)
    print(f"\nSaved: {OUTPUT_FILE} ({len(combined)} rows, sheet '{SHEET_NAME}')")


main()