janssen/IWRS/Drugs/Working/create_studie_report.py

import sys
import os
import mysql.connector
import pandas as pd
from datetime import date
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config

STUDIES = [
    ("77242113UCO3001", "UCO"),
    ("42847922MDD3003", "MDD"),
]

BASE_DIR   = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"

DATE_COLUMNS = {
    "Orig Exp Date", "Exp Date", "Rcv Date",
    "Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
}

COLUMN_WIDTHS = {
    "Site":           14,
    "Med ID":         10,
    "Lot No.":        12,
    "Orig Exp Date":  16,
    "Exp Date":       14,
    "Rcv Date":       14,
    "Rcpt User":      22,
    "Subject ID":     14,
    "Qty Asgn":        9,
    "IRT Tx":          8,
    "Date Asgn":      14,
    "Asgn User":      20,
    "Disp Status":    16,
    "Disp Date":      14,
    "Qty Disp":        9,
    "Disp User":      20,
    "Qty Ret":        10,
    "Date Ret":       14,
    "Ret User":       18,
    "Destroyed":      14,
    "Basket No.":     12,
    "Max Visit Date": 16,
}

N_SHIP_COLS = 9  # počet shipment sloupců (modrý header v Shipments sheetu)


# ── DB ────────────────────────────────────────────────────────────────────────

def get_conn():
    return mysql.connector.connect(
        host=db_config.DB_HOST, port=db_config.DB_PORT,
        user=db_config.DB_USER, password=db_config.DB_PASSWORD,
        database=db_config.DB_NAME,
    )


def get_latest_import_id(cursor, study):
    cursor.execute(
        "SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
        (study,),
    )
    row = cursor.fetchone()
    mid = row["mid"]
    if mid is None:
        raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
    return mid


# ── Načítání dat ──────────────────────────────────────────────────────────────

def load_inventory(cursor, study, import_id):
    sql = """
        SELECT
            i.site                      AS Site,
            i.medication_id             AS `Med ID`,
            i.packaged_lot_no           AS `Lot No.`,
            i.original_expiration_date  AS `Orig Exp Date`,
            i.expiration_date           AS `Exp Date`,
            i.received_date             AS `Rcv Date`,
            i.receipt_user              AS `Rcpt User`,
            i.subject_identifier        AS `Subject ID`,
            i.quantity_assigned         AS `Qty Asgn`,
            i.irt_transaction           AS `IRT Tx`,
            i.date_assigned             AS `Date Asgn`,
            i.assignment_user           AS `Asgn User`,
            i.dispensation_status       AS `Disp Status`,
            i.dispensing_date           AS `Disp Date`,
            i.quantity_dispensed        AS `Qty Disp`,
            i.dispensing_user           AS `Disp User`,
            i.quantity_returned         AS `Qty Ret`,
            i.date_returned             AS `Date Ret`,
            i.return_user               AS `Ret User`,
            d.destruction_date          AS Destroyed,
            d.basket_id                 AS `Basket No.`
        FROM iwrs_inventory i
        LEFT JOIN (
            SELECT medication_id,
                   ANY_VALUE(basket_id)        AS basket_id,
                   ANY_VALUE(destruction_date) AS destruction_date
            FROM iwrs_destruction
            WHERE study = %s
            GROUP BY medication_id
        ) d ON d.medication_id = i.medication_id
        WHERE i.import_id = %s
          AND i.study     = %s
        ORDER BY i.site, i.received_date, i.medication_id
    """
    cursor.execute(sql, (study, import_id, study))
    rows = cursor.fetchall()
    df = pd.DataFrame(rows)
    for col in DATE_COLUMNS:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors="coerce")
    print(f"  Inventory: {len(df)} kitu")
    return df


def load_shipments(cursor, study, import_id):
    sql = """
        SELECT
            s.shipment_id               AS `Shipment ID`,
            s.status                    AS `IRT Shipment Status`,
            s.type                      AS Type,
            s.ship_from                 AS `Shipment From`,
            s.ship_to_site              AS `Ship To:`,
            s.request_date              AS `Request Date`,
            s.received_date             AS `Received Date`,
            s.received_by               AS `Received by`,
            s.expected_arrival          AS `Expected Arrival`,
            i.investigator              AS Investigator,
            i.medication_description    AS `Medication Description`,
            i.medication_id             AS `Medication ID`,
            i.packaged_lot_no           AS `Packaged Lot number`,
            i.expiration_date           AS `Expiration Date`,
            i.item_status               AS Status
        FROM iwrs_shipments s
        JOIN iwrs_shipment_items i
            ON  i.study       = s.study
            AND i.shipment_id = s.shipment_id
            AND i.import_id   = %s
        WHERE s.import_id = %s
          AND s.study     = %s
        ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
    """
    cursor.execute(sql, (import_id, import_id, study))
    rows = cursor.fetchall()
    df = pd.DataFrame(rows)
    for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors="coerce")
    n_ship = df["Shipment ID"].nunique() if len(df) else 0
    print(f"  Shipments: {n_ship} zásilek, {len(df)} kitu")
    return df


# ── Odvozené sheety ───────────────────────────────────────────────────────────

def build_site_summary(shipments_df):
    STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
    pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
    for s in STATUS_COLS:
        if s not in pivot.columns:
            pivot[s] = 0
    pivot = (
        pivot[STATUS_COLS]
        .reset_index()
        .rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
        .sort_values("Site")
        .reset_index(drop=True)
    )
    pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
    print(f"  Site Summary: {len(pivot)} center")
    return pivot


def build_expired(df):
    today = date.today()
    mask = (
        df["Basket No."].isna() &
        df["Subject ID"].isna() &
        (df["Exp Date"] < pd.Timestamp(today))
    )
    filtered = df[mask].copy().reset_index(drop=True)
    print(f"  Expired: {len(filtered)}")
    return filtered


def build_assigned_not_dispensed(df):
    mask = df["Subject ID"].notna() & df["Disp Date"].isna()
    filtered = df[mask].copy().reset_index(drop=True)
    print(f"  Assigned not dispensed: {len(filtered)}")
    return filtered


def build_not_returned(df):
    no_ret = df[
        df["Date Ret"].isna() &
        df["Subject ID"].notna() &
        (df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
    ].copy()
    max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
    no_ret = no_ret.join(max_asgn, on="Subject ID")
    filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
    filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
    filtered = filtered.reset_index(drop=True)
    print(f"  Not returned: {len(filtered)}")
    return filtered


def build_kits_for_destruction(df):
    mask = (
        df["Basket No."].isna() &
        (df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
    )
    filtered = (
        df[mask]
        .copy()
        .sort_values(["Site", "Date Ret"], ascending=[True, True])
        .drop(columns=["Destroyed", "Basket No."])
        .reset_index(drop=True)
    )
    print(f"  Kits for destruction: {len(filtered)}")
    return filtered


# ── Formátování ───────────────────────────────────────────────────────────────

def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
    thin        = Side(style="thin", color="000000")
    border      = Border(left=thin, right=thin, top=thin, bottom=thin)
    header_fill = PatternFill("solid", start_color=header_color)
    header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
    row_font    = Font(name="Arial", size=10)
    hi_fill     = PatternFill("solid", start_color=highlight_color) if highlight_color else None

    headers = [cell.value for cell in ws[1]]

    for cell in ws[1]:
        cell.fill      = header_fill
        cell.font      = header_font
        cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
        cell.border    = border

    for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
        for cell in row:
            col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
            cell.font      = row_font
            cell.border    = border
            cell.alignment = Alignment(horizontal="center")
            if col_name in DATE_COLUMNS:
                cell.number_format = "DD-MMM-YYYY"
            if hi_fill and col_name == highlight_col:
                cell.fill = hi_fill

    for cell in ws[1]:
        width = COLUMN_WIDTHS.get(cell.value, 14)
        ws.column_dimensions[get_column_letter(cell.column)].width = width

    ws.auto_filter.ref = ws.dimensions
    ws.freeze_panes    = "A2"


def format_overview_sheet(ws):
    format_sheet(ws, header_color="1F4E79")
    new_col_fill = PatternFill("solid", start_color="E2EFDA")
    headers = [c.value for c in ws[1]]
    for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
        for cell in row:
            col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
            if col_name in ("Destroyed", "Basket No."):
                cell.fill = new_col_fill


def format_shipment_sheet(ws):
    thin        = Side(style="thin", color="000000")
    border      = Border(left=thin, right=thin, top=thin, bottom=thin)
    hfont       = Font(bold=True, color="FFFFFF", name="Arial", size=10)
    dfont       = Font(name="Arial", size=10)
    fill_ship   = PatternFill("solid", start_color="1F4E79")
    fill_detail = PatternFill("solid", start_color="375623")

    for cell in ws[1]:
        cell.fill      = fill_ship if cell.column <= N_SHIP_COLS else fill_detail
        cell.font      = hfont
        cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
        cell.border    = border
        ws.column_dimensions[get_column_letter(cell.column)].width = min(
            len(str(cell.value or "")) + 4, 35
        )
    ws.row_dimensions[1].height = 30

    for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
        for cell in row:
            cell.font      = dfont
            cell.border    = border
            cell.alignment = Alignment(horizontal="center", vertical="center")
            if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
                cell.number_format = "DD-MMM-YYYY"

    ws.auto_filter.ref = ws.dimensions
    ws.freeze_panes    = "A2"


# ── Main ──────────────────────────────────────────────────────────────────────

SHEETS_DEF = [
    ("CountryMedicationOverview", "overview"),
    ("Expired",                   "expired"),
    ("Assigned not dispensed",    "assigned"),
    ("Not returned",              "not_returned"),
    ("Kits for destruction",      "destruction"),
    ("Shipments",                 "shipments"),
    ("Site Summary",              "site_summary"),
]

FORMAT_MAP = {
    "overview":     lambda ws: format_overview_sheet(ws),
    "expired":      lambda ws: format_sheet(ws, "C00000", "Exp Date",       "FFE0E0"),
    "assigned":     lambda ws: format_sheet(ws, "833C00", "Subject ID",     "FFF2CC"),
    "not_returned": lambda ws: format_sheet(ws, "375623", "Max Visit Date", "E2EFDA"),
    "destruction":  lambda ws: format_sheet(ws, "595959"),
    "shipments":    lambda ws: format_shipment_sheet(ws),
    "site_summary": lambda ws: format_sheet(ws, "1F4E79"),
}


def process_study(cursor, study):
    today = date.today().strftime("%d-%b-%Y")
    import_id    = get_latest_import_id(cursor, study)
    print(f"  import_id = {import_id}")

    df           = load_inventory(cursor, study, import_id)
    shipments_df = load_shipments(cursor, study, import_id)

    expired_df      = build_expired(df)
    assigned_df     = build_assigned_not_dispensed(df)
    not_returned_df = build_not_returned(df)
    destruction_df  = build_kits_for_destruction(df)
    site_summ_df    = build_site_summary(shipments_df)

    return [
        df, expired_df, assigned_df, not_returned_df,
        destruction_df, shipments_df, site_summ_df,
    ]


def save_study_report(study, data_frames):
    output_file = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {study} report.xlsx"

    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        for (sheet_name, _), df_sheet in zip(SHEETS_DEF, data_frames):
            df_sheet.to_excel(writer, index=False, sheet_name=sheet_name)

    wb = load_workbook(output_file)
    for (sheet_name, fmt_key) in SHEETS_DEF:
        FORMAT_MAP[fmt_key](wb[sheet_name])
    wb.save(output_file)
    print(f"  Uloženo: {output_file}")


def main():
    OUTPUT_DIR.mkdir(exist_ok=True)

    conn   = get_conn()
    cursor = conn.cursor(dictionary=True)

    for study, _ in STUDIES:
        print(f"\n{'='*55}")
        print(f"[{study}]")
        print(f"{'='*55}")
        try:
            data_frames = process_study(cursor, study)
            save_study_report(study, data_frames)
        except Exception as e:
            import traceback
            print(f"  CHYBA: {e}")
            traceback.print_exc()

    cursor.close()
    conn.close()
    print(f"\nHotovo.")


if __name__ == "__main__":
    main()