z230
This commit is contained in:
@@ -0,0 +1,368 @@
|
||||
import pandas as pd
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
STUDY = "42847922MDD3003"
|
||||
#STUDY = "77242113UCO3001"
|
||||
|
||||
INVENTORY_DIR = Path(f"xls_reports_{STUDY}")
|
||||
DESTRUCTION_DIR = Path(f"xls_ip_destruction_{STUDY}")
|
||||
SHIPMENTS_FILE = Path(f"xls_shipments_{STUDY}/shipments_report_{STUDY}.xlsx")
|
||||
DETAILS_DIR = Path(f"xls_shipment_details_{STUDY}")
|
||||
OUTPUT_DIR = Path("output")
|
||||
OUTPUT_FILE = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {STUDY} CZ IWRS overview.xlsx"
|
||||
|
||||
SHIPMENT_DROP_COLS = {
|
||||
"Location", "Shipped Date", "Delivered Date [UTC]",
|
||||
"Delivery Recipient", "Delivery Details", "Cancelled Date",
|
||||
"Tracking #", "Total Medication IDs",
|
||||
"Shipping Category", "Study", "Destination Location", "Destination Site",
|
||||
"Medication type", "Container ID", "Quantity of Medication IDs",
|
||||
"Packaged Lot description",
|
||||
}
|
||||
|
||||
# ── Shared constants ──────────────────────────────────────────────────────────
|
||||
|
||||
COLUMN_RENAMES = {
|
||||
"Site": "Site",
|
||||
"Medication ID": "Med ID",
|
||||
"Packaged Lot number": "Lot No.",
|
||||
"Original Expiration Date when Packaged Lot was Added": "Orig Exp Date",
|
||||
"Expiration date": "Exp Date",
|
||||
"Received Date": "Rcv Date",
|
||||
"Shipment Receipt User": "Rcpt User",
|
||||
"Subject Identifier": "Subject ID",
|
||||
"Quantity Assigned": "Qty Asgn",
|
||||
"IRT Transaction": "IRT Tx",
|
||||
"Date Assigned": "Date Asgn",
|
||||
"Assignment User": "Asgn User",
|
||||
"Dispensation Status": "Disp Status",
|
||||
"Dispensing Date": "Disp Date",
|
||||
"Dispensing date": "Disp Date",
|
||||
"Quantity Dispensed": "Qty Disp",
|
||||
"Dispensing User": "Disp User",
|
||||
"Quantity Returned": "Qty Ret",
|
||||
"Date Returned": "Date Ret",
|
||||
"Return User": "Ret User",
|
||||
"DestroyedOn": "Destroyed",
|
||||
"Basket number": "Basket No.",
|
||||
}
|
||||
|
||||
DATE_COLUMNS = {
|
||||
"Orig Exp Date", "Exp Date", "Rcv Date",
|
||||
"Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
|
||||
}
|
||||
|
||||
COLUMN_WIDTHS = {
|
||||
"Site": 14,
|
||||
"Med ID": 10,
|
||||
"Lot No.": 12,
|
||||
"Orig Exp Date": 16,
|
||||
"Exp Date": 14,
|
||||
"Rcv Date": 14,
|
||||
"Rcpt User": 22,
|
||||
"Subject ID": 14,
|
||||
"Qty Asgn": 9,
|
||||
"IRT Tx": 8,
|
||||
"Date Asgn": 14,
|
||||
"Asgn User": 20,
|
||||
"Disp Status": 16,
|
||||
"Disp Date": 14,
|
||||
"Qty Disp": 9,
|
||||
"Disp User": 20,
|
||||
"Qty Ret": 10,
|
||||
"Date Ret": 14,
|
||||
"Ret User": 18,
|
||||
"Destroyed": 14,
|
||||
"Basket No.": 12,
|
||||
"Max Visit Date": 16,
|
||||
}
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def read_inventory(path):
|
||||
df = pd.read_excel(path, header=None)
|
||||
# Support both "Medication ID" (MDD3003) and "Medication" (UCO3001)
|
||||
mask = df[0].isin(["Medication ID", "Medication"])
|
||||
meta = {}
|
||||
for i in range(len(df)):
|
||||
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
|
||||
if val.startswith("Site:"):
|
||||
meta["site"] = val.replace("Site:", "").strip()
|
||||
if not mask.any():
|
||||
print(f" {path.name}: no data (skipping)")
|
||||
return None, meta
|
||||
header_row = df[mask].index[0]
|
||||
data = pd.read_excel(path, header=header_row)
|
||||
data = data.rename(columns={"Medication": "Medication ID"})
|
||||
return data, meta
|
||||
|
||||
|
||||
def read_destruction_lookup():
|
||||
lookup = {}
|
||||
for path in DESTRUCTION_DIR.glob("*.xlsx"):
|
||||
df = pd.read_excel(path, header=None)
|
||||
basket_id = None
|
||||
destroyed_on = None
|
||||
for i in range(15):
|
||||
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
|
||||
if val.startswith("Basket ID:"):
|
||||
basket_id = val.replace("Basket ID:", "").strip()
|
||||
if val.startswith("Drug Destruction Created Date:"):
|
||||
destroyed_on = val.replace("Drug Destruction Created Date:", "").strip()
|
||||
header_row = df[df[0] == "Medication ID Description"].index[0]
|
||||
data = pd.read_excel(path, header=header_row)
|
||||
for med_id in data["Medication ID"].dropna():
|
||||
lookup[int(med_id)] = (basket_id, destroyed_on)
|
||||
return lookup
|
||||
|
||||
|
||||
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
|
||||
thin = Side(style="thin", color="000000")
|
||||
border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
header_fill = PatternFill("solid", start_color=header_color)
|
||||
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
|
||||
row_font = Font(name="Arial", size=10)
|
||||
hi_fill = PatternFill("solid", start_color=highlight_color) if highlight_color else None
|
||||
|
||||
headers = [cell.value for cell in ws[1]]
|
||||
|
||||
for cell in ws[1]:
|
||||
cell.fill = header_fill
|
||||
cell.font = header_font
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
|
||||
cell.border = border
|
||||
|
||||
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
|
||||
for cell in row:
|
||||
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
|
||||
cell.font = row_font
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
if col_name in DATE_COLUMNS:
|
||||
cell.number_format = "DD-MMM-YYYY"
|
||||
if hi_fill and col_name == highlight_col:
|
||||
cell.fill = hi_fill
|
||||
|
||||
for cell in ws[1]:
|
||||
width = COLUMN_WIDTHS.get(cell.value, 14)
|
||||
ws.column_dimensions[get_column_letter(cell.column)].width = width
|
||||
|
||||
ws.auto_filter.ref = ws.dimensions
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
|
||||
# ── Shipment helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
def build_shipments():
|
||||
sh = pd.read_excel(SHIPMENTS_FILE, sheet_name=0, header=5)
|
||||
sh.columns = sh.columns.str.strip()
|
||||
sh = sh.dropna(how="all")
|
||||
sh["Shipment ID"] = sh["Shipment ID"].astype(str).str.strip()
|
||||
sh = sh.drop(columns=[c for c in SHIPMENT_DROP_COLS if c in sh.columns])
|
||||
shipment_cols = list(sh.columns)
|
||||
|
||||
all_rows = []
|
||||
for _, s_row in sh.iterrows():
|
||||
sid = s_row["Shipment ID"]
|
||||
path = DETAILS_DIR / f"shipment_details_{sid}.xlsx"
|
||||
if not path.exists():
|
||||
continue
|
||||
det = pd.read_excel(path, sheet_name=0, header=5)
|
||||
det.columns = det.columns.str.strip()
|
||||
det = det.dropna(how="all")
|
||||
det["Shipment"] = det["Shipment"].astype(str).str.strip()
|
||||
extra_cols = [c for c in det.columns if c not in shipment_cols and c != "Shipment" and c not in SHIPMENT_DROP_COLS]
|
||||
for _, d_row in det.iterrows():
|
||||
all_rows.append({**s_row.to_dict(), **{c: d_row[c] for c in extra_cols}})
|
||||
|
||||
result = pd.DataFrame(all_rows)
|
||||
all_cols = shipment_cols + [c for c in extra_cols if c in result.columns]
|
||||
result = result[all_cols]
|
||||
|
||||
for col in ["Request Date", "Received Date", "Expiration Date"]:
|
||||
if col in result.columns:
|
||||
result[col] = pd.to_datetime(result[col], errors="coerce")
|
||||
|
||||
print(f" Shipments: {result['Shipment ID'].nunique()} shipments, {len(result)} kitu")
|
||||
return result
|
||||
|
||||
|
||||
def build_site_summary(result):
|
||||
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
|
||||
pivot = result.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
|
||||
for s in STATUS_COLS:
|
||||
if s not in pivot.columns:
|
||||
pivot[s] = 0
|
||||
pivot = pivot[STATUS_COLS].reset_index().rename(columns={
|
||||
"Ship To:": "Site", "Returned by Subject": "Returned"
|
||||
})
|
||||
pivot = pivot.sort_values("Site").reset_index(drop=True)
|
||||
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
|
||||
print(f" Site Summary: {len(pivot)} center")
|
||||
return pivot
|
||||
|
||||
|
||||
def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_cols):
|
||||
thin = Side(style="thin", color="000000")
|
||||
border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
|
||||
dfont = Font(name="Arial", size=10)
|
||||
fill_ship = PatternFill("solid", start_color=header_color_ship)
|
||||
fill_detail = PatternFill("solid", start_color=header_color_detail)
|
||||
|
||||
for cell in ws[1]:
|
||||
cell.fill = fill_ship if cell.column <= n_ship_cols else fill_detail
|
||||
cell.font = hfont
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
||||
cell.border = border
|
||||
ws.column_dimensions[get_column_letter(cell.column)].width = min(len(str(cell.value or "")) + 4, 35)
|
||||
ws.row_dimensions[1].height = 30
|
||||
|
||||
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
|
||||
for cell in row:
|
||||
cell.font = dfont
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center")
|
||||
if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
|
||||
cell.number_format = "DD-MMM-YYYY"
|
||||
|
||||
ws.auto_filter.ref = ws.dimensions
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
|
||||
# ── Build DataFrames ──────────────────────────────────────────────────────────
|
||||
|
||||
def build_main(lookup):
|
||||
all_rows = []
|
||||
for path in sorted(INVENTORY_DIR.glob("onsite_inventory_detail_*.xlsx")):
|
||||
df, meta = read_inventory(path)
|
||||
if df is None:
|
||||
continue
|
||||
df["DestroyedOn"] = df["Medication ID"].apply(
|
||||
lambda x: lookup.get(int(x), (None, None))[1] if pd.notna(x) else None)
|
||||
df["Basket number"] = df["Medication ID"].apply(
|
||||
lambda x: lookup.get(int(x), (None, None))[0] if pd.notna(x) else None)
|
||||
df.insert(0, "Site", meta.get("site", path.stem))
|
||||
all_rows.append(df)
|
||||
print(f" {path.name}: {len(df)} kits")
|
||||
|
||||
combined = pd.concat(all_rows, ignore_index=True)
|
||||
combined.rename(columns=COLUMN_RENAMES, inplace=True)
|
||||
for col in DATE_COLUMNS:
|
||||
if col in combined.columns:
|
||||
combined[col] = pd.to_datetime(combined[col], dayfirst=True, errors="coerce")
|
||||
combined.sort_values(["Site", "Rcv Date", "Med ID"], inplace=True, ignore_index=True)
|
||||
return combined
|
||||
|
||||
|
||||
def build_expired(df):
|
||||
today = date.today()
|
||||
mask = (
|
||||
df["Basket No."].isna() &
|
||||
df["Subject ID"].isna() &
|
||||
(df["Exp Date"] < pd.Timestamp(today))
|
||||
)
|
||||
filtered = df[mask].copy().reset_index(drop=True)
|
||||
sheet_name = f"Expired as of {today.strftime('%d-%b-%Y')}"
|
||||
print(f" Expired: {len(filtered)}")
|
||||
return filtered, sheet_name
|
||||
|
||||
|
||||
def build_assigned_not_dispensed(df):
|
||||
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
|
||||
filtered = df[mask].copy().reset_index(drop=True)
|
||||
print(f" Assigned not dispensed: {len(filtered)}")
|
||||
return filtered
|
||||
|
||||
|
||||
def build_not_returned(df):
|
||||
no_ret = df[
|
||||
df["Date Ret"].isna() &
|
||||
df["Subject ID"].notna() &
|
||||
(df["Disp Status"].str.upper() != "NOT DISPENSED")
|
||||
].copy()
|
||||
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
|
||||
no_ret = no_ret.join(max_asgn, on="Subject ID")
|
||||
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
|
||||
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
|
||||
filtered = filtered.reset_index(drop=True)
|
||||
print(f" Not returned: {len(filtered)}")
|
||||
return filtered
|
||||
|
||||
|
||||
def build_kits_for_destruction(df):
|
||||
mask = (
|
||||
df["Basket No."].isna() &
|
||||
(df["Date Ret"].notna() | (df["Disp Status"].str.upper() == "NOT DISPENSED"))
|
||||
)
|
||||
filtered = df[mask].copy().sort_values(["Site", "Date Ret"], ascending=[True, True])
|
||||
filtered = filtered.drop(columns=["Destroyed", "Basket No."]).reset_index(drop=True)
|
||||
print(f" Kits for destruction: {len(filtered)}")
|
||||
return filtered
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
# Prepare output dir, remove any previous overview file
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
for old in OUTPUT_DIR.glob(f"*{STUDY} CZ IWRS overview.xlsx"):
|
||||
old.unlink()
|
||||
print(f"Removed old file: {old.name}")
|
||||
|
||||
lookup = read_destruction_lookup()
|
||||
print(f"Loaded {len(lookup)} kits from destruction reports")
|
||||
|
||||
df = build_main(lookup)
|
||||
|
||||
expired_df, expired_sheet = build_expired(df)
|
||||
assigned_df = build_assigned_not_dispensed(df)
|
||||
not_returned_df = build_not_returned(df)
|
||||
destruction_df = build_kits_for_destruction(df)
|
||||
|
||||
shipments_df = build_shipments()
|
||||
site_summary_df = build_site_summary(shipments_df)
|
||||
n_ship_cols = shipments_df.columns.tolist().index("Investigator") # first detail col index (0-based)
|
||||
|
||||
# Write all sheets
|
||||
with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:
|
||||
df.to_excel( writer, index=False, sheet_name="CountryMedicationOverview")
|
||||
expired_df.to_excel( writer, index=False, sheet_name=expired_sheet)
|
||||
assigned_df.to_excel( writer, index=False, sheet_name="Assigned not dispensed")
|
||||
not_returned_df.to_excel( writer, index=False, sheet_name="Not returned")
|
||||
destruction_df.to_excel( writer, index=False, sheet_name="Kits for destruction")
|
||||
shipments_df.to_excel( writer, index=False, sheet_name="Shipments")
|
||||
site_summary_df.to_excel( writer, index=False, sheet_name="Site Summary")
|
||||
|
||||
# Format all sheets
|
||||
wb = load_workbook(OUTPUT_FILE)
|
||||
|
||||
# Main sheet — dark blue, green highlight for Destroyed/Basket No.
|
||||
ws_main = wb["CountryMedicationOverview"]
|
||||
format_sheet(ws_main, header_color="1F4E79")
|
||||
# Extra: green fill for Destroyed and Basket No. columns
|
||||
new_col_fill = PatternFill("solid", start_color="E2EFDA")
|
||||
headers_main = [c.value for c in ws_main[1]]
|
||||
for row in ws_main.iter_rows(min_row=2, max_row=ws_main.max_row):
|
||||
for cell in row:
|
||||
col_name = headers_main[cell.column - 1] if cell.column <= len(headers_main) else None
|
||||
if col_name in ("Destroyed", "Basket No."):
|
||||
cell.fill = new_col_fill
|
||||
|
||||
format_sheet(wb[expired_sheet], header_color="C00000", highlight_col="Exp Date", highlight_color="FFE0E0")
|
||||
format_sheet(wb["Assigned not dispensed"], header_color="833C00", highlight_col="Subject ID", highlight_color="FFF2CC")
|
||||
format_sheet(wb["Not returned"], header_color="375623", highlight_col="Max Visit Date", highlight_color="E2EFDA")
|
||||
format_sheet(wb["Kits for destruction"], header_color="595959")
|
||||
format_shipment_sheet(wb["Shipments"], "1F4E79", "375623", n_ship_cols)
|
||||
format_sheet(wb["Site Summary"], header_color="1F4E79")
|
||||
|
||||
wb.save(OUTPUT_FILE)
|
||||
print(f"\nSaved: {OUTPUT_FILE} ({len(df)} rows on main sheet, {wb.sheetnames})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user