import pandas as pd from openpyxl import load_workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter SOURCE_FILE = "accountability_combined.xlsx" OUTPUT_FILE = "sheet_not_returned.xlsx" SHEET_NAME = "Not returned" DATE_COLUMNS = { "Orig Exp Date", "Exp Date", "Rcv Date", "Date Asgn", "Disp Date", "Max Visit Date", } COLUMN_WIDTHS = { "Site": 14, "Med ID": 10, "Lot No.": 12, "Orig Exp Date": 16, "Exp Date": 14, "Rcv Date": 14, "Rcpt User": 22, "Subject ID": 14, "Qty Asgn": 9, "IRT Tx": 8, "Date Asgn": 14, "Asgn User": 20, "Disp Status": 16, "Disp Date": 14, "Qty Disp": 9, "Disp User": 20, "Max Visit Date": 16, } df = pd.read_excel(SOURCE_FILE) for col in DATE_COLUMNS: if col in df.columns: df[col] = pd.to_datetime(df[col], errors="coerce") # Kits with no return date, assigned to a patient, and not "NOT DISPENSED" no_ret = df[ df["Date Ret"].isna() & df["Subject ID"].notna() & (df["Disp Status"].str.upper() != "NOT DISPENSED") ].copy() # Max Date Asgn per patient (from full dataset) max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date") no_ret = no_ret.join(max_asgn, on="Subject ID") # Keep only kits where Date Asgn is NOT the latest for that patient filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy() # Drop columns Q-U and keep Max Visit Date filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."]) filtered = filtered.reset_index(drop=True) print(f"Not returned kits: {len(filtered)}") filtered.to_excel(OUTPUT_FILE, index=False, sheet_name=SHEET_NAME) # Formatting wb = load_workbook(OUTPUT_FILE) ws = wb[SHEET_NAME] header_fill = PatternFill("solid", start_color="375623") # dark green header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10) row_font = Font(name="Arial", size=10) ret_fill = PatternFill("solid", start_color="E2EFDA") # light green highlight for Date Ret thin = Side(style="thin", color="000000") border = Border(left=thin, right=thin, top=thin, bottom=thin) headers = [cell.value for cell in ws[1]] for cell in ws[1]: cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False) cell.border = border for row in ws.iter_rows(min_row=2, max_row=ws.max_row): for cell in row: col_name = headers[cell.column - 1] if cell.column <= len(headers) else None cell.font = row_font cell.border = border cell.alignment = Alignment(horizontal="center") if col_name in DATE_COLUMNS: cell.number_format = "DD-MMM-YYYY" if col_name == "Max Visit Date": cell.fill = ret_fill for cell in ws[1]: width = COLUMN_WIDTHS.get(cell.value, 14) ws.column_dimensions[get_column_letter(cell.column)].width = width ws.auto_filter.ref = ws.dimensions ws.freeze_panes = "A2" wb.save(OUTPUT_FILE) print(f"Saved: {OUTPUT_FILE} (sheet: '{SHEET_NAME}')")