Initial commit: Janssen 4G Clinical report automation
- download_reports.py: Playwright script for site inventory XLS downloads - download_ip_destruction.py: Playwright script for IP destruction basket downloads - create_accountability_report.py: combines both sources into formatted accountability Excel - list_reports.py: discovers available reports on portal - reports.json: 21 available report URLs - .gitignore: excludes downloaded XLS files and output Excel Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
INVENTORY_DIR = Path("xls_reports")
|
||||
DESTRUCTION_DIR = Path("xls_ip_destruction")
|
||||
OUTPUT_FILE = "accountability_combined.xlsx"
|
||||
SHEET_NAME = "CountryMedicationOverview"
|
||||
|
||||
COLUMN_RENAMES = {
|
||||
"Site": "Site",
|
||||
"Medication ID": "Med ID",
|
||||
"Packaged Lot number": "Lot No.",
|
||||
"Original Expiration Date when Packaged Lot was Added": "Orig Exp Date",
|
||||
"Expiration date": "Exp Date",
|
||||
"Received Date": "Rcv Date",
|
||||
"Shipment Receipt User": "Rcpt User",
|
||||
"Subject Identifier": "Subject ID",
|
||||
"Quantity Assigned": "Qty Asgn",
|
||||
"IRT Transaction": "IRT Tx",
|
||||
"Date Assigned": "Date Asgn",
|
||||
"Assignment User": "Asgn User",
|
||||
"Dispensation Status": "Disp Status",
|
||||
"Dispensing Date": "Disp Date",
|
||||
"Quantity Dispensed": "Qty Disp",
|
||||
"Dispensing User": "Disp User",
|
||||
"Quantity Returned": "Qty Ret",
|
||||
"Date Returned": "Date Ret",
|
||||
"Return User": "Ret User",
|
||||
"DestroyedOn": "Destroyed",
|
||||
"Basket number": "Basket No.",
|
||||
}
|
||||
|
||||
DATE_COLUMNS = {
|
||||
"Orig Exp Date", "Exp Date", "Rcv Date",
|
||||
"Date Asgn", "Disp Date", "Date Ret", "Destroyed",
|
||||
}
|
||||
|
||||
COLUMN_WIDTHS = {
|
||||
"Site": 14,
|
||||
"Med ID": 10,
|
||||
"Lot No.": 12,
|
||||
"Orig Exp Date": 16,
|
||||
"Exp Date": 14,
|
||||
"Rcv Date": 14,
|
||||
"Rcpt User": 22,
|
||||
"Subject ID": 14,
|
||||
"Qty Asgn": 9,
|
||||
"IRT Tx": 8,
|
||||
"Date Asgn": 14,
|
||||
"Asgn User": 20,
|
||||
"Disp Status": 16,
|
||||
"Disp Date": 14,
|
||||
"Qty Disp": 9,
|
||||
"Disp User": 20,
|
||||
"Qty Ret": 10,
|
||||
"Date Ret": 14,
|
||||
"Ret User": 18,
|
||||
"Destroyed": 14,
|
||||
"Basket No.": 12,
|
||||
}
|
||||
|
||||
|
||||
def read_inventory(path):
|
||||
df = pd.read_excel(path, header=None)
|
||||
header_row = df[df[0] == "Medication ID"].index[0]
|
||||
data = pd.read_excel(path, header=header_row)
|
||||
meta = {}
|
||||
for i in range(header_row):
|
||||
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
|
||||
if val.startswith("Site:"):
|
||||
meta["site"] = val.replace("Site:", "").strip()
|
||||
return data, meta
|
||||
|
||||
|
||||
def read_destruction_lookup():
|
||||
lookup = {}
|
||||
for path in DESTRUCTION_DIR.glob("*.xlsx"):
|
||||
df = pd.read_excel(path, header=None)
|
||||
basket_id = None
|
||||
destroyed_on = None
|
||||
for i in range(15):
|
||||
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
|
||||
if val.startswith("Basket ID:"):
|
||||
basket_id = val.replace("Basket ID:", "").strip()
|
||||
if val.startswith("Drug Destruction Created Date:"):
|
||||
destroyed_on = val.replace("Drug Destruction Created Date:", "").strip()
|
||||
header_row = df[df[0] == "Medication ID Description"].index[0]
|
||||
data = pd.read_excel(path, header=header_row)
|
||||
for med_id in data["Medication ID"].dropna():
|
||||
lookup[int(med_id)] = (basket_id, destroyed_on)
|
||||
return lookup
|
||||
|
||||
|
||||
def main():
|
||||
lookup = read_destruction_lookup()
|
||||
print(f"Loaded {len(lookup)} kits from destruction reports")
|
||||
|
||||
all_rows = []
|
||||
for path in sorted(INVENTORY_DIR.glob("onsite_inventory_detail_*.xlsx")):
|
||||
df, meta = read_inventory(path)
|
||||
df["DestroyedOn"] = df["Medication ID"].apply(
|
||||
lambda x: lookup.get(int(x), (None, None))[1] if pd.notna(x) else None
|
||||
)
|
||||
df["Basket number"] = df["Medication ID"].apply(
|
||||
lambda x: lookup.get(int(x), (None, None))[0] if pd.notna(x) else None
|
||||
)
|
||||
df.insert(0, "Site", meta.get("site", path.stem))
|
||||
all_rows.append(df)
|
||||
print(f" {path.name}: {len(df)} kits")
|
||||
|
||||
combined = pd.concat(all_rows, ignore_index=True)
|
||||
|
||||
# Rename columns
|
||||
combined.rename(columns=COLUMN_RENAMES, inplace=True)
|
||||
|
||||
# Convert date columns
|
||||
for col in DATE_COLUMNS:
|
||||
if col in combined.columns:
|
||||
combined[col] = pd.to_datetime(combined[col], dayfirst=True, errors="coerce")
|
||||
|
||||
# Sort
|
||||
combined.sort_values(["Site", "Rcv Date", "Med ID"], inplace=True, ignore_index=True)
|
||||
|
||||
combined.to_excel(OUTPUT_FILE, index=False, sheet_name=SHEET_NAME)
|
||||
|
||||
# ── Formatting ────────────────────────────────────────────────────────────
|
||||
wb = load_workbook(OUTPUT_FILE)
|
||||
ws = wb[SHEET_NAME]
|
||||
|
||||
header_fill = PatternFill("solid", start_color="1F4E79")
|
||||
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
|
||||
new_col_fill = PatternFill("solid", start_color="E2EFDA")
|
||||
row_font = Font(name="Arial", size=10)
|
||||
|
||||
thin = Side(style="thin", color="000000")
|
||||
border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
|
||||
headers = [cell.value for cell in ws[1]]
|
||||
new_cols = {"Destroyed", "Basket No."}
|
||||
|
||||
for cell in ws[1]:
|
||||
cell.fill = header_fill
|
||||
cell.font = header_font
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
|
||||
cell.border = border
|
||||
|
||||
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
|
||||
for cell in row:
|
||||
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
|
||||
cell.font = row_font
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
if col_name in DATE_COLUMNS:
|
||||
cell.number_format = "DD-MMM-YYYY"
|
||||
if col_name in new_cols:
|
||||
cell.fill = new_col_fill
|
||||
|
||||
for cell in ws[1]:
|
||||
width = COLUMN_WIDTHS.get(cell.value, 14)
|
||||
ws.column_dimensions[get_column_letter(cell.column)].width = width
|
||||
|
||||
ws.auto_filter.ref = ws.dimensions
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
wb.save(OUTPUT_FILE)
|
||||
print(f"\nSaved: {OUTPUT_FILE} ({len(combined)} rows, sheet '{SHEET_NAME}')")
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user