import pandas as pd import openpyxl from openpyxl.styles import Font, PatternFill, Alignment, Border, Side, numbers from openpyxl.utils import get_column_letter from datetime import date import os CSV_FILE = "filename.csv" SVR_FILE = "Site Visit Report (2).xlsx" OUTPUT_DIR = os.path.join("..", "..", "CTMS", "output") os.makedirs(OUTPUT_DIR, exist_ok=True) today_str = date.today().strftime("%Y-%m-%d") OUTPUT_FILE = os.path.join(OUTPUT_DIR, f"{today_str} UCO3001 CZ CTMS Visits.xlsx") # --- Load & filter --- df = pd.read_csv(CSV_FILE, sep=";", encoding="utf-8-sig") df["Country"] = df["Study Site Number"].str.extract(r"DD5-([A-Z]+)\d+") cz = df[df["Country"] == "CZ"].copy() date_cols = ["Original Due Date", "Due Date", "Window Start Date", "Cutoff Date", "Completed Date"] for col in date_cols: cz[col] = pd.to_datetime(cz[col], errors="coerce") SITES = [ "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009", "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015", "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022", ] cz = cz[cz["Study Site Number"].isin(SITES) & cz["Status"].isin(["Completed", "Scheduled", "Planned"])].copy() cz["CRA"] = cz["Assigned To Last Name"].fillna("") # --- Merge Site Visit Report (2) --- import re as _re def _svid_to_ref(svid): svid = str(svid).replace("MCTMS|", "") if svid == "Qualification Visit": return "SQV" if svid == "Site Initiation": return "SIV" if svid == "Closure Visit": return "COV" m = _re.match(r"Monitoring Visit (\d+)", svid) return f"IMV{m.group(1)}" if m else svid svr = pd.read_excel(SVR_FILE, header=5) svr = svr[svr["Site ID"].isin(SITES)].copy() svr["Reference"] = svr["Site Visit ID"].apply(_svid_to_ref) svr = svr[["Site ID", "Reference", "Site Visit Type", "Submitter Name", "Approver Name"]].rename(columns={"Site ID": "Study Site Number"}) cz = cz.merge(svr, on=["Study Site Number", "Reference"], how="left") # --- Styles --- FONT_NAME = "Arial" COL_HEADER = "1F5C99" # dark blue COL_COMPL = "E2EFDA" # light green COL_SCHED = "FFF2CC" # light yellow COL_PLAN = "FCE4D6" # light orange COL_NA = "F2F2F2" # grey WHITE = "FFFFFF" DARK_TEXT = "000000" STATUS_COLORS = { "Completed": COL_COMPL, "Scheduled": COL_SCHED, "Planned": COL_PLAN, "Not applicable": COL_NA, } thin = Side(style="thin", color="BFBFBF") med = Side(style="medium", color="808080") def border(left=thin, right=thin, top=thin, bottom=thin): return Border(left=left, right=right, top=top, bottom=bottom) def header_cell(ws, row, col, value, width=None): c = ws.cell(row=row, column=col, value=value) c.font = Font(name=FONT_NAME, bold=True, color=WHITE, size=10) c.fill = PatternFill("solid", fgColor=COL_HEADER) c.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) c.border = Border(left=Side(style="medium", color=WHITE), right=Side(style="medium", color=WHITE), top=thin, bottom=thin) if width and col <= ws.max_column or width: ws.column_dimensions[get_column_letter(col)].width = width return c def data_cell(ws, row, col, value, fill_color=WHITE, align="left", bold=False, num_fmt=None, date_val=False): c = ws.cell(row=row, column=col, value=value) c.font = Font(name=FONT_NAME, size=9, bold=bold, color=DARK_TEXT) if fill_color != WHITE: c.fill = PatternFill("solid", fgColor=fill_color) c.alignment = Alignment(horizontal=align, vertical="center") c.border = border() if num_fmt: c.number_format = num_fmt elif date_val and isinstance(value, (pd.Timestamp, type(None))): c.number_format = "DD-MMM-YYYY" return c # ========================================================= # SHEET 1: Přehled per site # ========================================================= wb = openpyxl.Workbook() ws1 = wb.active ws1.title = "Přehled CZ" ws1.freeze_panes = "A3" # Title ws1.merge_cells("A1:M1") title = ws1["A1"] title.value = f"UCO3001 — CZ CTMS Visits Overview | {today_str}" title.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE) title.fill = PatternFill("solid", fgColor="2E4057") title.alignment = Alignment(horizontal="center", vertical="center") ws1.row_dimensions[1].height = 22 # Headers headers = [ ("Site", 14), ("Investigátor", 22), ("SQV", 11), ("SIV", 11), ("IMV\nCompleted", 11), ("IMV\nScheduled", 11), ("IMV\nPlanned", 11), ("COV", 11), ("Poslední vizita\nDatum", 14), ("Poslední vizita\nTyp", 16), ("Příští vizita\nDatum", 14), ("Příští vizita\nTyp", 16), ("Celkem\nvizit", 10), ] for ci, (h, w) in enumerate(headers, 1): header_cell(ws1, 2, ci, h, width=w) ws1.row_dimensions[2].height = 30 # Data per site sites = sorted(cz["Study Site Number"].unique()) for ri, site in enumerate(sites, 3): s = cz[cz["Study Site Number"] == site] inv_row = s.iloc[0] inv = f"{inv_row['INV_FIRST_NAME']} {inv_row['INV_LAST_NAME']}" cra = s["CRA"].replace("", pd.NA).dropna().iloc[0] if not s["CRA"].replace("", pd.NA).dropna().empty else "" sqv = s[s["Reference"] == "SQV"] siv = s[s["Reference"] == "SIV"] cov = s[s["Reference"] == "COV"] imv = s[s["Category"] == "Monitoring Visit"] def visit_status(sub): if sub.empty: return ("—", COL_NA) st = sub.iloc[0]["Status"] return (st, STATUS_COLORS.get(st, WHITE)) sqv_st, sqv_c = visit_status(sqv) siv_st, siv_c = visit_status(siv) cov_st, cov_c = visit_status(cov) imv_comp = int((imv["Status"] == "Completed").sum()) imv_sch = int((imv["Status"] == "Scheduled").sum()) imv_plan = int((imv["Status"] == "Planned").sum()) # Last completed comp = s[s["Status"] == "Completed"].dropna(subset=["Completed Date"]) last_comp = comp.sort_values("Completed Date").iloc[-1] if not comp.empty else None last_date = last_comp["Completed Date"] if last_comp is not None else None last_type = last_comp["Reference"] if last_comp is not None else "—" # Next upcoming — pouze vizity s Due Date po poslední Completed upcoming = s[s["Status"].isin(["Scheduled", "Planned"])].dropna(subset=["Due Date"]) if last_date is not None: upcoming = upcoming[upcoming["Due Date"] > last_date] next_vis = upcoming.sort_values("Due Date").iloc[0] if not upcoming.empty else None next_date = next_vis["Due Date"] if next_vis is not None else None next_type = next_vis["Reference"] if next_vis is not None else "—" total = len(s) bg = WHITE if ri % 2 == 0 else "F7F9FC" row_data = [ (site, "left", True, None, None), (inv, "left", False, None, None), (sqv_st, "center", False, None, sqv_c), (siv_st, "center", False, None, siv_c), (imv_comp, "center", False, "#,##0", None), (imv_sch, "center", False, "#,##0", None), (imv_plan, "center", False, "#,##0", None), (cov_st, "center", False, None, cov_c), (last_date, "center", False, "DD-MMM-YY",None), (last_type, "center", False, None, None), (next_date, "center", False, "DD-MMM-YY",None), (next_type, "center", False, None, None), (total, "center", True, "#,##0", None), ] for ci, (val, align, bold, fmt, fill) in enumerate(row_data, 1): fc = fill if fill else bg c = data_cell(ws1, ri, ci, val, fill_color=fc, align=align, bold=bold) if fmt: c.number_format = fmt ws1.row_dimensions[ri].height = 16 # Autofilter ws1.auto_filter.ref = f"A2:{get_column_letter(len(headers))}2" # ========================================================= # SHEET 2: Detail všech CZ vizit # ========================================================= ws2 = wb.create_sheet("Detail CZ") ws2.freeze_panes = "A3" ws2.merge_cells("A1:N1") t2 = ws2["A1"] t2.value = f"UCO3001 — CZ CTMS Visits — Detail | {today_str}" t2.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE) t2.fill = PatternFill("solid", fgColor="2E4057") t2.alignment = Alignment(horizontal="center", vertical="center") ws2.row_dimensions[1].height = 22 det_headers = [ ("Site", 14), ("Investigátor", 22), ("CRA (Submitter)", 24), ("Ref", 9), ("Název vizity", 24), ("Category", 20), ("Sub Category", 16), ("Status", 14), ("Due Date", 13), ("Window Start", 13), ("Cutoff Date", 13), ("Completed Date", 13), ("Typ vizity", 12), ] for ci, (h, w) in enumerate(det_headers, 1): header_cell(ws2, 2, ci, h, width=w) ws2.row_dimensions[2].height = 26 # Sort: site → SQV → SIV → IMV1 → IMV2 … → COV ref_order = {"SQV": 0, "SIV": 1, "COV": 9999} def ref_sort_key(ref): if ref in ref_order: return ref_order[ref] import re m = re.match(r"IMV(\d+)$", str(ref)) return int(m.group(1)) + 1 if m else 5000 cz["_ref_ord"] = cz["Reference"].apply(ref_sort_key) detail = cz.sort_values(["Study Site Number", "_ref_ord"]).reset_index(drop=True) for ri, row in detail.iterrows(): r = ri + 3 st = row["Status"] bg = STATUS_COLORS.get(st, WHITE) inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}" submitter = row["Submitter Name"] if pd.notna(row.get("Submitter Name")) else "" visit_type = row["Site Visit Type"] if pd.notna(row.get("Site Visit Type")) else "" vals = [ (row["Study Site Number"], "left", True), (inv, "left", False), (submitter, "left", False), (row["Reference"], "center", True), (row["Visit Name"], "left", False), (row["Category"], "left", False), (row["Sub Category"], "left", False), (st, "center", False), (row["Due Date"], "center", False), (row["Window Start Date"], "center", False), (row["Cutoff Date"], "center", False), (row["Completed Date"], "center", False), (visit_type, "center", False), ] for ci, (val, align, bold) in enumerate(vals, 1): c = data_cell(ws2, r, ci, val, fill_color=bg, align=align, bold=bold) if isinstance(val, pd.Timestamp) and not pd.isna(val): c.value = val.to_pydatetime() c.number_format = "DD-MMM-YY" ws2.row_dimensions[r].height = 14 ws2.auto_filter.ref = f"A2:{get_column_letter(len(det_headers))}2" # ========================================================= # SHEET 3: Nadcházející / Scheduled+Planned # ========================================================= ws3 = wb.create_sheet("Nadcházející vizity") ws3.freeze_panes = "A3" ws3.merge_cells("A1:J1") t3 = ws3["A1"] t3.value = f"UCO3001 — CZ — Nadcházející vizity (Scheduled + Planned) | {today_str}" t3.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE) t3.fill = PatternFill("solid", fgColor="2E4057") t3.alignment = Alignment(horizontal="center", vertical="center") ws3.row_dimensions[1].height = 22 upc_headers = [ ("Due Date", 13), ("Site", 14), ("Investigátor", 22), ("CRA", 14), ("Ref", 9), ("Název vizity", 24), ("Category", 20), ("Status", 12), ("Window Start", 13), ("Cutoff Date", 13), ] for ci, (h, w) in enumerate(upc_headers, 1): header_cell(ws3, 2, ci, h, width=w) ws3.row_dimensions[2].height = 26 upcoming = cz[cz["Status"].isin(["Scheduled", "Planned"])].sort_values(["Due Date", "Study Site Number"]).reset_index(drop=True) for ri, row in upcoming.iterrows(): r = ri + 3 bg = STATUS_COLORS.get(row["Status"], WHITE) inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}" vals = [ (row["Due Date"], "center", True), (row["Study Site Number"], "left", False), (inv, "left", False), (row["CRA"], "center", False), (row["Reference"], "center", True), (row["Visit Name"], "left", False), (row["Category"], "left", False), (row["Status"], "center", False), (row["Window Start Date"], "center", False), (row["Cutoff Date"], "center", False), ] for ci, (val, align, bold) in enumerate(vals, 1): c = data_cell(ws3, r, ci, val, fill_color=bg, align=align, bold=bold) if isinstance(val, pd.Timestamp) and not pd.isna(val): c.value = val.to_pydatetime() c.number_format = "DD-MMM-YY" ws3.row_dimensions[r].height = 14 ws3.auto_filter.ref = f"A2:{get_column_letter(len(upc_headers))}2" # ========================================================= # SHEET 4: Problémy — datové nesoulady # ========================================================= ws4 = wb.create_sheet("Problémy") ws4.freeze_panes = "A3" # Načteme původní data bez statusového filtru pro detekci problémů df_raw = pd.read_csv(CSV_FILE, sep=";", encoding="utf-8-sig") df_raw["Country"] = df_raw["Study Site Number"].str.extract(r"DD5-([A-Z]+)\d+") cz_raw = df_raw[df_raw["Study Site Number"].isin(SITES)].copy() for col in date_cols: cz_raw[col] = pd.to_datetime(cz_raw[col], errors="coerce") cz_raw["CRA"] = cz_raw["Assigned To Last Name"].fillna("") cz_raw = cz_raw.merge(svr, on=["Study Site Number", "Reference"], how="left") cz_raw["Submitter Name"] = cz_raw["Submitter Name"].fillna("") problems = [] # Pravidlo 1: Completed Date vyplněno ale Status ≠ Completed mask1 = cz_raw["Completed Date"].notna() & (cz_raw["Status"] != "Completed") for _, row in cz_raw[mask1].iterrows(): problems.append((row, "Completed Date je vyplněno, ale Status není Completed")) # Seřadit podle site a reference import re as _re def _ref_key(ref): if ref == "SQV": return 0 if ref == "SIV": return 1 if ref == "COV": return 9999 m = _re.match(r"IMV(\d+)$", str(ref)) return int(m.group(1)) + 1 if m else 5000 problems.sort(key=lambda x: (x[0]["Study Site Number"], _ref_key(x[0]["Reference"]))) COL_PROBLEM = "FFC7CE" # světle červená ws4.merge_cells("A1:M1") t4 = ws4["A1"] t4.value = f"UCO3001 — CZ — Datové problémy k opravě v OneCTMS | {today_str}" t4.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE) t4.fill = PatternFill("solid", fgColor="C00000") t4.alignment = Alignment(horizontal="center", vertical="center") ws4.row_dimensions[1].height = 22 prob_headers = [ ("Site", 14), ("Investigátor", 22), ("CRA (Submitter)", 24), ("Ref", 9), ("Název vizity", 24), ("Category", 18), ("Status", 14), ("Due Date", 13), ("Completed Date", 13), ("", 2), ("Důvod — co je potřeba opravit v OneCTMS", 50), ] for ci, (h, w) in enumerate(prob_headers, 1): header_cell(ws4, 2, ci, h, width=w) ws4.row_dimensions[2].height = 26 for ri, (row, reason) in enumerate(problems, 3): inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}" vals = [ (row["Study Site Number"], "left", True, None), (inv, "left", False, None), (row["Submitter Name"], "left", False, None), (row["Reference"], "center", True, None), (row["Visit Name"], "left", False, None), (row["Category"], "left", False, None), (row["Status"], "center", False, None), (row["Due Date"], "center", False, "DD-MMM-YY"), (row["Completed Date"], "center", False, "DD-MMM-YY"), ("", "center", False, None), (reason, "left", True, None), ] for ci, (val, align, bold, fmt) in enumerate(vals, 1): c = data_cell(ws4, ri, ci, val, fill_color=COL_PROBLEM, align=align, bold=bold) if fmt and isinstance(val, pd.Timestamp) and not pd.isna(val): c.value = val.to_pydatetime() c.number_format = fmt ws4.row_dimensions[ri].height = 16 ws4.auto_filter.ref = f"A2:{get_column_letter(len(prob_headers))}2" wb.save(OUTPUT_FILE) print(f"Report uložen: {OUTPUT_FILE}") print(f" Sheet 'Přehled CZ' : {len(sites)} sites") print(f" Sheet 'Detail CZ' : {len(detail)} řádků") print(f" Sheet 'Nadcházející vizity': {len(upcoming)} vizit") print(f" Sheet 'Problémy' : {len(problems)} záznamů")