janssen/CTMS/VisitsFromCTMSExport/20_report_CZ.py

import pandas as pd
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side, numbers
from openpyxl.utils import get_column_letter
from datetime import date
import os

CSV_FILE = "filename.csv"
SVR_FILE = "Site Visit Report (2).xlsx"
OUTPUT_DIR = os.path.join("..", "..", "CTMS", "output")
os.makedirs(OUTPUT_DIR, exist_ok=True)
today_str = date.today().strftime("%Y-%m-%d")
OUTPUT_FILE = os.path.join(OUTPUT_DIR, f"{today_str} UCO3001 CZ CTMS Visits.xlsx")

# --- Load & filter ---
df = pd.read_csv(CSV_FILE, sep=";", encoding="utf-8-sig")
df["Country"] = df["Study Site Number"].str.extract(r"DD5-([A-Z]+)\d+")
cz = df[df["Country"] == "CZ"].copy()

date_cols = ["Original Due Date", "Due Date", "Window Start Date", "Cutoff Date", "Completed Date"]
for col in date_cols:
    cz[col] = pd.to_datetime(cz[col], errors="coerce")

SITES = [
    "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
    "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
    "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
]
cz = cz[cz["Study Site Number"].isin(SITES) & cz["Status"].isin(["Completed", "Scheduled", "Planned"])].copy()

cz["CRA"] = cz["Assigned To Last Name"].fillna("")

# --- Merge Site Visit Report (2) ---
import re as _re
def _svid_to_ref(svid):
    svid = str(svid).replace("MCTMS|", "")
    if svid == "Qualification Visit": return "SQV"
    if svid == "Site Initiation":     return "SIV"
    if svid == "Closure Visit":       return "COV"
    m = _re.match(r"Monitoring Visit (\d+)", svid)
    return f"IMV{m.group(1)}" if m else svid

svr = pd.read_excel(SVR_FILE, header=5)
svr = svr[svr["Site ID"].isin(SITES)].copy()
svr["Reference"] = svr["Site Visit ID"].apply(_svid_to_ref)
svr = svr[["Site ID", "Reference", "Site Visit Type", "Submitter Name", "Approver Name"]].rename(columns={"Site ID": "Study Site Number"})

cz = cz.merge(svr, on=["Study Site Number", "Reference"], how="left")

# --- Styles ---
FONT_NAME = "Arial"
COL_HEADER = "1F5C99"   # dark blue
COL_COMPL  = "E2EFDA"   # light green
COL_SCHED  = "FFF2CC"   # light yellow
COL_PLAN   = "FCE4D6"   # light orange
COL_NA     = "F2F2F2"   # grey
WHITE      = "FFFFFF"
DARK_TEXT  = "000000"

STATUS_COLORS = {
    "Completed":      COL_COMPL,
    "Scheduled":      COL_SCHED,
    "Planned":        COL_PLAN,
    "Not applicable": COL_NA,
}

thin = Side(style="thin", color="BFBFBF")
med  = Side(style="medium", color="808080")

def border(left=thin, right=thin, top=thin, bottom=thin):
    return Border(left=left, right=right, top=top, bottom=bottom)

def header_cell(ws, row, col, value, width=None):
    c = ws.cell(row=row, column=col, value=value)
    c.font = Font(name=FONT_NAME, bold=True, color=WHITE, size=10)
    c.fill = PatternFill("solid", fgColor=COL_HEADER)
    c.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
    c.border = Border(left=Side(style="medium", color=WHITE),
                      right=Side(style="medium", color=WHITE),
                      top=thin, bottom=thin)
    if width and col <= ws.max_column or width:
        ws.column_dimensions[get_column_letter(col)].width = width
    return c

def data_cell(ws, row, col, value, fill_color=WHITE, align="left", bold=False, num_fmt=None, date_val=False):
    c = ws.cell(row=row, column=col, value=value)
    c.font = Font(name=FONT_NAME, size=9, bold=bold, color=DARK_TEXT)
    if fill_color != WHITE:
        c.fill = PatternFill("solid", fgColor=fill_color)
    c.alignment = Alignment(horizontal=align, vertical="center")
    c.border = border()
    if num_fmt:
        c.number_format = num_fmt
    elif date_val and isinstance(value, (pd.Timestamp, type(None))):
        c.number_format = "DD-MMM-YYYY"
    return c

# =========================================================
# SHEET 1: Přehled per site
# =========================================================
wb = openpyxl.Workbook()
ws1 = wb.active
ws1.title = "Přehled CZ"
ws1.freeze_panes = "A3"

# Title
ws1.merge_cells("A1:M1")
title = ws1["A1"]
title.value = f"UCO3001 — CZ CTMS Visits Overview  |  {today_str}"
title.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE)
title.fill = PatternFill("solid", fgColor="2E4057")
title.alignment = Alignment(horizontal="center", vertical="center")
ws1.row_dimensions[1].height = 22

# Headers
headers = [
    ("Site", 14), ("Investigátor", 22),
    ("SQV", 11), ("SIV", 11),
    ("IMV\nCompleted", 11), ("IMV\nScheduled", 11), ("IMV\nPlanned", 11),
    ("COV", 11),
    ("Poslední vizita\nDatum", 14), ("Poslední vizita\nTyp", 16),
    ("Příští vizita\nDatum", 14), ("Příští vizita\nTyp", 16),
    ("Celkem\nvizit", 10),
]
for ci, (h, w) in enumerate(headers, 1):
    header_cell(ws1, 2, ci, h, width=w)
ws1.row_dimensions[2].height = 30

# Data per site
sites = sorted(cz["Study Site Number"].unique())
for ri, site in enumerate(sites, 3):
    s = cz[cz["Study Site Number"] == site]
    inv_row = s.iloc[0]
    inv = f"{inv_row['INV_FIRST_NAME']} {inv_row['INV_LAST_NAME']}"
    cra = s["CRA"].replace("", pd.NA).dropna().iloc[0] if not s["CRA"].replace("", pd.NA).dropna().empty else ""

    sqv = s[s["Reference"] == "SQV"]
    siv = s[s["Reference"] == "SIV"]
    cov = s[s["Reference"] == "COV"]
    imv = s[s["Category"] == "Monitoring Visit"]

    def visit_status(sub):
        if sub.empty:
            return ("—", COL_NA)
        st = sub.iloc[0]["Status"]
        return (st, STATUS_COLORS.get(st, WHITE))

    sqv_st, sqv_c = visit_status(sqv)
    siv_st, siv_c = visit_status(siv)
    cov_st, cov_c = visit_status(cov)

    imv_comp = int((imv["Status"] == "Completed").sum())
    imv_sch  = int((imv["Status"] == "Scheduled").sum())
    imv_plan = int((imv["Status"] == "Planned").sum())

    # Last completed
    comp = s[s["Status"] == "Completed"].dropna(subset=["Completed Date"])
    last_comp = comp.sort_values("Completed Date").iloc[-1] if not comp.empty else None
    last_date = last_comp["Completed Date"] if last_comp is not None else None
    last_type = last_comp["Reference"] if last_comp is not None else "—"

    # Next upcoming — pouze vizity s Due Date po poslední Completed
    upcoming = s[s["Status"].isin(["Scheduled", "Planned"])].dropna(subset=["Due Date"])
    if last_date is not None:
        upcoming = upcoming[upcoming["Due Date"] > last_date]
    next_vis  = upcoming.sort_values("Due Date").iloc[0] if not upcoming.empty else None
    next_date = next_vis["Due Date"] if next_vis is not None else None
    next_type = next_vis["Reference"] if next_vis is not None else "—"

    total = len(s)
    bg = WHITE if ri % 2 == 0 else "F7F9FC"

    row_data = [
        (site,      "left",   True,  None,       None),
        (inv,       "left",   False, None,       None),
        (sqv_st,    "center", False, None,       sqv_c),
        (siv_st,    "center", False, None,       siv_c),
        (imv_comp,  "center", False, "#,##0",    None),
        (imv_sch,   "center", False, "#,##0",    None),
        (imv_plan,  "center", False, "#,##0",    None),
        (cov_st,    "center", False, None,       cov_c),
        (last_date, "center", False, "DD-MMM-YY",None),
        (last_type, "center", False, None,       None),
        (next_date, "center", False, "DD-MMM-YY",None),
        (next_type, "center", False, None,       None),
        (total,     "center", True,  "#,##0",    None),
    ]
    for ci, (val, align, bold, fmt, fill) in enumerate(row_data, 1):
        fc = fill if fill else bg
        c = data_cell(ws1, ri, ci, val, fill_color=fc, align=align, bold=bold)
        if fmt:
            c.number_format = fmt
    ws1.row_dimensions[ri].height = 16

# Autofilter
ws1.auto_filter.ref = f"A2:{get_column_letter(len(headers))}2"

# =========================================================
# SHEET 2: Detail všech CZ vizit
# =========================================================
ws2 = wb.create_sheet("Detail CZ")
ws2.freeze_panes = "A3"

ws2.merge_cells("A1:N1")
t2 = ws2["A1"]
t2.value = f"UCO3001 — CZ CTMS Visits — Detail  |  {today_str}"
t2.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE)
t2.fill = PatternFill("solid", fgColor="2E4057")
t2.alignment = Alignment(horizontal="center", vertical="center")
ws2.row_dimensions[1].height = 22

det_headers = [
    ("Site", 14), ("Investigátor", 22), ("CRA (Submitter)", 24),
    ("Ref", 9), ("Název vizity", 24), ("Category", 20), ("Sub Category", 16),
    ("Status", 14),
    ("Due Date", 13), ("Window Start", 13), ("Cutoff Date", 13), ("Completed Date", 13),
    ("Typ vizity", 12),
]
for ci, (h, w) in enumerate(det_headers, 1):
    header_cell(ws2, 2, ci, h, width=w)
ws2.row_dimensions[2].height = 26

# Sort: site → SQV → SIV → IMV1 → IMV2 … → COV
ref_order = {"SQV": 0, "SIV": 1, "COV": 9999}
def ref_sort_key(ref):
    if ref in ref_order:
        return ref_order[ref]
    import re
    m = re.match(r"IMV(\d+)$", str(ref))
    return int(m.group(1)) + 1 if m else 5000
cz["_ref_ord"] = cz["Reference"].apply(ref_sort_key)
detail = cz.sort_values(["Study Site Number", "_ref_ord"]).reset_index(drop=True)

for ri, row in detail.iterrows():
    r = ri + 3
    st = row["Status"]
    bg = STATUS_COLORS.get(st, WHITE)

    inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}"
    submitter  = row["Submitter Name"]   if pd.notna(row.get("Submitter Name"))   else ""
    visit_type = row["Site Visit Type"]  if pd.notna(row.get("Site Visit Type"))  else ""
    vals = [
        (row["Study Site Number"], "left",   True),
        (inv,                      "left",   False),
        (submitter,                "left",   False),
        (row["Reference"],         "center", True),
        (row["Visit Name"],        "left",   False),
        (row["Category"],          "left",   False),
        (row["Sub Category"],      "left",   False),
        (st,                       "center", False),
        (row["Due Date"],          "center", False),
        (row["Window Start Date"], "center", False),
        (row["Cutoff Date"],       "center", False),
        (row["Completed Date"],    "center", False),
        (visit_type,               "center", False),
    ]
    for ci, (val, align, bold) in enumerate(vals, 1):
        c = data_cell(ws2, r, ci, val, fill_color=bg, align=align, bold=bold)
        if isinstance(val, pd.Timestamp) and not pd.isna(val):
            c.value = val.to_pydatetime()
            c.number_format = "DD-MMM-YY"
    ws2.row_dimensions[r].height = 14

ws2.auto_filter.ref = f"A2:{get_column_letter(len(det_headers))}2"

# =========================================================
# SHEET 3: Nadcházející / Scheduled+Planned
# =========================================================
ws3 = wb.create_sheet("Nadcházející vizity")
ws3.freeze_panes = "A3"

ws3.merge_cells("A1:J1")
t3 = ws3["A1"]
t3.value = f"UCO3001 — CZ — Nadcházející vizity (Scheduled + Planned)  |  {today_str}"
t3.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE)
t3.fill = PatternFill("solid", fgColor="2E4057")
t3.alignment = Alignment(horizontal="center", vertical="center")
ws3.row_dimensions[1].height = 22

upc_headers = [
    ("Due Date", 13), ("Site", 14), ("Investigátor", 22), ("CRA", 14),
    ("Ref", 9), ("Název vizity", 24), ("Category", 20),
    ("Status", 12), ("Window Start", 13), ("Cutoff Date", 13),
]
for ci, (h, w) in enumerate(upc_headers, 1):
    header_cell(ws3, 2, ci, h, width=w)
ws3.row_dimensions[2].height = 26

upcoming = cz[cz["Status"].isin(["Scheduled", "Planned"])].sort_values(["Due Date", "Study Site Number"]).reset_index(drop=True)

for ri, row in upcoming.iterrows():
    r = ri + 3
    bg = STATUS_COLORS.get(row["Status"], WHITE)
    inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}"
    vals = [
        (row["Due Date"],          "center", True),
        (row["Study Site Number"], "left",   False),
        (inv,                      "left",   False),
        (row["CRA"],               "center", False),
        (row["Reference"],         "center", True),
        (row["Visit Name"],        "left",   False),
        (row["Category"],          "left",   False),
        (row["Status"],            "center", False),
        (row["Window Start Date"], "center", False),
        (row["Cutoff Date"],       "center", False),
    ]
    for ci, (val, align, bold) in enumerate(vals, 1):
        c = data_cell(ws3, r, ci, val, fill_color=bg, align=align, bold=bold)
        if isinstance(val, pd.Timestamp) and not pd.isna(val):
            c.value = val.to_pydatetime()
            c.number_format = "DD-MMM-YY"
    ws3.row_dimensions[r].height = 14

ws3.auto_filter.ref = f"A2:{get_column_letter(len(upc_headers))}2"

# =========================================================
# SHEET 4: Problémy — datové nesoulady
# =========================================================
ws4 = wb.create_sheet("Problémy")
ws4.freeze_panes = "A3"

# Načteme původní data bez statusového filtru pro detekci problémů
df_raw = pd.read_csv(CSV_FILE, sep=";", encoding="utf-8-sig")
df_raw["Country"] = df_raw["Study Site Number"].str.extract(r"DD5-([A-Z]+)\d+")
cz_raw = df_raw[df_raw["Study Site Number"].isin(SITES)].copy()
for col in date_cols:
    cz_raw[col] = pd.to_datetime(cz_raw[col], errors="coerce")
cz_raw["CRA"] = cz_raw["Assigned To Last Name"].fillna("")
cz_raw = cz_raw.merge(svr, on=["Study Site Number", "Reference"], how="left")
cz_raw["Submitter Name"] = cz_raw["Submitter Name"].fillna("")

problems = []

# Pravidlo 1: Completed Date vyplněno ale Status ≠ Completed
mask1 = cz_raw["Completed Date"].notna() & (cz_raw["Status"] != "Completed")
for _, row in cz_raw[mask1].iterrows():
    problems.append((row, "Completed Date je vyplněno, ale Status není Completed"))

# Seřadit podle site a reference
import re as _re
def _ref_key(ref):
    if ref == "SQV": return 0
    if ref == "SIV": return 1
    if ref == "COV": return 9999
    m = _re.match(r"IMV(\d+)$", str(ref))
    return int(m.group(1)) + 1 if m else 5000

problems.sort(key=lambda x: (x[0]["Study Site Number"], _ref_key(x[0]["Reference"])))

COL_PROBLEM = "FFC7CE"  # světle červená

ws4.merge_cells("A1:M1")
t4 = ws4["A1"]
t4.value = f"UCO3001 — CZ — Datové problémy k opravě v OneCTMS  |  {today_str}"
t4.font = Font(name=FONT_NAME, bold=True, size=12, color=WHITE)
t4.fill = PatternFill("solid", fgColor="C00000")
t4.alignment = Alignment(horizontal="center", vertical="center")
ws4.row_dimensions[1].height = 22

prob_headers = [
    ("Site", 14), ("Investigátor", 22), ("CRA (Submitter)", 24),
    ("Ref", 9), ("Název vizity", 24), ("Category", 18),
    ("Status", 14),
    ("Due Date", 13), ("Completed Date", 13),
    ("", 2),
    ("Důvod — co je potřeba opravit v OneCTMS", 50),
]
for ci, (h, w) in enumerate(prob_headers, 1):
    header_cell(ws4, 2, ci, h, width=w)
ws4.row_dimensions[2].height = 26

for ri, (row, reason) in enumerate(problems, 3):
    inv = f"{row['INV_FIRST_NAME']} {row['INV_LAST_NAME']}"
    vals = [
        (row["Study Site Number"], "left",   True,  None),
        (inv,                      "left",   False, None),
        (row["Submitter Name"],    "left",   False, None),
        (row["Reference"],         "center", True,  None),
        (row["Visit Name"],        "left",   False, None),
        (row["Category"],          "left",   False, None),
        (row["Status"],            "center", False, None),
        (row["Due Date"],          "center", False, "DD-MMM-YY"),
        (row["Completed Date"],    "center", False, "DD-MMM-YY"),
        ("",                       "center", False, None),
        (reason,                   "left",   True,  None),
    ]
    for ci, (val, align, bold, fmt) in enumerate(vals, 1):
        c = data_cell(ws4, ri, ci, val, fill_color=COL_PROBLEM, align=align, bold=bold)
        if fmt and isinstance(val, pd.Timestamp) and not pd.isna(val):
            c.value = val.to_pydatetime()
            c.number_format = fmt
    ws4.row_dimensions[ri].height = 16

ws4.auto_filter.ref = f"A2:{get_column_letter(len(prob_headers))}2"

wb.save(OUTPUT_FILE)
print(f"Report uložen: {OUTPUT_FILE}")
print(f"  Sheet 'Přehled CZ'         : {len(sites)} sites")
print(f"  Sheet 'Detail CZ'          : {len(detail)} řádků")
print(f"  Sheet 'Nadcházející vizity': {len(upcoming)} vizit")
print(f"  Sheet 'Problémy'           : {len(problems)} záznamů")