z230

2026-05-04 16:14:47 +02:00
parent 2ec8884e82
commit eaea634b2b
65 changed files with 500 additions and 0 deletions
@@ -0,0 +1,310 @@
+import os
+import glob
+import datetime
+import pandas as pd
+from openpyxl import Workbook
+from openpyxl.styles import (
+    Font, PatternFill, Alignment, Border, Side, GradientFill
+)
+from openpyxl.utils import get_column_letter
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+CREATED_DIR  = os.path.join(BASE_DIR, "CreatedReports")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+SOURCE_COLS = [
+    "Subject",
+    "Investigator",
+    "Subject's age collection",
+    "Cohort per IRT",
+    "IRT Subject Status",
+    "Last Recorded IRT Transaction",
+    "Next Expected IRT Transaction",
+    "Next Expected IRT Transaction Date [Local]",
+]
+
+DISPLAY_HEADERS = [
+    "Subject",
+    "Investigator",
+    "Věk",
+    "Cohort",
+    "Status",
+    "Last IRT",
+    "Next Visit",
+    "Next Date",
+]
+
+COL_WIDTHS = [14, 22, 6, 12, 14, 12, 12, 13]
+
+# ── Styles ───────────────────────────────────────────────────────────────────
+HEADER_FILL  = PatternFill("solid", fgColor="1F4E79")
+HEADER_FONT  = Font(name="Arial", bold=True, color="FFFFFF", size=10)
+NORMAL_FONT  = Font(name="Arial", size=10)
+BOLD_FONT    = Font(name="Arial", bold=True, size=10)
+STRIKE_FONT  = Font(name="Arial", size=10, strike=True, color="999999")
+ADOLESC_FONT = Font(name="Arial", bold=True, size=10)
+
+THIN = Side(style="thin", color="CCCCCC")
+BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
+
+EVEN_FILL = PatternFill("solid", fgColor="EBF3FB")
+ODD_FILL  = PatternFill("solid", fgColor="FFFFFF")
+
+CENTER = Alignment(horizontal="center", vertical="center", wrap_text=False)
+LEFT   = Alignment(horizontal="left",   vertical="center", wrap_text=False)
+
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def find_latest_source(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen zdrojový soubor pro {study} v {INCOMING_DIR}")
+    return files[0]
+
+
+def load_source(path):
+    raw = pd.read_excel(path, header=None)
+    # find header row (row with "Subject" in first cell)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+    df = pd.read_excel(path, header=header_row)
+    return df
+
+
+def simplify_cohort(val):
+    if pd.isna(val):
+        return ""
+    val = str(val)
+    if "dolescent" in val:
+        return "Adolescent"
+    if val.startswith("Adult"):
+        return "Adult"
+    # MDD3003: "Part 1", "Part 2" — keep as-is
+    return val
+
+
+def format_date(val):
+    if pd.isna(val):
+        return ""
+    if hasattr(val, "strftime"):
+        return val.strftime("%Y-%m-%d")
+    return str(val)[:10]
+
+
+def write_zdroj(wb, df_raw, source_path):
+    mtime = datetime.datetime.fromtimestamp(os.path.getmtime(source_path))
+    sheet_name = f"ZDROJ ({mtime.strftime('%d%b%Y').upper()})"
+    ws = wb.create_sheet(sheet_name)
+    ws.sheet_view.showGridLines = True
+
+    # write raw headers + data as plain table
+    headers = list(df_raw.columns)
+    for c, h in enumerate(headers, 1):
+        cell = ws.cell(row=1, column=c, value=h)
+        cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF")
+        cell.fill = PatternFill("solid", fgColor="404040")
+        cell.alignment = LEFT
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = 20
+
+    for r, (_, row) in enumerate(df_raw.iterrows(), 2):
+        fill = EVEN_FILL if r % 2 == 0 else ODD_FILL
+        for c, col in enumerate(headers, 1):
+            val = row[col]
+            if pd.isna(val):
+                val = ""
+            elif hasattr(val, "strftime"):
+                val = val.strftime("%Y-%m-%d")
+            cell = ws.cell(row=r, column=c, value=val)
+            cell.font = Font(name="Arial", size=9)
+            cell.fill = fill
+            cell.border = BORDER
+            cell.alignment = LEFT
+
+    ws.freeze_panes = "A2"
+    ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1"
+
+
+def write_prehled(wb, df_raw, study):
+    ws = wb.create_sheet("Přehled")
+    ws.sheet_view.showGridLines = False
+    ws.sheet_view.showRowColHeaders = True
+
+    # ── title row ────────────────────────────────────────────────────────────
+    ws.merge_cells("A1:H1")
+    title = ws["A1"]
+    title.value = f"Subject Summary — {study}   ({datetime.date.today().strftime('%d-%b-%Y')})"
+    title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
+    title.alignment = Alignment(horizontal="left", vertical="center")
+    ws.row_dimensions[1].height = 22
+
+    # ── header row ───────────────────────────────────────────────────────────
+    for c, (h, w) in enumerate(zip(DISPLAY_HEADERS, COL_WIDTHS), 1):
+        cell = ws.cell(row=2, column=c, value=h)
+        cell.font = HEADER_FONT
+        cell.fill = HEADER_FILL
+        cell.alignment = CENTER
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = w
+    ws.row_dimensions[2].height = 18
+
+    # ── build display dataframe ───────────────────────────────────────────────
+    display = pd.DataFrame()
+    display["Subject"]     = df_raw["Subject"].fillna("")
+    display["Investigator"]= df_raw["Investigator"].fillna("")
+    display["Věk"]         = df_raw["Subject's age collection"].apply(
+                                 lambda v: "" if pd.isna(v) else int(v))
+    display["Cohort"]      = df_raw["Cohort per IRT"].apply(simplify_cohort)
+    display["Status"]      = df_raw["IRT Subject Status"].fillna("")
+    display["Last IRT"]    = df_raw["Last Recorded IRT Transaction"].fillna("—")
+    display["Next Visit"]  = df_raw["Next Expected IRT Transaction"].fillna("—")
+    display["Next Date"]   = df_raw["Next Expected IRT Transaction Date [Local]"].apply(format_date)
+
+    display = display.sort_values("Subject").reset_index(drop=True)
+
+    # ── data rows ────────────────────────────────────────────────────────────
+    for r_idx, row in display.iterrows():
+        excel_row = r_idx + 3  # row 1=title, row 2=header
+        status = str(row["Status"])
+        is_failed     = "Screen Failed" in status or "Discontinued" in status
+        is_randomized = "Randomized" in status
+        is_adolescent = row["Cohort"] == "Adolescent"
+        fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
+
+        values = [
+            row["Subject"], row["Investigator"], row["Věk"],
+            row["Cohort"], row["Status"], row["Last IRT"],
+            row["Next Visit"], row["Next Date"],
+        ]
+
+        for c_idx, val in enumerate(values, 1):
+            cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
+            cell.fill  = fill
+            cell.border = BORDER
+
+            # alignment
+            cell.alignment = CENTER if c_idx in (3,) else LEFT
+
+            # font logic
+            if is_failed:
+                cell.font = STRIKE_FONT
+            elif c_idx == 5 and is_randomized:
+                cell.font = BOLD_FONT
+            elif c_idx == 4 and is_adolescent:
+                cell.font = ADOLESC_FONT
+            else:
+                cell.font = NORMAL_FONT
+
+        ws.row_dimensions[excel_row].height = 16
+
+    ws.freeze_panes = "A3"
+    last_data_row = len(display) + 2
+    ws.auto_filter.ref = f"A2:H{last_data_row}"
+
+
+def write_next_visits(wb, df_raw, study):
+    ws = wb.create_sheet("Next Visits")
+    ws.sheet_view.showGridLines = False
+
+    # title
+    ws.merge_cells("A1:D1")
+    title = ws["A1"]
+    title.value = f"Next Expected Visits — {study}   ({datetime.date.today().strftime('%d-%b-%Y')})"
+    title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
+    title.alignment = Alignment(horizontal="left", vertical="center")
+    ws.row_dimensions[1].height = 22
+
+    # headers
+    nv_headers = ["Subject", "Investigator", "Next Visit", "Datum"]
+    nv_widths   = [14, 22, 26, 13]
+    for c, (h, w) in enumerate(zip(nv_headers, nv_widths), 1):
+        cell = ws.cell(row=2, column=c, value=h)
+        cell.font = HEADER_FONT
+        cell.fill = HEADER_FILL
+        cell.alignment = CENTER
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = w
+    ws.row_dimensions[2].height = 18
+
+    # data — only rows with a Next Date, exclude Screen Failed / Discontinued
+    df = pd.DataFrame()
+    df["Subject"]     = df_raw["Subject"].fillna("")
+    df["Investigator"]= df_raw["Investigator"].fillna("")
+    df["Next Visit"]  = df_raw["Next Expected IRT Transaction"].fillna("")
+    df["Datum"]       = df_raw["Next Expected IRT Transaction Date [Local]"]
+    df["Status"]      = df_raw["IRT Subject Status"].fillna("")
+
+    df = df[df["Datum"].notna()]
+    df = df[~df["Status"].str.contains("Screen Failed|Discontinued", na=False)]
+    df = df.sort_values("Datum").reset_index(drop=True)
+
+    for r_idx, row in df.iterrows():
+        excel_row = r_idx + 3
+        fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
+        datum_val = row["Datum"]
+        datum_str = datum_val.strftime("%Y-%m-%d") if hasattr(datum_val, "strftime") else str(datum_val)[:10]
+
+        values = [row["Subject"], row["Investigator"], row["Next Visit"], datum_str]
+        for c_idx, val in enumerate(values, 1):
+            cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
+            cell.fill = fill
+            cell.border = BORDER
+            cell.font = NORMAL_FONT
+            cell.alignment = LEFT
+        ws.row_dimensions[excel_row].height = 16
+
+    ws.freeze_panes = "A3"
+    last_data_row = len(df) + 2
+    ws.auto_filter.ref = f"A2:D{last_data_row}"
+
+
+def create_report(study):
+    source_path = find_latest_source(study)
+    print(f"[{study}] Čtu: {os.path.basename(source_path)}")
+
+    df_raw = load_source(source_path)
+
+    wb = Workbook()
+    wb.remove(wb.active)  # remove default sheet
+
+    write_prehled(wb, df_raw, study)
+    write_next_visits(wb, df_raw, study)
+    write_zdroj(wb, df_raw, source_path)
+
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    out_path = unique_path(CREATED_DIR, f"{today} {study} Subject Summary")
+    wb.save(out_path)
+    print(f"[{study}] Uloženo: {out_path}")
+    return out_path
+
+
+def main():
+    os.makedirs(CREATED_DIR, exist_ok=True)
+    for study in STUDIES:
+        try:
+            create_report(study)
+        except FileNotFoundError as e:
+            print(f"[{study}] PŘESKOČENO: {e}")
+    print("\nHotovo.")
+
+
+main()