import os import glob import datetime import pandas as pd from openpyxl import Workbook from openpyxl.styles import ( Font, PatternFill, Alignment, Border, Side, GradientFill ) from openpyxl.utils import get_column_letter BASE_DIR = os.path.dirname(os.path.abspath(__file__)) INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports") CREATED_DIR = os.path.join(BASE_DIR, "CreatedReports") STUDIES = ["77242113UCO3001", "42847922MDD3003"] SOURCE_COLS = [ "Subject", "Investigator", "Subject's age collection", "Cohort per IRT", "IRT Subject Status", "Last Recorded IRT Transaction", "Next Expected IRT Transaction", "Next Expected IRT Transaction Date [Local]", ] DISPLAY_HEADERS = [ "Subject", "Investigator", "Věk", "Cohort", "Status", "Last IRT", "Next Visit", "Next Date", ] COL_WIDTHS = [14, 22, 6, 12, 14, 12, 12, 13] # ── Styles ─────────────────────────────────────────────────────────────────── HEADER_FILL = PatternFill("solid", fgColor="1F4E79") HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10) NORMAL_FONT = Font(name="Arial", size=10) BOLD_FONT = Font(name="Arial", bold=True, size=10) STRIKE_FONT = Font(name="Arial", size=10, strike=True, color="999999") ADOLESC_FONT = Font(name="Arial", bold=True, size=10) THIN = Side(style="thin", color="CCCCCC") BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN) EVEN_FILL = PatternFill("solid", fgColor="EBF3FB") ODD_FILL = PatternFill("solid", fgColor="FFFFFF") CENTER = Alignment(horizontal="center", vertical="center", wrap_text=False) LEFT = Alignment(horizontal="left", vertical="center", wrap_text=False) def unique_path(directory, stem): path = os.path.join(directory, f"{stem}.xlsx") if not os.path.exists(path): return path time_tag = datetime.datetime.now().strftime("%H%M") return os.path.join(directory, f"{stem} {time_tag}.xlsx") def find_latest_source(study): pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx") files = sorted( [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")], key=os.path.getmtime, reverse=True, ) if not files: raise FileNotFoundError(f"Nenalezen zdrojový soubor pro {study} v {INCOMING_DIR}") return files[0] def load_source(path): raw = pd.read_excel(path, header=None) # find header row (row with "Subject" in first cell) header_row = None for i, row in raw.iterrows(): if "Subject" in [str(v).strip() for v in row]: header_row = i break if header_row is None: raise ValueError("Hlavičkový řádek nenalezen") df = pd.read_excel(path, header=header_row) return df def simplify_cohort(val): if pd.isna(val): return "" val = str(val) if "dolescent" in val: return "Adolescent" if val.startswith("Adult"): return "Adult" # MDD3003: "Part 1", "Part 2" — keep as-is return val def format_date(val): if pd.isna(val): return "" if hasattr(val, "strftime"): return val.strftime("%Y-%m-%d") return str(val)[:10] def write_zdroj(wb, df_raw, source_path): mtime = datetime.datetime.fromtimestamp(os.path.getmtime(source_path)) sheet_name = f"ZDROJ ({mtime.strftime('%d%b%Y').upper()})" ws = wb.create_sheet(sheet_name) ws.sheet_view.showGridLines = True # write raw headers + data as plain table headers = list(df_raw.columns) for c, h in enumerate(headers, 1): cell = ws.cell(row=1, column=c, value=h) cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF") cell.fill = PatternFill("solid", fgColor="404040") cell.alignment = LEFT cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = 20 for r, (_, row) in enumerate(df_raw.iterrows(), 2): fill = EVEN_FILL if r % 2 == 0 else ODD_FILL for c, col in enumerate(headers, 1): val = row[col] if pd.isna(val): val = "" elif hasattr(val, "strftime"): val = val.strftime("%Y-%m-%d") cell = ws.cell(row=r, column=c, value=val) cell.font = Font(name="Arial", size=9) cell.fill = fill cell.border = BORDER cell.alignment = LEFT ws.freeze_panes = "A2" ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1" def write_prehled(wb, df_raw, study): ws = wb.create_sheet("Přehled") ws.sheet_view.showGridLines = False ws.sheet_view.showRowColHeaders = True # ── title row ──────────────────────────────────────────────────────────── ws.merge_cells("A1:H1") title = ws["A1"] title.value = f"Subject Summary — {study} ({datetime.date.today().strftime('%d-%b-%Y')})" title.font = Font(name="Arial", bold=True, size=12, color="1F4E79") title.alignment = Alignment(horizontal="left", vertical="center") ws.row_dimensions[1].height = 22 # ── header row ─────────────────────────────────────────────────────────── for c, (h, w) in enumerate(zip(DISPLAY_HEADERS, COL_WIDTHS), 1): cell = ws.cell(row=2, column=c, value=h) cell.font = HEADER_FONT cell.fill = HEADER_FILL cell.alignment = CENTER cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = w ws.row_dimensions[2].height = 18 # ── build display dataframe ─────────────────────────────────────────────── display = pd.DataFrame() display["Subject"] = df_raw["Subject"].fillna("") display["Investigator"]= df_raw["Investigator"].fillna("") display["Věk"] = df_raw["Subject's age collection"].apply( lambda v: "" if pd.isna(v) else int(v)) display["Cohort"] = df_raw["Cohort per IRT"].apply(simplify_cohort) display["Status"] = df_raw["IRT Subject Status"].fillna("") display["Last IRT"] = df_raw["Last Recorded IRT Transaction"].fillna("—") display["Next Visit"] = df_raw["Next Expected IRT Transaction"].fillna("—") display["Next Date"] = df_raw["Next Expected IRT Transaction Date [Local]"].apply(format_date) display = display.sort_values("Subject").reset_index(drop=True) # ── data rows ──────────────────────────────────────────────────────────── for r_idx, row in display.iterrows(): excel_row = r_idx + 3 # row 1=title, row 2=header status = str(row["Status"]) is_failed = "Screen Failed" in status or "Discontinued" in status is_randomized = "Randomized" in status is_adolescent = row["Cohort"] == "Adolescent" fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL values = [ row["Subject"], row["Investigator"], row["Věk"], row["Cohort"], row["Status"], row["Last IRT"], row["Next Visit"], row["Next Date"], ] for c_idx, val in enumerate(values, 1): cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None) cell.fill = fill cell.border = BORDER # alignment cell.alignment = CENTER if c_idx in (3,) else LEFT # font logic if is_failed: cell.font = STRIKE_FONT elif c_idx == 5 and is_randomized: cell.font = BOLD_FONT elif c_idx == 4 and is_adolescent: cell.font = ADOLESC_FONT else: cell.font = NORMAL_FONT ws.row_dimensions[excel_row].height = 16 ws.freeze_panes = "A3" last_data_row = len(display) + 2 ws.auto_filter.ref = f"A2:H{last_data_row}" def write_next_visits(wb, df_raw, study): ws = wb.create_sheet("Next Visits") ws.sheet_view.showGridLines = False # title ws.merge_cells("A1:D1") title = ws["A1"] title.value = f"Next Expected Visits — {study} ({datetime.date.today().strftime('%d-%b-%Y')})" title.font = Font(name="Arial", bold=True, size=12, color="1F4E79") title.alignment = Alignment(horizontal="left", vertical="center") ws.row_dimensions[1].height = 22 # headers nv_headers = ["Subject", "Investigator", "Next Visit", "Datum"] nv_widths = [14, 22, 26, 13] for c, (h, w) in enumerate(zip(nv_headers, nv_widths), 1): cell = ws.cell(row=2, column=c, value=h) cell.font = HEADER_FONT cell.fill = HEADER_FILL cell.alignment = CENTER cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = w ws.row_dimensions[2].height = 18 # data — only rows with a Next Date, exclude Screen Failed / Discontinued df = pd.DataFrame() df["Subject"] = df_raw["Subject"].fillna("") df["Investigator"]= df_raw["Investigator"].fillna("") df["Next Visit"] = df_raw["Next Expected IRT Transaction"].fillna("") df["Datum"] = df_raw["Next Expected IRT Transaction Date [Local]"] df["Status"] = df_raw["IRT Subject Status"].fillna("") df = df[df["Datum"].notna()] df = df[~df["Status"].str.contains("Screen Failed|Discontinued", na=False)] df = df.sort_values("Datum").reset_index(drop=True) for r_idx, row in df.iterrows(): excel_row = r_idx + 3 fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL datum_val = row["Datum"] datum_str = datum_val.strftime("%Y-%m-%d") if hasattr(datum_val, "strftime") else str(datum_val)[:10] values = [row["Subject"], row["Investigator"], row["Next Visit"], datum_str] for c_idx, val in enumerate(values, 1): cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None) cell.fill = fill cell.border = BORDER cell.font = NORMAL_FONT cell.alignment = LEFT ws.row_dimensions[excel_row].height = 16 ws.freeze_panes = "A3" last_data_row = len(df) + 2 ws.auto_filter.ref = f"A2:D{last_data_row}" def create_report(study): source_path = find_latest_source(study) print(f"[{study}] Čtu: {os.path.basename(source_path)}") df_raw = load_source(source_path) wb = Workbook() wb.remove(wb.active) # remove default sheet write_prehled(wb, df_raw, study) write_next_visits(wb, df_raw, study) write_zdroj(wb, df_raw, source_path) today = datetime.date.today().strftime("%Y-%m-%d") out_path = unique_path(CREATED_DIR, f"{today} {study} Subject Summary") wb.save(out_path) print(f"[{study}] Uloženo: {out_path}") return out_path def main(): os.makedirs(CREATED_DIR, exist_ok=True) for study in STUDIES: try: create_report(study) except FileNotFoundError as e: print(f"[{study}] PŘESKOČENO: {e}") print("\nHotovo.") main()