This commit is contained in:
2026-05-04 16:14:47 +02:00
parent 2ec8884e82
commit eaea634b2b
65 changed files with 500 additions and 0 deletions
@@ -0,0 +1,310 @@
import os
import glob
import datetime
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import (
Font, PatternFill, Alignment, Border, Side, GradientFill
)
from openpyxl.utils import get_column_letter
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
CREATED_DIR = os.path.join(BASE_DIR, "CreatedReports")
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
SOURCE_COLS = [
"Subject",
"Investigator",
"Subject's age collection",
"Cohort per IRT",
"IRT Subject Status",
"Last Recorded IRT Transaction",
"Next Expected IRT Transaction",
"Next Expected IRT Transaction Date [Local]",
]
DISPLAY_HEADERS = [
"Subject",
"Investigator",
"Věk",
"Cohort",
"Status",
"Last IRT",
"Next Visit",
"Next Date",
]
COL_WIDTHS = [14, 22, 6, 12, 14, 12, 12, 13]
# ── Styles ───────────────────────────────────────────────────────────────────
HEADER_FILL = PatternFill("solid", fgColor="1F4E79")
HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10)
NORMAL_FONT = Font(name="Arial", size=10)
BOLD_FONT = Font(name="Arial", bold=True, size=10)
STRIKE_FONT = Font(name="Arial", size=10, strike=True, color="999999")
ADOLESC_FONT = Font(name="Arial", bold=True, size=10)
THIN = Side(style="thin", color="CCCCCC")
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
EVEN_FILL = PatternFill("solid", fgColor="EBF3FB")
ODD_FILL = PatternFill("solid", fgColor="FFFFFF")
CENTER = Alignment(horizontal="center", vertical="center", wrap_text=False)
LEFT = Alignment(horizontal="left", vertical="center", wrap_text=False)
def unique_path(directory, stem):
path = os.path.join(directory, f"{stem}.xlsx")
if not os.path.exists(path):
return path
time_tag = datetime.datetime.now().strftime("%H%M")
return os.path.join(directory, f"{stem} {time_tag}.xlsx")
def find_latest_source(study):
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
files = sorted(
[f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
key=os.path.getmtime,
reverse=True,
)
if not files:
raise FileNotFoundError(f"Nenalezen zdrojový soubor pro {study} v {INCOMING_DIR}")
return files[0]
def load_source(path):
raw = pd.read_excel(path, header=None)
# find header row (row with "Subject" in first cell)
header_row = None
for i, row in raw.iterrows():
if "Subject" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
raise ValueError("Hlavičkový řádek nenalezen")
df = pd.read_excel(path, header=header_row)
return df
def simplify_cohort(val):
if pd.isna(val):
return ""
val = str(val)
if "dolescent" in val:
return "Adolescent"
if val.startswith("Adult"):
return "Adult"
# MDD3003: "Part 1", "Part 2" — keep as-is
return val
def format_date(val):
if pd.isna(val):
return ""
if hasattr(val, "strftime"):
return val.strftime("%Y-%m-%d")
return str(val)[:10]
def write_zdroj(wb, df_raw, source_path):
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(source_path))
sheet_name = f"ZDROJ ({mtime.strftime('%d%b%Y').upper()})"
ws = wb.create_sheet(sheet_name)
ws.sheet_view.showGridLines = True
# write raw headers + data as plain table
headers = list(df_raw.columns)
for c, h in enumerate(headers, 1):
cell = ws.cell(row=1, column=c, value=h)
cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF")
cell.fill = PatternFill("solid", fgColor="404040")
cell.alignment = LEFT
cell.border = BORDER
ws.column_dimensions[get_column_letter(c)].width = 20
for r, (_, row) in enumerate(df_raw.iterrows(), 2):
fill = EVEN_FILL if r % 2 == 0 else ODD_FILL
for c, col in enumerate(headers, 1):
val = row[col]
if pd.isna(val):
val = ""
elif hasattr(val, "strftime"):
val = val.strftime("%Y-%m-%d")
cell = ws.cell(row=r, column=c, value=val)
cell.font = Font(name="Arial", size=9)
cell.fill = fill
cell.border = BORDER
cell.alignment = LEFT
ws.freeze_panes = "A2"
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1"
def write_prehled(wb, df_raw, study):
ws = wb.create_sheet("Přehled")
ws.sheet_view.showGridLines = False
ws.sheet_view.showRowColHeaders = True
# ── title row ────────────────────────────────────────────────────────────
ws.merge_cells("A1:H1")
title = ws["A1"]
title.value = f"Subject Summary — {study} ({datetime.date.today().strftime('%d-%b-%Y')})"
title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
title.alignment = Alignment(horizontal="left", vertical="center")
ws.row_dimensions[1].height = 22
# ── header row ───────────────────────────────────────────────────────────
for c, (h, w) in enumerate(zip(DISPLAY_HEADERS, COL_WIDTHS), 1):
cell = ws.cell(row=2, column=c, value=h)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL
cell.alignment = CENTER
cell.border = BORDER
ws.column_dimensions[get_column_letter(c)].width = w
ws.row_dimensions[2].height = 18
# ── build display dataframe ───────────────────────────────────────────────
display = pd.DataFrame()
display["Subject"] = df_raw["Subject"].fillna("")
display["Investigator"]= df_raw["Investigator"].fillna("")
display["Věk"] = df_raw["Subject's age collection"].apply(
lambda v: "" if pd.isna(v) else int(v))
display["Cohort"] = df_raw["Cohort per IRT"].apply(simplify_cohort)
display["Status"] = df_raw["IRT Subject Status"].fillna("")
display["Last IRT"] = df_raw["Last Recorded IRT Transaction"].fillna("")
display["Next Visit"] = df_raw["Next Expected IRT Transaction"].fillna("")
display["Next Date"] = df_raw["Next Expected IRT Transaction Date [Local]"].apply(format_date)
display = display.sort_values("Subject").reset_index(drop=True)
# ── data rows ────────────────────────────────────────────────────────────
for r_idx, row in display.iterrows():
excel_row = r_idx + 3 # row 1=title, row 2=header
status = str(row["Status"])
is_failed = "Screen Failed" in status or "Discontinued" in status
is_randomized = "Randomized" in status
is_adolescent = row["Cohort"] == "Adolescent"
fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
values = [
row["Subject"], row["Investigator"], row["Věk"],
row["Cohort"], row["Status"], row["Last IRT"],
row["Next Visit"], row["Next Date"],
]
for c_idx, val in enumerate(values, 1):
cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
cell.fill = fill
cell.border = BORDER
# alignment
cell.alignment = CENTER if c_idx in (3,) else LEFT
# font logic
if is_failed:
cell.font = STRIKE_FONT
elif c_idx == 5 and is_randomized:
cell.font = BOLD_FONT
elif c_idx == 4 and is_adolescent:
cell.font = ADOLESC_FONT
else:
cell.font = NORMAL_FONT
ws.row_dimensions[excel_row].height = 16
ws.freeze_panes = "A3"
last_data_row = len(display) + 2
ws.auto_filter.ref = f"A2:H{last_data_row}"
def write_next_visits(wb, df_raw, study):
ws = wb.create_sheet("Next Visits")
ws.sheet_view.showGridLines = False
# title
ws.merge_cells("A1:D1")
title = ws["A1"]
title.value = f"Next Expected Visits — {study} ({datetime.date.today().strftime('%d-%b-%Y')})"
title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
title.alignment = Alignment(horizontal="left", vertical="center")
ws.row_dimensions[1].height = 22
# headers
nv_headers = ["Subject", "Investigator", "Next Visit", "Datum"]
nv_widths = [14, 22, 26, 13]
for c, (h, w) in enumerate(zip(nv_headers, nv_widths), 1):
cell = ws.cell(row=2, column=c, value=h)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL
cell.alignment = CENTER
cell.border = BORDER
ws.column_dimensions[get_column_letter(c)].width = w
ws.row_dimensions[2].height = 18
# data — only rows with a Next Date, exclude Screen Failed / Discontinued
df = pd.DataFrame()
df["Subject"] = df_raw["Subject"].fillna("")
df["Investigator"]= df_raw["Investigator"].fillna("")
df["Next Visit"] = df_raw["Next Expected IRT Transaction"].fillna("")
df["Datum"] = df_raw["Next Expected IRT Transaction Date [Local]"]
df["Status"] = df_raw["IRT Subject Status"].fillna("")
df = df[df["Datum"].notna()]
df = df[~df["Status"].str.contains("Screen Failed|Discontinued", na=False)]
df = df.sort_values("Datum").reset_index(drop=True)
for r_idx, row in df.iterrows():
excel_row = r_idx + 3
fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
datum_val = row["Datum"]
datum_str = datum_val.strftime("%Y-%m-%d") if hasattr(datum_val, "strftime") else str(datum_val)[:10]
values = [row["Subject"], row["Investigator"], row["Next Visit"], datum_str]
for c_idx, val in enumerate(values, 1):
cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
cell.fill = fill
cell.border = BORDER
cell.font = NORMAL_FONT
cell.alignment = LEFT
ws.row_dimensions[excel_row].height = 16
ws.freeze_panes = "A3"
last_data_row = len(df) + 2
ws.auto_filter.ref = f"A2:D{last_data_row}"
def create_report(study):
source_path = find_latest_source(study)
print(f"[{study}] Čtu: {os.path.basename(source_path)}")
df_raw = load_source(source_path)
wb = Workbook()
wb.remove(wb.active) # remove default sheet
write_prehled(wb, df_raw, study)
write_next_visits(wb, df_raw, study)
write_zdroj(wb, df_raw, source_path)
today = datetime.date.today().strftime("%Y-%m-%d")
out_path = unique_path(CREATED_DIR, f"{today} {study} Subject Summary")
wb.save(out_path)
print(f"[{study}] Uloženo: {out_path}")
return out_path
def main():
os.makedirs(CREATED_DIR, exist_ok=True)
for study in STUDIES:
try:
create_report(study)
except FileNotFoundError as e:
print(f"[{study}] PŘESKOČENO: {e}")
print("\nHotovo.")
main()