""" Covance samples report pro studii 42847922MDD3003. Čte z MySQL (nejnovější import), generuje Excel s 5 listy: 1. Přehled — agregát per pacient+visit (Received / Not Received / Cancelled) 2. Chybějící — detail Not Received vzorků 3. Kity — pivot kit inventory: centra × typy kitů 4. ZDROJ — surová data samples 5. ZDROJ Kity — surová data kit inventory """ import os import datetime import mysql.connector import pandas as pd from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter import db_config STUDY = "42847922MDD3003" BASE_DIR = os.path.dirname(os.path.abspath(__file__)) CREATED_DIR = os.path.join(BASE_DIR, "CreatedReports") # ── styles ─────────────────────────────────────────────────────────────────── HEADER_FILL = PatternFill("solid", fgColor="1F4E79") HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10) NORMAL_FONT = Font(name="Arial", size=10) BOLD_FONT = Font(name="Arial", bold=True, size=10) RED_FONT = Font(name="Arial", bold=True, size=10, color="C00000") THIN = Side(style="thin", color="CCCCCC") BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN) EVEN_FILL = PatternFill("solid", fgColor="EBF3FB") ODD_FILL = PatternFill("solid", fgColor="FFFFFF") NOTRCV_FILL = PatternFill("solid", fgColor="FCE4D6") CANCELLED_FILL = PatternFill("solid", fgColor="F2F2F2") CENTER = Alignment(horizontal="center", vertical="center") LEFT = Alignment(horizontal="left", vertical="center") def unique_path(stem): path = os.path.join(CREATED_DIR, f"{stem}.xlsx") if not os.path.exists(path): return path tag = datetime.datetime.now().strftime("%H%M") return os.path.join(CREATED_DIR, f"{stem} {tag}.xlsx") # ── data load ──────────────────────────────────────────────────────────────── def load_data(): conn = mysql.connector.connect( host=db_config.DB_HOST, port=db_config.DB_PORT, user=db_config.DB_USER, password=db_config.DB_PASSWORD, database=db_config.DB_NAME, ) sql = """ SELECT investigator_no, investigator_name, patient_no, collection_date, protocol_visit_code, accession, container_no, container_barcode, specimen_type, sample_status, label_line1, label_line2 FROM covance_samples WHERE import_id = ( SELECT MAX(import_id) FROM iwrs_import WHERE study = %s AND report_type = 'covance_samples' ) ORDER BY investigator_no, patient_no, protocol_visit_code, container_no """ cursor = conn.cursor() cursor.execute(sql, (STUDY,)) cols = [d[0] for d in cursor.description] rows = cursor.fetchall() cursor.close() conn.close() return pd.DataFrame(rows, columns=cols) def load_kit_data(): conn = mysql.connector.connect( host=db_config.DB_HOST, port=db_config.DB_PORT, user=db_config.DB_USER, password=db_config.DB_PASSWORD, database=db_config.DB_NAME, ) sql = """ SELECT site_code, investigator_name, kit_type, description, accession, shipped_date, expiration_date, days_to_expiration FROM covance_kit_inventory WHERE import_id = ( SELECT MAX(import_id) FROM iwrs_import WHERE study = %s AND report_type = 'covance_kit_inventory' ) ORDER BY site_code, kit_type+0, kit_type, accession """ cursor = conn.cursor() cursor.execute(sql, (STUDY,)) cols = [d[0] for d in cursor.description] rows = cursor.fetchall() cursor.close() conn.close() return pd.DataFrame(rows, columns=cols) # ── helpers ────────────────────────────────────────────────────────────────── def test_name(row): l1 = str(row["label_line1"]).strip() if pd.notna(row["label_line1"]) else "" l2 = str(row["label_line2"]).strip() if pd.notna(row["label_line2"]) else "" return f"{l1} {l2}".strip() if l2 else l1 def write_headers(ws, headers, widths, row=2): for c, (h, w) in enumerate(zip(headers, widths), 1): cell = ws.cell(row=row, column=c, value=h) cell.font = HEADER_FONT cell.fill = HEADER_FILL cell.alignment = CENTER cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = w ws.row_dimensions[row].height = 18 def write_title(ws, text, ncols): ws.merge_cells(f"A1:{get_column_letter(ncols)}1") cell = ws["A1"] cell.value = text cell.font = Font(name="Arial", bold=True, size=12, color="1F4E79") cell.alignment = Alignment(horizontal="left", vertical="center") ws.row_dimensions[1].height = 22 # ── sheet 1: Přehled ───────────────────────────────────────────────────────── def write_prehled(wb, df): ws = wb.create_sheet("Přehled") ws.sheet_view.showGridLines = False today = datetime.date.today().strftime("%d-%b-%Y") write_title(ws, f"Covance Samples — {STUDY} ({today})", 9) headers = ["Site", "Investigátor", "Pacient", "Visit", "Accession", "Datum odběru", "Celkem", "Received", "Not Received"] widths = [9, 22, 14, 12, 13, 14, 8, 10, 13] write_headers(ws, headers, widths) agg = ( df.groupby(["investigator_no", "investigator_name", "patient_no", "protocol_visit_code", "accession", "collection_date"]) .agg( celkem =("sample_status", "count"), received =("sample_status", lambda x: (x == "Received").sum()), not_received=("sample_status", lambda x: (x == "Not Received").sum()), ) .reset_index() .sort_values(["investigator_no", "patient_no", "protocol_visit_code"]) .reset_index(drop=True) ) for r_idx, row in agg.iterrows(): excel_row = r_idx + 3 has_missing = row["not_received"] > 0 fill = NOTRCV_FILL if has_missing else (EVEN_FILL if r_idx % 2 == 0 else ODD_FILL) col_date = row["collection_date"] date_str = col_date.strftime("%d-%b-%Y") if hasattr(col_date, "strftime") else str(col_date) values = [ row["investigator_no"], row["investigator_name"], row["patient_no"], row["protocol_visit_code"], row["accession"], date_str, int(row["celkem"]), int(row["received"]), int(row["not_received"]), ] for c_idx, val in enumerate(values, 1): cell = ws.cell(row=excel_row, column=c_idx, value=val) cell.fill = fill cell.border = BORDER cell.alignment = CENTER if c_idx in (1, 4, 5, 6, 7, 8, 9) else LEFT if c_idx == 9 and has_missing: cell.font = RED_FONT else: cell.font = NORMAL_FONT ws.row_dimensions[excel_row].height = 16 ws.freeze_panes = "A3" ws.auto_filter.ref = f"A2:I{len(agg) + 2}" # ── sheet 2: Chybějící ──────────────────────────────────────────────────────── def write_chybejici(wb, df): ws = wb.create_sheet("Chybějící") ws.sheet_view.showGridLines = False today = datetime.date.today().strftime("%d-%b-%Y") write_title(ws, f"Not Received vzorky — {STUDY} ({today})", 8) headers = ["Site", "Pacient", "Visit", "Datum odběru", "Accession", "Container", "Typ vzorku", "Test"] widths = [9, 14, 12, 14, 13, 10, 22, 30] write_headers(ws, headers, widths) missing = df[df["sample_status"] == "Not Received"].copy() missing["test"] = missing.apply(test_name, axis=1) missing = missing.sort_values( ["investigator_no", "patient_no", "protocol_visit_code", "container_no"] ).reset_index(drop=True) for r_idx, row in missing.iterrows(): excel_row = r_idx + 3 fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL col_date = row["collection_date"] date_str = col_date.strftime("%d-%b-%Y") if hasattr(col_date, "strftime") else str(col_date) values = [ row["investigator_no"], row["patient_no"], row["protocol_visit_code"], date_str, row["accession"], int(row["container_no"]) if pd.notna(row["container_no"]) else "", row["specimen_type"], row["test"], ] for c_idx, val in enumerate(values, 1): cell = ws.cell(row=excel_row, column=c_idx, value=val) cell.fill = fill cell.border = BORDER cell.alignment = CENTER if c_idx in (1, 3, 4, 5, 6) else LEFT cell.font = NORMAL_FONT ws.row_dimensions[excel_row].height = 16 ws.freeze_panes = "A3" ws.auto_filter.ref = f"A2:H{len(missing) + 2}" # ── sheet 3: Kity (per centrum) ────────────────────────────────────────────── def kit_sort_key(kt): try: return (0, int(kt), "") except ValueError: pass if str(kt).upper().startswith("T-"): try: return (1, int(str(kt)[2:]), "") except ValueError: pass return (2, 0, str(kt)) SITE_HDR_FILL = PatternFill("solid", fgColor="2E75B6") SITE_HDR_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10) TOTAL_FILL = PatternFill("solid", fgColor="D6E4F0") SOON_FILL = PatternFill("solid", fgColor="FCE4D6") def _cell(ws, row, col, value, font, fill, alignment, border): c = ws.cell(row=row, column=col, value=value) c.font = font; c.fill = fill; c.alignment = alignment; c.border = border return c def write_kity(wb, df_kits): ws = wb.create_sheet("Kity") ws.sheet_view.showGridLines = False today = datetime.date.today() cutoff = today + datetime.timedelta(days=30) today_str = today.strftime("%d-%b-%Y") # sada kitů napříč celou studií (seřazeno) kit_types = sorted(df_kits["kit_type"].dropna().unique(), key=kit_sort_key) kt_desc = (df_kits.drop_duplicates("kit_type") .set_index("kit_type")["description"].to_dict()) # centra seřazená sites = (df_kits[["site_code", "investigator_name"]] .drop_duplicates() .sort_values("site_code") .values.tolist()) # sloupce: A=Kit Type, B=Popis, C=≤30 dní, D=>30 dní, E=Celkem ws.column_dimensions["A"].width = 9 ws.column_dimensions["B"].width = 28 ws.column_dimensions["C"].width = 14 ws.column_dimensions["D"].width = 14 ws.column_dimensions["E"].width = 10 write_title(ws, f"Kit Inventory — {STUDY} ({today_str})", 5) # sub-header (řádek 2) — bez pevné výšky, Excel si ji sám přizpůsobí for col, txt in [(1, "Kit Type"), (2, "Popis"), (3, f"Expiruje ≤30 dní\n({cutoff.strftime('%d-%b-%Y')})"), (4, "Expiruje >30 dní"), (5, "Celkem")]: c = ws.cell(row=2, column=col, value=txt) c.font = HEADER_FONT; c.fill = HEADER_FILL c.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) c.border = BORDER cur_row = 3 for site_code, investigator in sites: # ── site header ─────────────────────────────────────────────────────── ws.merge_cells(f"A{cur_row}:E{cur_row}") c = ws.cell(row=cur_row, column=1, value=f"{site_code} — {investigator}") c.font = SITE_HDR_FONT; c.fill = SITE_HDR_FILL c.alignment = LEFT; c.border = BORDER for col in range(2, 6): ws.cell(row=cur_row, column=col).fill = SITE_HDR_FILL ws.cell(row=cur_row, column=col).border = BORDER ws.row_dimensions[cur_row].height = 17 cur_row += 1 # kity tohoto centra site_df = df_kits[df_kits["site_code"] == site_code].copy() site_df["exp_date"] = pd.to_datetime(site_df["expiration_date"]).dt.date site_soon = 0 site_later = 0 for kt_idx, kt in enumerate(kit_types): kt_df = site_df[site_df["kit_type"] == kt] soon = int((kt_df["exp_date"].apply( lambda d: d is not None and today <= d <= cutoff)).sum()) later = int((kt_df["exp_date"].apply( lambda d: d is not None and d > cutoff)).sum()) site_soon += soon site_later += later total = soon + later fill = EVEN_FILL if kt_idx % 2 == 0 else ODD_FILL _cell(ws, cur_row, 1, kt, BOLD_FONT, fill, CENTER, BORDER) _cell(ws, cur_row, 2, kt_desc.get(kt, ""), NORMAL_FONT, fill, LEFT, BORDER) _cell(ws, cur_row, 3, soon if soon else None, RED_FONT if soon else NORMAL_FONT, SOON_FILL if soon else fill, CENTER, BORDER) _cell(ws, cur_row, 4, later if later else None, NORMAL_FONT, fill, CENTER, BORDER) _cell(ws, cur_row, 5, total if total else None, BOLD_FONT, fill, CENTER, BORDER) ws.row_dimensions[cur_row].height = 16 cur_row += 1 # ── součet centra ───────────────────────────────────────────────────── site_total = site_soon + site_later _cell(ws, cur_row, 1, "Celkem", BOLD_FONT, TOTAL_FILL, CENTER, BORDER) _cell(ws, cur_row, 2, "", BOLD_FONT, TOTAL_FILL, LEFT, BORDER) _cell(ws, cur_row, 3, site_soon if site_soon else None, BOLD_FONT, TOTAL_FILL, CENTER, BORDER) _cell(ws, cur_row, 4, site_later if site_later else None, BOLD_FONT, TOTAL_FILL, CENTER, BORDER) _cell(ws, cur_row, 5, site_total if site_total else None, BOLD_FONT, TOTAL_FILL, CENTER, BORDER) ws.row_dimensions[cur_row].height = 16 cur_row += 2 # prázdný řádek mezi centry ws.freeze_panes = "A3" # ── sheet 4: ZDROJ (samples) ───────────────────────────────────────────────── # ── sheet 5: ZDROJ Kity ────────────────────────────────────────────────────── def write_zdroj_kity(wb, df_kits): ws = wb.create_sheet("ZDROJ Kity") ws.sheet_view.showGridLines = True headers = list(df_kits.columns) for c, h in enumerate(headers, 1): cell = ws.cell(row=1, column=c, value=h) cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF") cell.fill = PatternFill("solid", fgColor="404040") cell.alignment = LEFT cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = 20 for r_idx, (_, row) in enumerate(df_kits.iterrows(), 2): fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL for c_idx, col in enumerate(headers, 1): val = row[col] if pd.isna(val): val = "" elif hasattr(val, "strftime"): val = val.strftime("%Y-%m-%d") cell = ws.cell(row=r_idx, column=c_idx, value=val) cell.font = Font(name="Arial", size=9) cell.fill = fill cell.border = BORDER cell.alignment = LEFT ws.freeze_panes = "A2" ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1" # ── sheet 4: ZDROJ ─────────────────────────────────────────────────────────── def write_zdroj(wb, df): ws = wb.create_sheet("ZDROJ Vzorky") ws.sheet_view.showGridLines = True headers = list(df.columns) for c, h in enumerate(headers, 1): cell = ws.cell(row=1, column=c, value=h) cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF") cell.fill = PatternFill("solid", fgColor="404040") cell.alignment = LEFT cell.border = BORDER ws.column_dimensions[get_column_letter(c)].width = 18 for r_idx, (_, row) in enumerate(df.iterrows(), 2): fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL for c_idx, col in enumerate(headers, 1): val = row[col] if pd.isna(val): val = "" elif hasattr(val, "strftime"): val = val.strftime("%Y-%m-%d") cell = ws.cell(row=r_idx, column=c_idx, value=val) cell.font = Font(name="Arial", size=9) cell.fill = fill cell.border = BORDER cell.alignment = LEFT ws.freeze_panes = "A2" ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1" # ── main ───────────────────────────────────────────────────────────────────── def main(): os.makedirs(CREATED_DIR, exist_ok=True) print("Načítám data z MySQL...") df = load_data() df_kits = load_kit_data() print(f" Vzorky: {len(df)} řádků, {df['patient_no'].nunique()} pacientů") print(f" Kity: {len(df_kits)} kitů, {df_kits['site_code'].nunique()} center") wb = Workbook() wb.remove(wb.active) write_prehled(wb, df) write_chybejici(wb, df) write_kity(wb, df_kits) write_zdroj(wb, df) write_zdroj_kity(wb, df_kits) today = datetime.date.today().strftime("%Y-%m-%d") out_path = unique_path(f"{today} {STUDY} Covance Samples") wb.save(out_path) print(f"Uloženo: {out_path}") main()