z230

2026-04-21 13:39:36 +02:00
parent ac21a7c84a
commit 8638f98748
2 changed files with 111 additions and 0 deletions
@@ -0,0 +1,111 @@
+import re
+import copy
+import datetime
+from collections import Counter
+from pathlib import Path
+from openpyxl import load_workbook, Workbook
+from openpyxl.utils import get_column_letter
+
+TO_PROCESS = Path(__file__).parent / "ToProcess"
+PROCESSED = Path(__file__).parent / "Processed"
+SITE_PATTERN = re.compile(r'DD5-CZ\d+')
+
+ACTIVE_SITES = {
+    "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
+    "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
+    "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
+}
+TAB_GREEN = "00B050"
+TAB_RED   = "FF0000"
+
+
+def copy_row(ws_dst, dst_row_num, src_row):
+    for src_cell in src_row:
+        dst_cell = ws_dst.cell(row=dst_row_num, column=src_cell.column)
+        dst_cell.value = src_cell.value
+        if src_cell.has_style:
+            dst_cell.font = copy.copy(src_cell.font)
+            dst_cell.fill = copy.copy(src_cell.fill)
+            dst_cell.border = copy.copy(src_cell.border)
+            dst_cell.alignment = copy.copy(src_cell.alignment)
+            dst_cell.number_format = src_cell.number_format
+
+
+def process_file(src_path: Path):
+    print(f"Processing: {src_path.name}")
+    wb_src = load_workbook(src_path)
+    ws_src = wb_src["Sheet0"]
+
+    headers = [ws_src.cell(1, c).value for c in range(1, ws_src.max_column + 1)]
+    site_col_idx = headers.index("Site") + 1
+    date_col_idx = headers.index("Approval Complete Date") + 1
+
+    all_rows = list(ws_src.iter_rows(min_row=2))
+
+    all_sites = set(
+        m for row in all_rows
+        for m in SITE_PATTERN.findall(str(row[site_col_idx - 1].value or ""))
+    )
+    active = sorted(s for s in all_sites if s in ACTIVE_SITES)
+    inactive = sorted(s for s in all_sites if s not in ACTIVE_SITES)
+    ordered_sites = active + inactive
+    print(f"  Found {len(active)} active, {len(inactive)} inactive DD5-CZ sites")
+
+    wb_out = Workbook()
+    wb_out.remove(wb_out.active)
+
+    header_row = list(ws_src.iter_rows(min_row=1, max_row=1))[0]
+
+    for site in ordered_sites:
+        site_rows = [
+            row for row in all_rows
+            if site in (row[site_col_idx - 1].value or "")
+        ]
+        site_rows.sort(
+            key=lambda r: r[date_col_idx - 1].value or datetime.datetime.min,
+            reverse=True,
+        )
+
+        ws = wb_out.create_sheet(title=site)
+        ws.sheet_properties.tabColor = TAB_GREEN if site in ACTIVE_SITES else TAB_RED
+        copy_row(ws, 1, header_row)
+
+        for i, row in enumerate(site_rows, start=2):
+            copy_row(ws, i, row)
+
+        for col_idx in range(1, ws_src.max_column + 1):
+            col_letter = get_column_letter(col_idx)
+            if col_letter in ws_src.column_dimensions:
+                ws.column_dimensions[col_letter].width = ws_src.column_dimensions[col_letter].width
+
+        ws.auto_filter.ref = ws.dimensions
+        print(f"    {site}: {len(site_rows)} rows")
+
+    study_col_idx = headers.index("Study") + 1
+    study_values = [
+        s.strip()
+        for row in all_rows
+        for s in str(row[study_col_idx - 1].value or "").split(",")
+        if s.strip()
+    ]
+    study_number = Counter(study_values).most_common(1)[0][0]
+
+    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H_%M_%S")
+    filename = f"{timestamp} {study_number} {src_path.name}"
+    out_path = PROCESSED / filename
+    wb_out.save(out_path)
+    print(f"  Saved: {out_path}")
+
+
+def main():
+    xlsx_files = list(TO_PROCESS.glob("*.xlsx"))
+    if not xlsx_files:
+        print("No .xlsx files found in ToProcess/")
+        return
+
+    for f in xlsx_files:
+        process_file(f)
+
+
+if __name__ == "__main__":
+    main()