112 lines
3.5 KiB
Python
112 lines
3.5 KiB
Python
import re
|
|
import copy
|
|
import datetime
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
from openpyxl import load_workbook, Workbook
|
|
from openpyxl.utils import get_column_letter
|
|
|
|
TO_PROCESS = Path(__file__).parent / "ToProcess"
|
|
PROCESSED = Path(__file__).parent / "Processed"
|
|
SITE_PATTERN = re.compile(r'DD5-CZ\d+')
|
|
|
|
ACTIVE_SITES = {
|
|
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
|
|
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
|
|
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
|
|
}
|
|
TAB_GREEN = "00B050"
|
|
TAB_RED = "FF0000"
|
|
|
|
|
|
def copy_row(ws_dst, dst_row_num, src_row):
|
|
for src_cell in src_row:
|
|
dst_cell = ws_dst.cell(row=dst_row_num, column=src_cell.column)
|
|
dst_cell.value = src_cell.value
|
|
if src_cell.has_style:
|
|
dst_cell.font = copy.copy(src_cell.font)
|
|
dst_cell.fill = copy.copy(src_cell.fill)
|
|
dst_cell.border = copy.copy(src_cell.border)
|
|
dst_cell.alignment = copy.copy(src_cell.alignment)
|
|
dst_cell.number_format = src_cell.number_format
|
|
|
|
|
|
def process_file(src_path: Path):
|
|
print(f"Processing: {src_path.name}")
|
|
wb_src = load_workbook(src_path)
|
|
ws_src = wb_src["Sheet0"]
|
|
|
|
headers = [ws_src.cell(1, c).value for c in range(1, ws_src.max_column + 1)]
|
|
site_col_idx = headers.index("Site") + 1
|
|
date_col_idx = headers.index("Approval Complete Date") + 1
|
|
|
|
all_rows = list(ws_src.iter_rows(min_row=2))
|
|
|
|
all_sites = set(
|
|
m for row in all_rows
|
|
for m in SITE_PATTERN.findall(str(row[site_col_idx - 1].value or ""))
|
|
)
|
|
active = sorted(s for s in all_sites if s in ACTIVE_SITES)
|
|
inactive = sorted(s for s in all_sites if s not in ACTIVE_SITES)
|
|
ordered_sites = active + inactive
|
|
print(f" Found {len(active)} active, {len(inactive)} inactive DD5-CZ sites")
|
|
|
|
wb_out = Workbook()
|
|
wb_out.remove(wb_out.active)
|
|
|
|
header_row = list(ws_src.iter_rows(min_row=1, max_row=1))[0]
|
|
|
|
for site in ordered_sites:
|
|
site_rows = [
|
|
row for row in all_rows
|
|
if site in (row[site_col_idx - 1].value or "")
|
|
]
|
|
site_rows.sort(
|
|
key=lambda r: r[date_col_idx - 1].value or datetime.datetime.min,
|
|
reverse=True,
|
|
)
|
|
|
|
ws = wb_out.create_sheet(title=site)
|
|
ws.sheet_properties.tabColor = TAB_GREEN if site in ACTIVE_SITES else TAB_RED
|
|
copy_row(ws, 1, header_row)
|
|
|
|
for i, row in enumerate(site_rows, start=2):
|
|
copy_row(ws, i, row)
|
|
|
|
for col_idx in range(1, ws_src.max_column + 1):
|
|
col_letter = get_column_letter(col_idx)
|
|
if col_letter in ws_src.column_dimensions:
|
|
ws.column_dimensions[col_letter].width = ws_src.column_dimensions[col_letter].width
|
|
|
|
ws.auto_filter.ref = ws.dimensions
|
|
print(f" {site}: {len(site_rows)} rows")
|
|
|
|
study_col_idx = headers.index("Study") + 1
|
|
study_values = [
|
|
s.strip()
|
|
for row in all_rows
|
|
for s in str(row[study_col_idx - 1].value or "").split(",")
|
|
if s.strip()
|
|
]
|
|
study_number = Counter(study_values).most_common(1)[0][0]
|
|
|
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H_%M_%S")
|
|
filename = f"{timestamp} {study_number} {src_path.name}"
|
|
out_path = PROCESSED / filename
|
|
wb_out.save(out_path)
|
|
print(f" Saved: {out_path}")
|
|
|
|
|
|
def main():
|
|
xlsx_files = list(TO_PROCESS.glob("*.xlsx"))
|
|
if not xlsx_files:
|
|
print("No .xlsx files found in ToProcess/")
|
|
return
|
|
|
|
for f in xlsx_files:
|
|
process_file(f)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|