import glob import os import pandas as pd from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Border, Side, Alignment from openpyxl.utils import get_column_letter from datetime import date, datetime src_dir = "/Covance_UCO3001/" out_dir = "U:/Dropbox/!!!Days/Downloads Z230/" # Find source CSV csv_files = glob.glob(src_dir + "_EDCStdRpt-DataListing.csv") assert csv_files, "CSV file not found!" csv_file = csv_files[0] print(f"Source: {csv_file}") # Delete old report today = date.today().strftime("%Y-%m-%d") out_path = out_dir + f"{today} 77242113UCO3001 Visit report.xlsx" for old in glob.glob(out_dir + "*77242113UCO3001 Visit report*.xlsx"): os.remove(old) print(f"Deleted old: {old}") # Load CSV df = pd.read_csv(csv_file, encoding='utf-8') # Select and rename columns result = df[['SiteNumber', 'Subject', 'InstanceName', 'Field4Value', 'Field5Value']].copy() result.columns = ['Číslo centra', 'Číslo pacienta', 'Kód návštěvy', 'Datum návštěvy', 'Typ návštěvy'] # Parse date to real datetime def parse_date(val): if pd.isna(val) or val == '': return None try: return datetime.strptime(str(val).strip(), '%d %b %Y') except: return None result['Datum návštěvy'] = result['Datum návštěvy'].apply(parse_date) # Sort: Číslo centra → Číslo pacienta → Datum návštěvy ascending result = result.sort_values(['Číslo centra', 'Číslo pacienta', 'Datum návštěvy']).reset_index(drop=True) # Build workbook wb = Workbook() ws = wb.active ws.title = "Přehled návštěv" thin = Side(style='thin') border = Border(left=thin, right=thin, top=thin, bottom=thin) header_fill = PatternFill("solid", fgColor="4472C4") header_font = Font(name='Calibri', bold=True, size=11, color="FFFFFF") data_font = Font(name='Calibri', size=11) col_widths = [20, 20, 20, 16, 16] # Header row for col_idx, (col_name, width) in enumerate(zip(result.columns, col_widths), 1): cell = ws.cell(row=1, column=col_idx, value=col_name) cell.font = header_font cell.fill = header_fill cell.border = border cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True) ws.column_dimensions[get_column_letter(col_idx)].width = width ws.row_dimensions[1].height = 30 ws.freeze_panes = "A2" # Data rows for row_idx, row in enumerate(result.itertuples(index=False), 2): for col_idx, value in enumerate(row, 1): cell = ws.cell(row=row_idx, column=col_idx, value=value) cell.font = data_font cell.border = border cell.alignment = Alignment(horizontal='center', vertical='center') if col_idx == 4 and value is not None: # Datum návštěvy cell.number_format = 'DD-MMM-YYYY' wb.save(out_path) print(f"Saved: {out_path}") print(f"Rows: {len(result)}")