z230
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,83 @@
|
||||
import glob
|
||||
import os
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
from datetime import date, datetime
|
||||
|
||||
src_dir = "/Covance_UCO3001/"
|
||||
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
|
||||
|
||||
# Find source CSV
|
||||
csv_files = glob.glob(src_dir + "_EDCStdRpt-DataListing.csv")
|
||||
assert csv_files, "CSV file not found!"
|
||||
csv_file = csv_files[0]
|
||||
print(f"Source: {csv_file}")
|
||||
|
||||
# Delete old report
|
||||
today = date.today().strftime("%Y-%m-%d")
|
||||
out_path = out_dir + f"{today} 77242113UCO3001 Visit report.xlsx"
|
||||
for old in glob.glob(out_dir + "*77242113UCO3001 Visit report*.xlsx"):
|
||||
os.remove(old)
|
||||
print(f"Deleted old: {old}")
|
||||
|
||||
# Load CSV
|
||||
df = pd.read_csv(csv_file, encoding='utf-8')
|
||||
|
||||
# Select and rename columns
|
||||
result = df[['SiteNumber', 'Subject', 'InstanceName', 'Field4Value', 'Field5Value']].copy()
|
||||
result.columns = ['Číslo centra', 'Číslo pacienta', 'Kód návštěvy', 'Datum návštěvy', 'Typ návštěvy']
|
||||
|
||||
# Parse date to real datetime
|
||||
def parse_date(val):
|
||||
if pd.isna(val) or val == '':
|
||||
return None
|
||||
try:
|
||||
return datetime.strptime(str(val).strip(), '%d %b %Y')
|
||||
except:
|
||||
return None
|
||||
|
||||
result['Datum návštěvy'] = result['Datum návštěvy'].apply(parse_date)
|
||||
|
||||
# Sort: Číslo centra → Číslo pacienta → Datum návštěvy ascending
|
||||
result = result.sort_values(['Číslo centra', 'Číslo pacienta', 'Datum návštěvy']).reset_index(drop=True)
|
||||
|
||||
# Build workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Přehled návštěv"
|
||||
|
||||
thin = Side(style='thin')
|
||||
border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
header_fill = PatternFill("solid", fgColor="4472C4")
|
||||
header_font = Font(name='Calibri', bold=True, size=11, color="FFFFFF")
|
||||
data_font = Font(name='Calibri', size=11)
|
||||
|
||||
col_widths = [20, 20, 20, 16, 16]
|
||||
|
||||
# Header row
|
||||
for col_idx, (col_name, width) in enumerate(zip(result.columns, col_widths), 1):
|
||||
cell = ws.cell(row=1, column=col_idx, value=col_name)
|
||||
cell.font = header_font
|
||||
cell.fill = header_fill
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
ws.column_dimensions[get_column_letter(col_idx)].width = width
|
||||
|
||||
ws.row_dimensions[1].height = 30
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
# Data rows
|
||||
for row_idx, row in enumerate(result.itertuples(index=False), 2):
|
||||
for col_idx, value in enumerate(row, 1):
|
||||
cell = ws.cell(row=row_idx, column=col_idx, value=value)
|
||||
cell.font = data_font
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||||
if col_idx == 4 and value is not None: # Datum návštěvy
|
||||
cell.number_format = 'DD-MMM-YYYY'
|
||||
|
||||
wb.save(out_path)
|
||||
print(f"Saved: {out_path}")
|
||||
print(f"Rows: {len(result)}")
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,282 @@
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
import pandas as pd
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
from datetime import date, datetime
|
||||
|
||||
# Paths
|
||||
src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/"
|
||||
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
|
||||
|
||||
# Find source files
|
||||
src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx")
|
||||
assert src_files, "Source file not found!"
|
||||
src_file = src_files[0]
|
||||
print(f"Source xlsx: {src_file}")
|
||||
|
||||
csv_files = glob.glob(src_dir + "_EDCStdRpt-DataListing.csv")
|
||||
assert csv_files, "CSV file not found!"
|
||||
csv_file = csv_files[0]
|
||||
print(f"Source csv: {csv_file}")
|
||||
|
||||
# Delete old output report if exists
|
||||
today = date.today().strftime("%Y-%m-%d")
|
||||
out_filename = f"{today} 77242113UCO3001 Speciment Inventory report.xlsx"
|
||||
out_path = out_dir + out_filename
|
||||
|
||||
for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xlsx"):
|
||||
os.remove(old)
|
||||
print(f"Deleted old: {old}")
|
||||
|
||||
# Copy source file to output — preserves all formatting perfectly
|
||||
shutil.copy2(src_file, out_path)
|
||||
|
||||
# Load data with pandas for analysis
|
||||
df = pd.read_excel(src_file, sheet_name=0, header=2)
|
||||
|
||||
# All unique patients
|
||||
all_patients = sorted(df['Patient Number'].dropna().unique())
|
||||
|
||||
# BXSCR and DNA rows
|
||||
bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
|
||||
dna = df[df['Protocol Visit Code'] == 'DNA']
|
||||
|
||||
# Parse date value to datetime object
|
||||
def fmt_date(val):
|
||||
if pd.isna(val):
|
||||
return None
|
||||
if isinstance(val, str):
|
||||
return datetime.strptime(val, '%d-%b-%Y')
|
||||
return pd.to_datetime(val).to_pydatetime()
|
||||
|
||||
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
|
||||
# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4)
|
||||
def get_specimen_info(visit_df, patient, specimen_type=None):
|
||||
rows = visit_df[visit_df['Patient Number'] == patient]
|
||||
if specimen_type:
|
||||
rows = rows[rows['Specimen Type'] == specimen_type]
|
||||
if rows.empty:
|
||||
return '', None
|
||||
row = rows.iloc[0]
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
||||
|
||||
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
|
||||
def get_label_info(patient, label_code, visit_code):
|
||||
rows = df[(df['Patient Number'] == patient) &
|
||||
(df['Protocol Visit Code'] == visit_code) &
|
||||
(df['Container Label Line 1'] == label_code)]
|
||||
if rows.empty:
|
||||
return '', None
|
||||
row = rows.iloc[0]
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
||||
|
||||
# Open copied workbook and add analysis sheet
|
||||
out_wb = load_workbook(out_path)
|
||||
|
||||
# Create analysis sheet
|
||||
analysis_ws = out_wb.create_sheet("Přehled vzorků")
|
||||
|
||||
thin = Side(style='thin')
|
||||
border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
|
||||
# Column definitions: (header, width)
|
||||
# No "Visit code" column
|
||||
columns = [
|
||||
("Investigator Name", 24),
|
||||
("Číslo pacienta", 20),
|
||||
("Máme biopsii SM11", 20), # col 3 → SCREENING C:E
|
||||
("Máme RNA", 16),
|
||||
("Máme Cryostor", 16),
|
||||
("DNA", 14), # col 6 → no group
|
||||
("PLASMPK I-0 TROUGH", 18), # col 7 → RANDOMIZACE I-0 G:L
|
||||
("PLASMA PK I-0 PEAK", 18),
|
||||
("SERUM ADA I-0 PRE", 18),
|
||||
("SM06/SERUM BIOM", 16),
|
||||
("SM07/WB RNA", 14),
|
||||
("SM10/FECAL", 14),
|
||||
("PLASMPK I-2 TROUGH", 18), # col 13 → I-2 M:P
|
||||
("PLASMA PK I-2 PEAK", 18),
|
||||
("SERUM ADA I-2 PRE", 18),
|
||||
("STOOL I-2", 12),
|
||||
("PLASMPK I-4 TROUGH", 18), # col 17 → I-4 Q:V
|
||||
("PLASMA PK I-4 PEAK", 18),
|
||||
("SERUM ADA I-4 PRE", 18),
|
||||
("SM06/SERUM BIOM", 16),
|
||||
("SM07/WB RNA", 14),
|
||||
("STOOL I-4", 12),
|
||||
]
|
||||
|
||||
# Row 1 — group headers
|
||||
group_font = Font(name='Calibri', bold=True, size=11)
|
||||
group_fill = PatternFill("solid", fgColor="FFFFFF") # white, same as user's theme=0
|
||||
group_border = Border(left=thin, right=thin, top=thin, bottom=thin)
|
||||
|
||||
groups = [
|
||||
(3, 5, "SCREENING"),
|
||||
(7, 12, "RANDOMIZACE I-0"),
|
||||
(13, 16, "I-2"),
|
||||
(17, 22, "I-4"),
|
||||
]
|
||||
for start_col, end_col, label in groups:
|
||||
analysis_ws.merge_cells(start_row=1, start_column=start_col, end_row=1, end_column=end_col)
|
||||
cell = analysis_ws.cell(row=1, column=start_col, value=label)
|
||||
cell.font = group_font
|
||||
cell.fill = group_fill
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||||
cell.border = group_border
|
||||
# apply border to all merged cells
|
||||
for c in range(start_col, end_col + 1):
|
||||
analysis_ws.cell(row=1, column=c).border = group_border
|
||||
|
||||
analysis_ws.row_dimensions[1].height = 20
|
||||
|
||||
# Row 2 — column headers
|
||||
header_fill = PatternFill("solid", fgColor="4472C4")
|
||||
header_font = Font(name='Calibri', bold=True, size=11, color="FFFFFF")
|
||||
|
||||
for col_idx, (header, width) in enumerate(columns, 1):
|
||||
cell = analysis_ws.cell(row=2, column=col_idx, value=header)
|
||||
cell.font = header_font
|
||||
cell.fill = header_fill
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
analysis_ws.column_dimensions[get_column_letter(col_idx)].width = width
|
||||
|
||||
analysis_ws.row_dimensions[2].height = 30
|
||||
|
||||
# Freeze first 2 columns and first 2 rows
|
||||
analysis_ws.freeze_panes = "C3"
|
||||
|
||||
# Data rows start at row 3
|
||||
date_font_link = Font(name='Calibri', size=11, color="000000", underline='single')
|
||||
yes_fill = PatternFill("solid", fgColor="E2EFDA")
|
||||
no_fill = PatternFill("solid", fgColor="FFE7E7")
|
||||
data_font = Font(name='Calibri', size=11)
|
||||
|
||||
src_sheet_name = out_wb.sheetnames[0]
|
||||
pat_sheet_name = "Seznam pacientů"
|
||||
|
||||
# Build patient → first Excel row in "Seznam pacientů" (header=row1, data from row2)
|
||||
# pat_df is built later, but we need sorted order — pre-sort here too
|
||||
_csv_df_pre = pd.read_csv(csv_file, encoding='utf-8')
|
||||
_pat_pre = _csv_df_pre[['SiteNumber', 'Subject', 'Field4Value']].copy()
|
||||
_pat_pre['Field4Value'] = _pat_pre['Field4Value'].apply(lambda v: datetime.strptime(str(v).strip(), '%d %b %Y') if pd.notna(v) else None)
|
||||
_pat_pre = _pat_pre.sort_values(['SiteNumber', 'Subject', 'Field4Value']).reset_index(drop=True)
|
||||
patient_row_map = {}
|
||||
for i, row in _pat_pre.iterrows():
|
||||
pat = row['Subject']
|
||||
if pat not in patient_row_map:
|
||||
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
|
||||
|
||||
# Only patients with any BXSCR record
|
||||
bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique())
|
||||
|
||||
for row_idx, patient in enumerate(bxscr_patients, 3):
|
||||
investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name']
|
||||
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
|
||||
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
|
||||
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')
|
||||
dna_date, dna_row = get_specimen_info(dna, patient)
|
||||
trough, trough_row = get_label_info(patient, 'PLASMPK I-0 TROUGH', 'I-0')
|
||||
peak, peak_row = get_label_info(patient, 'PLASMA PK I-0 PEAK', 'I-0')
|
||||
ada, ada_row = get_label_info(patient, 'SERUM ADA I-0 PRE', 'I-0')
|
||||
sm06, sm06_row = get_label_info(patient, 'SM06/SERUM BIOM', 'I-0')
|
||||
sm07, sm07_row = get_label_info(patient, 'SM07/WB RNA', 'I-0')
|
||||
sm10, sm10_row = get_label_info(patient, 'SM10/FECAL', 'I-0')
|
||||
trough2, trough2_row = get_label_info(patient, 'PLASMPK I-2 TROUGH', 'I-2')
|
||||
peak2, peak2_row = get_label_info(patient, 'PLASMA PK I-2 PEAK', 'I-2')
|
||||
ada2, ada2_row = get_label_info(patient, 'SERUM ADA I-2 PRE', 'I-2')
|
||||
stool2, stool2_row = get_label_info(patient, 'STOOL I-2', 'I-2')
|
||||
trough4, trough4_row = get_label_info(patient, 'PLASMPK I-4 TROUGH', 'I-4')
|
||||
peak4, peak4_row = get_label_info(patient, 'PLASMA PK I-4 PEAK', 'I-4')
|
||||
ada4, ada4_row = get_label_info(patient, 'SERUM ADA I-4 PRE', 'I-4')
|
||||
sm064, sm064_row = get_label_info(patient, 'SM06/SERUM BIOM', 'I-4')
|
||||
sm074, sm074_row = get_label_info(patient, 'SM07/WB RNA', 'I-4')
|
||||
stool4, stool4_row = get_label_info(patient, 'STOOL I-4', 'I-4')
|
||||
|
||||
# col 1-2: plain text, col 3+: (date, excel_row) tuples
|
||||
row_data = [investigator, patient,
|
||||
(sm11, sm11_row), (rna, rna_row), (cryo, cryo_row), (dna_date, dna_row),
|
||||
(trough, trough_row), (peak, peak_row), (ada, ada_row),
|
||||
(sm06, sm06_row), (sm07, sm07_row), (sm10, sm10_row),
|
||||
(trough2, trough2_row), (peak2, peak2_row), (ada2, ada2_row), (stool2, stool2_row),
|
||||
(trough4, trough4_row), (peak4, peak4_row), (ada4, ada4_row),
|
||||
(sm064, sm064_row), (sm074, sm074_row), (stool4, stool4_row)]
|
||||
|
||||
for col_idx, value in enumerate(row_data, 1):
|
||||
if col_idx <= 2:
|
||||
cell = analysis_ws.cell(row=row_idx, column=col_idx, value=value)
|
||||
if col_idx == 2 and patient in patient_row_map:
|
||||
cell.hyperlink = f"#'{pat_sheet_name}'!B{patient_row_map[patient]}"
|
||||
cell.font = Font(name='Calibri', size=11, underline='single')
|
||||
else:
|
||||
cell.font = data_font
|
||||
else:
|
||||
dt, excel_row = value
|
||||
cell = analysis_ws.cell(row=row_idx, column=col_idx, value=dt)
|
||||
if dt and excel_row is not None:
|
||||
cell.hyperlink = f"#'{src_sheet_name}'!A{excel_row}"
|
||||
cell.font = date_font_link
|
||||
cell.fill = yes_fill
|
||||
cell.number_format = 'DD-MMM-YYYY'
|
||||
else:
|
||||
cell.font = Font(name='Calibri', size=11, color="C00000")
|
||||
cell.fill = no_fill
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||||
|
||||
# ── Seznam pacientů sheet ────────────────────────────────────────────────────
|
||||
csv_df = pd.read_csv(csv_file, encoding='utf-8')
|
||||
|
||||
patients_ws = out_wb.create_sheet("Seznam pacientů")
|
||||
|
||||
pat_columns = [
|
||||
("Číslo centra", 20),
|
||||
("Číslo pacienta", 20),
|
||||
("Kód návštěvy", 20),
|
||||
("Datum návštěvy", 16),
|
||||
("Typ návštěvy", 16),
|
||||
]
|
||||
|
||||
# Header row
|
||||
for col_idx, (col_name, width) in enumerate(pat_columns, 1):
|
||||
cell = patients_ws.cell(row=1, column=col_idx, value=col_name)
|
||||
cell.font = header_font
|
||||
cell.fill = header_fill
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
patients_ws.column_dimensions[get_column_letter(col_idx)].width = width
|
||||
|
||||
patients_ws.row_dimensions[1].height = 30
|
||||
patients_ws.freeze_panes = "A2"
|
||||
|
||||
# Prepare and sort data
|
||||
def parse_date(val):
|
||||
if pd.isna(val) or str(val).strip() == '':
|
||||
return None
|
||||
try:
|
||||
return datetime.strptime(str(val).strip(), '%d %b %Y')
|
||||
except:
|
||||
return None
|
||||
|
||||
pat_df = csv_df[['SiteNumber', 'Subject', 'InstanceName', 'Field4Value', 'Field5Value']].copy()
|
||||
pat_df['Field4Value'] = pat_df['Field4Value'].apply(parse_date)
|
||||
pat_df = pat_df.sort_values(['SiteNumber', 'Subject', 'Field4Value']).reset_index(drop=True)
|
||||
|
||||
# Data rows
|
||||
for row_idx, row in enumerate(pat_df.itertuples(index=False), 2):
|
||||
for col_idx, value in enumerate(row, 1):
|
||||
cell = patients_ws.cell(row=row_idx, column=col_idx, value=value)
|
||||
cell.font = data_font
|
||||
cell.border = border
|
||||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||||
if col_idx == 4 and value is not None:
|
||||
cell.number_format = 'DD-MMM-YYYY'
|
||||
|
||||
out_wb.save(out_path)
|
||||
print(f"Saved: {out_path}")
|
||||
print(f"Patients with BXSCR: {len(bxscr_patients)}")
|
||||
print(f"All unique patients: {len(all_patients)}")
|
||||
Reference in New Issue
Block a user