diff --git a/Covance_UCO3001/Protocol 77242113UCO3001 - All Samples.xlsx b/Covance_UCO3001/Protocol 77242113UCO3001 - All Samples.xlsx new file mode 100644 index 0000000..cae825b Binary files /dev/null and b/Covance_UCO3001/Protocol 77242113UCO3001 - All Samples.xlsx differ diff --git a/Covance_UCO3001/create_report.py b/Covance_UCO3001/create_report.py index aaf8009..8a2ed55 100644 --- a/Covance_UCO3001/create_report.py +++ b/Covance_UCO3001/create_report.py @@ -8,11 +8,11 @@ from openpyxl.utils import get_column_letter from datetime import date, datetime # Paths -src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/" +src_dir = "U:/janssen/Covance_UCO3001/" out_dir = "U:/Dropbox/!!!Days/Downloads Z230/" # Find source files -src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx") +src_files = glob.glob(src_dir + "Protocol 77242113UCO3001 - All Samples*.xlsx") assert src_files, "Source file not found!" src_file = src_files[0] print(f"Source xlsx: {src_file}") @@ -35,10 +35,10 @@ for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xls shutil.copy2(src_file, out_path) # Load data with pandas for analysis -df = pd.read_excel(src_file, sheet_name=0, header=2) +df = pd.read_excel(src_file, sheet_name=0, header=0) # All unique patients -all_patients = sorted(df['Patient Number'].dropna().unique()) +all_patients = sorted(df['Patient No.'].dropna().unique()) # BXSCR and DNA rows bxscr = df[df['Protocol Visit Code'] == 'BXSCR'] @@ -53,29 +53,40 @@ def fmt_date(val): return pd.to_datetime(val).to_pydatetime() # Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe -# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4) +OK_STATUSES = {'Received', 'In Inventory', 'Shipped'} + +# Excel row = pandas df index + 2 (row 1 header, data from row 2) def get_specimen_info(visit_df, patient, specimen_type=None): - rows = visit_df[visit_df['Patient Number'] == patient] + rows = visit_df[visit_df['Patient No.'] == patient] if specimen_type: rows = rows[rows['Specimen Type'] == specimen_type] + rows = rows[rows['Sample Status'].isin(OK_STATUSES)] if rows.empty: return '', None row = rows.iloc[0] - return fmt_date(row['Container Receipt Date']), rows.index[0] + 4 + return fmt_date(row['Container Receipt Date']), rows.index[0] + 2 # Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code def get_label_info(patient, label_code, visit_code): - rows = df[(df['Patient Number'] == patient) & + rows = df[(df['Patient No.'] == patient) & (df['Protocol Visit Code'] == visit_code) & (df['Container Label Line 1'] == label_code)] + rows = rows[rows['Sample Status'].isin(OK_STATUSES)] if rows.empty: return '', None row = rows.iloc[0] - return fmt_date(row['Container Receipt Date']), rows.index[0] + 4 + return fmt_date(row['Container Receipt Date']), rows.index[0] + 2 # Open copied workbook and add analysis sheet out_wb = load_workbook(out_path) +# Rename and autofit first sheet +src_ws = out_wb.active +src_ws.title = "Zdroj" +for col in src_ws.columns: + max_len = max((len(str(cell.value)) if cell.value is not None else 0) for cell in col) + src_ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 50) + # Create analysis sheet analysis_ws = out_wb.create_sheet("Přehled vzorků") @@ -172,10 +183,10 @@ for i, row in _pat_pre.iterrows(): patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row # Only patients with any BXSCR record -bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique()) +bxscr_patients = sorted(bxscr['Patient No.'].dropna().unique()) for row_idx, patient in enumerate(bxscr_patients, 3): - investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name'] + investigator = bxscr[bxscr['Patient No.'] == patient].iloc[0]['Investigator Name'] sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block') rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later') cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')