This commit is contained in:
2026-04-11 07:56:10 +02:00
parent c4912744ea
commit 4ff7277153
2 changed files with 22 additions and 11 deletions
+22 -11
View File
@@ -8,11 +8,11 @@ from openpyxl.utils import get_column_letter
from datetime import date, datetime from datetime import date, datetime
# Paths # Paths
src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/" src_dir = "U:/janssen/Covance_UCO3001/"
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/" out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
# Find source files # Find source files
src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx") src_files = glob.glob(src_dir + "Protocol 77242113UCO3001 - All Samples*.xlsx")
assert src_files, "Source file not found!" assert src_files, "Source file not found!"
src_file = src_files[0] src_file = src_files[0]
print(f"Source xlsx: {src_file}") print(f"Source xlsx: {src_file}")
@@ -35,10 +35,10 @@ for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xls
shutil.copy2(src_file, out_path) shutil.copy2(src_file, out_path)
# Load data with pandas for analysis # Load data with pandas for analysis
df = pd.read_excel(src_file, sheet_name=0, header=2) df = pd.read_excel(src_file, sheet_name=0, header=0)
# All unique patients # All unique patients
all_patients = sorted(df['Patient Number'].dropna().unique()) all_patients = sorted(df['Patient No.'].dropna().unique())
# BXSCR and DNA rows # BXSCR and DNA rows
bxscr = df[df['Protocol Visit Code'] == 'BXSCR'] bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
@@ -53,29 +53,40 @@ def fmt_date(val):
return pd.to_datetime(val).to_pydatetime() return pd.to_datetime(val).to_pydatetime()
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe # Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4) OK_STATUSES = {'Received', 'In Inventory', 'Shipped'}
# Excel row = pandas df index + 2 (row 1 header, data from row 2)
def get_specimen_info(visit_df, patient, specimen_type=None): def get_specimen_info(visit_df, patient, specimen_type=None):
rows = visit_df[visit_df['Patient Number'] == patient] rows = visit_df[visit_df['Patient No.'] == patient]
if specimen_type: if specimen_type:
rows = rows[rows['Specimen Type'] == specimen_type] rows = rows[rows['Specimen Type'] == specimen_type]
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
if rows.empty: if rows.empty:
return '', None return '', None
row = rows.iloc[0] row = rows.iloc[0]
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4 return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code # Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
def get_label_info(patient, label_code, visit_code): def get_label_info(patient, label_code, visit_code):
rows = df[(df['Patient Number'] == patient) & rows = df[(df['Patient No.'] == patient) &
(df['Protocol Visit Code'] == visit_code) & (df['Protocol Visit Code'] == visit_code) &
(df['Container Label Line 1'] == label_code)] (df['Container Label Line 1'] == label_code)]
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
if rows.empty: if rows.empty:
return '', None return '', None
row = rows.iloc[0] row = rows.iloc[0]
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4 return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
# Open copied workbook and add analysis sheet # Open copied workbook and add analysis sheet
out_wb = load_workbook(out_path) out_wb = load_workbook(out_path)
# Rename and autofit first sheet
src_ws = out_wb.active
src_ws.title = "Zdroj"
for col in src_ws.columns:
max_len = max((len(str(cell.value)) if cell.value is not None else 0) for cell in col)
src_ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 50)
# Create analysis sheet # Create analysis sheet
analysis_ws = out_wb.create_sheet("Přehled vzorků") analysis_ws = out_wb.create_sheet("Přehled vzorků")
@@ -172,10 +183,10 @@ for i, row in _pat_pre.iterrows():
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
# Only patients with any BXSCR record # Only patients with any BXSCR record
bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique()) bxscr_patients = sorted(bxscr['Patient No.'].dropna().unique())
for row_idx, patient in enumerate(bxscr_patients, 3): for row_idx, patient in enumerate(bxscr_patients, 3):
investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name'] investigator = bxscr[bxscr['Patient No.'] == patient].iloc[0]['Investigator Name']
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block') sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later') rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue') cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')