This commit is contained in:
2026-04-11 07:56:10 +02:00
parent c4912744ea
commit 4ff7277153
2 changed files with 22 additions and 11 deletions
+22 -11
View File
@@ -8,11 +8,11 @@ from openpyxl.utils import get_column_letter
from datetime import date, datetime
# Paths
src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/"
src_dir = "U:/janssen/Covance_UCO3001/"
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
# Find source files
src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx")
src_files = glob.glob(src_dir + "Protocol 77242113UCO3001 - All Samples*.xlsx")
assert src_files, "Source file not found!"
src_file = src_files[0]
print(f"Source xlsx: {src_file}")
@@ -35,10 +35,10 @@ for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xls
shutil.copy2(src_file, out_path)
# Load data with pandas for analysis
df = pd.read_excel(src_file, sheet_name=0, header=2)
df = pd.read_excel(src_file, sheet_name=0, header=0)
# All unique patients
all_patients = sorted(df['Patient Number'].dropna().unique())
all_patients = sorted(df['Patient No.'].dropna().unique())
# BXSCR and DNA rows
bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
@@ -53,29 +53,40 @@ def fmt_date(val):
return pd.to_datetime(val).to_pydatetime()
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4)
OK_STATUSES = {'Received', 'In Inventory', 'Shipped'}
# Excel row = pandas df index + 2 (row 1 header, data from row 2)
def get_specimen_info(visit_df, patient, specimen_type=None):
rows = visit_df[visit_df['Patient Number'] == patient]
rows = visit_df[visit_df['Patient No.'] == patient]
if specimen_type:
rows = rows[rows['Specimen Type'] == specimen_type]
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
if rows.empty:
return '', None
row = rows.iloc[0]
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
def get_label_info(patient, label_code, visit_code):
rows = df[(df['Patient Number'] == patient) &
rows = df[(df['Patient No.'] == patient) &
(df['Protocol Visit Code'] == visit_code) &
(df['Container Label Line 1'] == label_code)]
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
if rows.empty:
return '', None
row = rows.iloc[0]
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
# Open copied workbook and add analysis sheet
out_wb = load_workbook(out_path)
# Rename and autofit first sheet
src_ws = out_wb.active
src_ws.title = "Zdroj"
for col in src_ws.columns:
max_len = max((len(str(cell.value)) if cell.value is not None else 0) for cell in col)
src_ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 50)
# Create analysis sheet
analysis_ws = out_wb.create_sheet("Přehled vzorků")
@@ -172,10 +183,10 @@ for i, row in _pat_pre.iterrows():
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
# Only patients with any BXSCR record
bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique())
bxscr_patients = sorted(bxscr['Patient No.'].dropna().unique())
for row_idx, patient in enumerate(bxscr_patients, 3):
investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name']
investigator = bxscr[bxscr['Patient No.'] == patient].iloc[0]['Investigator Name']
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')