notebook
This commit is contained in:
@@ -8,11 +8,11 @@ from openpyxl.utils import get_column_letter
|
||||
from datetime import date, datetime
|
||||
|
||||
# Paths
|
||||
src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/"
|
||||
src_dir = "U:/janssen/Covance_UCO3001/"
|
||||
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
|
||||
|
||||
# Find source files
|
||||
src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx")
|
||||
src_files = glob.glob(src_dir + "Protocol 77242113UCO3001 - All Samples*.xlsx")
|
||||
assert src_files, "Source file not found!"
|
||||
src_file = src_files[0]
|
||||
print(f"Source xlsx: {src_file}")
|
||||
@@ -35,10 +35,10 @@ for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xls
|
||||
shutil.copy2(src_file, out_path)
|
||||
|
||||
# Load data with pandas for analysis
|
||||
df = pd.read_excel(src_file, sheet_name=0, header=2)
|
||||
df = pd.read_excel(src_file, sheet_name=0, header=0)
|
||||
|
||||
# All unique patients
|
||||
all_patients = sorted(df['Patient Number'].dropna().unique())
|
||||
all_patients = sorted(df['Patient No.'].dropna().unique())
|
||||
|
||||
# BXSCR and DNA rows
|
||||
bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
|
||||
@@ -53,29 +53,40 @@ def fmt_date(val):
|
||||
return pd.to_datetime(val).to_pydatetime()
|
||||
|
||||
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
|
||||
# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4)
|
||||
OK_STATUSES = {'Received', 'In Inventory', 'Shipped'}
|
||||
|
||||
# Excel row = pandas df index + 2 (row 1 header, data from row 2)
|
||||
def get_specimen_info(visit_df, patient, specimen_type=None):
|
||||
rows = visit_df[visit_df['Patient Number'] == patient]
|
||||
rows = visit_df[visit_df['Patient No.'] == patient]
|
||||
if specimen_type:
|
||||
rows = rows[rows['Specimen Type'] == specimen_type]
|
||||
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
|
||||
if rows.empty:
|
||||
return '', None
|
||||
row = rows.iloc[0]
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
|
||||
|
||||
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
|
||||
def get_label_info(patient, label_code, visit_code):
|
||||
rows = df[(df['Patient Number'] == patient) &
|
||||
rows = df[(df['Patient No.'] == patient) &
|
||||
(df['Protocol Visit Code'] == visit_code) &
|
||||
(df['Container Label Line 1'] == label_code)]
|
||||
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
|
||||
if rows.empty:
|
||||
return '', None
|
||||
row = rows.iloc[0]
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
|
||||
|
||||
# Open copied workbook and add analysis sheet
|
||||
out_wb = load_workbook(out_path)
|
||||
|
||||
# Rename and autofit first sheet
|
||||
src_ws = out_wb.active
|
||||
src_ws.title = "Zdroj"
|
||||
for col in src_ws.columns:
|
||||
max_len = max((len(str(cell.value)) if cell.value is not None else 0) for cell in col)
|
||||
src_ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 50)
|
||||
|
||||
# Create analysis sheet
|
||||
analysis_ws = out_wb.create_sheet("Přehled vzorků")
|
||||
|
||||
@@ -172,10 +183,10 @@ for i, row in _pat_pre.iterrows():
|
||||
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
|
||||
|
||||
# Only patients with any BXSCR record
|
||||
bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique())
|
||||
bxscr_patients = sorted(bxscr['Patient No.'].dropna().unique())
|
||||
|
||||
for row_idx, patient in enumerate(bxscr_patients, 3):
|
||||
investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name']
|
||||
investigator = bxscr[bxscr['Patient No.'] == patient].iloc[0]['Investigator Name']
|
||||
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
|
||||
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
|
||||
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')
|
||||
|
||||
Reference in New Issue
Block a user