notebook
This commit is contained in:
Binary file not shown.
@@ -8,11 +8,11 @@ from openpyxl.utils import get_column_letter
|
|||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
|
|
||||||
# Paths
|
# Paths
|
||||||
src_dir = "U:/PythonProject/Janssen/Covance_UCO3001/"
|
src_dir = "U:/janssen/Covance_UCO3001/"
|
||||||
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
|
out_dir = "U:/Dropbox/!!!Days/Downloads Z230/"
|
||||||
|
|
||||||
# Find source files
|
# Find source files
|
||||||
src_files = glob.glob(src_dir + "3001Specimeninventoryreport*.xlsx")
|
src_files = glob.glob(src_dir + "Protocol 77242113UCO3001 - All Samples*.xlsx")
|
||||||
assert src_files, "Source file not found!"
|
assert src_files, "Source file not found!"
|
||||||
src_file = src_files[0]
|
src_file = src_files[0]
|
||||||
print(f"Source xlsx: {src_file}")
|
print(f"Source xlsx: {src_file}")
|
||||||
@@ -35,10 +35,10 @@ for old in glob.glob(out_dir + "*77242113UCO3001 Speciment Inventory report*.xls
|
|||||||
shutil.copy2(src_file, out_path)
|
shutil.copy2(src_file, out_path)
|
||||||
|
|
||||||
# Load data with pandas for analysis
|
# Load data with pandas for analysis
|
||||||
df = pd.read_excel(src_file, sheet_name=0, header=2)
|
df = pd.read_excel(src_file, sheet_name=0, header=0)
|
||||||
|
|
||||||
# All unique patients
|
# All unique patients
|
||||||
all_patients = sorted(df['Patient Number'].dropna().unique())
|
all_patients = sorted(df['Patient No.'].dropna().unique())
|
||||||
|
|
||||||
# BXSCR and DNA rows
|
# BXSCR and DNA rows
|
||||||
bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
|
bxscr = df[df['Protocol Visit Code'] == 'BXSCR']
|
||||||
@@ -53,29 +53,40 @@ def fmt_date(val):
|
|||||||
return pd.to_datetime(val).to_pydatetime()
|
return pd.to_datetime(val).to_pydatetime()
|
||||||
|
|
||||||
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
|
# Get Container Receipt Date + Excel row for patient+specimen from given visit dataframe
|
||||||
# Excel row = pandas df index + 4 (rows 1-2 title, row 3 header, data from row 4)
|
OK_STATUSES = {'Received', 'In Inventory', 'Shipped'}
|
||||||
|
|
||||||
|
# Excel row = pandas df index + 2 (row 1 header, data from row 2)
|
||||||
def get_specimen_info(visit_df, patient, specimen_type=None):
|
def get_specimen_info(visit_df, patient, specimen_type=None):
|
||||||
rows = visit_df[visit_df['Patient Number'] == patient]
|
rows = visit_df[visit_df['Patient No.'] == patient]
|
||||||
if specimen_type:
|
if specimen_type:
|
||||||
rows = rows[rows['Specimen Type'] == specimen_type]
|
rows = rows[rows['Specimen Type'] == specimen_type]
|
||||||
|
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
|
||||||
if rows.empty:
|
if rows.empty:
|
||||||
return '', None
|
return '', None
|
||||||
row = rows.iloc[0]
|
row = rows.iloc[0]
|
||||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
|
||||||
|
|
||||||
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
|
# Get Container Receipt Date + Excel row by Container Label Line 1 code and visit code
|
||||||
def get_label_info(patient, label_code, visit_code):
|
def get_label_info(patient, label_code, visit_code):
|
||||||
rows = df[(df['Patient Number'] == patient) &
|
rows = df[(df['Patient No.'] == patient) &
|
||||||
(df['Protocol Visit Code'] == visit_code) &
|
(df['Protocol Visit Code'] == visit_code) &
|
||||||
(df['Container Label Line 1'] == label_code)]
|
(df['Container Label Line 1'] == label_code)]
|
||||||
|
rows = rows[rows['Sample Status'].isin(OK_STATUSES)]
|
||||||
if rows.empty:
|
if rows.empty:
|
||||||
return '', None
|
return '', None
|
||||||
row = rows.iloc[0]
|
row = rows.iloc[0]
|
||||||
return fmt_date(row['Container Receipt Date']), rows.index[0] + 4
|
return fmt_date(row['Container Receipt Date']), rows.index[0] + 2
|
||||||
|
|
||||||
# Open copied workbook and add analysis sheet
|
# Open copied workbook and add analysis sheet
|
||||||
out_wb = load_workbook(out_path)
|
out_wb = load_workbook(out_path)
|
||||||
|
|
||||||
|
# Rename and autofit first sheet
|
||||||
|
src_ws = out_wb.active
|
||||||
|
src_ws.title = "Zdroj"
|
||||||
|
for col in src_ws.columns:
|
||||||
|
max_len = max((len(str(cell.value)) if cell.value is not None else 0) for cell in col)
|
||||||
|
src_ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 2, 50)
|
||||||
|
|
||||||
# Create analysis sheet
|
# Create analysis sheet
|
||||||
analysis_ws = out_wb.create_sheet("Přehled vzorků")
|
analysis_ws = out_wb.create_sheet("Přehled vzorků")
|
||||||
|
|
||||||
@@ -172,10 +183,10 @@ for i, row in _pat_pre.iterrows():
|
|||||||
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
|
patient_row_map[pat] = i + 2 # +1 for 1-based, +1 for header row
|
||||||
|
|
||||||
# Only patients with any BXSCR record
|
# Only patients with any BXSCR record
|
||||||
bxscr_patients = sorted(bxscr['Patient Number'].dropna().unique())
|
bxscr_patients = sorted(bxscr['Patient No.'].dropna().unique())
|
||||||
|
|
||||||
for row_idx, patient in enumerate(bxscr_patients, 3):
|
for row_idx, patient in enumerate(bxscr_patients, 3):
|
||||||
investigator = bxscr[bxscr['Patient Number'] == patient].iloc[0]['Investigator Name']
|
investigator = bxscr[bxscr['Patient No.'] == patient].iloc[0]['Investigator Name']
|
||||||
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
|
sm11, sm11_row = get_specimen_info(bxscr, patient, 'Tissue , Paraffin Block')
|
||||||
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
|
rna, rna_row = get_specimen_info(bxscr, patient, 'Biopsy RNA Later')
|
||||||
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')
|
cryo, cryo_row = get_specimen_info(bxscr, patient, 'Biopsy, Frozen Tissue')
|
||||||
|
|||||||
Reference in New Issue
Block a user