197 lines
9.2 KiB
Python
197 lines
9.2 KiB
Python
# Název: 02 PřejmenujSouboryReportu.py
|
|
# Verze: 1.2
|
|
# Datum: 2026-05-27
|
|
# Popis: Prochází zadaný adresář a přejmenuje známé typy reportů na standardizovaný
|
|
# formát "datum čas studie typ.přípona". Podporuje: Panorama Dashboard (xlsx),
|
|
# Site Visit Report (xlsx), Follow-Up Letter (xlsx),
|
|
# Clario MayoScore (csv), Clario MayoDiary (csv).
|
|
# Loguje průběh do prejmenuj.log vedle skriptu.
|
|
|
|
import os
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
LOG_FILE = Path(__file__).parent / "prejmenuj.log"
|
|
|
|
|
|
def log(msg: str):
|
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
line = f"[{ts}] {msg}"
|
|
print(line)
|
|
with LOG_FILE.open("a", encoding="utf-8") as lf:
|
|
lf.write(line + "\n")
|
|
|
|
|
|
def zpracuj_reporty(directory_path):
|
|
mayo_diary_columns = [
|
|
'Protocol', 'Country', 'Site', 'PI Name', 'Subject ID',
|
|
'Report Date', 'Report Start Date/Time', 'Report End Date/Time',
|
|
'Stool Frequency', 'Form Number', 'Role', 'Original Source',
|
|
]
|
|
|
|
mayo_columns = [
|
|
'Protocol', 'Study Population', 'Country', 'Site', 'Principal Investigator',
|
|
'Participant ID', 'Baseline Stool Frequency', 'Visit', 'Visit Date',
|
|
'Endoscopy Completed?', 'Central Endoscopy Score', 'Local Endoscopy Score',
|
|
'Partial Mayo Score', 'Full Mayo Score',
|
|
]
|
|
|
|
panorama_columns = [
|
|
'Part', 'Source', 'Sector', 'TA', 'Protocol ID', 'Interventional',
|
|
'Region', 'Country Name', 'Institution Name', 'Site City',
|
|
'Site Zip/Postal Code', 'Site Address', 'MSID', 'Site ID',
|
|
'Site Status', 'SM Full Name', 'PI Name', 'St F Subj Enr Act',
|
|
'ID', 'Category', 'Type', 'Priority', 'Severity', 'Description',
|
|
'Brief Description - Subject ID', 'Comments', 'Created By',
|
|
'Create Date', 'Last Modified Date', 'Start Date', 'Due Date',
|
|
'End Date', 'Status', 'Days Outstanding', 'Action Taken',
|
|
'Escalated To', 'Visit Report Status', 'Visit Report Approved',
|
|
'Visit Report Type', 'Visit Report Status End Date', 'Active',
|
|
'Association', 'Deviation', 'Deviation Closed Date', 'Reason For Exclusion'
|
|
]
|
|
|
|
log(f"=== Spuštění přejmenování, adresář: {directory_path} ===")
|
|
|
|
if not os.path.exists(directory_path):
|
|
log(f"CHYBA: Adresář '{directory_path}' neexistuje.")
|
|
return
|
|
|
|
all_files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
|
|
log(f"Nalezeno souborů: {len(all_files)} — {all_files}")
|
|
|
|
for filename in all_files:
|
|
file_path = os.path.join(directory_path, filename)
|
|
|
|
# ---------------------------------------------------------
|
|
# 0a. CLARIO MAYO DIARY (CSV)
|
|
# ---------------------------------------------------------
|
|
if 'MAYO-DIARY' in filename and filename.endswith('.csv'):
|
|
log(f"Detekován MayoDiary: {filename}")
|
|
try:
|
|
df = pd.read_csv(file_path)
|
|
actual_columns = set(df.columns)
|
|
missing = set(mayo_diary_columns) - actual_columns
|
|
|
|
if not missing:
|
|
protocols = df['Protocol'].dropna().unique()
|
|
log(f" Protocol hodnoty: {list(protocols)}")
|
|
if len(protocols) > 0:
|
|
study_name = str(protocols[0]).strip()
|
|
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
|
|
timestamp = file_time.strftime('%Y-%m-%d_%H-%M-%S')
|
|
new_filename = f"{timestamp} {study_name} Clario MayoDiary.csv"
|
|
os.rename(file_path, os.path.join(directory_path, new_filename))
|
|
log(f" ÚSPĚCH: -> '{new_filename}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# ---------------------------------------------------------
|
|
# 0b. CLARIO MAYO SCORE (CSV)
|
|
# ---------------------------------------------------------
|
|
if 'Custom.MayoScoreReport' in filename and filename.endswith('.csv'):
|
|
log(f"Detekován MayoScore: {filename}")
|
|
try:
|
|
df = pd.read_csv(file_path)
|
|
actual_columns = set(df.columns)
|
|
missing = set(mayo_columns) - actual_columns
|
|
|
|
if not missing:
|
|
protocols = df['Protocol'].dropna().unique()
|
|
log(f" Protocol hodnoty: {list(protocols)}")
|
|
if len(protocols) > 0:
|
|
study_name = str(protocols[0]).strip()
|
|
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
|
|
timestamp = file_time.strftime('%Y-%m-%d_%H-%M-%S')
|
|
new_filename = f"{timestamp} {study_name} Clario MayoScore.csv"
|
|
os.rename(file_path, os.path.join(directory_path, new_filename))
|
|
log(f" ÚSPĚCH: -> '{new_filename}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# Ostatní typy — jen xlsx
|
|
if not filename.endswith('.xlsx'):
|
|
log(f"Přeskočeno (neznámý typ): {filename}")
|
|
continue
|
|
|
|
# ---------------------------------------------------------
|
|
# 1. PANORAMA DASHBOARD (XLSX)
|
|
# ---------------------------------------------------------
|
|
if 'Panorama Dashboard' in filename:
|
|
log(f"Detekován Panorama: {filename}")
|
|
try:
|
|
df = pd.read_excel(file_path, skiprows=5)
|
|
actual_columns = set(df.columns)
|
|
missing = set(panorama_columns) - actual_columns
|
|
|
|
if not missing:
|
|
protocol_ids = df['Protocol ID'].dropna().unique()
|
|
log(f" Protocol ID hodnoty: {list(protocol_ids)}")
|
|
if len(protocol_ids) > 0:
|
|
study_name = str(protocol_ids[0]).strip()
|
|
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
|
|
timestamp = file_time.strftime('%Y-%m-%d_%H-%M-%S')
|
|
new_filename = f"{timestamp} {study_name} Panorama Deviations and Issues.xlsx"
|
|
os.rename(file_path, os.path.join(directory_path, new_filename))
|
|
log(f" ÚSPĚCH: -> '{new_filename}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol ID je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
|
|
# ---------------------------------------------------------
|
|
# 2. SITE VISIT REPORT A FOLLOW-UP LETTER (XLSX)
|
|
# ---------------------------------------------------------
|
|
else:
|
|
try:
|
|
df_a1 = pd.read_excel(file_path, nrows=1, header=None)
|
|
if not df_a1.empty:
|
|
a1_text = str(df_a1.iloc[0, 0])
|
|
log(f" A1 obsah: {a1_text[:80]}")
|
|
|
|
is_site_visit = "Title: Site Visit Report Details" in a1_text
|
|
is_follow_up = "Title: Follow-Up Letter Details" in a1_text
|
|
|
|
if is_site_visit or is_follow_up:
|
|
suffix = "Site Visit Details.xlsx" if is_site_visit else "FUL details.xlsx"
|
|
log(f"Detekován {'Site Visit' if is_site_visit else 'Follow-Up Letter'}: {filename}")
|
|
|
|
df = pd.read_excel(file_path, skiprows=5)
|
|
if 'Protocol ID' in df.columns:
|
|
protocol_ids = df['Protocol ID'].dropna().unique()
|
|
log(f" Protocol ID hodnoty: {list(protocol_ids)}")
|
|
if len(protocol_ids) > 0:
|
|
study_name = str(protocol_ids[0]).strip()
|
|
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
|
|
timestamp = file_time.strftime('%Y-%m-%d_%H-%M-%S')
|
|
new_filename = f"{timestamp} {study_name} {suffix}"
|
|
os.rename(file_path, os.path.join(directory_path, new_filename))
|
|
log(f" ÚSPĚCH: -> '{new_filename}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol ID je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Soubor neobsahuje sloupec 'Protocol ID'.")
|
|
else:
|
|
log(f"Přeskočeno (neznámý xlsx obsah): {filename}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
|
|
log("=== Přejmenování dokončeno ===")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cesta_k_adresari = r"c:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos"
|
|
zpracuj_reporty(cesta_k_adresari)
|