39e578af2d
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
294 lines
12 KiB
Python
294 lines
12 KiB
Python
# Název: janssenpc_file_send.py
|
|
# Verze: 2.2
|
|
# Datum: 2026-06-02
|
|
# Popis: Přejmenuje soubory ve složce ##JNJPrenos, odešle je na msgs.buzalka.cz
|
|
# a přesune do podsložky Trash. Loguje průběh do file_send.log vedle skriptu.
|
|
# Podporuje: PANORAMA Site Contacts (xlsx), Panorama Dashboard (xlsx),
|
|
# Site Visit Report (xlsx), Follow-Up Letter (xlsx),
|
|
# Clario MayoScore (csv), Clario MayoDiary (csv),
|
|
# Clario Data Corrections / DCRs (csv).
|
|
|
|
import os
|
|
import time
|
|
import shutil
|
|
import requests
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
|
UPLOAD_URL = "https://msgs.buzalka.cz/upload-dropbox"
|
|
SOURCE_DIR = Path(r"C:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos")
|
|
TRASH_DIR = SOURCE_DIR / "Trash"
|
|
LOG_FILE = Path(__file__).parent / "file_send.log"
|
|
|
|
MAYO_DIARY_COLUMNS = [
|
|
'Protocol', 'Country', 'Site', 'PI Name', 'Subject ID',
|
|
'Report Date', 'Report Start Date/Time', 'Report End Date/Time',
|
|
'Stool Frequency', 'Form Number', 'Role', 'Original Source',
|
|
]
|
|
|
|
MAYO_SCORE_COLUMNS = [
|
|
'Protocol', 'Study Population', 'Country', 'Site', 'Principal Investigator',
|
|
'Participant ID', 'Baseline Stool Frequency', 'Visit', 'Visit Date',
|
|
'Endoscopy Completed?', 'Central Endoscopy Score', 'Local Endoscopy Score',
|
|
'Partial Mayo Score', 'Full Mayo Score',
|
|
]
|
|
|
|
DCR_ECOA_COLUMNS = [
|
|
'Protocol', 'Data Correction ID', 'Description', 'Query History',
|
|
]
|
|
|
|
DCR_ECG_COLUMNS = [
|
|
'Protocol', 'Data Correction ID', 'Site ID', 'PI_NAME', 'Subject Number', 'Query History',
|
|
]
|
|
|
|
PANORAMA_COLUMNS = [
|
|
'Part', 'Source', 'Sector', 'TA', 'Protocol ID', 'Interventional',
|
|
'Region', 'Country Name', 'Institution Name', 'Site City',
|
|
'Site Zip/Postal Code', 'Site Address', 'MSID', 'Site ID',
|
|
'Site Status', 'SM Full Name', 'PI Name', 'St F Subj Enr Act',
|
|
'ID', 'Category', 'Type', 'Priority', 'Severity', 'Description',
|
|
'Brief Description - Subject ID', 'Comments', 'Created By',
|
|
'Create Date', 'Last Modified Date', 'Start Date', 'Due Date',
|
|
'End Date', 'Status', 'Days Outstanding', 'Action Taken',
|
|
'Escalated To', 'Visit Report Status', 'Visit Report Approved',
|
|
'Visit Report Type', 'Visit Report Status End Date', 'Active',
|
|
'Association', 'Deviation', 'Deviation Closed Date', 'Reason For Exclusion'
|
|
]
|
|
|
|
|
|
def log(msg: str):
|
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
line = f"[{ts}] {msg}"
|
|
print(line)
|
|
with LOG_FILE.open("a", encoding="utf-8") as lf:
|
|
lf.write(line + "\n")
|
|
|
|
|
|
def move_to_trash(f: Path):
|
|
TRASH_DIR.mkdir(exist_ok=True)
|
|
dest = TRASH_DIR / f.name
|
|
if dest.exists():
|
|
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
dest = TRASH_DIR / f"{f.stem}_{ts}{f.suffix}"
|
|
shutil.move(str(f), dest)
|
|
|
|
|
|
def get_timestamp(file_path: str) -> str:
|
|
return datetime.fromtimestamp(os.path.getmtime(file_path)).strftime('%Y-%m-%d_%H-%M-%S')
|
|
|
|
|
|
def prejmenuj(directory: Path) -> None:
|
|
log(f"--- Přejmenování, adresář: {directory} ---")
|
|
files = [f for f in directory.iterdir() if f.is_file()]
|
|
log(f" Nalezeno souborů: {len(files)} — {[f.name for f in files]}")
|
|
|
|
for f in files:
|
|
filename = f.name
|
|
file_path = str(f)
|
|
|
|
# 0a. CLARIO MAYO DIARY (CSV)
|
|
if 'MAYO-DIARY' in filename and filename.endswith('.csv'):
|
|
log(f" Detekován MayoDiary: {filename}")
|
|
try:
|
|
df = pd.read_csv(file_path)
|
|
missing = set(MAYO_DIARY_COLUMNS) - set(df.columns)
|
|
if not missing:
|
|
protocols = df['Protocol'].dropna().unique()
|
|
log(f" Protocol: {list(protocols)}")
|
|
if len(protocols) > 0:
|
|
study = str(protocols[0]).strip()
|
|
new_name = f"{get_timestamp(file_path)} {study} Clario MayoDiary.csv"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH: -> '{new_name}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# 0b. CLARIO MAYO SCORE (CSV)
|
|
if 'Custom.MayoScoreReport' in filename and filename.endswith('.csv'):
|
|
log(f" Detekován MayoScore: {filename}")
|
|
try:
|
|
df = pd.read_csv(file_path)
|
|
missing = set(MAYO_SCORE_COLUMNS) - set(df.columns)
|
|
if not missing:
|
|
protocols = df['Protocol'].dropna().unique()
|
|
log(f" Protocol: {list(protocols)}")
|
|
if len(protocols) > 0:
|
|
study = str(protocols[0]).strip()
|
|
new_name = f"{get_timestamp(file_path)} {study} Clario MayoScore.csv"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH: -> '{new_name}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# 0c. CLARIO DATA CORRECTIONS (CSV) — ECG nebo eCOA
|
|
if filename.endswith('.csv'):
|
|
try:
|
|
df = pd.read_csv(file_path, nrows=2)
|
|
cols = set(df.columns)
|
|
log(f" CSV sloupce ({filename}): {sorted(cols)}")
|
|
|
|
missing_ecg = set(DCR_ECG_COLUMNS) - cols
|
|
missing_ecoa = set(DCR_ECOA_COLUMNS) - cols
|
|
log(f" Chybí pro ECG: {missing_ecg or '—'}")
|
|
log(f" Chybí pro eCOA: {missing_ecoa or '—'}")
|
|
|
|
if not missing_ecg:
|
|
label = "Clario ECG DCRs"
|
|
elif not missing_ecoa:
|
|
label = "Clario eCOA DCRs"
|
|
else:
|
|
log(f" Neznámý CSV typ — bude odeslán bez přejmenování: {filename}")
|
|
# nepokračujeme continue — soubor projde dál k odeslání
|
|
label = None
|
|
|
|
if label:
|
|
log(f" Detekován {label}: {filename}")
|
|
protocols = df['Protocol'].dropna().unique()
|
|
log(f" Protocol: {list(protocols)}")
|
|
if len(protocols) > 0:
|
|
study = str(protocols[0]).strip()
|
|
new_name = f"{get_timestamp(file_path)} {study} {label}.csv"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH přejmenování: -> '{new_name}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Sloupec Protocol je prázdný — odesílám pod původním názvem.")
|
|
except Exception as e:
|
|
log(f" CHYBA při zpracování CSV {filename}: {e}")
|
|
continue
|
|
|
|
# Ostatní — jen xlsx
|
|
if not filename.endswith('.xlsx'):
|
|
log(f" Přeskočeno (neznámý typ): {filename}")
|
|
continue
|
|
|
|
# 1a. PANORAMA SITE CONTACTS (XLSX) — soubor pojmenovaný "PANORAMA Dashboard"
|
|
if 'PANORAMA Dashboard' in filename:
|
|
log(f" Detekován PANORAMA Site Contacts: {filename}")
|
|
try:
|
|
with pd.ExcelFile(file_path) as xl:
|
|
sheet_names = xl.sheet_names
|
|
if 'Site Contacts' in sheet_names:
|
|
df_a1 = xl.parse('Site Contacts', nrows=1, header=None)
|
|
a1 = str(df_a1.iloc[0, 0]) if not df_a1.empty else ''
|
|
else:
|
|
a1 = None
|
|
# soubor je nyní zavřen — přejmenování proběhne bez chyby
|
|
if a1 is None:
|
|
log(f" PŘESKOČENO: List 'Site Contacts' nenalezen.")
|
|
elif 'Title: Site Contacts' in a1:
|
|
new_name = f"{get_timestamp(file_path)} PANORAMA Site Contacts.xlsx"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH: -> '{new_name}'")
|
|
else:
|
|
log(f" PŘESKOČENO: A1 neodpovídá vzoru ({a1[:50]})")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# 1. PANORAMA DASHBOARD (XLSX)
|
|
if 'Panorama Dashboard' in filename:
|
|
log(f" Detekován Panorama: {filename}")
|
|
try:
|
|
df = pd.read_excel(file_path, skiprows=5)
|
|
missing = set(PANORAMA_COLUMNS) - set(df.columns)
|
|
if not missing:
|
|
ids = df['Protocol ID'].dropna().unique()
|
|
log(f" Protocol ID: {list(ids)}")
|
|
if len(ids) > 0:
|
|
study = str(ids[0]).strip()
|
|
new_name = f"{get_timestamp(file_path)} {study} Panorama Deviations and Issues.xlsx"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH: -> '{new_name}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Protocol ID je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupce: {missing}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
continue
|
|
|
|
# 2. SITE VISIT REPORT A FOLLOW-UP LETTER (XLSX)
|
|
try:
|
|
df_a1 = pd.read_excel(file_path, nrows=1, header=None)
|
|
if not df_a1.empty:
|
|
a1 = str(df_a1.iloc[0, 0])
|
|
log(f" A1: {a1[:80]}")
|
|
is_site_visit = "Title: Site Visit Report Details" in a1
|
|
is_follow_up = "Title: Follow-Up Letter Details" in a1
|
|
|
|
if is_site_visit or is_follow_up:
|
|
suffix = "Site Visit Details.xlsx" if is_site_visit else "FUL details.xlsx"
|
|
log(f" Detekován {'Site Visit' if is_site_visit else 'Follow-Up Letter'}: {filename}")
|
|
df = pd.read_excel(file_path, skiprows=5)
|
|
if 'Protocol ID' in df.columns:
|
|
ids = df['Protocol ID'].dropna().unique()
|
|
log(f" Protocol ID: {list(ids)}")
|
|
if len(ids) > 0:
|
|
study = str(ids[0]).strip()
|
|
new_name = f"{get_timestamp(file_path)} {study} {suffix}"
|
|
f.rename(directory / new_name)
|
|
log(f" ÚSPĚCH: -> '{new_name}'")
|
|
else:
|
|
log(f" VAROVÁNÍ: Protocol ID je prázdný.")
|
|
else:
|
|
log(f" PŘESKOČENO: Chybí sloupec Protocol ID.")
|
|
else:
|
|
log(f" Přeskočeno (neznámý xlsx obsah): {filename}")
|
|
except Exception as e:
|
|
log(f" CHYBA: {e}")
|
|
|
|
log("--- Přejmenování dokončeno ---")
|
|
|
|
|
|
# === HLAVNÍ LOGIKA ===
|
|
|
|
log("=== Spuštění ===")
|
|
log(f"Zdrojový adresář: {SOURCE_DIR} (existuje: {SOURCE_DIR.exists()})")
|
|
|
|
# 1. Přejmenuj
|
|
prejmenuj(SOURCE_DIR)
|
|
|
|
# 2. Počkej 10 vteřin
|
|
log("Čekám 10 vteřin...")
|
|
time.sleep(10)
|
|
|
|
# 3. Odešli soubory
|
|
files = [f for f in SOURCE_DIR.iterdir() if f.is_file()]
|
|
log(f"Souborů k odeslání: {len(files)}")
|
|
for f in files:
|
|
log(f" Nalezen: {f.name}")
|
|
|
|
if not files:
|
|
log("Žádné soubory k odeslání.")
|
|
else:
|
|
for f in files:
|
|
try:
|
|
with f.open("rb") as fh:
|
|
resp = requests.post(
|
|
UPLOAD_URL,
|
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
|
files={"file": (f.name, fh, "application/octet-stream")},
|
|
timeout=120,
|
|
)
|
|
resp.raise_for_status()
|
|
status = resp.json().get('status', '?').upper()
|
|
log(f" {status:10} | {f.name}")
|
|
move_to_trash(f)
|
|
log(f" PŘESUNUTO | {f.name} -> Trash")
|
|
except Exception as e:
|
|
log(f" CHYBA | {f.name} | {e}")
|
|
|
|
log("=== Hotovo ===")
|