from playwright.sync_api import sync_playwright import os import glob import datetime import requests import pandas as pd # ── CONFIG ────────────────────────────────────────────────────────────────── BASE_URL = "https://janssen.4gclinical.com" EMAIL = "vbuzalka@its.jnj.com" PASSWORD = "Vlado123++-+" STUDIES = ["77242113UCO3001", "42847922MDD3003"] BASE_DIR = os.path.dirname(os.path.abspath(__file__)) INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports") DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails") # ──────────────────────────────────────────────────────────────────────────── def get_subjects(study): pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx") files = sorted( [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")], key=os.path.getmtime, reverse=True, ) if not files: raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}") today = datetime.date.today().strftime("%Y-%m-%d") if not os.path.basename(files[0]).startswith(today): raise FileNotFoundError( f"Dnešní Subject Summary Report pro {study} neexistuje — spusť nejdříve download_subject_summary.py" ) path = files[0] print(f" Čtu subjekty z: {os.path.basename(path)}") raw = pd.read_excel(path, header=None) header_row = None for i, row in raw.iterrows(): if "Subject" in [str(v).strip() for v in row]: header_row = i break if header_row is None: raise ValueError("Hlavičkový řádek nenalezen") df = pd.read_excel(path, header=header_row) subjects = df["Subject"].dropna().astype(str).str.strip().tolist() return subjects def get_jwt_and_api_base(page, study): """Získá JWT token a api_base_url pro danou studii.""" jwt = page.evaluate("localStorage.getItem('JWT.access')") if not jwt: raise ValueError("JWT token nenalezen v localStorage") instances = page.evaluate("""async (jwt) => { const res = await fetch('/_/api/dispatch/app_instances/', { headers: { 'Authorization': `Bearer ${jwt}` } }); return res.json(); }""", jwt) instance = next( (i for i in instances if study in i.get("label", "")), None ) if not instance: raise ValueError(f"app_instance pro studii {study} nenalezena") return jwt, instance["api_base_url"] def get_notifications(jwt, api_base, study, subject): """Načte seznam notifikací pro daného subjekta přes report_data API.""" url = f"{BASE_URL}{api_base}/api/v1/reports_api/report_data" params = { "path": "patient_detail_report", "id": subject, "key": "table_1", "unblinded": "false", } payload = { "path": "patient_detail_report", "study": study, "id": subject, "key": "table_1", "fields": {}, "filters": [{"tableId": "table_1", "tableFilters": {}}], "pagination_details": {"order": "type", "reverseOrder": False, "page": 1, "limit": 500}, "cache_key": f"py_{subject}_{datetime.datetime.now().timestamp()}", } headers = { "Authorization": f"Bearer {jwt}", "Content-Type": "application/json", "lang": "en", } resp = requests.post(url, params=params, json=payload, headers=headers) resp.raise_for_status() data = resp.json() notifications = [] for row in data.get("data", []): for notif in row.get("notification", []): item = notif.get("item", {}) pk = item.get("pk") title = item.get("et_title") if pk and title: notifications.append({"pk": pk, "title": title, "event": row.get("event_event_id", "")}) return notifications def download_pdf(jwt, api_base, pk, title, out_path): """Stáhne PDF notifikaci a uloží ji.""" url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification" params = {"pk": pk, "title": title, "html": "true"} headers = { "Authorization": f"Bearer {jwt}", "lang": "en", "Accept": "*/*", } resp = requests.get(url, params=params, headers=headers) resp.raise_for_status() with open(out_path, "wb") as f: f.write(resp.content) def run(page, study): out_dir = os.path.join(DETAILS_DIR, study) os.makedirs(out_dir, exist_ok=True) subjects = get_subjects(study) print(f" Nalezeno {len(subjects)} subjektů") today = datetime.date.today().strftime("%Y-%m-%d") # Načteme stránku aby byl platný session kontext page.goto(f"{BASE_URL}/report/patient_detail_report") page.wait_for_load_state("networkidle", timeout=120000) jwt, api_base = get_jwt_and_api_base(page, study) print(f" API base: {api_base}") for subject in subjects: print(f" [{subject}] Stahuji notifikace...") try: notifications = get_notifications(jwt, api_base, study, subject) if not notifications: print(f" [{subject}] Žádné notifikace") continue for notif in notifications: pk = notif["pk"] title = notif["title"] filename = os.path.join(out_dir, f"{today} {study} {subject} Notification {title} pk{pk}.pdf") if os.path.exists(filename): print(f" [{subject}] {title} (pk={pk}) — již existuje, přeskakuji") continue download_pdf(jwt, api_base, pk, title, filename) print(f" [{subject}] {title} (pk={pk}) OK") except Exception as e: print(f" [{subject}] CHYBA při notifikacích: {e}") print(f" [{study}] Notifikace hotovo.") def main(): os.makedirs(DETAILS_DIR, exist_ok=True) with sync_playwright() as p: for study in STUDIES: print(f"\n[{study}] Přihlášení...") browser = p.chromium.launch(headless=False) context = browser.new_context(accept_downloads=True) page = context.new_page() page.goto(BASE_URL) page.wait_for_load_state("networkidle") page.get_by_label("Email *").fill(EMAIL) page.get_by_label("Password *").fill(PASSWORD) page.locator("#login__submit").click() page.wait_for_load_state("networkidle") page.get_by_label("Study *").click() page.get_by_role("option", name=study).click() page.get_by_role("button", name="SELECT").click() page.wait_for_load_state("networkidle") try: run(page, study) except Exception as e: print(f" [{study}] CHYBA: {e}") browser.close() print("\nVše hotovo.") main()