# ============================================================================= # Název: download_test_results_v1.1.py # Verze: 1.1 # Datum: 2026-05-29 # Popis: Stahuje Test Results ze xsp.labcorp.com pro 2 studie (36940, 35472), # oba typy reportu (Standard + Microbiology), pres vsechna centra. # Ceka na nacteni AG Grid radku (.ag-row); prazdne centrum ('No Data') # preskoci. Vystup: timestampovane CSV do adresare Source/. # Zmeny v1.1: + studie 35472, + report typ microbiology (driv jen 36940/standard). # ============================================================================= from playwright.sync_api import sync_playwright from datetime import datetime import os def log(msg): print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True) EMAIL = "vbuzalka@its.jnj.com" PASSWORD = "%zT3Wqfc9)cWua5" LOGIN_URL = "https://xsp.covance.com/" OUT_DIR = r"U:\PythonProject\Janssen\Covance_UCO3001\Source" PROFILE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "browser_profile") # Studie + jejich interni cisla center. # 36940 = 77242113UCO3001 (UC) — zdroj center: download_equeries_report SITES # 35472 = druha studie (MDD) STUDIES = [ { "study": "36940", "sites": [ "930551", "930556", "930525", "930549", "930543", "930547", "930555", "930557", "930539", "930536", "930553", "930531", ], }, { "study": "35472", "sites": [ "898745", "898739", "898733", "898744", "898727", ], }, ] # Typy reportu: zalozka v URL + suffix v nazvu souboru. REPORT_TYPES = [ {"slug": "standard-test-results", "suffix": "standard"}, {"slug": "microbiology", "suffix": "microbiology"}, ] REPORTS = [ { "site": sid, "study": st["study"], "type": rt["suffix"], "url": f"https://xsp.labcorp.com/sponsor/study/{st['study']}/test-results/{sid}/{rt['slug']}", "filename": f"sponsor-study-{st['study']}-test-results-{sid}-{rt['suffix']}.csv", } for st in STUDIES for sid in st["sites"] for rt in REPORT_TYPES ] def login(page): log("LOGIN: otviram login stranku...") page.goto(LOGIN_URL) page.wait_for_load_state("networkidle") if not page.get_by_label("Email").is_visible(): log(f"LOGIN: session uz aktivni, prihlaseni preskoceno ({page.url})") return log("LOGIN: zadavam email...") page.get_by_label("Email").fill(EMAIL) page.get_by_role("button", name="Next").click() page.wait_for_load_state("networkidle") log("LOGIN: zadavam heslo...") page.get_by_label("Password").fill(PASSWORD) page.get_by_role("button", name="Verify").click() log("LOGIN: cekam na presmerovani po prihlaseni...") page.wait_for_url(lambda url: "code=" not in url, timeout=60000) page.wait_for_load_state("networkidle", timeout=60000) page.wait_for_timeout(2000) log(f"LOGIN: prihlaseni OK ({page.url})") def download_report(page, report): log(f"=== Centrum {report['site']} / {report['type']} (studie {report['study']}) ===") log(f"KROK 1/5: navigace na report URL...") page.goto(report["url"]) log(f"KROK 1/5: stranka nactena ({page.url})") # Grid je AG Grid uvnitř . Data jsou nactena, jakmile # se v gridu objevi radky (.ag-row jde z 0 -> N). Pockej na prvni radek # a pak na stabilizaci poctu (proti castecnemu renderu). log("KROK 2/5: cekam na radky gridu (.ag-row) nebo prazdny grid ('No Data')...") # AG Grid radky jsou position-absolute (virtualni render), takze nejsou # "visible" dle Playwrightu -> cekej na pritomnost v DOM, ne na viditelnost. # Prazdne centrum: AG Grid vykresli no-rows overlay s textem "No Data" ve # wrapperu .ag-overlay-no-rows-wrapper. POZOR: trida NENI -no-rows-center; # navic jsou na strance 2 overlaye (jeden skryty) -> kontroluj viditelny # (offsetParent != null). Detekuj, aby to u centra bez dat necekalo 120 s. EMPTY_GRID_JS = """() => { if (document.querySelectorAll('div.ag-row').length > 0) return false; return [...document.querySelectorAll('.ag-overlay-no-rows-wrapper')] .some(e => e.offsetParent !== null); }""" page.wait_for_function( f"""() => document.querySelectorAll('div.ag-row').length > 0 || ({EMPTY_GRID_JS})()""", timeout=120000, ) if page.evaluate(EMPTY_GRID_JS): log("KROK 2/5: centrum bez dat ('No Data' overlay) — preskakuji export.") return log("KROK 2/5: radky se objevily, cekam na stabilizaci poctu...") prev = -1 for i in range(20): # max ~40 s stabilizace cnt = page.locator("div.ag-row").count() log(f" ...kontrola #{i+1}: {cnt} radku") if cnt == prev and cnt > 0: break prev = cnt page.wait_for_timeout(2000) page.wait_for_timeout(2000) # buffer log(f"KROK 2/5: data stabilni ({prev} radku v gridu).") # Tri tecky: na strance jsou 2x (jeden skryty), klikni na # VIDITELNY more_horiz button. log("KROK 3/5: klikam na viditelne tri tecky (more_horiz)...") page.locator("ag-export button:visible", has_text="more_horiz").first.click() log("KROK 3/5: menu otevreno.") timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S") dest = os.path.join(OUT_DIR, f"{timestamp} {report['filename']}") log("KROK 4/5: klikam na 'Export to CSV' a cekam na stahovani...") with page.expect_download(timeout=60000) as dl: # 2x "Export to CSV" v DOM (jeden skryty) -> klikni na VIDITELNY page.locator("mdl-menu-item:visible", has_text="Export to CSV").first.click() log("KROK 4/5: stahovani zachyceno, ukladam soubor...") dl.value.save_as(dest) log(f"KROK 5/5: HOTOVO -> {dest}") if __name__ == "__main__": with sync_playwright() as p: context = p.chromium.launch_persistent_context( user_data_dir=PROFILE_DIR, headless=False, args=[ "--disable-blink-features=AutomationControlled", "--start-maximized", "--disable-restore-session-state", "--disable-session-crashed-bubble", ], no_viewport=True, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", accept_downloads=True, ) context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") page = context.new_page() log("START: prohlizec spusten.") login(page) ok, failed = 0, [] for idx, report in enumerate(REPORTS, 1): log(f">>> Report {idx}/{len(REPORTS)}") try: download_report(page, report) ok += 1 except Exception as e: failed.append(f"{report['site']}/{report['type']}") log(f"CHYBA u centra {report['site']}/{report['type']}: {e!r} — pokracuji dalsim.") log(f"KONEC: hotovo {ok}/{len(REPORTS)} reportu.") if failed: log(f"KONEC: SELHALA centra: {', '.join(failed)}") context.close()