Files
janssen/Covance_UCO3001/Trash/download_test_results_v1.1.py
T

176 lines
7.1 KiB
Python

# =============================================================================
# Název: download_test_results_v1.1.py
# Verze: 1.1
# Datum: 2026-05-29
# Popis: Stahuje Test Results ze xsp.labcorp.com pro 2 studie (36940, 35472),
# oba typy reportu (Standard + Microbiology), pres vsechna centra.
# Ceka na nacteni AG Grid radku (.ag-row); prazdne centrum ('No Data')
# preskoci. Vystup: timestampovane CSV do adresare Source/.
# Zmeny v1.1: + studie 35472, + report typ microbiology (driv jen 36940/standard).
# =============================================================================
from playwright.sync_api import sync_playwright
from datetime import datetime
import os
def log(msg):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "%zT3Wqfc9)cWua5"
LOGIN_URL = "https://xsp.covance.com/"
OUT_DIR = r"U:\PythonProject\Janssen\Covance_UCO3001\Source"
PROFILE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "browser_profile")
# Studie + jejich interni cisla center.
# 36940 = 77242113UCO3001 (UC) — zdroj center: download_equeries_report SITES
# 35472 = druha studie (MDD)
STUDIES = [
{
"study": "36940",
"sites": [
"930551", "930556", "930525", "930549", "930543", "930547",
"930555", "930557", "930539", "930536", "930553", "930531",
],
},
{
"study": "35472",
"sites": [
"898745", "898739", "898733", "898744", "898727",
],
},
]
# Typy reportu: zalozka v URL + suffix v nazvu souboru.
REPORT_TYPES = [
{"slug": "standard-test-results", "suffix": "standard"},
{"slug": "microbiology", "suffix": "microbiology"},
]
REPORTS = [
{
"site": sid,
"study": st["study"],
"type": rt["suffix"],
"url": f"https://xsp.labcorp.com/sponsor/study/{st['study']}/test-results/{sid}/{rt['slug']}",
"filename": f"sponsor-study-{st['study']}-test-results-{sid}-{rt['suffix']}.csv",
}
for st in STUDIES
for sid in st["sites"]
for rt in REPORT_TYPES
]
def login(page):
log("LOGIN: otviram login stranku...")
page.goto(LOGIN_URL)
page.wait_for_load_state("networkidle")
if not page.get_by_label("Email").is_visible():
log(f"LOGIN: session uz aktivni, prihlaseni preskoceno ({page.url})")
return
log("LOGIN: zadavam email...")
page.get_by_label("Email").fill(EMAIL)
page.get_by_role("button", name="Next").click()
page.wait_for_load_state("networkidle")
log("LOGIN: zadavam heslo...")
page.get_by_label("Password").fill(PASSWORD)
page.get_by_role("button", name="Verify").click()
log("LOGIN: cekam na presmerovani po prihlaseni...")
page.wait_for_url(lambda url: "code=" not in url, timeout=60000)
page.wait_for_load_state("networkidle", timeout=60000)
page.wait_for_timeout(2000)
log(f"LOGIN: prihlaseni OK ({page.url})")
def download_report(page, report):
log(f"=== Centrum {report['site']} / {report['type']} (studie {report['study']}) ===")
log(f"KROK 1/5: navigace na report URL...")
page.goto(report["url"])
log(f"KROK 1/5: stranka nactena ({page.url})")
# Grid je AG Grid uvnitř <covance-ag-grid>. Data jsou nactena, jakmile
# se v gridu objevi radky (.ag-row jde z 0 -> N). Pockej na prvni radek
# a pak na stabilizaci poctu (proti castecnemu renderu).
log("KROK 2/5: cekam na radky gridu (.ag-row) nebo prazdny grid ('No Data')...")
# AG Grid radky jsou position-absolute (virtualni render), takze nejsou
# "visible" dle Playwrightu -> cekej na pritomnost v DOM, ne na viditelnost.
# Prazdne centrum: AG Grid vykresli no-rows overlay s textem "No Data" ve
# wrapperu .ag-overlay-no-rows-wrapper. POZOR: trida NENI -no-rows-center;
# navic jsou na strance 2 overlaye (jeden skryty) -> kontroluj viditelny
# (offsetParent != null). Detekuj, aby to u centra bez dat necekalo 120 s.
EMPTY_GRID_JS = """() => {
if (document.querySelectorAll('div.ag-row').length > 0) return false;
return [...document.querySelectorAll('.ag-overlay-no-rows-wrapper')]
.some(e => e.offsetParent !== null);
}"""
page.wait_for_function(
f"""() => document.querySelectorAll('div.ag-row').length > 0
|| ({EMPTY_GRID_JS})()""",
timeout=120000,
)
if page.evaluate(EMPTY_GRID_JS):
log("KROK 2/5: centrum bez dat ('No Data' overlay) — preskakuji export.")
return
log("KROK 2/5: radky se objevily, cekam na stabilizaci poctu...")
prev = -1
for i in range(20): # max ~40 s stabilizace
cnt = page.locator("div.ag-row").count()
log(f" ...kontrola #{i+1}: {cnt} radku")
if cnt == prev and cnt > 0:
break
prev = cnt
page.wait_for_timeout(2000)
page.wait_for_timeout(2000) # buffer
log(f"KROK 2/5: data stabilni ({prev} radku v gridu).")
# Tri tecky: na strance jsou 2x <ag-export> (jeden skryty), klikni na
# VIDITELNY more_horiz button.
log("KROK 3/5: klikam na viditelne tri tecky (more_horiz)...")
page.locator("ag-export button:visible", has_text="more_horiz").first.click()
log("KROK 3/5: menu otevreno.")
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
dest = os.path.join(OUT_DIR, f"{timestamp} {report['filename']}")
log("KROK 4/5: klikam na 'Export to CSV' a cekam na stahovani...")
with page.expect_download(timeout=60000) as dl:
# 2x "Export to CSV" v DOM (jeden skryty) -> klikni na VIDITELNY
page.locator("mdl-menu-item:visible", has_text="Export to CSV").first.click()
log("KROK 4/5: stahovani zachyceno, ukladam soubor...")
dl.value.save_as(dest)
log(f"KROK 5/5: HOTOVO -> {dest}")
if __name__ == "__main__":
with sync_playwright() as p:
context = p.chromium.launch_persistent_context(
user_data_dir=PROFILE_DIR,
headless=False,
args=[
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--disable-restore-session-state",
"--disable-session-crashed-bubble",
],
no_viewport=True,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
accept_downloads=True,
)
context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
page = context.new_page()
log("START: prohlizec spusten.")
login(page)
ok, failed = 0, []
for idx, report in enumerate(REPORTS, 1):
log(f">>> Report {idx}/{len(REPORTS)}")
try:
download_report(page, report)
ok += 1
except Exception as e:
failed.append(f"{report['site']}/{report['type']}")
log(f"CHYBA u centra {report['site']}/{report['type']}: {e!r} — pokracuji dalsim.")
log(f"KONEC: hotovo {ok}/{len(REPORTS)} reportu.")
if failed:
log(f"KONEC: SELHALA centra: {', '.join(failed)}")
context.close()