Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)
This commit is contained in:
@@ -0,0 +1,173 @@
|
||||
# =============================================================================
|
||||
# Název: download_test_results_v1.0.py
|
||||
# Verze: 1.0
|
||||
# Datum: 2026-05-29
|
||||
# Popis: Stahuje Standard Test Results ze xsp.labcorp.com pro studii 36940.
|
||||
# Čeká na načtení AG Grid řádků (.ag-row) před exportem.
|
||||
# Výstup: timestampované CSV do adresáře Source/.
|
||||
# =============================================================================
|
||||
from playwright.sync_api import sync_playwright
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
|
||||
def log(msg):
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
|
||||
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "%zT3Wqfc9)cWua5"
|
||||
LOGIN_URL = "https://xsp.covance.com/"
|
||||
OUT_DIR = r"U:\PythonProject\Janssen\Covance_UCO3001\Source"
|
||||
PROFILE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "browser_profile")
|
||||
|
||||
# Studie + jejich interni cisla center.
|
||||
# 36940 = 77242113UCO3001 (zdroj center: download_equeries_report SITES)
|
||||
# 35472 = druha studie
|
||||
STUDIES = [
|
||||
{
|
||||
"study": "36940",
|
||||
"sites": [
|
||||
"930551", "930556", "930525", "930549", "930543", "930547",
|
||||
"930555", "930557", "930539", "930536", "930553", "930531",
|
||||
],
|
||||
},
|
||||
{
|
||||
"study": "35472",
|
||||
"sites": [
|
||||
"898745", "898739", "898733", "898744", "898727",
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
# Typy reportu: zalozka v URL + suffix v nazvu souboru.
|
||||
REPORT_TYPES = [
|
||||
{"slug": "standard-test-results", "suffix": "standard"},
|
||||
{"slug": "microbiology", "suffix": "microbiology"},
|
||||
]
|
||||
|
||||
REPORTS = [
|
||||
{
|
||||
"site": sid,
|
||||
"study": st["study"],
|
||||
"type": rt["suffix"],
|
||||
"url": f"https://xsp.labcorp.com/sponsor/study/{st['study']}/test-results/{sid}/{rt['slug']}",
|
||||
"filename": f"sponsor-study-{st['study']}-test-results-{sid}-{rt['suffix']}.csv",
|
||||
}
|
||||
for st in STUDIES
|
||||
for sid in st["sites"]
|
||||
for rt in REPORT_TYPES
|
||||
]
|
||||
|
||||
|
||||
def login(page):
|
||||
log("LOGIN: otviram login stranku...")
|
||||
page.goto(LOGIN_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
if not page.get_by_label("Email").is_visible():
|
||||
log(f"LOGIN: session uz aktivni, prihlaseni preskoceno ({page.url})")
|
||||
return
|
||||
log("LOGIN: zadavam email...")
|
||||
page.get_by_label("Email").fill(EMAIL)
|
||||
page.get_by_role("button", name="Next").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
log("LOGIN: zadavam heslo...")
|
||||
page.get_by_label("Password").fill(PASSWORD)
|
||||
page.get_by_role("button", name="Verify").click()
|
||||
log("LOGIN: cekam na presmerovani po prihlaseni...")
|
||||
page.wait_for_url(lambda url: "code=" not in url, timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=60000)
|
||||
page.wait_for_timeout(2000)
|
||||
log(f"LOGIN: prihlaseni OK ({page.url})")
|
||||
|
||||
|
||||
def download_report(page, report):
|
||||
log(f"=== Centrum {report['site']} / {report['type']} (studie {report['study']}) ===")
|
||||
|
||||
log(f"KROK 1/5: navigace na report URL...")
|
||||
page.goto(report["url"])
|
||||
log(f"KROK 1/5: stranka nactena ({page.url})")
|
||||
|
||||
# Grid je AG Grid uvnitř <covance-ag-grid>. Data jsou nactena, jakmile
|
||||
# se v gridu objevi radky (.ag-row jde z 0 -> N). Pockej na prvni radek
|
||||
# a pak na stabilizaci poctu (proti castecnemu renderu).
|
||||
log("KROK 2/5: cekam na radky gridu (.ag-row) nebo prazdny grid ('No Data')...")
|
||||
# AG Grid radky jsou position-absolute (virtualni render), takze nejsou
|
||||
# "visible" dle Playwrightu -> cekej na pritomnost v DOM, ne na viditelnost.
|
||||
# Prazdne centrum: AG Grid vykresli no-rows overlay s textem "No Data" ve
|
||||
# wrapperu .ag-overlay-no-rows-wrapper. POZOR: trida NENI -no-rows-center;
|
||||
# navic jsou na strance 2 overlaye (jeden skryty) -> kontroluj viditelny
|
||||
# (offsetParent != null). Detekuj, aby to u centra bez dat necekalo 120 s.
|
||||
EMPTY_GRID_JS = """() => {
|
||||
if (document.querySelectorAll('div.ag-row').length > 0) return false;
|
||||
return [...document.querySelectorAll('.ag-overlay-no-rows-wrapper')]
|
||||
.some(e => e.offsetParent !== null);
|
||||
}"""
|
||||
page.wait_for_function(
|
||||
f"""() => document.querySelectorAll('div.ag-row').length > 0
|
||||
|| ({EMPTY_GRID_JS})()""",
|
||||
timeout=120000,
|
||||
)
|
||||
if page.evaluate(EMPTY_GRID_JS):
|
||||
log("KROK 2/5: centrum bez dat ('No Data' overlay) — preskakuji export.")
|
||||
return
|
||||
log("KROK 2/5: radky se objevily, cekam na stabilizaci poctu...")
|
||||
prev = -1
|
||||
for i in range(20): # max ~40 s stabilizace
|
||||
cnt = page.locator("div.ag-row").count()
|
||||
log(f" ...kontrola #{i+1}: {cnt} radku")
|
||||
if cnt == prev and cnt > 0:
|
||||
break
|
||||
prev = cnt
|
||||
page.wait_for_timeout(2000)
|
||||
page.wait_for_timeout(2000) # buffer
|
||||
log(f"KROK 2/5: data stabilni ({prev} radku v gridu).")
|
||||
|
||||
# Tri tecky: na strance jsou 2x <ag-export> (jeden skryty), klikni na
|
||||
# VIDITELNY more_horiz button.
|
||||
log("KROK 3/5: klikam na viditelne tri tecky (more_horiz)...")
|
||||
page.locator("ag-export button:visible", has_text="more_horiz").first.click()
|
||||
log("KROK 3/5: menu otevreno.")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
dest = os.path.join(OUT_DIR, f"{timestamp} {report['filename']}")
|
||||
log("KROK 4/5: klikam na 'Export to CSV' a cekam na stahovani...")
|
||||
with page.expect_download(timeout=60000) as dl:
|
||||
# 2x "Export to CSV" v DOM (jeden skryty) -> klikni na VIDITELNY
|
||||
page.locator("mdl-menu-item:visible", has_text="Export to CSV").first.click()
|
||||
log("KROK 4/5: stahovani zachyceno, ukladam soubor...")
|
||||
dl.value.save_as(dest)
|
||||
log(f"KROK 5/5: HOTOVO -> {dest}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=PROFILE_DIR,
|
||||
headless=False,
|
||||
args=[
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--start-maximized",
|
||||
"--disable-restore-session-state",
|
||||
"--disable-session-crashed-bubble",
|
||||
],
|
||||
no_viewport=True,
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
accept_downloads=True,
|
||||
)
|
||||
context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
page = context.new_page()
|
||||
log("START: prohlizec spusten.")
|
||||
login(page)
|
||||
ok, failed = 0, []
|
||||
for idx, report in enumerate(REPORTS, 1):
|
||||
log(f">>> Report {idx}/{len(REPORTS)}")
|
||||
try:
|
||||
download_report(page, report)
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
failed.append(f"{report['site']}/{report['type']}")
|
||||
log(f"CHYBA u centra {report['site']}/{report['type']}: {e!r} — pokracuji dalsim.")
|
||||
log(f"KONEC: hotovo {ok}/{len(REPORTS)} reportu.")
|
||||
if failed:
|
||||
log(f"KONEC: SELHALA centra: {', '.join(failed)}")
|
||||
context.close()
|
||||
Reference in New Issue
Block a user