This commit is contained in:
2026-05-29 13:37:26 +02:00
parent d290be0f6b
commit f02d6dcb8a
80 changed files with 107 additions and 1090 deletions
+20 -17
View File
@@ -1,11 +1,11 @@
# =============================================================================
# Název: download_samples_report_v1.1.py
# Verze: 1.1
# Datum: 2026-05-28
# Datum: 2026-05-29
# Popis: Automatické stažení CSV reportu All Samples ze xsp.labcorp.com pro
# studie 77242113UCO3001 (study 36940) a 42847922MDD3003 (study 35472).
# Aplikuje country filtr CZ, date range od FROM_DATE, čeká na stabil
# Record Count. Výstup do příslušné složky Source/ každé studie.
# Aplikuje country filtr CZ, date range od FROM_DATE, čeká na zmize
# "Fetching Data" před exportem. Výstup do Source/ každé studie.
# =============================================================================
from playwright.sync_api import sync_playwright
from datetime import datetime
@@ -31,6 +31,9 @@ TILE_SUFFIX = "allSamples"
def login(page):
page.goto(LOGIN_URL)
page.wait_for_load_state("networkidle", timeout=120000)
if not page.get_by_label("Email").is_visible():
print(f"Session aktivni, prihlasen: {page.url}")
return
page.get_by_label("Email").fill(EMAIL)
page.get_by_role("button", name="Next").click()
page.wait_for_load_state("networkidle", timeout=120000)
@@ -83,19 +86,14 @@ def export_tile(page, tile_label, file_suffix, timestamp, study_id, out_dir):
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
# Čekej dokud:
# 1. se neobjeví "No Data Available" (= record count je 0), nebo
# 2. record count není nenulový
page.wait_for_function("""() => {
const noData = document.querySelector('div.table-row.no-data');
if (noData) return true;
const countEl = document.querySelector('div.grid-count span');
if (countEl) {
const n = parseInt(countEl.innerText.trim().replace(/,/g, ''), 10);
return !isNaN(n) && n > 0;
}
return false;
}""", timeout=30000)
# Čekej až zmizí "Fetching Data": po filtru 5s, pak opakuj kontrolu každých 5s
page.wait_for_timeout(5000)
for _ in range(24): # max 2 minuty
if not page.get_by_text("Fetching Data").is_visible():
break
print(" Fetching Data... cekam 5s")
page.wait_for_timeout(5000)
page.wait_for_timeout(5000) # extra buffer po zmizení
if page.locator("div.table-row.no-data").is_visible():
print(f" Record Count: 0 — preskakuji.")
@@ -128,7 +126,12 @@ if __name__ == "__main__":
context = p.chromium.launch_persistent_context(
user_data_dir=PROFILE_DIR,
headless=False,
args=["--disable-blink-features=AutomationControlled", "--start-maximized"],
args=[
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--disable-restore-session-state",
"--disable-session-crashed-bubble",
],
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
accept_downloads=True,
no_viewport=True,