Files
janssen/Covance/download_samples_report_v1.1.py
2026-06-09 08:22:49 +02:00

144 lines
5.8 KiB
Python

# =============================================================================
# Název: download_samples_report_v1.1.py
# Verze: 1.1
# Datum: 2026-05-29
# Popis: Automatické stažení CSV reportu All Samples ze xsp.labcorp.com pro
# studie 77242113UCO3001 (study 36940) a 42847922MDD3003 (study 35472).
# Aplikuje country filtr CZ, date range od FROM_DATE, čeká na zmizení
# "Fetching Data" před exportem. Výstup do Source/ každé studie.
# =============================================================================
from playwright.sync_api import sync_playwright
from datetime import datetime
import os
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "%zT3Wqfc9)cWua5"
LOGIN_URL = "https://xsp.covance.com/"
HOME_URL = "https://xsp.labcorp.com/sampletracking/home"
FROM_DATE = "01-Jan-2025"
PROFILE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "browser_profile")
# (protocol, covance study ID, výstupní adresář)
STUDIES = [
("77242113UCO3001", "36940", r"U:\PythonProject\Janssen\Covance\Source"),
("42847922MDD3003", "35472", r"U:\PythonProject\Janssen\Covance\Source"),
]
TILE_LABEL = "All Samples"
TILE_SUFFIX = "allSamples"
def login(page):
page.goto(LOGIN_URL)
page.wait_for_load_state("networkidle", timeout=120000)
if not page.get_by_label("Email").is_visible():
print(f"Session aktivni, prihlasen: {page.url}")
return
page.get_by_label("Email").fill(EMAIL)
page.get_by_role("button", name="Next").click()
page.wait_for_load_state("networkidle", timeout=120000)
page.get_by_label("Password").fill(PASSWORD)
page.get_by_role("button", name="Verify").click()
page.wait_for_timeout(15000)
page.wait_for_load_state("networkidle", timeout=120000)
print(f"Prihlaseni OK: {page.url}")
def select_protocol(page, protocol):
page.goto(HOME_URL)
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
page.locator("span").filter(has_text=protocol).first.click()
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
print(f"Protokol vybran: {protocol} ({page.url})")
def apply_country_filter(page):
page.get_by_text("keyboard_arrow_down").nth(3).click()
page.wait_for_timeout(2000)
page.get_by_role("checkbox", name="Czech Republic").check()
page.wait_for_timeout(2000)
page.mouse.move(0, -50, steps=5)
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
print("Country filter CZ aplikovan.")
def export_tile(page, tile_label, file_suffix, timestamp, study_id, out_dir):
# Klikni na View Samples u správné dlaždice
page.locator("div.study-group-card").filter(
has=page.locator("span.label", has_text=tile_label)
).locator("button.view-sample").click()
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
print(f" Otevreno: {tile_label} ({page.url})")
# Date picker
page.get_by_role("button", name="DD/MM - DD/MM").click()
page.wait_for_timeout(2000)
page.get_by_role("textbox", name="Date input field").first.click()
page.get_by_role("textbox", name="Date input field").first.press("End")
page.get_by_role("textbox", name="Date input field").first.press("Shift+Home")
page.get_by_role("textbox", name="Date input field").first.fill(FROM_DATE)
page.wait_for_timeout(500)
page.get_by_role("button", name="Apply").click()
page.wait_for_load_state("networkidle", timeout=120000)
page.wait_for_timeout(3000)
# Čekej až zmizí "Fetching Data": po filtru 5s, pak opakuj kontrolu každých 5s
page.wait_for_timeout(5000)
for _ in range(24): # max 2 minuty
if not page.get_by_text("Fetching Data").is_visible():
break
print(" Fetching Data... cekam 5s")
page.wait_for_timeout(5000)
page.wait_for_timeout(5000) # extra buffer po zmizení
if page.locator("div.table-row.no-data").is_visible():
print(f" Record Count: 0 — preskakuji.")
return
count_str = page.locator("div.grid-count span").first.inner_text().strip().replace(",", "")
count = int(count_str) if count_str.isdigit() else -1
print(f" Record Count: {count}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Export arrow_drop_down").click()
page.wait_for_timeout(1000)
page.get_by_text("Export As CSV").click()
dest = os.path.join(out_dir, f"{timestamp} sponsor-study-{study_id}-samples-{file_suffix}.csv")
dl.value.save_as(dest)
print(f" Stazeno: {dest}")
def download(page):
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
for protocol, study_id, out_dir in STUDIES:
print(f"\n--- Studie: {protocol} ---")
select_protocol(page, protocol)
apply_country_filter(page)
export_tile(page, TILE_LABEL, TILE_SUFFIX, timestamp, study_id, out_dir)
print("\nHotovo.")
if __name__ == "__main__":
with sync_playwright() as p:
context = p.chromium.launch_persistent_context(
user_data_dir=PROFILE_DIR,
headless=False,
args=[
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--disable-restore-session-state",
"--disable-session-crashed-bubble",
],
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
accept_downloads=True,
no_viewport=True,
)
context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
page = context.new_page()
login(page)
download(page)
context.close()