from playwright.sync_api import sync_playwright import os import pandas as pd # ── CONFIG ────────────────────────────────────────────────────────────────── BASE_URL = "https://janssen.4gclinical.com" EMAIL = "vbuzalka@its.jnj.com" PASSWORD = "Vlado123++-+" STUDY = "42847922MDD3003" #STUDY = "77242113UCO3001" OUTPUT_DIR = f"xls_shipment_details_{STUDY}" # ──────────────────────────────────────────────────────────────────────────── def get_cz_shipment_ids(study): path = f"xls_shipments_{study}/shipments_report_{study}.xlsx" if not os.path.exists(path): return None df = pd.read_excel(path, header=5) df.columns = df.columns.str.strip() df = df.dropna(how="all") df["Shipment ID"] = df["Shipment ID"].astype(str).str.strip() cz = df[df["Location"].str.contains("Czech", na=False, case=False)] return cz["Shipment ID"].tolist() def run(page, study): output_dir = f"xls_shipment_details_{study}" os.makedirs(output_dir, exist_ok=True) page.goto(f"{BASE_URL}/report/shipment_details_report") page.wait_for_load_state("networkidle", timeout=120000) cz_ids = get_cz_shipment_ids(study) if cz_ids is not None: shipments = cz_ids print(f" Filtrovano ze shipments reportu: {len(shipments)} CZ shipmentu") else: page.locator('input[placeholder="search"], input[type="text"]').first.click() page.wait_for_timeout(1000) shipments = [s.strip() for s in page.locator('mat-option').all_inner_texts() if s.strip() and s.strip() != "No results found"] print(f" Nalezeno {len(shipments)} shipmentu z dropdownu") page.keyboard.press("Escape") page.wait_for_timeout(500) if not shipments: print(" Zadne shipments — preskakuji.") return for shipment in shipments: filename = os.path.join(output_dir, f"shipment_details_{shipment}.xlsx") if os.path.exists(filename): print(f" [{shipment}] Preskakuji — existuje.") continue print(f" [{shipment}] Stahuji...") input_field = page.locator('input[placeholder="search"], input[type="text"]').first input_field.click() input_field.fill(shipment) page.wait_for_timeout(500) page.locator('mat-option').first.dispatch_event('click') page.wait_for_load_state("networkidle", timeout=120000) with page.expect_download(timeout=120000) as dl: page.get_by_role("button", name="Download XLS").click() dl.value.save_as(filename) print(f" [{shipment}] OK") page.get_by_role("button", name="Clear").click() page.wait_for_load_state("networkidle", timeout=120000) print(" Shipment details hotovo.") if __name__ == "__main__": from playwright.sync_api import sync_playwright with sync_playwright() as p: browser = p.chromium.launch(headless=False) context = browser.new_context(accept_downloads=True) page = context.new_page() page.goto(BASE_URL) page.wait_for_load_state("networkidle") page.get_by_label("Email *").fill(EMAIL) page.get_by_label("Password *").fill(PASSWORD) page.locator('#login__submit').click() page.wait_for_load_state("networkidle") page.get_by_label("Study *").click() page.get_by_role("option", name=STUDY).click() page.get_by_role("button", name="SELECT").click() page.wait_for_load_state("networkidle") run(page, STUDY) browser.close()