""" download_drugs.py — stažení Drugs reportů pro jednu studii do IWRS/Incoming/. Verze: 1.1 | Datum: 2026-06-10 v1.1: přesun na úroveň IWRS/ Volá se z IWRS/run_all_v1.1.py s již přihlášenou Playwright page (login + výběr studie zajišťuje common.iwrs_portal.login). 1. Onsite inventory detail (per site, stahuje se vždy) 2. IP destruction (per košík; přeskočí košíky už importované v Mongo iwrs_destruction — destrukce se nemění) 3. Shipments report (jeden soubor na studii, stahuje se vždy) 4. Shipment details (per CZ zásilka; přeskočí zásilky, jejichž položky jsou v Mongo iwrs_shipment_items se statusem RECEIVED — finální stav) Názvy souborů (datumované, aby zapadly do Incoming/ flow): YYYY-MM-DD {study} Onsite Inventory {site}.xlsx YYYY-MM-DD {study} IP Destruction {basket}.xlsx YYYY-MM-DD {study} Shipments Report.xlsx YYYY-MM-DD {study} Shipment Details {shipment_id}.xlsx """ import os import sys import datetime import pandas as pd BASE_DIR = os.path.dirname(os.path.abspath(__file__)) if BASE_DIR not in sys.path: sys.path.insert(0, BASE_DIR) from common.iwrs_portal import BASE_URL from common.paths import INCOMING_DIR, unique_path from common.mongo_writer import get_db SITES = { "77242113UCO3001": [ "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009", "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015", "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022", ], "42847922MDD3003": [ "S10-CZ10002", "S10-CZ10004", "S10-CZ10005", "S10-CZ10008", "S10-CZ10011", "S10-CZ10012", ], } def _today(): return datetime.date.today().strftime("%Y-%m-%d") # ── skip-logika přes Mongo (náhrada za dřívější "soubor existuje") ─────────── def get_existing_baskets(study): """Košíky už importované v iwrs_destruction — destrukce je immutable.""" try: db = get_db() return set(db.iwrs_destruction.distinct("basket_id", {"study": study})) except Exception as e: print(f" UPOZORNĚNÍ: nelze načíst košíky z Mongo ({e}), stahuji vše") return set() def get_received_shipments(study): """Zásilky, jejichž položky už jsou v Mongo se statusem RECEIVED (finální stav).""" try: db = get_db() return set(db.iwrs_shipment_items.distinct( "shipment_id", {"study": study, "shipment_status": {"$regex": "^received$", "$options": "i"}}, )) except Exception as e: print(f" UPOZORNĚNÍ: nelze načíst zásilky z Mongo ({e}), stahuji vše") return set() # ── download funkce ────────────────────────────────────────────────────────── def download_inventory(page, study): today = _today() page.goto(f"{BASE_URL}/report/onsite_inventory_detail") page.wait_for_load_state("networkidle", timeout=120000) for site_id in SITES[study]: print(f" [{site_id}] inventory...") page.locator('input[placeholder="search"], input[type="text"]').first.click() page.get_by_role("option", name=site_id).click() page.wait_for_load_state("networkidle", timeout=120000) filename = unique_path(INCOMING_DIR, f"{today} {study} Onsite Inventory {site_id}") with page.expect_download(timeout=120000) as dl: page.get_by_role("button", name="Download XLS").click() dl.value.save_as(filename) page.get_by_role("button", name="Clear").click() page.wait_for_load_state("networkidle", timeout=120000) print(f" Inventory OK ({len(SITES[study])} center)") def download_destruction(page, study): today = _today() page.goto(f"{BASE_URL}/report/ip_destruction_form") page.wait_for_load_state("networkidle", timeout=120000) page.locator('input[placeholder="search"], input[type="text"]').first.click() page.wait_for_timeout(1000) baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts() if b.strip() and b.strip() != "No results found"] page.keyboard.press("Escape") page.wait_for_timeout(500) if not baskets: print(" Žádné destruction košíky") return existing = get_existing_baskets(study) new_count = 0 for basket in baskets: if basket in existing: continue # destrukce se nemění — přeskočit print(f" [košík {basket}] stahování...") input_field = page.locator('input[placeholder="search"], input[type="text"]').first input_field.click() input_field.fill(basket) page.wait_for_timeout(500) page.locator("mat-option").first.dispatch_event("click") page.wait_for_load_state("networkidle", timeout=120000) filename = unique_path(INCOMING_DIR, f"{today} {study} IP Destruction {basket}") with page.expect_download(timeout=120000) as dl: page.get_by_role("button", name="Download XLS").click() dl.value.save_as(filename) new_count += 1 page.get_by_role("button", name="Clear").click() page.wait_for_load_state("networkidle", timeout=120000) print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)") def download_shipments_report(page, study): today = _today() page.goto(f"{BASE_URL}/report/shipments_report") page.wait_for_load_state("networkidle", timeout=120000) filename = unique_path(INCOMING_DIR, f"{today} {study} Shipments Report") with page.expect_download(timeout=120000) as dl: page.get_by_role("button", name="Download XLS").click() dl.value.save_as(filename) print(f" Shipments report OK -> {os.path.basename(filename)}") return filename def download_shipment_details(page, study, shipments_report_path): today = _today() # načti CZ shipment IDs z právě staženého shipments reportu raw = pd.read_excel(shipments_report_path, header=None) header_row = None for i, row in raw.iterrows(): if "Shipment ID" in [str(v).strip() for v in row]: header_row = i break df = pd.read_excel(shipments_report_path, header=header_row) df = df.dropna(how="all") df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)] cz_shipments = list(zip( df["Shipment ID"].astype(str).str.strip(), df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df), )) print(f" CZ zásilek celkem: {len(cz_shipments)}") received = get_received_shipments(study) page.goto(f"{BASE_URL}/report/shipment_details_report") page.wait_for_load_state("networkidle", timeout=120000) skipped = 0 for shipment, status in cz_shipments: if shipment in received: skipped += 1 continue # položky v Mongo už mají finální stav RECEIVED input_field = page.locator('input[placeholder="search"], input[type="text"]').first input_field.click() input_field.fill(shipment) page.wait_for_timeout(500) page.locator("mat-option").first.dispatch_event("click") page.wait_for_load_state("networkidle", timeout=120000) filename = unique_path(INCOMING_DIR, f"{today} {study} Shipment Details {shipment}") with page.expect_download(timeout=120000) as dl: page.get_by_role("button", name="Download XLS").click() dl.value.save_as(filename) print(f" [{shipment}] ({status}) OK") page.get_by_role("button", name="Clear").click() page.wait_for_load_state("networkidle", timeout=120000) print(f" Přeskočeno (RECEIVED v Mongo): {skipped}") def run(page, study): """Stáhne všechny 4 typy Drugs reportů pro studii do IWRS/Incoming/.""" os.makedirs(INCOMING_DIR, exist_ok=True) print("\n [1/4] Onsite inventory...") download_inventory(page, study) print("\n [2/4] IP destruction...") download_destruction(page, study) print("\n [3/4] Shipments report...") report_path = download_shipments_report(page, study) print("\n [4/4] Shipment details (CZ)...") download_shipment_details(page, study, report_path)