janssen/IWRS/Trash/Drugs/Trash/run_all.py

"""
Kompletní pipeline pro Drugs:
  1. Onsite inventory detail    (per site, vždy přepisuje)
  2. IP destruction             (per košík, přeskočí již existující soubory)
  3. Shipments report           (jeden soubor na studii, přepisuje)
  4. Shipment details           (per zásilka CZ, vždy přepisuje)
  5. Import do MongoDB (studie.iwrs_shipments / iwrs_shipment_items / iwrs_inventory / iwrs_destruction)

Spusť tento skript — zpracuje obě studie automaticky.
"""

import os
import glob
import re
import datetime

import sys
import pandas as pd
from playwright.sync_api import sync_playwright

import import_to_mongo as drugs_mongo

BASE_URL = "https://janssen.4gclinical.com"
EMAIL    = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"

STUDIES = ["77242113UCO3001", "42847922MDD3003"]

SITES = {
    "77242113UCO3001": [
        "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
        "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
        "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
    ],
    "42847922MDD3003": [
        "S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
        "S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
    ],
}

BASE_DIR = os.path.dirname(os.path.abspath(__file__))


# ── login ────────────────────────────────────────────────────────────────────

def login(page, study):
    page.goto(BASE_URL)
    page.wait_for_load_state("networkidle")
    page.get_by_label("Email *").fill(EMAIL)
    page.get_by_label("Password *").fill(PASSWORD)
    page.locator("#login__submit").click()
    page.wait_for_load_state("networkidle")
    page.get_by_label("Study *").click()
    page.get_by_role("option", name=study).click()
    page.get_by_role("button", name="SELECT").click()
    page.wait_for_load_state("networkidle")


# ── download funkce ──────────────────────────────────────────────────────────

def download_inventory(page, study):
    out_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
    os.makedirs(out_dir, exist_ok=True)

    page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
    page.wait_for_load_state("networkidle", timeout=120000)

    for site_id in SITES[study]:
        print(f"    [{site_id}] inventory...")
        page.locator('input[placeholder="search"], input[type="text"]').first.click()
        page.get_by_role("option", name=site_id).click()
        page.wait_for_load_state("networkidle", timeout=120000)

        filename = os.path.join(out_dir, f"onsite_inventory_detail_{site_id}.xlsx")
        with page.expect_download(timeout=120000) as dl:
            page.get_by_role("button", name="Download XLS").click()
        dl.value.save_as(filename)

        page.get_by_role("button", name="Clear").click()
        page.wait_for_load_state("networkidle", timeout=120000)
    print(f"    Inventory OK ({len(SITES[study])} center)")


def download_destruction(page, study):
    out_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
    os.makedirs(out_dir, exist_ok=True)

    page.goto(f"{BASE_URL}/report/ip_destruction_form")
    page.wait_for_load_state("networkidle", timeout=120000)

    page.locator('input[placeholder="search"], input[type="text"]').first.click()
    page.wait_for_timeout(1000)
    baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
               if b.strip() and b.strip() != "No results found"]
    page.keyboard.press("Escape")
    page.wait_for_timeout(500)

    if not baskets:
        print("    Žádné destruction košíky")
        return

    new_count = 0
    for basket in baskets:
        filename = os.path.join(out_dir, f"ip_destruction_basket_{basket}.xlsx")
        if os.path.exists(filename):
            continue  # destrukce se nemění — přeskočit
        print(f"    [košík {basket}] stahování...")
        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
        input_field.click()
        input_field.fill(basket)
        page.wait_for_timeout(500)
        page.locator("mat-option").first.dispatch_event("click")
        page.wait_for_load_state("networkidle", timeout=120000)

        with page.expect_download(timeout=120000) as dl:
            page.get_by_role("button", name="Download XLS").click()
        dl.value.save_as(filename)
        new_count += 1

        page.get_by_role("button", name="Clear").click()
        page.wait_for_load_state("networkidle", timeout=120000)

    print(f"    Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")


def download_shipments_report(page, study):
    out_dir = os.path.join(BASE_DIR, f"xls_shipments_{study}")
    os.makedirs(out_dir, exist_ok=True)

    page.goto(f"{BASE_URL}/report/shipments_report")
    page.wait_for_load_state("networkidle", timeout=120000)

    filename = os.path.join(out_dir, f"shipments_report_{study}.xlsx")
    with page.expect_download(timeout=120000) as dl:
        page.get_by_role("button", name="Download XLS").click()
    dl.value.save_as(filename)
    print(f"    Shipments report OK")


def download_shipment_details(page, study):
    out_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
    os.makedirs(out_dir, exist_ok=True)

    # načti CZ shipment IDs z právě staženého shipments reportu
    report_path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
    raw = pd.read_excel(report_path, header=None)
    header_row = None
    for i, row in raw.iterrows():
        if "Shipment ID" in [str(v).strip() for v in row]:
            header_row = i
            break
    df = pd.read_excel(report_path, header=header_row)
    df = df.dropna(how="all")
    df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
    cz_shipments = list(zip(
        df["Shipment ID"].astype(str).str.strip(),
        df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
    ))
    print(f"    CZ zásilek ke stažení: {len(cz_shipments)}")

    page.goto(f"{BASE_URL}/report/shipment_details_report")
    page.wait_for_load_state("networkidle", timeout=120000)

    skipped = 0
    for shipment, status in cz_shipments:
        filename = os.path.join(out_dir, f"shipment_details_{shipment}.xlsx")
        if os.path.exists(filename) and status.upper() == "RECEIVED":
            skipped += 1
            continue  # finální stav, soubor se nemění
        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
        input_field.click()
        input_field.fill(shipment)
        page.wait_for_timeout(500)
        page.locator("mat-option").first.dispatch_event("click")
        page.wait_for_load_state("networkidle", timeout=120000)

        with page.expect_download(timeout=120000) as dl:
            page.get_by_role("button", name="Download XLS").click()
        dl.value.save_as(filename)
        print(f"    [{shipment}] ({status}) OK")

        page.get_by_role("button", name="Clear").click()
        page.wait_for_load_state("networkidle", timeout=120000)

    print(f"    Přeskočeno (RECEIVED): {skipped}")


# ── main ─────────────────────────────────────────────────────────────────────

def main():
    os.chdir(BASE_DIR)

    # ── Stahování ────────────────────────────────────────────────────────────
    with sync_playwright() as p:
        for study in STUDIES:
            print(f"\n{'='*60}")
            print(f"[{study}] STAHOVÁNÍ")
            print(f"{'='*60}")

            browser = p.chromium.launch(headless=False)
            context = browser.new_context(accept_downloads=True)
            page = context.new_page()

            try:
                print("  Přihlášení...")
                login(page, study)

                print("\n  [1/4] Onsite inventory...")
                download_inventory(page, study)

                print("\n  [2/4] IP destruction...")
                download_destruction(page, study)

                print("\n  [3/4] Shipments report...")
                download_shipments_report(page, study)

                print("\n  [4/4] Shipment details (CZ)...")
                download_shipment_details(page, study)

            except Exception as e:
                import traceback
                print(f"  CHYBA při stahování: {e}")
                traceback.print_exc()
            finally:
                browser.close()

    # ── Import do MongoDB ─────────────────────────────────────────────────────
    print(f"\n{'='*60}")
    print("IMPORT DO MongoDB")
    print(f"{'='*60}")

    try:
        drugs_mongo.run(STUDIES)
    except Exception as e:
        import traceback
        print(f"  CHYBA při importu: {e}")
        traceback.print_exc()

    print(f"\n{'='*60}")
    print("Vše hotovo.")
    print(f"{'='*60}")


main()