246 lines
9.3 KiB
Python
246 lines
9.3 KiB
Python
"""
|
|
Kompletní pipeline pro Drugs:
|
|
1. Onsite inventory detail (per site, vždy přepisuje)
|
|
2. IP destruction (per košík, přeskočí již existující soubory)
|
|
3. Shipments report (jeden soubor na studii, přepisuje)
|
|
4. Shipment details (per zásilka CZ, vždy přepisuje)
|
|
5. Import do MongoDB (studie.iwrs_shipments / iwrs_shipment_items / iwrs_inventory / iwrs_destruction)
|
|
|
|
Spusť tento skript — zpracuje obě studie automaticky.
|
|
"""
|
|
|
|
import os
|
|
import glob
|
|
import re
|
|
import datetime
|
|
|
|
import sys
|
|
import pandas as pd
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
import import_to_mongo as drugs_mongo
|
|
|
|
BASE_URL = "https://janssen.4gclinical.com"
|
|
EMAIL = "vbuzalka@its.jnj.com"
|
|
PASSWORD = "Vlado123++-+"
|
|
|
|
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
|
|
|
SITES = {
|
|
"77242113UCO3001": [
|
|
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
|
|
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
|
|
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
|
|
],
|
|
"42847922MDD3003": [
|
|
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
|
|
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
|
|
],
|
|
}
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
|
|
# ── login ────────────────────────────────────────────────────────────────────
|
|
|
|
def login(page, study):
|
|
page.goto(BASE_URL)
|
|
page.wait_for_load_state("networkidle")
|
|
page.get_by_label("Email *").fill(EMAIL)
|
|
page.get_by_label("Password *").fill(PASSWORD)
|
|
page.locator("#login__submit").click()
|
|
page.wait_for_load_state("networkidle")
|
|
page.get_by_label("Study *").click()
|
|
page.get_by_role("option", name=study).click()
|
|
page.get_by_role("button", name="SELECT").click()
|
|
page.wait_for_load_state("networkidle")
|
|
|
|
|
|
# ── download funkce ──────────────────────────────────────────────────────────
|
|
|
|
def download_inventory(page, study):
|
|
out_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
|
|
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
for site_id in SITES[study]:
|
|
print(f" [{site_id}] inventory...")
|
|
page.locator('input[placeholder="search"], input[type="text"]').first.click()
|
|
page.get_by_role("option", name=site_id).click()
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
filename = os.path.join(out_dir, f"onsite_inventory_detail_{site_id}.xlsx")
|
|
with page.expect_download(timeout=120000) as dl:
|
|
page.get_by_role("button", name="Download XLS").click()
|
|
dl.value.save_as(filename)
|
|
|
|
page.get_by_role("button", name="Clear").click()
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
print(f" Inventory OK ({len(SITES[study])} center)")
|
|
|
|
|
|
def download_destruction(page, study):
|
|
out_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
|
|
page.goto(f"{BASE_URL}/report/ip_destruction_form")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
page.locator('input[placeholder="search"], input[type="text"]').first.click()
|
|
page.wait_for_timeout(1000)
|
|
baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
|
|
if b.strip() and b.strip() != "No results found"]
|
|
page.keyboard.press("Escape")
|
|
page.wait_for_timeout(500)
|
|
|
|
if not baskets:
|
|
print(" Žádné destruction košíky")
|
|
return
|
|
|
|
new_count = 0
|
|
for basket in baskets:
|
|
filename = os.path.join(out_dir, f"ip_destruction_basket_{basket}.xlsx")
|
|
if os.path.exists(filename):
|
|
continue # destrukce se nemění — přeskočit
|
|
print(f" [košík {basket}] stahování...")
|
|
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
|
input_field.click()
|
|
input_field.fill(basket)
|
|
page.wait_for_timeout(500)
|
|
page.locator("mat-option").first.dispatch_event("click")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
with page.expect_download(timeout=120000) as dl:
|
|
page.get_by_role("button", name="Download XLS").click()
|
|
dl.value.save_as(filename)
|
|
new_count += 1
|
|
|
|
page.get_by_role("button", name="Clear").click()
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")
|
|
|
|
|
|
def download_shipments_report(page, study):
|
|
out_dir = os.path.join(BASE_DIR, f"xls_shipments_{study}")
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
|
|
page.goto(f"{BASE_URL}/report/shipments_report")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
filename = os.path.join(out_dir, f"shipments_report_{study}.xlsx")
|
|
with page.expect_download(timeout=120000) as dl:
|
|
page.get_by_role("button", name="Download XLS").click()
|
|
dl.value.save_as(filename)
|
|
print(f" Shipments report OK")
|
|
|
|
|
|
def download_shipment_details(page, study):
|
|
out_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
|
|
# načti CZ shipment IDs z právě staženého shipments reportu
|
|
report_path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
|
|
raw = pd.read_excel(report_path, header=None)
|
|
header_row = None
|
|
for i, row in raw.iterrows():
|
|
if "Shipment ID" in [str(v).strip() for v in row]:
|
|
header_row = i
|
|
break
|
|
df = pd.read_excel(report_path, header=header_row)
|
|
df = df.dropna(how="all")
|
|
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
|
|
cz_shipments = list(zip(
|
|
df["Shipment ID"].astype(str).str.strip(),
|
|
df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
|
|
))
|
|
print(f" CZ zásilek ke stažení: {len(cz_shipments)}")
|
|
|
|
page.goto(f"{BASE_URL}/report/shipment_details_report")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
skipped = 0
|
|
for shipment, status in cz_shipments:
|
|
filename = os.path.join(out_dir, f"shipment_details_{shipment}.xlsx")
|
|
if os.path.exists(filename) and status.upper() == "RECEIVED":
|
|
skipped += 1
|
|
continue # finální stav, soubor se nemění
|
|
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
|
input_field.click()
|
|
input_field.fill(shipment)
|
|
page.wait_for_timeout(500)
|
|
page.locator("mat-option").first.dispatch_event("click")
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
with page.expect_download(timeout=120000) as dl:
|
|
page.get_by_role("button", name="Download XLS").click()
|
|
dl.value.save_as(filename)
|
|
print(f" [{shipment}] ({status}) OK")
|
|
|
|
page.get_by_role("button", name="Clear").click()
|
|
page.wait_for_load_state("networkidle", timeout=120000)
|
|
|
|
print(f" Přeskočeno (RECEIVED): {skipped}")
|
|
|
|
|
|
# ── main ─────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
os.chdir(BASE_DIR)
|
|
|
|
# ── Stahování ────────────────────────────────────────────────────────────
|
|
with sync_playwright() as p:
|
|
for study in STUDIES:
|
|
print(f"\n{'='*60}")
|
|
print(f"[{study}] STAHOVÁNÍ")
|
|
print(f"{'='*60}")
|
|
|
|
browser = p.chromium.launch(headless=False)
|
|
context = browser.new_context(accept_downloads=True)
|
|
page = context.new_page()
|
|
|
|
try:
|
|
print(" Přihlášení...")
|
|
login(page, study)
|
|
|
|
print("\n [1/4] Onsite inventory...")
|
|
download_inventory(page, study)
|
|
|
|
print("\n [2/4] IP destruction...")
|
|
download_destruction(page, study)
|
|
|
|
print("\n [3/4] Shipments report...")
|
|
download_shipments_report(page, study)
|
|
|
|
print("\n [4/4] Shipment details (CZ)...")
|
|
download_shipment_details(page, study)
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
print(f" CHYBA při stahování: {e}")
|
|
traceback.print_exc()
|
|
finally:
|
|
browser.close()
|
|
|
|
# ── Import do MongoDB ─────────────────────────────────────────────────────
|
|
print(f"\n{'='*60}")
|
|
print("IMPORT DO MongoDB")
|
|
print(f"{'='*60}")
|
|
|
|
try:
|
|
drugs_mongo.run(STUDIES)
|
|
except Exception as e:
|
|
import traceback
|
|
print(f" CHYBA při importu: {e}")
|
|
traceback.print_exc()
|
|
|
|
print(f"\n{'='*60}")
|
|
print("Vše hotovo.")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
main()
|