Files
janssen/IWRS/download_drugs.py
2026-06-10 11:59:19 +02:00

220 lines
8.3 KiB
Python

"""
download_drugs.py — stažení Drugs reportů pro jednu studii do IWRS/Incoming/.
Verze: 1.1 | Datum: 2026-06-10
v1.1: přesun na úroveň IWRS/
Volá se z IWRS/run_all_v1.1.py s již přihlášenou Playwright page (login +
výběr studie zajišťuje common.iwrs_portal.login).
1. Onsite inventory detail (per site, stahuje se vždy)
2. IP destruction (per košík; přeskočí košíky už importované
v Mongo iwrs_destruction — destrukce se nemění)
3. Shipments report (jeden soubor na studii, stahuje se vždy)
4. Shipment details (per CZ zásilka; přeskočí zásilky, jejichž
položky jsou v Mongo iwrs_shipment_items se
statusem RECEIVED — finální stav)
Názvy souborů (datumované, aby zapadly do Incoming/ flow):
YYYY-MM-DD {study} Onsite Inventory {site}.xlsx
YYYY-MM-DD {study} IP Destruction {basket}.xlsx
YYYY-MM-DD {study} Shipments Report.xlsx
YYYY-MM-DD {study} Shipment Details {shipment_id}.xlsx
"""
import os
import sys
import datetime
import pandas as pd
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
if BASE_DIR not in sys.path:
sys.path.insert(0, BASE_DIR)
from common.iwrs_portal import BASE_URL
from common.paths import INCOMING_DIR, unique_path
from common.mongo_writer import get_db
SITES = {
"77242113UCO3001": [
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
],
"42847922MDD3003": [
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
],
}
def _today():
return datetime.date.today().strftime("%Y-%m-%d")
# ── skip-logika přes Mongo (náhrada za dřívější "soubor existuje") ───────────
def get_existing_baskets(study):
"""Košíky už importované v iwrs_destruction — destrukce je immutable."""
try:
db = get_db()
return set(db.iwrs_destruction.distinct("basket_id", {"study": study}))
except Exception as e:
print(f" UPOZORNĚNÍ: nelze načíst košíky z Mongo ({e}), stahuji vše")
return set()
def get_received_shipments(study):
"""Zásilky, jejichž položky už jsou v Mongo se statusem RECEIVED (finální stav)."""
try:
db = get_db()
return set(db.iwrs_shipment_items.distinct(
"shipment_id",
{"study": study, "shipment_status": {"$regex": "^received$", "$options": "i"}},
))
except Exception as e:
print(f" UPOZORNĚNÍ: nelze načíst zásilky z Mongo ({e}), stahuji vše")
return set()
# ── download funkce ──────────────────────────────────────────────────────────
def download_inventory(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
page.wait_for_load_state("networkidle", timeout=120000)
for site_id in SITES[study]:
print(f" [{site_id}] inventory...")
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.get_by_role("option", name=site_id).click()
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Onsite Inventory {site_id}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Inventory OK ({len(SITES[study])} center)")
def download_destruction(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/ip_destruction_form")
page.wait_for_load_state("networkidle", timeout=120000)
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.wait_for_timeout(1000)
baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
if b.strip() and b.strip() != "No results found"]
page.keyboard.press("Escape")
page.wait_for_timeout(500)
if not baskets:
print(" Žádné destruction košíky")
return
existing = get_existing_baskets(study)
new_count = 0
for basket in baskets:
if basket in existing:
continue # destrukce se nemění — přeskočit
print(f" [košík {basket}] stahování...")
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(basket)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} IP Destruction {basket}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
new_count += 1
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")
def download_shipments_report(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/shipments_report")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Shipments Report")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" Shipments report OK -> {os.path.basename(filename)}")
return filename
def download_shipment_details(page, study, shipments_report_path):
today = _today()
# načti CZ shipment IDs z právě staženého shipments reportu
raw = pd.read_excel(shipments_report_path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
df = pd.read_excel(shipments_report_path, header=header_row)
df = df.dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
cz_shipments = list(zip(
df["Shipment ID"].astype(str).str.strip(),
df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
))
print(f" CZ zásilek celkem: {len(cz_shipments)}")
received = get_received_shipments(study)
page.goto(f"{BASE_URL}/report/shipment_details_report")
page.wait_for_load_state("networkidle", timeout=120000)
skipped = 0
for shipment, status in cz_shipments:
if shipment in received:
skipped += 1
continue # položky v Mongo už mají finální stav RECEIVED
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(shipment)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Shipment Details {shipment}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{shipment}] ({status}) OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Přeskočeno (RECEIVED v Mongo): {skipped}")
def run(page, study):
"""Stáhne všechny 4 typy Drugs reportů pro studii do IWRS/Incoming/."""
os.makedirs(INCOMING_DIR, exist_ok=True)
print("\n [1/4] Onsite inventory...")
download_inventory(page, study)
print("\n [2/4] IP destruction...")
download_destruction(page, study)
print("\n [3/4] Shipments report...")
report_path = download_shipments_report(page, study)
print("\n [4/4] Shipment details (CZ)...")
download_shipment_details(page, study, report_path)