z230
This commit is contained in:
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
Import Drugs dat (shipments, shipment_items, inventory, destruction) z XLSX do MongoDB.
|
||||
|
||||
Volá se z IWRS/Drugs/run_all.py po stažení reportů.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import glob
|
||||
|
||||
import pandas as pd
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from common.mongo_writer import (
|
||||
to_str, to_int, to_date,
|
||||
ensure_indexes, log_import,
|
||||
bulk_upsert_with_snapshot, bulk_upsert_only,
|
||||
)
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
# ── XLSX parsery (převzaté z run_all.py + úprava na Mongo dokumenty) ─────────
|
||||
|
||||
def parse_shipments_report(study):
|
||||
path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
|
||||
if not os.path.exists(path):
|
||||
print(f" CHYBI: {path}")
|
||||
return []
|
||||
raw = pd.read_excel(path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Shipment ID" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
return []
|
||||
df = pd.read_excel(path, header=header_row).dropna(how="all")
|
||||
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
|
||||
col = df.columns.tolist()
|
||||
rows = []
|
||||
for _, r in df.iterrows():
|
||||
sid = to_str(r["Shipment ID"])
|
||||
if not sid:
|
||||
continue
|
||||
rows.append({
|
||||
"_id": sid,
|
||||
"shipment_id": sid,
|
||||
"study": study,
|
||||
"status": to_str(r["IRT Shipment Status"]),
|
||||
"type": to_str(r["Type"]),
|
||||
"ship_from": to_str(r["Shipment From"]),
|
||||
"ship_to_site": to_str(r["Ship To:"]),
|
||||
"location": to_str(r["Location"]),
|
||||
"request_date": to_date(r["Request Date"]),
|
||||
"shipped_date": to_date(r["Shipped Date"]),
|
||||
"received_date": to_date(r["Received Date"]) if "Received Date" in col else None,
|
||||
"received_by": to_str(r["Received by"]) if "Received by" in col else None,
|
||||
"delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None,
|
||||
"delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None,
|
||||
"delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None,
|
||||
"cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None,
|
||||
"total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None,
|
||||
"tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None,
|
||||
"shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None,
|
||||
"expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None,
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
def parse_shipment_details(study):
|
||||
detail_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
|
||||
files = sorted(glob.glob(os.path.join(detail_dir, "shipment_details_*.xlsx")))
|
||||
rows = []
|
||||
for path in files:
|
||||
m = re.search(r"shipment_details_(.+)\.xlsx", os.path.basename(path))
|
||||
shipment_id = m.group(1) if m else "UNKNOWN"
|
||||
raw = pd.read_excel(path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Medication ID" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
continue
|
||||
df = pd.read_excel(path, header=header_row).dropna(how="all")
|
||||
for _, r in df.iterrows():
|
||||
med_desc = (to_str(r.get("Medication Description"))
|
||||
or to_str(r.get("Medication ID Description")))
|
||||
med_type = (to_str(r.get("Medication type"))
|
||||
or to_str(r.get("Medication ID type")))
|
||||
med_id = to_str(r.get("Medication ID"))
|
||||
if not med_id:
|
||||
continue
|
||||
rows.append({
|
||||
"_id": f"{shipment_id}:{med_id}",
|
||||
"study": study,
|
||||
"shipment_id": shipment_id,
|
||||
"destination_location": to_str(r.get("Destination Location")),
|
||||
"shipment_status": to_str(r.get("IRT Shipment Status")),
|
||||
"shipment_type": to_str(r.get("Type")),
|
||||
"destination_site": to_str(r.get("Destination Site")),
|
||||
"investigator": to_str(r.get("Investigator")),
|
||||
"medication_description": med_desc,
|
||||
"medication_type": med_type,
|
||||
"medication_id": med_id,
|
||||
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
|
||||
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
|
||||
"container_id": to_str(r.get("Container ID")),
|
||||
"quantity": to_int(r.get("Quantity of Medication IDs")),
|
||||
"expiration_date": to_date(r.get("Expiration Date")),
|
||||
"item_status": to_str(r.get("Status")),
|
||||
})
|
||||
# dedupe (poslední vyhrává)
|
||||
by_id = {r["_id"]: r for r in rows}
|
||||
return list(by_id.values())
|
||||
|
||||
|
||||
def parse_inventory(study):
|
||||
inv_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
|
||||
files = sorted(glob.glob(os.path.join(inv_dir, "onsite_inventory_detail_*.xlsx")))
|
||||
rows = []
|
||||
for path in files:
|
||||
raw = pd.read_excel(path, header=None)
|
||||
site = investigator = location = None
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
|
||||
if first.startswith("Site:"):
|
||||
site = first.replace("Site:", "").strip()
|
||||
elif first.startswith("Investigator:"):
|
||||
investigator = first.replace("Investigator:", "").strip()
|
||||
elif first.startswith("Location:"):
|
||||
location = first.replace("Location:", "").strip()
|
||||
if first in ("Medication", "Medication ID") and header_row is None:
|
||||
header_row = i
|
||||
if header_row is None:
|
||||
continue
|
||||
df = pd.read_excel(path, header=header_row).dropna(how="all")
|
||||
df = df.rename(columns={df.columns[0]: "medication_id"})
|
||||
for _, r in df.iterrows():
|
||||
med_id = to_str(r["medication_id"])
|
||||
if not med_id or not site:
|
||||
continue
|
||||
rows.append({
|
||||
"_id": f"{site}:{med_id}",
|
||||
"study": study,
|
||||
"site": site,
|
||||
"investigator": investigator,
|
||||
"location": location,
|
||||
"medication_id": med_id,
|
||||
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
|
||||
"original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")),
|
||||
"expiration_date": to_date(r.get("Expiration date")),
|
||||
"received_date": to_date(r.get("Received Date")),
|
||||
"receipt_user": to_str(r.get("Shipment Receipt User")),
|
||||
"subject_identifier": to_str(r.get("Subject Identifier")),
|
||||
"quantity_assigned": to_int(r.get("Quantity Assigned")),
|
||||
"irt_transaction": to_str(r.get("IRT Transaction")),
|
||||
"date_assigned": to_date(r.get("Date Assigned")),
|
||||
"assignment_user": to_str(r.get("Assignment User")),
|
||||
"dispensation_status": to_str(r.get("Dispensation Status")),
|
||||
"dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")),
|
||||
"quantity_dispensed": to_int(r.get("Quantity Dispensed")),
|
||||
"dispensing_user": to_str(r.get("Dispensing User")),
|
||||
"quantity_returned": to_int(r.get("Quantity Returned")),
|
||||
"date_returned": to_date(r.get("Date Returned")),
|
||||
"return_user": to_str(r.get("Return User")),
|
||||
})
|
||||
by_id = {r["_id"]: r for r in rows}
|
||||
return list(by_id.values())
|
||||
|
||||
|
||||
def parse_destruction_files(study):
|
||||
dest_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
|
||||
files = sorted(glob.glob(os.path.join(dest_dir, "ip_destruction_basket_*.xlsx")))
|
||||
rows = []
|
||||
for path in files:
|
||||
raw = pd.read_excel(path, header=None)
|
||||
meta = {}
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
|
||||
for key, attr in [
|
||||
("Investigator Name:", "investigator"),
|
||||
("Site ID:", "site_id"),
|
||||
("Location:", "location"),
|
||||
("Basket ID:", "basket_id"),
|
||||
("Drug Destruction Created Date:", "destruction_date"),
|
||||
]:
|
||||
if first.startswith(key):
|
||||
meta[attr] = first.replace(key, "").strip()
|
||||
if first == "Medication ID Description" and header_row is None:
|
||||
header_row = i
|
||||
if header_row is None:
|
||||
continue
|
||||
df = pd.read_excel(path, header=header_row).dropna(how="all")
|
||||
basket_id = meta.get("basket_id")
|
||||
for _, r in df.iterrows():
|
||||
med_id = to_str(r.get("Medication ID"))
|
||||
if not med_id or not basket_id:
|
||||
continue
|
||||
rows.append({
|
||||
"_id": f"{basket_id}:{med_id}",
|
||||
"study": study,
|
||||
"site_id": meta.get("site_id"),
|
||||
"investigator": meta.get("investigator"),
|
||||
"location": meta.get("location"),
|
||||
"basket_id": basket_id,
|
||||
"destruction_date": to_date(meta.get("destruction_date")),
|
||||
"medication_description": to_str(r.get("Medication ID Description")),
|
||||
"medication_id": med_id,
|
||||
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
|
||||
"comments": to_str(r.get("Comments")),
|
||||
})
|
||||
by_id = {r["_id"]: r for r in rows}
|
||||
return list(by_id.values())
|
||||
|
||||
|
||||
# ── hlavní import ────────────────────────────────────────────────────────────
|
||||
|
||||
def import_study(study):
|
||||
print(f"\n [{study}] parsovani XLSX...")
|
||||
shipments = parse_shipments_report(study)
|
||||
items = parse_shipment_details(study)
|
||||
inventory = parse_inventory(study)
|
||||
destruct = parse_destruction_files(study)
|
||||
print(f" Zasilky: {len(shipments)} | Polozky: {len(items)} | Sklad: {len(inventory)} | Destrukce: {len(destruct)}")
|
||||
|
||||
import_id = log_import(study, f"drugs_{study}", "drugs", {
|
||||
"shipments": len(shipments),
|
||||
"shipment_items": len(items),
|
||||
"inventory": len(inventory),
|
||||
"destruction": len(destruct),
|
||||
})
|
||||
print(f" import_id = {import_id}")
|
||||
|
||||
bulk_upsert_with_snapshot("iwrs_shipments", "iwrs_shipments_snapshots", shipments, import_id)
|
||||
bulk_upsert_with_snapshot("iwrs_shipment_items", "iwrs_shipment_items_snapshots", items, import_id)
|
||||
bulk_upsert_with_snapshot("iwrs_inventory", "iwrs_inventory_snapshots", inventory, import_id)
|
||||
bulk_upsert_only("iwrs_destruction", destruct, import_id)
|
||||
|
||||
|
||||
def run(studies):
|
||||
ensure_indexes()
|
||||
for s in studies:
|
||||
import_study(s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
studies = sys.argv[1:] if len(sys.argv) > 1 else ["77242113UCO3001", "42847922MDD3003"]
|
||||
run(studies)
|
||||
@@ -0,0 +1,245 @@
|
||||
"""
|
||||
Kompletní pipeline pro Drugs:
|
||||
1. Onsite inventory detail (per site, vždy přepisuje)
|
||||
2. IP destruction (per košík, přeskočí již existující soubory)
|
||||
3. Shipments report (jeden soubor na studii, přepisuje)
|
||||
4. Shipment details (per zásilka CZ, vždy přepisuje)
|
||||
5. Import do MongoDB (studie.iwrs_shipments / iwrs_shipment_items / iwrs_inventory / iwrs_destruction)
|
||||
|
||||
Spusť tento skript — zpracuje obě studie automaticky.
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
import datetime
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
import import_to_mongo as drugs_mongo
|
||||
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
SITES = {
|
||||
"77242113UCO3001": [
|
||||
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
|
||||
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
|
||||
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
|
||||
],
|
||||
"42847922MDD3003": [
|
||||
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
|
||||
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
|
||||
],
|
||||
}
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
|
||||
# ── login ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def login(page, study):
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=study).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
|
||||
# ── download funkce ──────────────────────────────────────────────────────────
|
||||
|
||||
def download_inventory(page, study):
|
||||
out_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
for site_id in SITES[study]:
|
||||
print(f" [{site_id}] inventory...")
|
||||
page.locator('input[placeholder="search"], input[type="text"]').first.click()
|
||||
page.get_by_role("option", name=site_id).click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
filename = os.path.join(out_dir, f"onsite_inventory_detail_{site_id}.xlsx")
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
|
||||
page.get_by_role("button", name="Clear").click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
print(f" Inventory OK ({len(SITES[study])} center)")
|
||||
|
||||
|
||||
def download_destruction(page, study):
|
||||
out_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
page.goto(f"{BASE_URL}/report/ip_destruction_form")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
page.locator('input[placeholder="search"], input[type="text"]').first.click()
|
||||
page.wait_for_timeout(1000)
|
||||
baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
|
||||
if b.strip() and b.strip() != "No results found"]
|
||||
page.keyboard.press("Escape")
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
if not baskets:
|
||||
print(" Žádné destruction košíky")
|
||||
return
|
||||
|
||||
new_count = 0
|
||||
for basket in baskets:
|
||||
filename = os.path.join(out_dir, f"ip_destruction_basket_{basket}.xlsx")
|
||||
if os.path.exists(filename):
|
||||
continue # destrukce se nemění — přeskočit
|
||||
print(f" [košík {basket}] stahování...")
|
||||
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
||||
input_field.click()
|
||||
input_field.fill(basket)
|
||||
page.wait_for_timeout(500)
|
||||
page.locator("mat-option").first.dispatch_event("click")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
new_count += 1
|
||||
|
||||
page.get_by_role("button", name="Clear").click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")
|
||||
|
||||
|
||||
def download_shipments_report(page, study):
|
||||
out_dir = os.path.join(BASE_DIR, f"xls_shipments_{study}")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
page.goto(f"{BASE_URL}/report/shipments_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
filename = os.path.join(out_dir, f"shipments_report_{study}.xlsx")
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" Shipments report OK")
|
||||
|
||||
|
||||
def download_shipment_details(page, study):
|
||||
out_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
# načti CZ shipment IDs z právě staženého shipments reportu
|
||||
report_path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
|
||||
raw = pd.read_excel(report_path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Shipment ID" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
df = pd.read_excel(report_path, header=header_row)
|
||||
df = df.dropna(how="all")
|
||||
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
|
||||
cz_shipments = list(zip(
|
||||
df["Shipment ID"].astype(str).str.strip(),
|
||||
df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
|
||||
))
|
||||
print(f" CZ zásilek ke stažení: {len(cz_shipments)}")
|
||||
|
||||
page.goto(f"{BASE_URL}/report/shipment_details_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
skipped = 0
|
||||
for shipment, status in cz_shipments:
|
||||
filename = os.path.join(out_dir, f"shipment_details_{shipment}.xlsx")
|
||||
if os.path.exists(filename) and status.upper() == "RECEIVED":
|
||||
skipped += 1
|
||||
continue # finální stav, soubor se nemění
|
||||
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
||||
input_field.click()
|
||||
input_field.fill(shipment)
|
||||
page.wait_for_timeout(500)
|
||||
page.locator("mat-option").first.dispatch_event("click")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" [{shipment}] ({status}) OK")
|
||||
|
||||
page.get_by_role("button", name="Clear").click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
print(f" Přeskočeno (RECEIVED): {skipped}")
|
||||
|
||||
|
||||
# ── main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
os.chdir(BASE_DIR)
|
||||
|
||||
# ── Stahování ────────────────────────────────────────────────────────────
|
||||
with sync_playwright() as p:
|
||||
for study in STUDIES:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{study}] STAHOVÁNÍ")
|
||||
print(f"{'='*60}")
|
||||
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context(accept_downloads=True)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
print(" Přihlášení...")
|
||||
login(page, study)
|
||||
|
||||
print("\n [1/4] Onsite inventory...")
|
||||
download_inventory(page, study)
|
||||
|
||||
print("\n [2/4] IP destruction...")
|
||||
download_destruction(page, study)
|
||||
|
||||
print("\n [3/4] Shipments report...")
|
||||
download_shipments_report(page, study)
|
||||
|
||||
print("\n [4/4] Shipment details (CZ)...")
|
||||
download_shipment_details(page, study)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f" CHYBA při stahování: {e}")
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# ── Import do MongoDB ─────────────────────────────────────────────────────
|
||||
print(f"\n{'='*60}")
|
||||
print("IMPORT DO MongoDB")
|
||||
print(f"{'='*60}")
|
||||
|
||||
try:
|
||||
drugs_mongo.run(STUDIES)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f" CHYBA při importu: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("Vše hotovo.")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user