From 033c3e5046d575dcc6d28da0bada990bca7a17a2 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Wed, 10 Jun 2026 09:26:06 +0200 Subject: [PATCH] z230 --- IWRS/Drugs/download_drugs.py | 220 +++++++++++++ IWRS/Drugs/import_drugs.py | 306 ++++++++++++++++++ .../shipment_details_102839.xlsx | Bin 0 -> 5350 bytes .../shipment_details_102840.xlsx | Bin 0 -> 6266 bytes IWRS/Patients/download_patients.py | 48 +++ IWRS/Patients/import_patients.py | 90 ++++++ IWRS/common/iwrs_portal.py | 24 ++ IWRS/common/paths.py | 54 ++++ IWRS/run_all_v1.0.md | 110 +++++++ IWRS/run_all_v1.0.py | 147 +++++++++ 10 files changed, 999 insertions(+) create mode 100644 IWRS/Drugs/download_drugs.py create mode 100644 IWRS/Drugs/import_drugs.py create mode 100644 IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102839.xlsx create mode 100644 IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102840.xlsx create mode 100644 IWRS/Patients/download_patients.py create mode 100644 IWRS/Patients/import_patients.py create mode 100644 IWRS/common/iwrs_portal.py create mode 100644 IWRS/common/paths.py create mode 100644 IWRS/run_all_v1.0.md create mode 100644 IWRS/run_all_v1.0.py diff --git a/IWRS/Drugs/download_drugs.py b/IWRS/Drugs/download_drugs.py new file mode 100644 index 0000000..c43cbf0 --- /dev/null +++ b/IWRS/Drugs/download_drugs.py @@ -0,0 +1,220 @@ +""" +download_drugs.py — stažení Drugs reportů pro jednu studii do IWRS/Incoming/. +Verze: 1.0 | Datum: 2026-06-10 + +Volá se z IWRS/run_all_v1.0.py s již přihlášenou Playwright page (login + +výběr studie zajišťuje common.iwrs_portal.login). + + 1. Onsite inventory detail (per site, stahuje se vždy) + 2. IP destruction (per košík; přeskočí košíky už importované + v Mongo iwrs_destruction — destrukce se nemění) + 3. Shipments report (jeden soubor na studii, stahuje se vždy) + 4. Shipment details (per CZ zásilka; přeskočí zásilky, jejichž + položky jsou v Mongo iwrs_shipment_items se + statusem RECEIVED — finální stav) + +Názvy souborů (datumované, aby zapadly do Incoming/ flow): + YYYY-MM-DD {study} Onsite Inventory {site}.xlsx + YYYY-MM-DD {study} IP Destruction {basket}.xlsx + YYYY-MM-DD {study} Shipments Report.xlsx + YYYY-MM-DD {study} Shipment Details {shipment_id}.xlsx +""" + +import os +import sys +import datetime + +import pandas as pd + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +IWRS_DIR = os.path.dirname(BASE_DIR) +for _p in (IWRS_DIR, BASE_DIR): + if _p not in sys.path: + sys.path.insert(0, _p) + +from common.iwrs_portal import BASE_URL +from common.paths import INCOMING_DIR, unique_path +from common.mongo_writer import get_db + +SITES = { + "77242113UCO3001": [ + "DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009", + "DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015", + "DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022", + ], + "42847922MDD3003": [ + "S10-CZ10002", "S10-CZ10004", "S10-CZ10005", + "S10-CZ10008", "S10-CZ10011", "S10-CZ10012", + ], +} + + +def _today(): + return datetime.date.today().strftime("%Y-%m-%d") + + +# ── skip-logika přes Mongo (náhrada za dřívější "soubor existuje") ─────────── + +def get_existing_baskets(study): + """Košíky už importované v iwrs_destruction — destrukce je immutable.""" + try: + db = get_db() + return set(db.iwrs_destruction.distinct("basket_id", {"study": study})) + except Exception as e: + print(f" UPOZORNĚNÍ: nelze načíst košíky z Mongo ({e}), stahuji vše") + return set() + + +def get_received_shipments(study): + """Zásilky, jejichž položky už jsou v Mongo se statusem RECEIVED (finální stav).""" + try: + db = get_db() + return set(db.iwrs_shipment_items.distinct( + "shipment_id", + {"study": study, "shipment_status": {"$regex": "^received$", "$options": "i"}}, + )) + except Exception as e: + print(f" UPOZORNĚNÍ: nelze načíst zásilky z Mongo ({e}), stahuji vše") + return set() + + +# ── download funkce ────────────────────────────────────────────────────────── + +def download_inventory(page, study): + today = _today() + page.goto(f"{BASE_URL}/report/onsite_inventory_detail") + page.wait_for_load_state("networkidle", timeout=120000) + + for site_id in SITES[study]: + print(f" [{site_id}] inventory...") + page.locator('input[placeholder="search"], input[type="text"]').first.click() + page.get_by_role("option", name=site_id).click() + page.wait_for_load_state("networkidle", timeout=120000) + + filename = unique_path(INCOMING_DIR, f"{today} {study} Onsite Inventory {site_id}") + with page.expect_download(timeout=120000) as dl: + page.get_by_role("button", name="Download XLS").click() + dl.value.save_as(filename) + + page.get_by_role("button", name="Clear").click() + page.wait_for_load_state("networkidle", timeout=120000) + print(f" Inventory OK ({len(SITES[study])} center)") + + +def download_destruction(page, study): + today = _today() + page.goto(f"{BASE_URL}/report/ip_destruction_form") + page.wait_for_load_state("networkidle", timeout=120000) + + page.locator('input[placeholder="search"], input[type="text"]').first.click() + page.wait_for_timeout(1000) + baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts() + if b.strip() and b.strip() != "No results found"] + page.keyboard.press("Escape") + page.wait_for_timeout(500) + + if not baskets: + print(" Žádné destruction košíky") + return + + existing = get_existing_baskets(study) + new_count = 0 + for basket in baskets: + if basket in existing: + continue # destrukce se nemění — přeskočit + print(f" [košík {basket}] stahování...") + input_field = page.locator('input[placeholder="search"], input[type="text"]').first + input_field.click() + input_field.fill(basket) + page.wait_for_timeout(500) + page.locator("mat-option").first.dispatch_event("click") + page.wait_for_load_state("networkidle", timeout=120000) + + filename = unique_path(INCOMING_DIR, f"{today} {study} IP Destruction {basket}") + with page.expect_download(timeout=120000) as dl: + page.get_by_role("button", name="Download XLS").click() + dl.value.save_as(filename) + new_count += 1 + + page.get_by_role("button", name="Clear").click() + page.wait_for_load_state("networkidle", timeout=120000) + + print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)") + + +def download_shipments_report(page, study): + today = _today() + page.goto(f"{BASE_URL}/report/shipments_report") + page.wait_for_load_state("networkidle", timeout=120000) + + filename = unique_path(INCOMING_DIR, f"{today} {study} Shipments Report") + with page.expect_download(timeout=120000) as dl: + page.get_by_role("button", name="Download XLS").click() + dl.value.save_as(filename) + print(f" Shipments report OK -> {os.path.basename(filename)}") + return filename + + +def download_shipment_details(page, study, shipments_report_path): + today = _today() + + # načti CZ shipment IDs z právě staženého shipments reportu + raw = pd.read_excel(shipments_report_path, header=None) + header_row = None + for i, row in raw.iterrows(): + if "Shipment ID" in [str(v).strip() for v in row]: + header_row = i + break + df = pd.read_excel(shipments_report_path, header=header_row) + df = df.dropna(how="all") + df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)] + cz_shipments = list(zip( + df["Shipment ID"].astype(str).str.strip(), + df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df), + )) + print(f" CZ zásilek celkem: {len(cz_shipments)}") + + received = get_received_shipments(study) + + page.goto(f"{BASE_URL}/report/shipment_details_report") + page.wait_for_load_state("networkidle", timeout=120000) + + skipped = 0 + for shipment, status in cz_shipments: + if shipment in received: + skipped += 1 + continue # položky v Mongo už mají finální stav RECEIVED + input_field = page.locator('input[placeholder="search"], input[type="text"]').first + input_field.click() + input_field.fill(shipment) + page.wait_for_timeout(500) + page.locator("mat-option").first.dispatch_event("click") + page.wait_for_load_state("networkidle", timeout=120000) + + filename = unique_path(INCOMING_DIR, f"{today} {study} Shipment Details {shipment}") + with page.expect_download(timeout=120000) as dl: + page.get_by_role("button", name="Download XLS").click() + dl.value.save_as(filename) + print(f" [{shipment}] ({status}) OK") + + page.get_by_role("button", name="Clear").click() + page.wait_for_load_state("networkidle", timeout=120000) + + print(f" Přeskočeno (RECEIVED v Mongo): {skipped}") + + +def run(page, study): + """Stáhne všechny 4 typy Drugs reportů pro studii do IWRS/Incoming/.""" + os.makedirs(INCOMING_DIR, exist_ok=True) + + print("\n [1/4] Onsite inventory...") + download_inventory(page, study) + + print("\n [2/4] IP destruction...") + download_destruction(page, study) + + print("\n [3/4] Shipments report...") + report_path = download_shipments_report(page, study) + + print("\n [4/4] Shipment details (CZ)...") + download_shipment_details(page, study, report_path) diff --git a/IWRS/Drugs/import_drugs.py b/IWRS/Drugs/import_drugs.py new file mode 100644 index 0000000..53474a7 --- /dev/null +++ b/IWRS/Drugs/import_drugs.py @@ -0,0 +1,306 @@ +""" +import_drugs.py — import Drugs reportů z IWRS/Incoming/ do MongoDB. +Verze: 1.0 | Datum: 2026-06-10 + +Nahrazuje Drugs/import_to_mongo.py (ten parsoval pevné adresáře xls_*; +nyní se parsují datumované soubory z IWRS/Incoming/). + +Per studie a běh: jeden import_id. Soubory se zpracují nejstarší napřed, +při více souborech stejného záznamu vyhrává poslední (poslední stav). +Po úspěšném zápisu do Monga se zparsované soubory přesunou do +IWRS/Incoming/Processed/; soubor s chybou parsování zůstává v Incoming/. + +Cílové kolekce (db `studie`): + iwrs_shipments / iwrs_shipment_items / iwrs_inventory (upsert + snapshot) + iwrs_destruction (upsert only, immutable) + +Volá se z IWRS/run_all_v1.0.py (ensure_indexes volá orchestrátor); +lze spustit i samostatně: python import_drugs.py +""" + +import os +import re +import sys +import glob + +import pandas as pd + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +IWRS_DIR = os.path.dirname(BASE_DIR) +for _p in (IWRS_DIR, BASE_DIR): + if _p not in sys.path: + sys.path.insert(0, _p) + +from common.paths import INCOMING_DIR, STUDIES, move_done, sorted_by_mtime +from common.mongo_writer import ( + to_str, to_int, to_date, + ensure_indexes, log_import, + bulk_upsert_with_snapshot, bulk_upsert_only, +) + + +def _pending(pattern): + return sorted_by_mtime(glob.glob(os.path.join(INCOMING_DIR, pattern))) + + +def _find_header_row(raw, marker): + for i, row in raw.iterrows(): + if marker in [str(v).strip() for v in row]: + return i + return None + + +# ── XLSX parsery (per soubor) ──────────────────────────────────────────────── + +def parse_shipments_file(path, study): + raw = pd.read_excel(path, header=None) + header_row = _find_header_row(raw, "Shipment ID") + if header_row is None: + raise ValueError("hlavičkový řádek 'Shipment ID' nenalezen") + df = pd.read_excel(path, header=header_row).dropna(how="all") + df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)] + col = df.columns.tolist() + rows = [] + for _, r in df.iterrows(): + sid = to_str(r["Shipment ID"]) + if not sid: + continue + rows.append({ + "_id": sid, + "shipment_id": sid, + "study": study, + "status": to_str(r["IRT Shipment Status"]), + "type": to_str(r["Type"]), + "ship_from": to_str(r["Shipment From"]), + "ship_to_site": to_str(r["Ship To:"]), + "location": to_str(r["Location"]), + "request_date": to_date(r["Request Date"]), + "shipped_date": to_date(r["Shipped Date"]), + "received_date": to_date(r["Received Date"]) if "Received Date" in col else None, + "received_by": to_str(r["Received by"]) if "Received by" in col else None, + "delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None, + "delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None, + "delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None, + "cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None, + "total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None, + "tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None, + "shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None, + "expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None, + }) + return rows + + +def parse_shipment_details_file(path, study): + # shipment_id z názvu: "... Shipment Details {id}[ HHMM].xlsx" + m = re.search(r"Shipment Details (\S+?)(?: \d{4})?\.xlsx$", os.path.basename(path)) + shipment_id = m.group(1) if m else "UNKNOWN" + raw = pd.read_excel(path, header=None) + header_row = _find_header_row(raw, "Medication ID") + if header_row is None: + raise ValueError("hlavičkový řádek 'Medication ID' nenalezen") + df = pd.read_excel(path, header=header_row).dropna(how="all") + rows = [] + for _, r in df.iterrows(): + med_desc = (to_str(r.get("Medication Description")) + or to_str(r.get("Medication ID Description"))) + med_type = (to_str(r.get("Medication type")) + or to_str(r.get("Medication ID type"))) + med_id = to_str(r.get("Medication ID")) + if not med_id: + continue + rows.append({ + "_id": f"{shipment_id}:{med_id}", + "study": study, + "shipment_id": shipment_id, + "destination_location": to_str(r.get("Destination Location")), + "shipment_status": to_str(r.get("IRT Shipment Status")), + "shipment_type": to_str(r.get("Type")), + "destination_site": to_str(r.get("Destination Site")), + "investigator": to_str(r.get("Investigator")), + "medication_description": med_desc, + "medication_type": med_type, + "medication_id": med_id, + "packaged_lot_no": to_str(r.get("Packaged Lot number")), + "packaged_lot_description": to_str(r.get("Packaged Lot description")), + "container_id": to_str(r.get("Container ID")), + "quantity": to_int(r.get("Quantity of Medication IDs")), + "expiration_date": to_date(r.get("Expiration Date")), + "item_status": to_str(r.get("Status")), + }) + return rows + + +def parse_inventory_file(path, study): + raw = pd.read_excel(path, header=None) + site = investigator = location = None + header_row = None + for i, row in raw.iterrows(): + first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else "" + if first.startswith("Site:"): + site = first.replace("Site:", "").strip() + elif first.startswith("Investigator:"): + investigator = first.replace("Investigator:", "").strip() + elif first.startswith("Location:"): + location = first.replace("Location:", "").strip() + if first in ("Medication", "Medication ID") and header_row is None: + header_row = i + if header_row is None: + raise ValueError("hlavičkový řádek 'Medication' nenalezen") + df = pd.read_excel(path, header=header_row).dropna(how="all") + df = df.rename(columns={df.columns[0]: "medication_id"}) + rows = [] + for _, r in df.iterrows(): + med_id = to_str(r["medication_id"]) + if not med_id or not site: + continue + rows.append({ + "_id": f"{site}:{med_id}", + "study": study, + "site": site, + "investigator": investigator, + "location": location, + "medication_id": med_id, + "packaged_lot_no": to_str(r.get("Packaged Lot number")), + "original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")), + "expiration_date": to_date(r.get("Expiration date")), + "received_date": to_date(r.get("Received Date")), + "receipt_user": to_str(r.get("Shipment Receipt User")), + "subject_identifier": to_str(r.get("Subject Identifier")), + "quantity_assigned": to_int(r.get("Quantity Assigned")), + "irt_transaction": to_str(r.get("IRT Transaction")), + "date_assigned": to_date(r.get("Date Assigned")), + "assignment_user": to_str(r.get("Assignment User")), + "dispensation_status": to_str(r.get("Dispensation Status")), + "dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")), + "quantity_dispensed": to_int(r.get("Quantity Dispensed")), + "dispensing_user": to_str(r.get("Dispensing User")), + "quantity_returned": to_int(r.get("Quantity Returned")), + "date_returned": to_date(r.get("Date Returned")), + "return_user": to_str(r.get("Return User")), + }) + return rows + + +def parse_destruction_file(path, study): + raw = pd.read_excel(path, header=None) + meta = {} + header_row = None + for i, row in raw.iterrows(): + first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else "" + for key, attr in [ + ("Investigator Name:", "investigator"), + ("Site ID:", "site_id"), + ("Location:", "location"), + ("Basket ID:", "basket_id"), + ("Drug Destruction Created Date:", "destruction_date"), + ]: + if first.startswith(key): + meta[attr] = first.replace(key, "").strip() + if first == "Medication ID Description" and header_row is None: + header_row = i + if header_row is None: + raise ValueError("hlavičkový řádek 'Medication ID Description' nenalezen") + df = pd.read_excel(path, header=header_row).dropna(how="all") + basket_id = meta.get("basket_id") + rows = [] + for _, r in df.iterrows(): + med_id = to_str(r.get("Medication ID")) + if not med_id or not basket_id: + continue + rows.append({ + "_id": f"{basket_id}:{med_id}", + "study": study, + "site_id": meta.get("site_id"), + "investigator": meta.get("investigator"), + "location": meta.get("location"), + "basket_id": basket_id, + "destruction_date": to_date(meta.get("destruction_date")), + "medication_description": to_str(r.get("Medication ID Description")), + "medication_id": med_id, + "packaged_lot_description": to_str(r.get("Packaged Lot description")), + "comments": to_str(r.get("Comments")), + }) + return rows + + +# ── zpracování souborů ─────────────────────────────────────────────────────── + +def _parse_files(files, parser, study, label): + """Zparsuje soubory (nejstarší napřed, poslední vyhrává per _id). + + Vrací (docs, ok_paths, failed_paths). + """ + docs, ok, failed = {}, [], [] + for path in files: + try: + for d in parser(path, study): + docs[d["_id"]] = d + ok.append(path) + except Exception as e: + failed.append(path) + print(f" [{study}] CHYBA parsování {label} {os.path.basename(path)}: {e}") + return list(docs.values()), ok, failed + + +def import_study(study): + ship_files = _pending(f"* {study} Shipments Report*.xlsx") + item_files = _pending(f"* {study} Shipment Details *.xlsx") + inv_files = _pending(f"* {study} Onsite Inventory *.xlsx") + dest_files = _pending(f"* {study} IP Destruction *.xlsx") + + if not (ship_files or item_files or inv_files or dest_files): + print(f" [{study}] drugs: nic ke zpracování") + return + + shipments, ok_ship, _ = _parse_files(ship_files, parse_shipments_file, study, "shipments") + items, ok_item, _ = _parse_files(item_files, parse_shipment_details_file, study, "details") + inventory, ok_inv, _ = _parse_files(inv_files, parse_inventory_file, study, "inventory") + destruct, ok_dest, _ = _parse_files(dest_files, parse_destruction_file, study, "destruction") + + ok_files = ok_ship + ok_item + ok_inv + ok_dest + if not ok_files: + print(f" [{study}] drugs: žádný soubor se nepodařilo zparsovat") + return + + print(f" [{study}] Zásilky: {len(shipments)} | Položky: {len(items)} | " + f"Sklad: {len(inventory)} | Destrukce: {len(destruct)}") + + import_id = log_import(study, f"drugs_{study}", "drugs", { + "shipments": len(shipments), + "shipment_items": len(items), + "inventory": len(inventory), + "destruction": len(destruct), + }) + print(f" [{study}] import_id = {import_id}") + + bulk_upsert_with_snapshot("iwrs_shipments", "iwrs_shipments_snapshots", shipments, import_id) + bulk_upsert_with_snapshot("iwrs_shipment_items", "iwrs_shipment_items_snapshots", items, import_id) + bulk_upsert_with_snapshot("iwrs_inventory", "iwrs_inventory_snapshots", inventory, import_id) + bulk_upsert_only("iwrs_destruction", destruct, import_id) + + # zápis do Monga prošel → archivovat zdrojové soubory + for path in ok_files: + move_done(path) + print(f" [{study}] drugs: {len(ok_files)} soubor(ů) přesunuto do Processed") + + +def run(studies=None): + studies = studies or STUDIES + if not os.path.isdir(INCOMING_DIR): + print(f"Adresář neexistuje: {INCOMING_DIR}") + return + print("=" * 60) + print("Import Drugs (shipments / items / inventory / destruction)") + print("=" * 60) + for study in studies: + try: + import_study(study) + except Exception as e: + import traceback + print(f" [{study}] CHYBA importu drugs: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + ensure_indexes() + run(sys.argv[1:] or None) diff --git a/IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102839.xlsx b/IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102839.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..914560d56f4cb740f5ef6d9c3bee27e1145e1c96 GIT binary patch literal 5350 zcmZ`-1yodP*B-jNV?Y53X$GW`kQ4@xA&0tzFi3-ROGytY4bt5$F?6?xARq$vH5 z*Y$t*%K!am);VXLwa&B8exJ4XexG?Xo}!_X0{{SQ0K34jA(o7qSs?PM26+-APYVZg z4QB^O7v8@d9eF(LY}H~U@lg4RpWF^RwKvHiWX+@}pHjHC6fcQ3iJZeHo*|cnyp%|wibxZAr zFh(KUnUJBv1HAR6#?g|kr{F#Y!L761a>=aF0qgf<#<@drn6btm=>nuvoxUN{QL^EDU zYdrlJz&xi=7mA_xGl{)*E~i^MZNRag&hY34bV~D_xR^~5O|f_w_eqoLX;F*AXIwo~ zRYczbYHHG(GJi7{tX@%3#kRsi*ux^V@dDsO4{tA@p*>2s@N%sDQ%-|EHg%{|>Q!q? z+lpj(W%5g{!NTGV@S{>2i0@@b^UYx!Zrjv+`}IIS6{0f4#C^&a4WKRyBIJGtRg)f8XLfk~j{+O($y{J=i}k6&`(VZp z61`|!PYZakf2ZOj;*n&6{dl$3bP05>-OF)##qAvt&y#OFW;>zbiN_wMA9)w&+M0Nm zS9JwgNM!hEimDE~a0^t88w_B89ueOmGSZ$;L6O)`J;+Dq3|FDFyv%c7>KB)Ef?>9~ z)QT3SzQ$bE(vuKP0|%3P^_z^KpA;jIXv;hlGF=(gmhk%HbKAbFg=HzriCrGC1{lut|&gvxQ1{VQI9M`pggCmA!Q9 z4BTj_`pgUWrJ0!5{S}iQ1RY1}VLg?v`^zR>_h77S*Of2wcPXu971sB>NGTdNrDyH| z^c>yhMyJt|jX5q}CenTcwk^pR9_oD)eNA%H--kX2o2E$?RB+CW!yp6*_fDy5_*BU> zMyz(nPaDR~O>i`5&|;?z27Xrzy5IMy>iT<1+fY8pE7MnO{6(TatPG`Szb$#qf2J!Utw(a;$+TZ^j5My6K7}ef(HT<2j{qZUGj&CK<_{%Zbxa=;6H5OqtLW7FN;{Q9m^$vK!8Vg`12S>BB@`mJ1blEUb>X z$dz?$2SkfdsG@rrW<4$8b%Sx9A%sg0rvsu|(bXy34~DfJGE}j|q*Zj2aLP)V(LADc zZlDZobSeopH6ZPu@&HfQlU(VS3nfUI4t^bjzZ{=~6OzOm3nSkhkBlSfWC#^eN!x$x zq=$Z;W!$5T*a;eWe%$SC1*3tgON0#8KG<|Mn%DAV2buui?SCPYq z4|{QTET9i?Mj$hm!V#odvUK*lTHjTYK5SOCus2IUH+HLXU8T4t&LvE?>&Ngwa!t-p zzceOxG_Z=OoYsswf*D5e#jPabfsO7tK%vVsBexCa0zR!2xn^3~4m7*g%uiw>S#RJi zxHHe<9D2@o`#fmZL_eap_`cD#+n%4Ay4*%+R_7f^Bm)P+kKf+^mK*wJqDL;w)7#{Y zRZ_o(Ai14wISZFUlWFsZBQEk)3sdeKtXEZJA&pdJ6Ewjef;oHa-cM5|32L zJdvtUwA{a<8^Xr#z^UG==)z`qqFGSgV=Tt&s>-pruW;#LJT3s_=M)8y|-Y z8m>>ere?JNemgy*(u>LMBE=nHdVK9Tq)4pCGQJn&H_;g5oNV z%A7=xNl7KJq46y~4wDRJEOF?`A!?9CA$`D`3%vGY{1y(1@d_zXe|fG~pboNP?W@<{=MGDi>JsLKfE`V{Nn@tfo(U zyzDc%*=`q&XC>>YBo$8bQdb%3!d%k5n!Dx8Z}sLF&IMLP9T&xx(Tumqxa9Kv5WW!+ z0VHK+9j#UB=MOT(>%ETx@1XL4mKhXSaFRTJisi!?k7*tSJry*GB{!=m ztf}Z<;K%n2&^y7;eJ-*RCZ9$NdLLOX8jb59^Im%M@!~9j^%;R6JsiWBKeh2u-Ub*~ z;?vCmL3MX;%;){c&N~|jy2;*!CK>?Xj1K@%|K32{9h_gdSX)}Uy72ye`+FD3OmT3Z z<|FU#Q~{gp(TZV)pwTCZEjAR6I(TBL|t0xw5Urdr^Sygu{4O=`}1LYqi_Esvw2Gpq4 zGpBNV9>Mbh*&(A^ZfW2nIt_I`luQ*I9k~~*qFlnF&{Mi&+rk4{H5T-2Y zG#5B2&PeV_lL9^X?o!G2ERVxN_F-)hIg1uCyf4d^s*jh_VV%>s>@w8Pj1L_VX86yBNAGfIg3m1UV&w zw0)JjXjm!J!Vl%elf|X1?Im;sM?n!me5Ok642!-ld!Re}VO&ZGjkzkIWsQTg!iBrL z_OH)qQJrtbl^e1^eGoYG@CUWjij^sizn^L|sM7W;mf)=GiyaCrS%c$7#`*MbVHU`tPB~He3$1b4=Ahhwkyk%%$gqd@i0y4&4s;{$l2Pg1~wFLXP;qy zJA+}EEv%amSBlnA07F$jTjf^Z!z*ST*?DWZ8Z+6qsX;SXhd;d|pM6<;NYM9$zK8Qz za@y*uU&n?WyM_nc{2`{rTRdyUQ&4{iBVlZ5zoBBDFE!vrQPBGbDGk2I)@6Gy_vl`R zl#DMXy0wF#*Aj|1v+{P`7m)1gl6Pn8-V=@Anh2oy&SfLoD`w%>b{e1#6PlG<|Mg2= z8pauWKB-VaTE;NPb@NfYe#M%UsMw&}=RT^1LqCzhz1oepzw)k|mlE{*$6WDuteE=P zmQqEnRQCzqO-h3CCdR%9Ba7I0!-si zcJhX3!~$Iql{e3${Luc?aXu$`pEQvI_Y4OBAo$&eF0P)omM*^(+>*{>#3CQ@HN|J5 z;)FLsPRhp`a;CZ5cx^zv!vu)B;E47t&PcG}M* z#TO7i?HUJ_T_Z# z{N4cO;%N13IyO$UnJXE^>1|Qd&SmyeEx5zr)-u3Gb<&@3E|QJSz!T@9pgs0+2;u8b z^4aD1Ddy?E1DRTNcy84B$J0>@`j4Xd4s6hVWHFkS99;FBE7bdF+*qPEedO{di>7xK z%Jk~4?boWiqOfY_DjEycn-*MSO7ns&?Giu1M12C}WqBum_F09QoXre|&i{}w38n8f z*TeB!$V6H>n;FD@kENDX^hG?GK1V*GFa_IK$*PL3fwsJ6GDLAtELBGBdj;1~%9+Au zDE>o2g^M=hnVCZ2jdjVK=ND6<8cEaXO4=$vXyL9aahlq@FfFGz8MVL#U{rR@EAgZ) zatv_NaJ1JeSbixxx;uY+o~5#DQLnfp;}>p@h*A~i@B$D!E-s=8u2786hlR@#aR~G| zksNkdDe<`}YPux&v-VOV+UUEGHAjZ5KRo2!#M#o;g_q~oH8Z}&zK0KA+P`lyiw|q^ zemKbTh!XuAMdvNiJSj$md}e3WLV;5p9*w7@1nDORnDo9#eq&?_^)>M`n8EvI6s5)o zv9NKtY~?H$gHYJC^piz%xoQJ0DQO;>=S}SvltnK2y5WW&G7BUZdDUec0;nH|CT1gD5An`WvC3A)>E-fIk;Xe2h0Mi2?u!A{R>ZyUBj-b!H9@FMhRIw5ANW zhmZJ5Wdkd&K1c;ZK=m+(G$`V&F{kjdVA;a5bvrD#Y5+{G`5KSS`?>CV>d~62&nGfq z9j}!yyEgOm_=yCBKm=%CjKRnzR5L?*pYeE7z?zH};Pfb)vX9H1w%toF%HYL#6$%a{ zXoER5oV#1}YMg8oOzjv;x!DG}UJ4qf_j2_>(JGa-%*{Q)<2N3RY)p?BT6m@wilEf1 zzL0Z>A5w28l4SO}&CP>98;fAqW5(JQ_VNWvX5Hf$+$^O4X>Thbt^cfoO{xH*9dAzf{O-cjskclY7GDysFi{`Nr}!CkzzI@c8Rz ztUsA%VMu>lfn@k6l5N`GO#iN`fAjr!g*Ct>319Kyhg6Yn0)#Y52A4s?Fl|KNtdMTa&xn=QN(MD7?TNwix+(0LX{dNAQfV^QqfVm9OcC zV!oShw;_&aZYC zh6E__kCDw+8VE9vQg-jez4xGcXNMAxT~9i-L$%Z{GkuBZ?ok^Rnmi#D(hq%zJ;uZ@ zPl{VyCQYrFc=R+P%7P8gfsS9i#idmgD`~B;s z3>f;V13tEXjSmyv2sG2%n)vD6PjyQ-enis!t@ClsyE!ne|5oJZ$Hx9hCQ(qy(f)r= z3Q6bRM*x!F|LaiQMc=iEeq#ZE0FDyi>AxvK&G69pv^IhK)W{G$l&Lhp|7f1t#Ke+=}yz`G;yAK*C>i2UZi8}=^v yZkPW9Hb?r3NbvvD^LOLiZKQwVC?O5M|I$_(Pce|57XW~R{7E5)c|7T_r~d(@8azJ$ literal 0 HcmV?d00001 diff --git a/IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102840.xlsx b/IWRS/Drugs/xls_shipment_details_42847922MDD3003/shipment_details_102840.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..77b74a7f8e3e29cc7e19825062f1bcc1240883b2 GIT binary patch literal 6266 zcmZ`-1yqym`yS<_I~^e{3`uE5hX?~Cr3D5I1x6?#qePGtknRp?M39t_ZbnEWAt2HS z(*LRF3;BP)?Y!@M-g9={_w`)6<9Tk)dsx^M007`RfJ>mq7$~c5;fH>!Kwl*2%gWJG z)7cT>!ut?`xb1HLSUo}tACsR%;j+i6u|~E<&O&DBet9MFvm3mTR`Qi{-c4dHuN)X0 zX_V&iY`7K+iFg8aUM(?469XuzL#m-B`3|6X+gwWzfW&IlK-i$lUGt(*h63EqX> zL=j5q&O{6)?$9q^D-bY_w%nUIgqP0tbA_XKw%NbOF^=y@z)dy(pqui;;LR4ACO0Mk zK>BaGmX6M_pM3kHTU0;tkp!A4Iy-XMPvT`Jhc4cgMab?_MNiI}qXMmRW@|*k>rE6G zn=P#`y!Hvz@*9UNn^*Zf39R_%`brtIhP-sv}nwuv@MC|3Km3p@yKE{(x#ukHmsbD^x|31vyf>rS3}HE;W^ zEjMDd@dgL{rdxDPOEvzE*pBnm6O7*a4@KzeYc4hbJHld6! zNDaatdssyU_BPQOP0h2ub&PmDzC1IdhBoAwpa^%8MEhZKFK>hUSGpI zx1cY;N-E1on_srmjF+os`pyszXcI+tlasa0_=jG<=T6Z(ZoFVb$ICMQwQ71+F97~H zlSbLf9BB%+l^N2}GITU!s#;+b{6X2P6AsJ9AlH{=uYFmycl@~hY-$by8(bHs7w~i7 zyJ5iOGvJuN3^jcDS*P`5vz?8!-lNXp5fxwmZe$Ej-QB}%6y5i1 zfC1v^c;5(CoawETCoy!B^!2l{#ybXEV!@;r9qriT@DbWLAtmS3NE{-7$j4zdO|LT9 z>X3!j=n><{@xfc~H0iDw0&@&Y&KJX~U2VR7czgeF=`1;Ic zZ8<&v^@O_I0HOhmyL-xF^e~9SL4w>_9AqQsHF$U!mCGhDxP6Sb0hP32)Qc-O85~?r z3v*_vpFK1{9%?8%uuK#-zh$0?QDRk0@8YRjCKY{mmr#IQ`g&?$f}JJ^GY6MHE;H~) zRYx)D0y(m#>3Y|E_{*ar$0RBJOoO{q0Y6BT=oV94zPJoOJ_Y6~4Ik<@%w)$KP-4W> z%nQA_18yI8k8R#9Ns*){C+m;Fza|?P&&p0#DCVuMLVm%WJ9UwQlG%yjWj#@idIf*W zlJCmiza~i6qEvR{Ns#S`q~`_BampP$P}B{2Oq&~Tm2L*W>vtH+SR;~3T1mO(AQrU3 zbk6Un{HmP_1I!J{I)>e$BUPkl4~p-+hM0GK>yLUeFdjuj8f_|q{_Q|$BLxh*7Zck^VDHJ~i%0S^mRY!jEA!KQCHyqZrzpKVhtd!MqzLzvwUN6gZg@6Z-CCyf=`tb$j zYn<cw80tLLtbJPTrd95{A9Y1PC<<1S#nUt_ed%<9YQHCYT1|{8P+Kon=n3Pu$=jRf zA;1M2y?j8nsBW@3wwjz$>dE}{B;KvXeD565txUWkv1)c;B46r#IJ

@LEDeEybgq z6vI^!lO+~(jfz@eS#v8o@)|ite@wT+4yM0V9>_Q01ix{Qp!OE!KnX<5M-luy5NbNY zMn(}*PJnZh+IeYv`?Z~0*u)9g&ptp$z-o-TM?SbkO3+#UlMdT{=@dKui|Iw3{(8D5 zcJq5}o(`!z9GBCkqf%AWQcC;Ti3<#GgItn5-?u6jUmA=v91F~gA*RLWuuQ*@gXMF) zTaY0kzNAGKO?71&$80GQRnK<)n3B)Nr*jTx7YMvYiwtwExJjk&0lgTbuUUo}9SE7d zqOd5*t0?K1;wSL%HP|P}v=*HYQcR)~91ATL3&(Sm9g|s+o*pH%JtPzYMd6t8Csqq* zFGKMpXD+r0%UeH2eBKQGfbnMo3C%lwyMk^Y!@>Xn&F=={=IH#$#TEv0b>aQ>_|-*9 zk{`MxsnWc-#9LaO#>3~d4s$WGcSNz7ysamy_#;tJ}71$#N$H^+^?4oLaK>WDF;I@JEQo64+EO%O|DW+c3yK6h8 zA1Yz(tsuFq9cA=mZ-Rw!pRX@<+3C!SfAg`;m)5;a{i9bK?Y-?Y$Fz){pFgyyfuRzn zoe8_od?-VTeyo%GJcz89Y7XW-8|Dj74-+z5vlZ=kTz6m@H>t1uR8AN=M?k9O9oyF*wzdI5cTG1zxu0MRkx-#uyLL+&Rj|E}a zw#d>${VvMoz}dpHsJ0sA=4$2UW>RBbe}y!shidP4p7RL`2PwFV8)*PNLdL!we?%m# z9st8k)Yz-xQvoo?N0Zs$BBDnv&p6#5#LEdPCE*S#kQ{HUrF5dc6Ta~FKd{jk={WQ< z#WyP!cyWdmJuk^&ynFWQ6twk(NoVTALZ;HITFN;n)>of(+bs9+Mt06~OOyGz3oky) zt|ZdcY+{Zl{t(SH%ek@<{&gAI1{%|TAY1W4v4R+^QY@nBkaJw$w$vrCvfuyks6vft zkKytQN^zkmLq3}O!i6on&xUKXoYRQyZh@!b@k^nJJskJ5bXZbeHZx&2#OYXx?EImZ z%5xwk904L`OFoMy=-aH{?dK7U-=FSYY1^!Ep?uV3P1cp+3{!#VR|hA?^%^`$Z(^Fb z(J6G=r~6qup29%qnw~OJF*{-Zqc^d>zonILT&rGl>Zb>m8*~YLReIyd; zIX6!}l|7vycCA&OQ`m|M=#!%QJi`*sex#DuD+QVWA9V%xAYsV~_7YGZj-#&FE~Lr* zWp9~fhVYT)v`?RvYHRuy+Y%=s$GnEjCqt=SkkQD`96F-snmNJJqw9OGfWlaS6HRQL zn$Srs-uZDn%%OO5nao>E1p_mg04T~_X7h<4kt+4AA>9z9Xn7Tg$t?uSYc^mJP;{Lr zQl%4P1TN-_b;H@$ONEGQx*c>Z9a+=!D!Gd_zYOD0yX;)ipfA&fyF(3{dSBv8NBVXCfbX>TA&QLIMK^S zhYmjzggIlgU&#ufRvW}b*pHz{3Sx?iz{){61AxNIfa6HeCtV_A>rGQU|0W9H6ZK%M z>ZG zAL|p?PE*$^Of(7>5BP2fwcTweulD>Z{E5n(_tMgP*L_V{Sb-K zR0vI-HmHg#7?-j|wl|69YZVAo$Qg)MZtTr_4!3t5vZ190nYWx|-nAo=BA#1*Z`aeJ zF#-y8#8nFfqN$gtTFNORhAM>v(TS4lI84_YU%*HLmMpPh73q?60iXn-VNCAlq4eKcgZ5=sChOiqW zvlS#06^K=Ua`FWVFQEetgbp|l(_Z2|?!!uf^udjdU1fl(< zlg2NdARf(N>rLmOEDb$dRgfl9ASG{UVF6NWbOO=2fR*7(rvkOC1I|q8k{wSSI#opG z)2C)gohmOXKbe3P8jRRO1At}XWLMIUTP zu!ucX4yPw_=Tx&d(l9+1wVPD;~H=Fm)x*Gh});HT2|FCCFxTrQ7 z$kFzoG`CJ^ACFMEsMwO>KB>yJdzaj;y#){BvKe{8pV;uVSvkQisn>@7Sf!P+=m;Ey~j|gW_tx6h>|NE1vA_U8j(?$wv;b71VdzQ(^}Uw zT2bY@xTCnD$`Wft=Z;#rc)D$l|`THL*IEAiCQnSWUlfwT03^K)xnFHv1AK#vZ%VaJUGs`-m6k5Uf9M7!vmlAgu)x2iN>EH6&AUSneMh5mJbqGc29SU$5Iqp=T$Zj(3 z4GGwi(*c}>)2Z6QZgh>F24RMe2FfsSb^MoE5?}JPik%IR_d#h8uc%h)bC8=)dO3o49ZXB9izK78i}P?JTEh|qwe>Ia2l`x*F`*$ z^ipX|w}}hLJ>|HY@A2&PjlNqWzRRB1Rc3J@6}GCmm~yGjGImF=Jm-ze0&Fd+h5b21 z=+m`WkIf7n+??1{jA7fhh)lM)7(Pg?fV^R_a9Q1wSO-Q~pSim$JVS~23lCmUS7*nY zJ2vZUPx5NqUY~wav)QyixD2$T6x4xjnGNr(s83y!asSTEj@A9Z582o$Mz;tF*1etQ5%$dDkbi za4*sGO3@u+z6O)ELPe`vs+4rgYOS`_!Y%g~d)4By3Iu3FxZ)6oOpD!2_HaF9xdyaq z)q`5(NU^kk3Ac2f0O9U>apgDzi}Z-5MK5p=>-Rj&NZ!%G73Sb-rsIKtxtRP6sgl>~ zKSK(=I+zjLZLmQ`-VJkn69k5(23^s`Ya7FnX+h|eY6cnQ3-x06xrV{C_~N-&q~&X= z-#^9BYk5&%>H?t_^vv$1RX4@%o+VW?tG;_}L~_)-@?5ip-2DybvzU!B!769RLicr# z8|pjiiOm)dB*R<#>{My9@2HYZw6|<9#(Y&MsC5}Kw_(0*pczsWV0=uE`1(vjMpa3^ zA}ZK4*D`jkL`K}O*>h^KhgN=?4oAG8aVd_@60@)Zi^*nV*p*+MJ&iAHuzvEa#Tu`x zM;(t;tODDoGnJ)#R@aI}#VW7N{buF&Z>O?QqvuUg{Y&QraFJy{3xh9%Kc00^U)~tl zC2igMAYIWg4yE(?68c%%)Ca931||j8|DF@0W9HY#7p?FApBi69Ume^2#sUDo7-j!P z|24$D3coto{0H8LcK;t^&Z`El9&Gwt|rnyc4p8K a@*ioXc@GCYMgRbC(O(F {os.path.basename(filename)}") + return filename + + +def run(page, study): + """Stáhne summary + detaily + notifikace pro studii do IWRS/Incoming/.""" + os.makedirs(INCOMING_DIR, exist_ok=True) + today = datetime.date.today().strftime("%Y-%m-%d") + download_summary(page, study, today) + # detail XLSX + notifikace přímo do Incoming/ (flat názvy se study+subject) + dsd.run(page, study, out_dir=INCOMING_DIR, subjects_source_dir=INCOMING_DIR) diff --git a/IWRS/Patients/import_patients.py b/IWRS/Patients/import_patients.py new file mode 100644 index 0000000..cd814c5 --- /dev/null +++ b/IWRS/Patients/import_patients.py @@ -0,0 +1,90 @@ +""" +import_patients.py — import pacientských reportů z IWRS/Incoming/ do MongoDB. +Verze: 1.0 | Datum: 2026-06-10 + +Pořadí zpracování per typ + studie: nejstarší soubor podle mtime první +(důležité pro chronologickou správnost snapshotů). + +Po úspěšném importu se soubor přesune do IWRS/Incoming/Processed/. +Při chybě zůstane soubor v Incoming/. + +Volá se z IWRS/run_all_v1.0.py (ensure_indexes volá orchestrátor); +lze spustit i samostatně: python import_patients.py +""" + +import os +import sys +import glob + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +IWRS_DIR = os.path.dirname(BASE_DIR) +for _p in (IWRS_DIR, BASE_DIR): + if _p not in sys.path: + sys.path.insert(0, _p) + +from common.paths import INCOMING_DIR, PROCESSED_DIR, STUDIES, move_done, sorted_by_mtime +from common.mongo_writer import ensure_indexes + +import import_to_mongo +import import_notifications_to_mongo + + +def import_summaries(study): + pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report*.xlsx") + files = sorted_by_mtime(glob.glob(pattern)) + if not files: + print(f" [{study}] summary: nic ke zpracování") + return + print(f" [{study}] summary: {len(files)} soubor(ů) (oldest first)") + for path in files: + try: + import_to_mongo.import_subject_summary(study, path) + move_done(path) + except Exception as e: + print(f" [{study}] CHYBA summary {os.path.basename(path)}: {e}") + + +def import_details(study): + pattern = os.path.join(INCOMING_DIR, f"* {study} * Subject Detail.xlsx") + files = sorted_by_mtime(glob.glob(pattern)) + if not files: + print(f" [{study}] detail: nic ke zpracování") + return + print(f" [{study}] detail: {len(files)} soubor(ů) (oldest first)") + for path in files: + parsed = import_to_mongo.parse_detail_filename(path) + if not parsed: + print(f" [{study}] PŘESKAKUJI (nelze parsovat název): {os.path.basename(path)}") + continue + _, parsed_study, subject = parsed + if parsed_study != study: + continue # patří jiné studii + try: + import_to_mongo.import_visits_single_file(study, subject, path) + move_done(path) + except Exception as e: + print(f" [{study}] CHYBA detail {os.path.basename(path)}: {e}") + + +def run(studies=None): + studies = studies or STUDIES + if not os.path.isdir(INCOMING_DIR): + print(f"Adresář neexistuje: {INCOMING_DIR}") + return + + print("=" * 60) + print("Import Subject Summary + Visits") + print("=" * 60) + for study in studies: + import_summaries(study) + import_details(study) + + print("\n" + "=" * 60) + print("Import notifikací") + print("=" * 60) + import_notifications_to_mongo.import_from_dir(INCOMING_DIR, PROCESSED_DIR, studies) + + +if __name__ == "__main__": + ensure_indexes() + run() diff --git a/IWRS/common/iwrs_portal.py b/IWRS/common/iwrs_portal.py new file mode 100644 index 0000000..fef5da4 --- /dev/null +++ b/IWRS/common/iwrs_portal.py @@ -0,0 +1,24 @@ +""" +iwrs_portal.py — přihlášení k IWRS portálu janssen.4gclinical.com (Playwright). +Verze: 1.0 | Datum: 2026-06-10 + +Studie se vybírá až po přihlášení, proto login(page, study) udělá obojí. +""" + +BASE_URL = "https://janssen.4gclinical.com" +EMAIL = "vbuzalka@its.jnj.com" +PASSWORD = "Vlado123++-+" + + +def login(page, study): + """Přihlásí se a vybere studii. Page musí být čerstvá (nepřihlášená).""" + page.goto(BASE_URL) + page.wait_for_load_state("networkidle") + page.get_by_label("Email *").fill(EMAIL) + page.get_by_label("Password *").fill(PASSWORD) + page.locator("#login__submit").click() + page.wait_for_load_state("networkidle") + page.get_by_label("Study *").click() + page.get_by_role("option", name=study).click() + page.get_by_role("button", name="SELECT").click() + page.wait_for_load_state("networkidle") diff --git a/IWRS/common/paths.py b/IWRS/common/paths.py new file mode 100644 index 0000000..dafa951 --- /dev/null +++ b/IWRS/common/paths.py @@ -0,0 +1,54 @@ +""" +paths.py — sdílené cesty a souborové utility pro IWRS pipeline. +Verze: 1.0 | Datum: 2026-06-10 + +Tok souborů: + IWRS/Incoming/ — sem padají všechny stažené reporty (pacienti i léky) + IWRS/Incoming/Processed/ — sem se přesouvají po úspěšném importu do MongoDB +Při chybě importu soubor zůstává v Incoming/ a zpracuje se při příštím běhu. +""" + +import os +import shutil +import datetime + +IWRS_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +INCOMING_DIR = os.path.join(IWRS_DIR, "Incoming") +PROCESSED_DIR = os.path.join(INCOMING_DIR, "Processed") + +STUDIES = ["77242113UCO3001", "42847922MDD3003"] + + +def ensure_dirs(): + os.makedirs(INCOMING_DIR, exist_ok=True) + os.makedirs(PROCESSED_DIR, exist_ok=True) + + +def unique_path(directory, stem, ext=".xlsx"): + """Cesta bez kolize — při kolizi přidá ' HHMM' před příponu.""" + path = os.path.join(directory, f"{stem}{ext}") + if not os.path.exists(path): + return path + time_tag = datetime.datetime.now().strftime("%H%M") + return os.path.join(directory, f"{stem} {time_tag}{ext}") + + +def move_done(path, done_dir=None): + """Přesune zpracovaný soubor do Processed/. + + Kolize → přepíše (Mongo už má aktuální data, soubor je jen archiv). + """ + dst_dir = done_dir or PROCESSED_DIR + os.makedirs(dst_dir, exist_ok=True) + dst = os.path.join(dst_dir, os.path.basename(path)) + if os.path.exists(dst): + os.remove(dst) + shutil.move(path, dst) + + +def sorted_by_mtime(paths): + """Nejstarší první, bez Excel lock souborů (~$...).""" + return sorted( + (p for p in paths if not os.path.basename(p).startswith("~$")), + key=os.path.getmtime, + ) diff --git a/IWRS/run_all_v1.0.md b/IWRS/run_all_v1.0.md new file mode 100644 index 0000000..da699b4 --- /dev/null +++ b/IWRS/run_all_v1.0.md @@ -0,0 +1,110 @@ +# run_all_v1.0.py — IWRS: kompletní pipeline Pacienti + Léky + +**Verze:** 1.0 | **Datum:** 2026-06-10 + +Jeden vstupní skript na úrovni `IWRS/`, který stáhne z janssen.4gclinical.com +a naimportuje do MongoDB (db `studie`) data pacientů i léků pro obě studie +(77242113UCO3001, 42847922MDD3003). Nahrazuje dřívější `Drugs/run_all.py` +a `Patients/download_all.py` + `Patients/import_all.py` (přesunuty do `Trash/`). + +## Tok souborů + +``` +IWRS/Incoming/ ← sem padá vše stažené (pacienti i léky, datumované názvy) +IWRS/Incoming/Processed/ ← sem se přesouvá po úspěšném importu +``` + +- Při chybě importu soubor **zůstává v Incoming/** a zpracuje se při příštím běhu. +- Import jde vždy **nejstarší soubor napřed** (mtime) — chronologická správnost snapshotů. +- Kolize jména v Processed/ → přepíše se (Mongo už data má, soubor je jen archiv). +- Adresář `IWRS/Incoming/` je v `.gitignore` (stejně jako dříve `Patients/Incoming/`). +- Původní adresáře `Drugs/xls_*` zůstávají zmrazené na místě jako archiv — nový kód je nepoužívá. + +## Názvy souborů v Incoming/ + +| Typ | Vzor | +|---|---| +| Subject Summary | `YYYY-MM-DD {study} Subject Summary Report.xlsx` | +| Subject Detail | `YYYY-MM-DD {study} {subject} Subject Detail.xlsx` | +| Notifikace | `{datum}_{study}_{subject}_{label}.pdf` + `.json` | +| Onsite Inventory | `YYYY-MM-DD {study} Onsite Inventory {site}.xlsx` | +| IP Destruction | `YYYY-MM-DD {study} IP Destruction {basket}.xlsx` | +| Shipments Report | `YYYY-MM-DD {study} Shipments Report.xlsx` | +| Shipment Details | `YYYY-MM-DD {study} Shipment Details {shipment_id}.xlsx` | + +Při kolizi (druhý běh ve stejný den) se před příponu přidá ` HHMM`. +Metadata (site, basket, study) se při importu čtou primárně z **obsahu** souboru; +z názvu se bere jen `shipment_id` u Shipment Details. + +## Průběh + +### Fáze 1 — stahování (2 přihlášení, per studie jedna browser session) + +1. Login + výběr studie (`common/iwrs_portal.py`) +2. **Pacienti** (`Patients/download_patients.py`): + - Subject Summary Report + - per subjekt: Subject Detail XLSX + notifikace PDF+JSON (stahují se jen + notifikace, jejichž `pk` ještě není v Mongo `iwrs_notifications`) +3. **Léky** (`Drugs/download_drugs.py`): + - Onsite Inventory — všechna centra, vždy znovu + - IP Destruction — přeskočí košíky už importované v `iwrs_destruction` + (destrukce je immutable); dříve se přeskakovalo podle existence souboru + - Shipments Report — vždy znovu + - Shipment Details — jen CZ zásilky; přeskočí zásilky, jejichž položky + jsou v `iwrs_shipment_items` se statusem RECEIVED (finální stav); + dříve „soubor existuje a status RECEIVED“. CANCELLED zásilky se stahují + při každém běhu (záměrně zachováno z původní verze). + +### Fáze 2 — import (po stažení obou studií) + +1. `ensure_indexes()` (jednou) +2. **Pacienti** (`Patients/import_patients.py`): summary → detaily → notifikace; + per soubor, po úspěchu přesun do Processed/ +3. **Léky** (`Drugs/import_drugs.py`): jeden `import_id` per studie a běh; + parsuje všechny čekající soubory (nejstarší napřed, poslední vyhrává per `_id`), + pak hromadný zápis: + - `iwrs_shipments`, `iwrs_shipment_items`, `iwrs_inventory` — upsert + snapshot + - `iwrs_destruction` — upsert bez snapshotu + Po úspěšném zápisu se zparsované soubory přesunou do Processed/; + soubor s chybou parsování zůstává v Incoming/. + +## Použití + +``` +python run_all_v1.0.py # vše (download + import, obě studie) +python run_all_v1.0.py --download-only # jen stažení do Incoming/ +python run_all_v1.0.py --import-only # jen import čekajících souborů +python run_all_v1.0.py --only-patients # jen pacientská část +python run_all_v1.0.py --only-drugs # jen léková část +python run_all_v1.0.py --study 42847922MDD3003 # jen jedna studie +``` + +Prohlížeč běží s `headless=False` (viditelné okno) jako dosud. +Moduly `import_patients.py` a `import_drugs.py` lze spustit i samostatně. + +## Mapa modulů + +``` +IWRS/ + run_all_v1.0.py ← vstupní skript (CLI, orchestrace) + common/ + iwrs_portal.py ← BASE_URL, credentials, login(page, study) + paths.py ← INCOMING/PROCESSED, unique_path, move_done, sorted_by_mtime + mongo_writer.py ← beze změny (konvertory, upserty, snapshoty, import log) + Patients/ + download_patients.py ← summary + delegace na download_subject_details.run() + import_patients.py ← logika z bývalého import_all.py, nové cesty + download_subject_details.py, import_to_mongo.py, + import_notifications_to_mongo.py, parse_notifications_to_mongo.py ← beze změny + Trash/download_all.py, Trash/import_all.py ← nahrazeno + Drugs/ + download_drugs.py ← 4 typy reportů → Incoming/, skip-logika přes Mongo + import_drugs.py ← parsery z bývalého import_to_mongo.py, čte Incoming/ + Trash/run_all.py, Trash/import_to_mongo.py ← nahrazeno +``` + +## Jednorázová migrace (provedeno 2026-06-10) + +- `Patients/Incoming/Zpracováno/` (1343 souborů) → `IWRS/Incoming/Processed/` +- `.gitignore`: `IWRS/Patients/Incoming/` → `IWRS/Incoming/` +- Staré vstupní skripty → `Trash/` (viz mapa výše) diff --git a/IWRS/run_all_v1.0.py b/IWRS/run_all_v1.0.py new file mode 100644 index 0000000..c17a3ce --- /dev/null +++ b/IWRS/run_all_v1.0.py @@ -0,0 +1,147 @@ +""" +================================================================================ + run_all_v1.0.py — IWRS: kompletní pipeline Pacienti + Léky (obě studie) + Verze: 1.0 + Datum: 2026-06-10 +================================================================================ + +Stáhne z janssen.4gclinical.com a naimportuje do MongoDB (db `studie`): + + Pacienti: Subject Summary, Subject Details, notifikace (PDF+JSON) + Léky: Onsite Inventory, IP Destruction, Shipments Report, Shipment Details + +Tok souborů: vše se stahuje do IWRS/Incoming/, po úspěšném importu se přesouvá +do IWRS/Incoming/Processed/. Při chybě soubor zůstává v Incoming/ a zpracuje +se při příštím běhu. + +Přihlášení: 2× (jednou per studie) — studie se vybírá až po přihlášení, takže +jedna browser session stáhne pacienty i léky pro jednu studii. + +Použití: + python run_all_v1.0.py # vše (download + import, obě studie) + python run_all_v1.0.py --download-only # jen stažení do Incoming/ + python run_all_v1.0.py --import-only # jen import čekajících souborů + python run_all_v1.0.py --only-patients # jen pacientská část + python run_all_v1.0.py --only-drugs # jen léková část + python run_all_v1.0.py --study 42847922MDD3003 # jen jedna studie + +Detaily v run_all_v1.0.md. +""" + +import os +import sys +import argparse +import traceback + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +for _p in (os.path.join(BASE_DIR, "Drugs"), os.path.join(BASE_DIR, "Patients"), BASE_DIR): + if _p not in sys.path: + sys.path.insert(0, _p) + +from playwright.sync_api import sync_playwright + +from common.iwrs_portal import login +from common.paths import STUDIES, INCOMING_DIR, PROCESSED_DIR, ensure_dirs +from common.mongo_writer import ensure_indexes + +import download_patients +import import_patients +import download_drugs +import import_drugs + + +def download_phase(studies, do_patients, do_drugs): + with sync_playwright() as p: + for study in studies: + print(f"\n{'='*60}") + print(f"[{study}] STAHOVÁNÍ") + print(f"{'='*60}") + + browser = p.chromium.launch(headless=False) + context = browser.new_context(accept_downloads=True) + page = context.new_page() + + try: + print(" Přihlášení...") + login(page, study) + + if do_patients: + print(f"\n ── PACIENTI [{study}] ──") + try: + download_patients.run(page, study) + except Exception as e: + print(f" CHYBA při stahování pacientů: {e}") + traceback.print_exc() + + if do_drugs: + print(f"\n ── LÉKY [{study}] ──") + try: + download_drugs.run(page, study) + except Exception as e: + print(f" CHYBA při stahování léků: {e}") + traceback.print_exc() + + except Exception as e: + print(f" CHYBA (login/session): {e}") + traceback.print_exc() + finally: + browser.close() + + +def import_phase(studies, do_patients, do_drugs): + print(f"\n{'='*60}") + print("IMPORT DO MongoDB") + print(f"{'='*60}") + ensure_indexes() + + if do_patients: + try: + import_patients.run(studies) + except Exception as e: + print(f" CHYBA při importu pacientů: {e}") + traceback.print_exc() + + if do_drugs: + try: + import_drugs.run(studies) + except Exception as e: + print(f" CHYBA při importu léků: {e}") + traceback.print_exc() + + +def main(): + ap = argparse.ArgumentParser( + description="IWRS pipeline: stažení + import pacientů a léků (obě studie)") + ap.add_argument("--download-only", action="store_true", help="jen stažení do Incoming/") + ap.add_argument("--import-only", action="store_true", help="jen import čekajících souborů") + ap.add_argument("--only-patients", action="store_true", help="jen pacientská část") + ap.add_argument("--only-drugs", action="store_true", help="jen léková část") + ap.add_argument("--study", choices=STUDIES, help="jen jedna studie") + args = ap.parse_args() + + if args.download_only and args.import_only: + ap.error("--download-only a --import-only nelze kombinovat") + if args.only_patients and args.only_drugs: + ap.error("--only-patients a --only-drugs nelze kombinovat") + + studies = [args.study] if args.study else STUDIES + do_patients = not args.only_drugs + do_drugs = not args.only_patients + + ensure_dirs() + + if not args.import_only: + download_phase(studies, do_patients, do_drugs) + + if not args.download_only: + import_phase(studies, do_patients, do_drugs) + + print(f"\n{'='*60}") + print("Vše hotovo.") + print(f" Incoming: {INCOMING_DIR}") + print(f" Processed: {PROCESSED_DIR}") + print(f"{'='*60}") + + +if __name__ == "__main__": + main()