Migrate IWRS from MySQL to MongoDB

- Add IWRS/common/mongo_writer.py with shared connection, indexes,
  upsert+snapshot helpers
- Add IWRS/Patients/import_to_mongo.py (subject_summary + visits)
- Add IWRS/Patients/import_notifications_to_mongo.py: parse PDF/JSON
  directly to Mongo (incl. PDF as BinData), replaces 2-step MySQL flow
- Add IWRS/Drugs/import_to_mongo.py (shipments, items, inventory,
  destruction)
- Add IWRS/backfill_mysql_to_mongo.py: one-shot history backfill
- Switch IWRS/Patients/run_all.py and IWRS/Drugs/run_all.py to Mongo
- Rewrite IWRS/Drugs/create_report.py data loaders to read from Mongo
- 8 main collections (upsert = latest state) + 5 snapshot collections
  (append-only with import_id) under studie database; notifications and
  destruction are immutable and need no snapshots

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-03 07:24:36 +02:00
parent 681095d557
commit ea9d611719
2080 changed files with 9465 additions and 172 deletions
+169 -172
View File
@@ -1,5 +1,5 @@
import os
import mysql.connector
import sys
import pandas as pd
from datetime import date
from pathlib import Path
@@ -7,7 +7,8 @@ from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
import db_config
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import get_db
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
@@ -23,70 +24,56 @@ DATE_COLUMNS = {
N_SHIP_COLS = 9 # počet shipment sloupců před detail sloupci
# ── DB ────────────────────────────────────────────────────────────────────────
# ── Načítání dat z MongoDB ────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
INVENTORY_COLS = [
("site", "Site"),
("medication_id", "Med ID"),
("packaged_lot_no", "Lot No."),
("original_expiration_date", "Orig Exp Date"),
("expiration_date", "Exp Date"),
("received_date", "Rcv Date"),
("receipt_user", "Rcpt User"),
("subject_identifier", "Subject ID"),
("quantity_assigned", "Qty Asgn"),
("irt_transaction", "IRT Tx"),
("date_assigned", "Date Asgn"),
("assignment_user", "Asgn User"),
("dispensation_status", "Disp Status"),
("dispensing_date", "Disp Date"),
("quantity_dispensed", "Qty Disp"),
("dispensing_user", "Disp User"),
("quantity_returned", "Qty Ret"),
("date_returned", "Date Ret"),
("return_user", "Ret User"),
]
def get_latest_import_id(cursor, study):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
return mid
def load_inventory(study):
db = get_db()
inv = list(db.iwrs_inventory.find({"study": study}))
destr = list(db.iwrs_destruction.find({"study": study}))
# map medication_id -> first basket+date
destr_map = {}
for d in destr:
mid = d.get("medication_id")
if mid and mid not in destr_map:
destr_map[mid] = (d.get("basket_id"), d.get("destruction_date"))
records = []
for doc in inv:
row = {label: doc.get(key) for key, label in INVENTORY_COLS}
b, dt = destr_map.get(doc.get("medication_id"), (None, None))
row["Destroyed"] = dt
row["Basket No."] = b
records.append(row)
# ── Načítání dat ──────────────────────────────────────────────────────────────
df = pd.DataFrame(records)
if df.empty:
print(" Inventory: 0 kitu")
return df
def load_inventory(cursor, study, import_id):
sql = """
SELECT
i.site AS Site,
i.medication_id AS `Med ID`,
i.packaged_lot_no AS `Lot No.`,
i.original_expiration_date AS `Orig Exp Date`,
i.expiration_date AS `Exp Date`,
i.received_date AS `Rcv Date`,
i.receipt_user AS `Rcpt User`,
i.subject_identifier AS `Subject ID`,
i.quantity_assigned AS `Qty Asgn`,
i.irt_transaction AS `IRT Tx`,
i.date_assigned AS `Date Asgn`,
i.assignment_user AS `Asgn User`,
i.dispensation_status AS `Disp Status`,
i.dispensing_date AS `Disp Date`,
i.quantity_dispensed AS `Qty Disp`,
i.dispensing_user AS `Disp User`,
i.quantity_returned AS `Qty Ret`,
i.date_returned AS `Date Ret`,
i.return_user AS `Ret User`,
d.destruction_date AS Destroyed,
d.basket_id AS `Basket No.`
FROM iwrs_inventory i
LEFT JOIN (
SELECT medication_id,
ANY_VALUE(basket_id) AS basket_id,
ANY_VALUE(destruction_date) AS destruction_date
FROM iwrs_destruction
WHERE study = %s
GROUP BY medication_id
) d ON d.medication_id = i.medication_id
WHERE i.import_id = %s
AND i.study = %s
ORDER BY i.site, i.received_date, i.medication_id
"""
cursor.execute(sql, (study, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
df = df.sort_values(["Site", "Rcv Date", "Med ID"], na_position="last").reset_index(drop=True)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
@@ -94,78 +81,102 @@ def load_inventory(cursor, study, import_id):
return df
def load_shipments(cursor, study, import_id):
sql = """
SELECT
s.shipment_id AS `Shipment ID`,
s.status AS `IRT Shipment Status`,
s.type AS Type,
s.ship_from AS `Shipment From`,
s.ship_to_site AS `Ship To:`,
s.request_date AS `Request Date`,
s.received_date AS `Received Date`,
s.received_by AS `Received by`,
s.expected_arrival AS `Expected Arrival`,
i.investigator AS Investigator,
i.medication_description AS `Medication Description`,
i.medication_id AS `Medication ID`,
i.packaged_lot_no AS `Packaged Lot number`,
i.expiration_date AS `Expiration Date`,
i.item_status AS Status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
SHIP_COLS = [
("shipment_id", "Shipment ID"),
("status", "IRT Shipment Status"),
("type", "Type"),
("ship_from", "Shipment From"),
("ship_to_site", "Ship To:"),
("request_date", "Request Date"),
("received_date", "Received Date"),
("received_by", "Received by"),
("expected_arrival", "Expected Arrival"),
]
ITEM_COLS = [
("investigator", "Investigator"),
("medication_description", "Medication Description"),
("medication_id", "Medication ID"),
("packaged_lot_no", "Packaged Lot number"),
("expiration_date", "Expiration Date"),
("item_status", "Status"),
]
def load_shipments(study):
db = get_db()
ships = list(db.iwrs_shipments.find({"study": study}))
items = list(db.iwrs_shipment_items.find({"study": study}))
# index items by shipment_id
items_by_ship = {}
for it in items:
items_by_ship.setdefault(it.get("shipment_id"), []).append(it)
records = []
for s in ships:
base = {label: s.get(key) for key, label in SHIP_COLS}
for it in items_by_ship.get(s.get("shipment_id"), []):
row = dict(base)
for key, label in ITEM_COLS:
row[label] = it.get(key)
records.append(row)
df = pd.DataFrame(records)
if df.empty:
print(" Shipments: 0 zásilek, 0 kitu")
return df
df = df.sort_values(["Ship To:", "Shipment ID", "Medication ID"], na_position="last").reset_index(drop=True)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
n_ship = df["Shipment ID"].nunique() if len(df) else 0
n_ship = df["Shipment ID"].nunique()
print(f" Shipments: {n_ship} zásilek, {len(df)} kitu")
return df
def load_visits(cursor, study, import_id):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='patients'",
(study,),
)
patients_import_id = cursor.fetchone()["mid"] or import_id
import_id = patients_import_id
sql = """
SELECT
v.subject AS Subject,
COALESCE(v.actual_date, v.scheduled_date) AS `Visit Date`,
v.scheduled_date AS `Scheduled Date`,
v.irt_transaction_no AS `IRT Tx No`,
v.irt_transaction_description AS `Visit`,
v.medication_assignment AS `Medication`,
GROUP_CONCAT(v.medication_id ORDER BY v.medication_id SEPARATOR ', ') AS `Med IDs`,
SUM(v.quantity_assigned) AS `Qty`
FROM iwrs_subject_visits v
WHERE v.import_id = %s AND v.study = %s AND v.visit_type = 'Past'
AND v.irt_transaction_no IS NOT NULL
GROUP BY v.subject, v.actual_date, v.scheduled_date,
v.irt_transaction_no, v.irt_transaction_description, v.medication_assignment
ORDER BY v.subject, COALESCE(v.actual_date, v.scheduled_date)
"""
cursor.execute(sql, (import_id, study))
rows = cursor.fetchall()
def load_visits(study):
db = get_db()
cur = db.iwrs_visits.find({
"study": study,
"visit_type": "Past",
"irt_transaction_no": {"$ne": None},
})
rows = []
for v in cur:
rows.append({
"Subject": v.get("subject"),
"Visit Date": v.get("actual_date") or v.get("scheduled_date"),
"Scheduled Date": v.get("scheduled_date"),
"IRT Tx No": v.get("irt_transaction_no"),
"Visit": v.get("irt_transaction_description"),
"Medication": v.get("medication_assignment"),
"medication_id": v.get("medication_id"),
"quantity_assigned": v.get("quantity_assigned"),
})
df = pd.DataFrame(rows)
if df.empty:
print(" Visits: 0 radku")
return df
# GROUP BY subject/actual/scheduled/irt_no/desc/medication
grouped = (
df.groupby(["Subject", "Visit Date", "Scheduled Date", "IRT Tx No", "Visit", "Medication"],
dropna=False, as_index=False)
.agg(**{
"Med IDs": ("medication_id", lambda s: ", ".join(sorted([str(x) for x in s if pd.notna(x)]))),
"Qty": ("quantity_assigned", "sum"),
})
)
grouped = grouped.sort_values(["Subject", "Visit Date"]).reset_index(drop=True)
for col in ("Visit Date", "Scheduled Date"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
if study == "77242113UCO3001" and "Visit" in df.columns:
df["Visit"] = df["Visit"].replace("Subject Number Creation", "Screening")
print(f" Visits: {len(df)} řádků")
return df
if col in grouped.columns:
grouped[col] = pd.to_datetime(grouped[col], errors="coerce")
if study == "77242113UCO3001":
grouped["Visit"] = grouped["Visit"].replace("Subject Number Creation", "Screening")
print(f" Visits: {len(grouped)} řádků")
return grouped
# ── Odvozené sheety ───────────────────────────────────────────────────────────
@@ -343,49 +354,42 @@ def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_col
# ── Pacienti ─────────────────────────────────────────────────────────────────
PATIENT_TABLE = {
"77242113UCO3001": "iwrs_uco3001_subject_summary",
"42847922MDD3003": "iwrs_mdd3003_subject_summary",
}
def load_patients(study):
db = get_db()
docs = list(db.iwrs_subject_summary.find({"study": study}))
if not docs:
raise RuntimeError(f"Žádná data v Mongo pro pacienty {study}")
def load_patients(cursor, study):
table = PATIENT_TABLE[study]
cursor.execute(f"SELECT MAX(import_id) AS mid FROM {table}")
mid = cursor.fetchone()["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro pacienty {study}")
extra_cols = ""
base_cols = [
("subject", "Subject"),
("investigator", "Investigator"),
("age", "Subject's age collection"),
("cohort_per_irt", "Cohort per IRT"),
("irt_subject_status", "IRT Subject Status"),
("last_irt_transaction", "Last Recorded IRT Transaction"),
("next_irt_transaction", "Next Expected IRT Transaction"),
("next_irt_transaction_date_local", "Next Expected IRT Transaction Date [Local]"),
]
uco_extra = [
("rescreened_subject", "Rescreened Subject"),
("adt_ir", "ADT-IR"),
("three_or_more_advanced_therapies", "3+ Adv. Therapies"),
("only_oral_5asa_compounds", "Only 5-ASA"),
("ustekinumab", "Ustekinumab"),
("isolated_proctitis", "Isolated Proctitis"),
]
cols = list(base_cols)
if study == "77242113UCO3001":
extra_cols = """
rescreened_subject AS `Rescreened Subject`,
adt_ir AS `ADT-IR`,
three_or_more_advanced_therapies AS `3+ Adv. Therapies`,
only_oral_5asa_compounds AS `Only 5-ASA`,
ustekinumab AS `Ustekinumab`,
isolated_proctitis AS `Isolated Proctitis`,"""
sql = f"""
SELECT
subject AS `Subject`,
investigator AS `Investigator`,
age AS `Subject's age collection`,
cohort_per_irt AS `Cohort per IRT`,{extra_cols}
irt_subject_status AS `IRT Subject Status`,
last_irt_transaction AS `Last Recorded IRT Transaction`,
next_irt_transaction AS `Next Expected IRT Transaction`,
next_irt_transaction_date_local AS `Next Expected IRT Transaction Date [Local]`
FROM {table}
WHERE import_id = %s
ORDER BY subject
"""
cursor.execute(sql, (mid,))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
cols += uco_extra
rows = [{label: d.get(key) for key, label in cols} for d in docs]
df = pd.DataFrame(rows).sort_values("Subject").reset_index(drop=True)
if "Next Expected IRT Transaction Date [Local]" in df.columns:
df["Next Expected IRT Transaction Date [Local]"] = pd.to_datetime(
df["Next Expected IRT Transaction Date [Local]"], errors="coerce"
)
print(f" Pacienti: {len(df)} subjektů (import_id={mid})")
print(f" Pacienti: {len(df)} subjektů")
return df
@@ -574,18 +578,11 @@ def create_study_report(study):
output_file = OUTPUT_DIR / f"{today} {study} CZ IWRS overview v{version}.xlsx"
print(f"\n[{study}] Načítám z MySQL...")
conn = get_conn()
cursor = conn.cursor(dictionary=True)
import_id = get_latest_import_id(cursor, study)
print(f" import_id = {import_id}")
df = load_inventory(cursor, study, import_id)
shipments_df = load_shipments(cursor, study, import_id)
df_patients = load_patients(cursor, study)
visits_df = load_visits(cursor, study, import_id)
cursor.close()
conn.close()
print(f"\n[{study}] Nacitam z MongoDB...")
df = load_inventory(study)
shipments_df = load_shipments(study)
df_patients = load_patients(study)
visits_df = load_visits(study)
expired_df, expired_sheet = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
+253
View File
@@ -0,0 +1,253 @@
"""
Import Drugs dat (shipments, shipment_items, inventory, destruction) z XLSX do MongoDB.
Volá se z IWRS/Drugs/run_all.py po stažení reportů.
"""
import os
import sys
import re
import glob
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import (
to_str, to_int, to_date,
ensure_indexes, log_import,
bulk_upsert_with_snapshot, bulk_upsert_only,
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ── XLSX parsery (převzaté z run_all.py + úprava na Mongo dokumenty) ─────────
def parse_shipments_report(study):
path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
if not os.path.exists(path):
print(f" CHYBI: {path}")
return []
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
return []
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
col = df.columns.tolist()
rows = []
for _, r in df.iterrows():
sid = to_str(r["Shipment ID"])
if not sid:
continue
rows.append({
"_id": sid,
"shipment_id": sid,
"study": study,
"status": to_str(r["IRT Shipment Status"]),
"type": to_str(r["Type"]),
"ship_from": to_str(r["Shipment From"]),
"ship_to_site": to_str(r["Ship To:"]),
"location": to_str(r["Location"]),
"request_date": to_date(r["Request Date"]),
"shipped_date": to_date(r["Shipped Date"]),
"received_date": to_date(r["Received Date"]) if "Received Date" in col else None,
"received_by": to_str(r["Received by"]) if "Received by" in col else None,
"delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None,
"delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None,
"delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None,
"cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None,
"total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None,
"tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None,
"shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None,
"expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None,
})
return rows
def parse_shipment_details(study):
detail_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
files = sorted(glob.glob(os.path.join(detail_dir, "shipment_details_*.xlsx")))
rows = []
for path in files:
m = re.search(r"shipment_details_(.+)\.xlsx", os.path.basename(path))
shipment_id = m.group(1) if m else "UNKNOWN"
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Medication ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
for _, r in df.iterrows():
med_desc = (to_str(r.get("Medication Description"))
or to_str(r.get("Medication ID Description")))
med_type = (to_str(r.get("Medication type"))
or to_str(r.get("Medication ID type")))
med_id = to_str(r.get("Medication ID"))
if not med_id:
continue
rows.append({
"_id": f"{shipment_id}:{med_id}",
"study": study,
"shipment_id": shipment_id,
"destination_location": to_str(r.get("Destination Location")),
"shipment_status": to_str(r.get("IRT Shipment Status")),
"shipment_type": to_str(r.get("Type")),
"destination_site": to_str(r.get("Destination Site")),
"investigator": to_str(r.get("Investigator")),
"medication_description": med_desc,
"medication_type": med_type,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"container_id": to_str(r.get("Container ID")),
"quantity": to_int(r.get("Quantity of Medication IDs")),
"expiration_date": to_date(r.get("Expiration Date")),
"item_status": to_str(r.get("Status")),
})
# dedupe (poslední vyhrává)
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
def parse_inventory(study):
inv_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
files = sorted(glob.glob(os.path.join(inv_dir, "onsite_inventory_detail_*.xlsx")))
rows = []
for path in files:
raw = pd.read_excel(path, header=None)
site = investigator = location = None
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
if first.startswith("Site:"):
site = first.replace("Site:", "").strip()
elif first.startswith("Investigator:"):
investigator = first.replace("Investigator:", "").strip()
elif first.startswith("Location:"):
location = first.replace("Location:", "").strip()
if first in ("Medication", "Medication ID") and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df.rename(columns={df.columns[0]: "medication_id"})
for _, r in df.iterrows():
med_id = to_str(r["medication_id"])
if not med_id or not site:
continue
rows.append({
"_id": f"{site}:{med_id}",
"study": study,
"site": site,
"investigator": investigator,
"location": location,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")),
"expiration_date": to_date(r.get("Expiration date")),
"received_date": to_date(r.get("Received Date")),
"receipt_user": to_str(r.get("Shipment Receipt User")),
"subject_identifier": to_str(r.get("Subject Identifier")),
"quantity_assigned": to_int(r.get("Quantity Assigned")),
"irt_transaction": to_str(r.get("IRT Transaction")),
"date_assigned": to_date(r.get("Date Assigned")),
"assignment_user": to_str(r.get("Assignment User")),
"dispensation_status": to_str(r.get("Dispensation Status")),
"dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")),
"quantity_dispensed": to_int(r.get("Quantity Dispensed")),
"dispensing_user": to_str(r.get("Dispensing User")),
"quantity_returned": to_int(r.get("Quantity Returned")),
"date_returned": to_date(r.get("Date Returned")),
"return_user": to_str(r.get("Return User")),
})
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
def parse_destruction_files(study):
dest_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
files = sorted(glob.glob(os.path.join(dest_dir, "ip_destruction_basket_*.xlsx")))
rows = []
for path in files:
raw = pd.read_excel(path, header=None)
meta = {}
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
for key, attr in [
("Investigator Name:", "investigator"),
("Site ID:", "site_id"),
("Location:", "location"),
("Basket ID:", "basket_id"),
("Drug Destruction Created Date:", "destruction_date"),
]:
if first.startswith(key):
meta[attr] = first.replace(key, "").strip()
if first == "Medication ID Description" and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
basket_id = meta.get("basket_id")
for _, r in df.iterrows():
med_id = to_str(r.get("Medication ID"))
if not med_id or not basket_id:
continue
rows.append({
"_id": f"{basket_id}:{med_id}",
"study": study,
"site_id": meta.get("site_id"),
"investigator": meta.get("investigator"),
"location": meta.get("location"),
"basket_id": basket_id,
"destruction_date": to_date(meta.get("destruction_date")),
"medication_description": to_str(r.get("Medication ID Description")),
"medication_id": med_id,
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"comments": to_str(r.get("Comments")),
})
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
# ── hlavní import ────────────────────────────────────────────────────────────
def import_study(study):
print(f"\n [{study}] parsovani XLSX...")
shipments = parse_shipments_report(study)
items = parse_shipment_details(study)
inventory = parse_inventory(study)
destruct = parse_destruction_files(study)
print(f" Zasilky: {len(shipments)} | Polozky: {len(items)} | Sklad: {len(inventory)} | Destrukce: {len(destruct)}")
import_id = log_import(study, f"drugs_{study}", "drugs", {
"shipments": len(shipments),
"shipment_items": len(items),
"inventory": len(inventory),
"destruction": len(destruct),
})
print(f" import_id = {import_id}")
bulk_upsert_with_snapshot("iwrs_shipments", "iwrs_shipments_snapshots", shipments, import_id)
bulk_upsert_with_snapshot("iwrs_shipment_items", "iwrs_shipment_items_snapshots", items, import_id)
bulk_upsert_with_snapshot("iwrs_inventory", "iwrs_inventory_snapshots", inventory, import_id)
bulk_upsert_only("iwrs_destruction", destruct, import_id)
def run(studies):
ensure_indexes()
for s in studies:
import_study(s)
if __name__ == "__main__":
studies = sys.argv[1:] if len(sys.argv) > 1 else ["77242113UCO3001", "42847922MDD3003"]
run(studies)
@@ -0,0 +1,10 @@
{
"pk": 6545,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100080002 has been screened at site S10-CZ10008",
"event": "Screen",
"actual_date": "2025-08-05",
"subject": "CZ100080002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080002 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080002 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 04-Aug-2025 \n\nDate of Screening in IRT: 05-Aug-2025 \nTransaction Date/Time (site local): 05-Aug-2025 09:24:09\nTransaction Date/Time (system local): 05-Aug-2025 07:24:09\n\nTransaction performed by: v.smidkova@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 6760,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100040001 has been screened at site S10-CZ10004",
"event": "Screen",
"actual_date": "2025-08-08",
"subject": "CZ100040001",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100040001 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10004 \nInvestigator: Herman, Erik \n\n \nSubject Details \nSubject: CZ100040001 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 08-Aug-2025 \n\nDate of Screening in IRT: 08-Aug-2025 \nTransaction Date/Time (site local): 08-Aug-2025 09:39:48\nTransaction Date/Time (system local): 08-Aug-2025 07:39:48\n\nTransaction performed by: gnovotna@email.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 7427,
"title": "Dispensation",
"label": "Janssen 42847922MDD3003 Subject CZ100080002 dispensing confirmation has occurred at site S10-CZ10008",
"event": "uv_disp_conf",
"actual_date": "2025-08-26",
"subject": "CZ100080002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticals\nhttps://janssen.4gclinical.com\n\n Confirmation of dispensing medication(s) for Subject CZ100080002 has\nbeen performed for the following medication IDs:\n\nMedication No: 1019782\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 26-Aug-2025\nNote (only collected if Not Dispensed):\n \n\nMedication No: 1020699\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 26-Aug-2025\nNote (only collected if Not Dispensed):\n \n\nSite Details \nLocation: CZE \nSite:\nS10-CZ10008 \nInvestigator: Solle, Zdenek \n\nSubject Details \nSubject:\nCZ100080002 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 26-Aug-2025 13:17:53\n\nTransaction Date/Time (system local): 26-Aug-2025 11:17:53 \nTransaction performed by: v.smidkova@clintrial.cz\n\nIf you have\nquestions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 7426,
"title": "Randomized_Part_1",
"label": "Janssen 42847922MDD3003 Subject CZ100080002 randomized into Part 1 at site S10-CZ10008",
"event": "Rand",
"actual_date": "2025-08-26",
"subject": "CZ100080002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080002 has been randomized into Part 1.\n \nThe following medication(s) has been assigned to the subject:\n\nMedication NoMedication TypePackaged Lot NoExpiration Date1019782Seltorexant 20mg or placeboT38028505-Apr-20261020699Seltorexant 20mg or placeboT38028505-Apr-2026\n\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080002 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 26-Aug-2025 13:03:37\nTransaction Date/Time (system local): 26-Aug-2025 11:03:37\nTransaction performed by: v.smidkova@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 7414,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100080004 has been screened at site S10-CZ10008",
"event": "Screen",
"actual_date": "2025-08-26",
"subject": "CZ100080004",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080004 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080004 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 25-Aug-2025 \n\nDate of Screening in IRT: 26-Aug-2025 \nTransaction Date/Time (site local): 26-Aug-2025 09:33:38\nTransaction Date/Time (system local): 26-Aug-2025 07:33:38\n\nTransaction performed by: m.deif@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 7772,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100120002 has been screened at site S10-CZ10012",
"event": "Screen",
"actual_date": "2025-09-03",
"subject": "CZ100120002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100120002 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10012 \nInvestigator: Urban, Ales \n\n \nSubject Details \nSubject: CZ100120002 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 03-Sep-2025 \n\nDate of Screening in IRT: 03-Sep-2025 \nTransaction Date/Time (site local): 03-Sep-2025 13:44:09\nTransaction Date/Time (system local): 03-Sep-2025 11:44:09\n\nTransaction performed by: marcelasedlackova@seznam.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 7946,
"title": "Randomized_Part_1",
"label": "Janssen 42847922MDD3003 Subject CZ100040001 randomized into Part 1 at site S10-CZ10004",
"event": "Rand",
"actual_date": "2025-09-05",
"subject": "CZ100040001",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100040001 has been randomized into Part 1.\n \nThe following medication(s) has been assigned to the subject:\n\nMedication NoMedication TypePackaged Lot NoExpiration Date1109895Seltorexant 20mg or placeboT38028505-Apr-20261125335Seltorexant 20mg or placeboT38028505-Apr-2026\n\n \nSite Details \nLocation: CZE \nSite: S10-CZ10004 \nInvestigator: Herman, Erik \n\n \nSubject Details \nSubject: CZ100040001 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 05-Sep-2025 10:23:26\nTransaction Date/Time (system local): 05-Sep-2025 08:23:26\nTransaction performed by: gnovotna@email.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8056,
"title": "Dispensation",
"label": "Janssen 42847922MDD3003 Subject CZ100080002 dispensing confirmation has occurred at site S10-CZ10008",
"event": "uv_disp_conf",
"actual_date": "2025-09-09",
"subject": "CZ100080002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticals\nhttps://janssen.4gclinical.com\n\n Confirmation of dispensing medication(s) for Subject CZ100080002 has\nbeen performed for the following medication IDs:\n\nMedication No: 1043808\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 09-Sep-2025\nNote (only collected if Not Dispensed):\n \n\nMedication No: 1056387\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 09-Sep-2025\nNote (only collected if Not Dispensed):\n \n\nSite Details \nLocation: CZE \nSite:\nS10-CZ10008 \nInvestigator: Solle, Zdenek \n\nSubject Details \nSubject:\nCZ100080002 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 09-Sep-2025 12:10:27\n\nTransaction Date/Time (system local): 09-Sep-2025 10:10:27 \nTransaction performed by: m.deif@clintrial.cz\n\nIf you have\nquestions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8055,
"title": "Assignment",
"label": "Janssen 42847922MDD3003 Subject CZ100080002 has been assigned medication for visit Double Blind Part 1 Visit 6 at site S10-CZ10008",
"event": "DB_P1_V6",
"actual_date": "2025-09-09",
"subject": "CZ100080002",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080002 has been assigned the following medication(s) for visit Double Blind Part 1 Visit 6:\n\nMedication NoMedication TypePackaged Lot NoExpiration Date1043808Seltorexant 20mg or placeboT38028505-Apr-20261056387Seltorexant 20mg or placeboT38028505-Apr-2026\n\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080002 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 09-Sep-2025 12:09:34 \nTransaction Date/Time (system local): 09-Sep-2025 10:09:34 \nTransaction performed by: m.deif@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8124,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100080005 has been screened at site S10-CZ10008",
"event": "Screen",
"actual_date": "2025-09-10",
"subject": "CZ100080005",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080005 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080005 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 09-Sep-2025 \n\nDate of Screening in IRT: 10-Sep-2025 \nTransaction Date/Time (site local): 10-Sep-2025 10:44:50\nTransaction Date/Time (system local): 10-Sep-2025 08:44:50\n\nTransaction performed by: v.smidkova@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8351,
"title": "Dispensation",
"label": "Janssen 42847922MDD3003 Subject CZ100080004 dispensing confirmation has occurred at site S10-CZ10008",
"event": "uv_disp_conf",
"actual_date": "2025-09-16",
"subject": "CZ100080004",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticals\nhttps://janssen.4gclinical.com\n\n Confirmation of dispensing medication(s) for Subject CZ100080004 has\nbeen performed for the following medication IDs:\n\nMedication No: 1076269\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 16-Sep-2025\nNote (only collected if Not Dispensed):\n \n\nMedication No: 1094935\n\nProduct Label Type: Seltorexant 20mg or placebo\nMedication status:\n Dispensed\nDispensation date: 16-Sep-2025\nNote (only collected if Not Dispensed):\n \n\nSite Details \nLocation: CZE \nSite:\nS10-CZ10008 \nInvestigator: Solle, Zdenek \n\nSubject Details \nSubject:\nCZ100080004 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 16-Sep-2025 12:54:42\n\nTransaction Date/Time (system local): 16-Sep-2025 10:54:42 \nTransaction performed by: m.deif@clintrial.cz\n\nIf you have\nquestions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8350,
"title": "Randomized_Part_1",
"label": "Janssen 42847922MDD3003 Subject CZ100080004 randomized into Part 1 at site S10-CZ10008",
"event": "Rand",
"actual_date": "2025-09-16",
"subject": "CZ100080004",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080004 has been randomized into Part 1.\n \nThe following medication(s) has been assigned to the subject:\n\nMedication NoMedication TypePackaged Lot NoExpiration Date1076269Seltorexant 20mg or placeboT38028505-Apr-20261094935Seltorexant 20mg or placeboT38028505-Apr-2026\n\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080004 \nIRT Subject Status: Randomized Part 1 \n\nTransaction Date/Time (site local): 16-Sep-2025 12:53:29\nTransaction Date/Time (system local): 16-Sep-2025 10:53:29\nTransaction performed by: m.deif@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}
@@ -0,0 +1,10 @@
{
"pk": 8335,
"title": "Screening",
"label": "Janssen 42847922MDD3003 Subject CZ100080006 has been screened at site S10-CZ10008",
"event": "Screen",
"actual_date": "2025-09-16",
"subject": "CZ100080006",
"study": "42847922MDD3003",
"text": "42847922MDD3003\nJanssen Pharmaceuticalshttps://janssen.4gclinical.com\nSubject CZ100080006 has been screened.\n \nSite Details \nLocation: CZE \nSite: S10-CZ10008 \nInvestigator: Solle, Zdenek \n\n \nSubject Details \nSubject: CZ100080006 \nIRT Subject Status: Screened \nCohort: Part 1\nInformed Consent Date at Screening: 15-Sep-2025 \n\nDate of Screening in IRT: 16-Sep-2025 \nTransaction Date/Time (site local): 16-Sep-2025 09:14:53\nTransaction Date/Time (system local): 16-Sep-2025 07:14:53\n\nTransaction performed by: v.smidkova@clintrial.cz\n \nIf you have questions about this notification, please contact 4G Clinical Support at \nhttps://support.4gclinical.com"
}

Some files were not shown because too many files have changed in this diff Show More