This commit is contained in:
2026-06-10 11:59:03 +02:00
parent 033c3e5046
commit a41f97b86b
257 changed files with 133 additions and 7009 deletions
@@ -51,7 +51,7 @@ OVERRIDES = {
} }
# Ocekavane pocty (dle odsouhlasene klasifikace 09JUN2026) pro kontrolu # Ocekavane pocty (dle odsouhlasene klasifikace 09JUN2026) pro kontrolu
EXPECTED = {K0: 4, K0N: 36, K1: 25, K2: 25, K31: 3, K32: 12, K4: 11, K5: 0, K6: 14, K7: 0} EXPECTED = {K0: 4, K0N: 36, K1: 17, K2: 30, K31: 3, K32: 14, K4: 9, K5: 3, K6: 13, K7: 1}
def classify(status: str) -> str: def classify(status: str) -> str:
@@ -33,26 +33,28 @@ TOWER_USER = "root"
TOWER_PASS = "7309208104" TOWER_PASS = "7309208104"
REMOTE_DIR = "/mnt/user/JNJEMAILS" REMOTE_DIR = "/mnt/user/JNJEMAILS"
TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp"
STORED_AT = "2026-06-09" STORED_AT = "2026-06-10"
# investigator_id -> (msg_filename, attachment_filename, label) # investigator_id -> (msg_filename, attachment_filename, label)
# DAVKA 2 (09JUN2026): institucionalni CDA, soubor jmenovan primo v STATUS lekare # DAVKA 3 (10JUN2026): nove CDA z 10.6. (krok 4 -> 5)
MAPPING = [ MAPPING = [
("6a19832b5fc2213518257950", "FC130007D8A1F0E30000.msg", ("6a19832b5fc221351825796c", "FC130007DE92C2040000.msg",
"CZ_CDA Template Master institution_Axon Clinical, s.r.o._fully signed 08Jun2026.pdf", "CZ_CDA institution_MUDr. GREGAR s.r.o_Jan Gregar_fully signed_09Jun2026.pdf",
"Matous Jan (AXON Clinical, master)"), "Gregar Jan (MUDr. GREGAR s.r.o.)"),
("6a19832b5fc2213518257958", "FC130007D8A1F0E60000.msg", ("6a19832b5fc2213518257969", "FC130007DE92C2030000.msg",
"SK_CDA institution_Gastro LM_fully signed_08Jun2026.pdf", "SK_CDA PI_Durina_FN Nove Zamky_fully signed 09Jun2026.pdf",
"Mihalkanin Lubomir (Gastro LM)"), "Durina Juraj (FN Nove Zamky)"),
("6a198b661218c31ab0f5ba4e", "FC130007C1643CA10000.msg", ("6a19832b5fc2213518257973", "FC130007DE92C1FE0000.msg",
"06_CDA-Janssen a FN v Motole_fully executed.pdf", "SK_CDA_Institution_Accout Center s.r.o_09Jun2026.pdf",
"Krizova Viera (FN Motol master)"), "Horvath Frantisek (Accout Center)"),
] ]
# DAVKA 1 (09JUN2026) - jiz ulozeno, ponechano pro historii: # DAVKA 1+2 (09JUN2026) - jiz ulozeno, ponechano pro historii:
# Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED FC1300053049739B, # Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED FC1300053049739B,
# Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta FC130007D8A1F0E1, # Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta FC130007D8A1F0E1,
# Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF (store_cda_to_mongo_v1.0) # Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF (store_cda_to_mongo_v1.0),
# Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM FC130007D8A1F0E6,
# Krizova/Motol FC130007C1643CA1
def norm(s): def norm(s):
-253
View File
@@ -1,253 +0,0 @@
"""
Import Drugs dat (shipments, shipment_items, inventory, destruction) z XLSX do MongoDB.
Volá se z IWRS/Drugs/run_all.py po stažení reportů.
"""
import os
import sys
import re
import glob
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import (
to_str, to_int, to_date,
ensure_indexes, log_import,
bulk_upsert_with_snapshot, bulk_upsert_only,
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ── XLSX parsery (převzaté z run_all.py + úprava na Mongo dokumenty) ─────────
def parse_shipments_report(study):
path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
if not os.path.exists(path):
print(f" CHYBI: {path}")
return []
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
return []
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
col = df.columns.tolist()
rows = []
for _, r in df.iterrows():
sid = to_str(r["Shipment ID"])
if not sid:
continue
rows.append({
"_id": sid,
"shipment_id": sid,
"study": study,
"status": to_str(r["IRT Shipment Status"]),
"type": to_str(r["Type"]),
"ship_from": to_str(r["Shipment From"]),
"ship_to_site": to_str(r["Ship To:"]),
"location": to_str(r["Location"]),
"request_date": to_date(r["Request Date"]),
"shipped_date": to_date(r["Shipped Date"]),
"received_date": to_date(r["Received Date"]) if "Received Date" in col else None,
"received_by": to_str(r["Received by"]) if "Received by" in col else None,
"delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None,
"delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None,
"delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None,
"cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None,
"total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None,
"tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None,
"shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None,
"expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None,
})
return rows
def parse_shipment_details(study):
detail_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
files = sorted(glob.glob(os.path.join(detail_dir, "shipment_details_*.xlsx")))
rows = []
for path in files:
m = re.search(r"shipment_details_(.+)\.xlsx", os.path.basename(path))
shipment_id = m.group(1) if m else "UNKNOWN"
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Medication ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
for _, r in df.iterrows():
med_desc = (to_str(r.get("Medication Description"))
or to_str(r.get("Medication ID Description")))
med_type = (to_str(r.get("Medication type"))
or to_str(r.get("Medication ID type")))
med_id = to_str(r.get("Medication ID"))
if not med_id:
continue
rows.append({
"_id": f"{shipment_id}:{med_id}",
"study": study,
"shipment_id": shipment_id,
"destination_location": to_str(r.get("Destination Location")),
"shipment_status": to_str(r.get("IRT Shipment Status")),
"shipment_type": to_str(r.get("Type")),
"destination_site": to_str(r.get("Destination Site")),
"investigator": to_str(r.get("Investigator")),
"medication_description": med_desc,
"medication_type": med_type,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"container_id": to_str(r.get("Container ID")),
"quantity": to_int(r.get("Quantity of Medication IDs")),
"expiration_date": to_date(r.get("Expiration Date")),
"item_status": to_str(r.get("Status")),
})
# dedupe (poslední vyhrává)
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
def parse_inventory(study):
inv_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
files = sorted(glob.glob(os.path.join(inv_dir, "onsite_inventory_detail_*.xlsx")))
rows = []
for path in files:
raw = pd.read_excel(path, header=None)
site = investigator = location = None
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
if first.startswith("Site:"):
site = first.replace("Site:", "").strip()
elif first.startswith("Investigator:"):
investigator = first.replace("Investigator:", "").strip()
elif first.startswith("Location:"):
location = first.replace("Location:", "").strip()
if first in ("Medication", "Medication ID") and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df.rename(columns={df.columns[0]: "medication_id"})
for _, r in df.iterrows():
med_id = to_str(r["medication_id"])
if not med_id or not site:
continue
rows.append({
"_id": f"{site}:{med_id}",
"study": study,
"site": site,
"investigator": investigator,
"location": location,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")),
"expiration_date": to_date(r.get("Expiration date")),
"received_date": to_date(r.get("Received Date")),
"receipt_user": to_str(r.get("Shipment Receipt User")),
"subject_identifier": to_str(r.get("Subject Identifier")),
"quantity_assigned": to_int(r.get("Quantity Assigned")),
"irt_transaction": to_str(r.get("IRT Transaction")),
"date_assigned": to_date(r.get("Date Assigned")),
"assignment_user": to_str(r.get("Assignment User")),
"dispensation_status": to_str(r.get("Dispensation Status")),
"dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")),
"quantity_dispensed": to_int(r.get("Quantity Dispensed")),
"dispensing_user": to_str(r.get("Dispensing User")),
"quantity_returned": to_int(r.get("Quantity Returned")),
"date_returned": to_date(r.get("Date Returned")),
"return_user": to_str(r.get("Return User")),
})
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
def parse_destruction_files(study):
dest_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
files = sorted(glob.glob(os.path.join(dest_dir, "ip_destruction_basket_*.xlsx")))
rows = []
for path in files:
raw = pd.read_excel(path, header=None)
meta = {}
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
for key, attr in [
("Investigator Name:", "investigator"),
("Site ID:", "site_id"),
("Location:", "location"),
("Basket ID:", "basket_id"),
("Drug Destruction Created Date:", "destruction_date"),
]:
if first.startswith(key):
meta[attr] = first.replace(key, "").strip()
if first == "Medication ID Description" and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row).dropna(how="all")
basket_id = meta.get("basket_id")
for _, r in df.iterrows():
med_id = to_str(r.get("Medication ID"))
if not med_id or not basket_id:
continue
rows.append({
"_id": f"{basket_id}:{med_id}",
"study": study,
"site_id": meta.get("site_id"),
"investigator": meta.get("investigator"),
"location": meta.get("location"),
"basket_id": basket_id,
"destruction_date": to_date(meta.get("destruction_date")),
"medication_description": to_str(r.get("Medication ID Description")),
"medication_id": med_id,
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"comments": to_str(r.get("Comments")),
})
by_id = {r["_id"]: r for r in rows}
return list(by_id.values())
# ── hlavní import ────────────────────────────────────────────────────────────
def import_study(study):
print(f"\n [{study}] parsovani XLSX...")
shipments = parse_shipments_report(study)
items = parse_shipment_details(study)
inventory = parse_inventory(study)
destruct = parse_destruction_files(study)
print(f" Zasilky: {len(shipments)} | Polozky: {len(items)} | Sklad: {len(inventory)} | Destrukce: {len(destruct)}")
import_id = log_import(study, f"drugs_{study}", "drugs", {
"shipments": len(shipments),
"shipment_items": len(items),
"inventory": len(inventory),
"destruction": len(destruct),
})
print(f" import_id = {import_id}")
bulk_upsert_with_snapshot("iwrs_shipments", "iwrs_shipments_snapshots", shipments, import_id)
bulk_upsert_with_snapshot("iwrs_shipment_items", "iwrs_shipment_items_snapshots", items, import_id)
bulk_upsert_with_snapshot("iwrs_inventory", "iwrs_inventory_snapshots", inventory, import_id)
bulk_upsert_only("iwrs_destruction", destruct, import_id)
def run(studies):
ensure_indexes()
for s in studies:
import_study(s)
if __name__ == "__main__":
studies = sys.argv[1:] if len(sys.argv) > 1 else ["77242113UCO3001", "42847922MDD3003"]
run(studies)
-245
View File
@@ -1,245 +0,0 @@
"""
Kompletní pipeline pro Drugs:
1. Onsite inventory detail (per site, vždy přepisuje)
2. IP destruction (per košík, přeskočí již existující soubory)
3. Shipments report (jeden soubor na studii, přepisuje)
4. Shipment details (per zásilka CZ, vždy přepisuje)
5. Import do MongoDB (studie.iwrs_shipments / iwrs_shipment_items / iwrs_inventory / iwrs_destruction)
Spusť tento skript — zpracuje obě studie automaticky.
"""
import os
import glob
import re
import datetime
import sys
import pandas as pd
from playwright.sync_api import sync_playwright
import import_to_mongo as drugs_mongo
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
SITES = {
"77242113UCO3001": [
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
],
"42847922MDD3003": [
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
],
}
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ── login ────────────────────────────────────────────────────────────────────
def login(page, study):
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator("#login__submit").click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=study).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
# ── download funkce ──────────────────────────────────────────────────────────
def download_inventory(page, study):
out_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
os.makedirs(out_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
page.wait_for_load_state("networkidle", timeout=120000)
for site_id in SITES[study]:
print(f" [{site_id}] inventory...")
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.get_by_role("option", name=site_id).click()
page.wait_for_load_state("networkidle", timeout=120000)
filename = os.path.join(out_dir, f"onsite_inventory_detail_{site_id}.xlsx")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Inventory OK ({len(SITES[study])} center)")
def download_destruction(page, study):
out_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
os.makedirs(out_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/ip_destruction_form")
page.wait_for_load_state("networkidle", timeout=120000)
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.wait_for_timeout(1000)
baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
if b.strip() and b.strip() != "No results found"]
page.keyboard.press("Escape")
page.wait_for_timeout(500)
if not baskets:
print(" Žádné destruction košíky")
return
new_count = 0
for basket in baskets:
filename = os.path.join(out_dir, f"ip_destruction_basket_{basket}.xlsx")
if os.path.exists(filename):
continue # destrukce se nemění — přeskočit
print(f" [košík {basket}] stahování...")
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(basket)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
new_count += 1
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")
def download_shipments_report(page, study):
out_dir = os.path.join(BASE_DIR, f"xls_shipments_{study}")
os.makedirs(out_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/shipments_report")
page.wait_for_load_state("networkidle", timeout=120000)
filename = os.path.join(out_dir, f"shipments_report_{study}.xlsx")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" Shipments report OK")
def download_shipment_details(page, study):
out_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
os.makedirs(out_dir, exist_ok=True)
# načti CZ shipment IDs z právě staženého shipments reportu
report_path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
raw = pd.read_excel(report_path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
df = pd.read_excel(report_path, header=header_row)
df = df.dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
cz_shipments = list(zip(
df["Shipment ID"].astype(str).str.strip(),
df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
))
print(f" CZ zásilek ke stažení: {len(cz_shipments)}")
page.goto(f"{BASE_URL}/report/shipment_details_report")
page.wait_for_load_state("networkidle", timeout=120000)
skipped = 0
for shipment, status in cz_shipments:
filename = os.path.join(out_dir, f"shipment_details_{shipment}.xlsx")
if os.path.exists(filename) and status.upper() == "RECEIVED":
skipped += 1
continue # finální stav, soubor se nemění
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(shipment)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{shipment}] ({status}) OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Přeskočeno (RECEIVED): {skipped}")
# ── main ─────────────────────────────────────────────────────────────────────
def main():
os.chdir(BASE_DIR)
# ── Stahování ────────────────────────────────────────────────────────────
with sync_playwright() as p:
for study in STUDIES:
print(f"\n{'='*60}")
print(f"[{study}] STAHOVÁNÍ")
print(f"{'='*60}")
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
try:
print(" Přihlášení...")
login(page, study)
print("\n [1/4] Onsite inventory...")
download_inventory(page, study)
print("\n [2/4] IP destruction...")
download_destruction(page, study)
print("\n [3/4] Shipments report...")
download_shipments_report(page, study)
print("\n [4/4] Shipment details (CZ)...")
download_shipment_details(page, study)
except Exception as e:
import traceback
print(f" CHYBA při stahování: {e}")
traceback.print_exc()
finally:
browser.close()
# ── Import do MongoDB ─────────────────────────────────────────────────────
print(f"\n{'='*60}")
print("IMPORT DO MongoDB")
print(f"{'='*60}")
try:
drugs_mongo.run(STUDIES)
except Exception as e:
import traceback
print(f" CHYBA při importu: {e}")
traceback.print_exc()
print(f"\n{'='*60}")
print("Vše hotovo.")
print(f"{'='*60}")
main()
-139
View File
@@ -1,139 +0,0 @@
import mysql.connector
import db_config
conn = mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME
)
c = conn.cursor()
# Přidat report_type do iwrs_import (pokud ještě neexistuje)
try:
c.execute("""ALTER TABLE iwrs_import
ADD COLUMN report_type VARCHAR(20) NOT NULL DEFAULT 'patients'
AFTER source_file""")
print("ALTER TABLE iwrs_import OK — report_type přidán")
except mysql.connector.errors.DatabaseError as e:
if "Duplicate column" in str(e):
print("report_type již existuje — přeskočeno")
else:
raise
stmts = [
(
"iwrs_shipments",
"""CREATE TABLE IF NOT EXISTS iwrs_shipments (
id INT AUTO_INCREMENT PRIMARY KEY,
import_id INT NOT NULL,
study VARCHAR(20) NOT NULL,
shipment_id VARCHAR(20) NOT NULL,
status VARCHAR(50),
type VARCHAR(30),
ship_from VARCHAR(50),
ship_to_site VARCHAR(50),
location VARCHAR(50),
request_date DATE,
shipped_date DATE,
received_date DATE,
received_by VARCHAR(100),
delivered_date_utc DATE,
delivery_recipient VARCHAR(100),
delivery_details VARCHAR(200),
cancelled_date DATE,
total_medication_ids SMALLINT,
tracking_no VARCHAR(100),
shipping_category VARCHAR(50),
expected_arrival DATE,
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
INDEX idx_import (import_id),
INDEX idx_study_shipment (study, shipment_id)
)"""
),
(
"iwrs_shipment_items",
"""CREATE TABLE IF NOT EXISTS iwrs_shipment_items (
id INT AUTO_INCREMENT PRIMARY KEY,
import_id INT NOT NULL,
study VARCHAR(20) NOT NULL,
shipment_id VARCHAR(20) NOT NULL,
destination_location VARCHAR(50),
shipment_status VARCHAR(50),
shipment_type VARCHAR(30),
destination_site VARCHAR(50),
investigator VARCHAR(100),
medication_description VARCHAR(200),
medication_type VARCHAR(50),
medication_id VARCHAR(20),
packaged_lot_no VARCHAR(50),
packaged_lot_description VARCHAR(100),
container_id VARCHAR(50),
quantity SMALLINT,
expiration_date DATE,
item_status VARCHAR(50),
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
INDEX idx_import (import_id),
INDEX idx_med_id (medication_id)
)"""
),
(
"iwrs_inventory",
"""CREATE TABLE IF NOT EXISTS iwrs_inventory (
id INT AUTO_INCREMENT PRIMARY KEY,
import_id INT NOT NULL,
study VARCHAR(20) NOT NULL,
site VARCHAR(50),
investigator VARCHAR(100),
location VARCHAR(50),
medication_id VARCHAR(20),
packaged_lot_no VARCHAR(50),
original_expiration_date DATE,
expiration_date DATE,
received_date DATE,
receipt_user VARCHAR(100),
subject_identifier VARCHAR(20),
quantity_assigned SMALLINT,
irt_transaction VARCHAR(100),
date_assigned DATE,
assignment_user VARCHAR(100),
dispensation_status VARCHAR(50),
dispensing_date DATE,
quantity_dispensed SMALLINT,
dispensing_user VARCHAR(100),
quantity_returned SMALLINT,
date_returned DATE,
return_user VARCHAR(100),
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
INDEX idx_import (import_id),
INDEX idx_site (study, site)
)"""
),
(
"iwrs_destruction",
"""CREATE TABLE IF NOT EXISTS iwrs_destruction (
id INT AUTO_INCREMENT PRIMARY KEY,
study VARCHAR(20) NOT NULL,
site_id VARCHAR(50),
investigator VARCHAR(100),
location VARCHAR(50),
basket_id VARCHAR(20) NOT NULL,
destruction_date DATE,
medication_description VARCHAR(200),
medication_id VARCHAR(20),
packaged_lot_description VARCHAR(100),
comments VARCHAR(500),
imported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY uq_destruction (study, basket_id, medication_id),
INDEX idx_study_basket (study, basket_id)
)"""
),
]
for name, sql in stmts:
c.execute(sql)
print(f"OK: {name}")
conn.commit()
c.close()
conn.close()
print("\nVšechny tabulky připraveny.")
@@ -1,364 +0,0 @@
import sys
import os
import mysql.connector
import pandas as pd
from datetime import date
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
STUDY = "42847922MDD3003"
# STUDY = "77242113UCO3001"
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"
OUTPUT_FILE = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {STUDY} CZ IWRS overview.xlsx"
DATE_COLUMNS = {
"Orig Exp Date", "Exp Date", "Rcv Date",
"Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
}
COLUMN_WIDTHS = {
"Site": 14,
"Med ID": 10,
"Lot No.": 12,
"Orig Exp Date": 16,
"Exp Date": 14,
"Rcv Date": 14,
"Rcpt User": 22,
"Subject ID": 14,
"Qty Asgn": 9,
"IRT Tx": 8,
"Date Asgn": 14,
"Asgn User": 20,
"Disp Status": 16,
"Disp Date": 14,
"Qty Disp": 9,
"Disp User": 20,
"Qty Ret": 10,
"Date Ret": 14,
"Ret User": 18,
"Destroyed": 14,
"Basket No.": 12,
"Max Visit Date": 16,
}
# shipments sheet: kolík kde začínají detail sloupce (1-based, pro format_shipment_sheet)
N_SHIP_COLS = 9
# ── DB ────────────────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def get_latest_import_id(cursor, study):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
return mid
# ── Načítání dat z MySQL ──────────────────────────────────────────────────────
def load_inventory(cursor, study, import_id):
"""
Vrátí DataFrame s inventory + destruction join.
Sloupce jsou rovnou přejmenované pro downstream funkce.
"""
sql = """
SELECT
i.site AS Site,
i.medication_id AS `Med ID`,
i.packaged_lot_no AS `Lot No.`,
i.original_expiration_date AS `Orig Exp Date`,
i.expiration_date AS `Exp Date`,
i.received_date AS `Rcv Date`,
i.receipt_user AS `Rcpt User`,
i.subject_identifier AS `Subject ID`,
i.quantity_assigned AS `Qty Asgn`,
i.irt_transaction AS `IRT Tx`,
i.date_assigned AS `Date Asgn`,
i.assignment_user AS `Asgn User`,
i.dispensation_status AS `Disp Status`,
i.dispensing_date AS `Disp Date`,
i.quantity_dispensed AS `Qty Disp`,
i.dispensing_user AS `Disp User`,
i.quantity_returned AS `Qty Ret`,
i.date_returned AS `Date Ret`,
i.return_user AS `Ret User`,
d.destruction_date AS Destroyed,
d.basket_id AS `Basket No.`
FROM iwrs_inventory i
LEFT JOIN (
SELECT medication_id,
ANY_VALUE(basket_id) AS basket_id,
ANY_VALUE(destruction_date) AS destruction_date
FROM iwrs_destruction
WHERE study = %s
GROUP BY medication_id
) d ON d.medication_id = i.medication_id
WHERE i.import_id = %s
AND i.study = %s
ORDER BY i.site, i.received_date, i.medication_id
"""
cursor.execute(sql, (study, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Inventory: {len(df)} kitu")
return df
def load_shipments(cursor, study, import_id):
"""
Vrátí DataFrame se spojenými shipments + items.
"""
sql = """
SELECT
s.shipment_id AS `Shipment ID`,
s.status AS `IRT Shipment Status`,
s.type AS Type,
s.ship_from AS `Shipment From`,
s.ship_to_site AS `Ship To:`,
s.request_date AS `Request Date`,
s.received_date AS `Received Date`,
s.received_by AS `Received by`,
s.expected_arrival AS `Expected Arrival`,
i.investigator AS Investigator,
i.medication_description AS `Medication Description`,
i.medication_id AS `Medication ID`,
i.packaged_lot_no AS `Packaged Lot number`,
i.expiration_date AS `Expiration Date`,
i.item_status AS Status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Shipments: {df['Shipment ID'].nunique() if len(df) else 0} zásilek, {len(df)} kitu")
return df
# ── Odvozené sheety ───────────────────────────────────────────────────────────
def build_site_summary(shipments_df):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
sheet_name = f"Expired as of {today.strftime('%d-%b-%Y')}"
print(f" Expired: {len(filtered)}")
return filtered, sheet_name
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
)
filtered = (
df[mask]
.copy()
.sort_values(["Site", "Date Ret"], ascending=[True, True])
.drop(columns=["Destroyed", "Basket No."])
.reset_index(drop=True)
)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Formátování ───────────────────────────────────────────────────────────────
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
header_fill = PatternFill("solid", start_color=header_color)
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
row_font = Font(name="Arial", size=10)
hi_fill = PatternFill("solid", start_color=highlight_color) if highlight_color else None
headers = [cell.value for cell in ws[1]]
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
cell.border = border
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
cell.font = row_font
cell.border = border
cell.alignment = Alignment(horizontal="center")
if col_name in DATE_COLUMNS:
cell.number_format = "DD-MMM-YYYY"
if hi_fill and col_name == highlight_col:
cell.fill = hi_fill
for cell in ws[1]:
width = COLUMN_WIDTHS.get(cell.value, 14)
ws.column_dimensions[get_column_letter(cell.column)].width = width
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_cols):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
fill_ship = PatternFill("solid", start_color=header_color_ship)
fill_detail = PatternFill("solid", start_color=header_color_detail)
for cell in ws[1]:
cell.fill = fill_ship if cell.column <= n_ship_cols else fill_detail
cell.font = hfont
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
ws.column_dimensions[get_column_letter(cell.column)].width = min(
len(str(cell.value or "")) + 4, 35
)
ws.row_dimensions[1].height = 30
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
cell.font = dfont
cell.border = border
cell.alignment = Alignment(horizontal="center", vertical="center")
if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
cell.number_format = "DD-MMM-YYYY"
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
OUTPUT_DIR.mkdir(exist_ok=True)
print(f"\nNačítám data z MySQL pro {STUDY}...")
conn = get_conn()
cursor = conn.cursor(dictionary=True)
import_id = get_latest_import_id(cursor, STUDY)
print(f" import_id = {import_id}")
df = load_inventory(cursor, STUDY, import_id)
shipments_df = load_shipments(cursor, STUDY, import_id)
cursor.close()
conn.close()
expired_df, expired_sheet = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
not_returned_df = build_not_returned(df)
destruction_df = build_kits_for_destruction(df)
site_summary_df = build_site_summary(shipments_df)
with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:
df.to_excel( writer, index=False, sheet_name="CountryMedicationOverview")
expired_df.to_excel( writer, index=False, sheet_name=expired_sheet)
assigned_df.to_excel( writer, index=False, sheet_name="Assigned not dispensed")
not_returned_df.to_excel( writer, index=False, sheet_name="Not returned")
destruction_df.to_excel( writer, index=False, sheet_name="Kits for destruction")
shipments_df.to_excel( writer, index=False, sheet_name="Shipments")
site_summary_df.to_excel( writer, index=False, sheet_name="Site Summary")
wb = load_workbook(OUTPUT_FILE)
ws_main = wb["CountryMedicationOverview"]
format_sheet(ws_main, header_color="1F4E79")
new_col_fill = PatternFill("solid", start_color="E2EFDA")
headers_main = [c.value for c in ws_main[1]]
for row in ws_main.iter_rows(min_row=2, max_row=ws_main.max_row):
for cell in row:
col_name = headers_main[cell.column - 1] if cell.column <= len(headers_main) else None
if col_name in ("Destroyed", "Basket No."):
cell.fill = new_col_fill
format_sheet(wb[expired_sheet], header_color="C00000", highlight_col="Exp Date", highlight_color="FFE0E0")
format_sheet(wb["Assigned not dispensed"], header_color="833C00", highlight_col="Subject ID", highlight_color="FFF2CC")
format_sheet(wb["Not returned"], header_color="375623", highlight_col="Max Visit Date", highlight_color="E2EFDA")
format_sheet(wb["Kits for destruction"], header_color="595959")
format_shipment_sheet(wb["Shipments"], "1F4E79", "375623", N_SHIP_COLS)
format_sheet(wb["Site Summary"], header_color="1F4E79")
wb.save(OUTPUT_FILE)
print(f"\nUloženo: {OUTPUT_FILE} ({len(df)} řádků, sheety: {wb.sheetnames})")
if __name__ == "__main__":
main()
@@ -1,205 +0,0 @@
import sys
import os
import mysql.connector
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from datetime import date
import pandas as pd
# db_config.py je v nadřazeném adresáři (Drugs/)
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
STUDY = "77242113UCO3001"
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output")
os.makedirs(OUTPUT_DIR, exist_ok=True)
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def load_data(study):
conn = get_conn()
cursor = conn.cursor(dictionary=True)
# nejnovější import_id pro danou studii
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
import_id = row["mid"]
if import_id is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
print(f" import_id = {import_id}")
sql = """
SELECT
s.shipment_id,
s.status AS irt_shipment_status,
s.type,
s.ship_from AS shipment_from,
s.ship_to_site AS ship_to,
s.request_date,
s.received_date,
s.received_by,
s.expected_arrival,
i.investigator,
i.medication_description,
i.medication_id,
i.packaged_lot_no,
i.expiration_date,
i.item_status AS status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
cursor.close()
conn.close()
print(f" Načteno řádků: {len(rows)}")
return rows
# shipment sloupce (modrý header) / detail sloupce (zelený header)
SHIP_COLS = [
("shipment_id", "Shipment ID"),
("irt_shipment_status","IRT Shipment Status"),
("type", "Type"),
("shipment_from", "Shipment From"),
("ship_to", "Ship To:"),
("request_date", "Request Date"),
("received_date", "Received Date"),
("received_by", "Received by"),
("expected_arrival", "Expected Arrival"),
]
DETAIL_COLS = [
("investigator", "Investigator"),
("medication_description", "Medication Description"),
("medication_id", "Medication ID"),
("packaged_lot_no", "Packaged Lot number"),
("expiration_date", "Expiration Date"),
("status", "Status"),
]
ALL_COLS = SHIP_COLS + DETAIL_COLS
N_SHIP_COLS = len(SHIP_COLS)
HEADER_FILL_SHIP = PatternFill("solid", fgColor="1F4E79")
HEADER_FILL_DETAIL = PatternFill("solid", fgColor="375623")
HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10)
DATA_FONT = Font(name="Arial", size=10)
THIN_BORDER = Border(
left=Side(style="thin", color="BFBFBF"),
right=Side(style="thin", color="BFBFBF"),
bottom=Side(style="thin", color="BFBFBF"),
)
def write_shipments_sheet(wb, rows):
ws = wb.active
ws.title = "Shipments"
# záhlaví
for ci, (_, label) in enumerate(ALL_COLS, 1):
cell = ws.cell(row=1, column=ci, value=label)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL_SHIP if ci <= N_SHIP_COLS else HEADER_FILL_DETAIL
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = THIN_BORDER
ws.row_dimensions[1].height = 30
# data
for ri, row in enumerate(rows, 2):
for ci, (key, _) in enumerate(ALL_COLS, 1):
val = row[key]
cell = ws.cell(row=ri, column=ci, value=val)
cell.font = DATA_FONT
cell.border = THIN_BORDER
cell.alignment = Alignment(horizontal="center", vertical="center")
if isinstance(val, date):
cell.number_format = "DD-MMM-YYYY"
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
# šířky sloupců
for ci, (key, label) in enumerate(ALL_COLS, 1):
vals = [label] + [str(r[key]) for r in rows if r[key] is not None]
ws.column_dimensions[get_column_letter(ci)].width = min(
max((len(v) for v in vals), default=10) + 2, 35
)
def write_summary_sheet(wb, rows):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
df = pd.DataFrame(rows)
pivot = df.groupby("ship_to")["status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"ship_to": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
ws = wb.create_sheet("Site Summary")
s_cols = ["Site", "Available", "Assigned", "Dispensed", "Returned", "Total"]
for ci, col in enumerate(s_cols, 1):
cell = ws.cell(row=1, column=ci, value=col)
cell.font = HEADER_FONT
cell.fill = PatternFill("solid", fgColor="1F4E79")
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.border = THIN_BORDER
ws.row_dimensions[1].height = 25
for ri, (_, row) in enumerate(pivot.iterrows(), 2):
for ci, col in enumerate(s_cols, 1):
cell = ws.cell(row=ri, column=ci, value=row[col])
cell.font = DATA_FONT
cell.border = THIN_BORDER
cell.alignment = Alignment(horizontal="center", vertical="center")
for ci, col in enumerate(s_cols, 1):
vals = [col] + [str(pivot.iloc[r][col]) for r in range(len(pivot))]
ws.column_dimensions[get_column_letter(ci)].width = min(
max(len(v) for v in vals) + 4, 35
)
ws.freeze_panes = "A2"
def build_report():
print(f"\nNačítám data z MySQL pro {STUDY}...")
rows = load_data(STUDY)
wb = openpyxl.Workbook()
write_shipments_sheet(wb, rows)
write_summary_sheet(wb, rows)
outfile = os.path.join(OUTPUT_DIR, f"{date.today()} {STUDY} CZ Shipments.xlsx")
wb.save(outfile)
print(f"\nUloženo -> {outfile}")
build_report()
-393
View File
@@ -1,393 +0,0 @@
import sys
import os
import mysql.connector
import pandas as pd
from datetime import date
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
STUDIES = [
("77242113UCO3001", "UCO"),
("42847922MDD3003", "MDD"),
]
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"
DATE_COLUMNS = {
"Orig Exp Date", "Exp Date", "Rcv Date",
"Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
}
COLUMN_WIDTHS = {
"Site": 14,
"Med ID": 10,
"Lot No.": 12,
"Orig Exp Date": 16,
"Exp Date": 14,
"Rcv Date": 14,
"Rcpt User": 22,
"Subject ID": 14,
"Qty Asgn": 9,
"IRT Tx": 8,
"Date Asgn": 14,
"Asgn User": 20,
"Disp Status": 16,
"Disp Date": 14,
"Qty Disp": 9,
"Disp User": 20,
"Qty Ret": 10,
"Date Ret": 14,
"Ret User": 18,
"Destroyed": 14,
"Basket No.": 12,
"Max Visit Date": 16,
}
N_SHIP_COLS = 9 # počet shipment sloupců (modrý header v Shipments sheetu)
# ── DB ────────────────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def get_latest_import_id(cursor, study):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
return mid
# ── Načítání dat ──────────────────────────────────────────────────────────────
def load_inventory(cursor, study, import_id):
sql = """
SELECT
i.site AS Site,
i.medication_id AS `Med ID`,
i.packaged_lot_no AS `Lot No.`,
i.original_expiration_date AS `Orig Exp Date`,
i.expiration_date AS `Exp Date`,
i.received_date AS `Rcv Date`,
i.receipt_user AS `Rcpt User`,
i.subject_identifier AS `Subject ID`,
i.quantity_assigned AS `Qty Asgn`,
i.irt_transaction AS `IRT Tx`,
i.date_assigned AS `Date Asgn`,
i.assignment_user AS `Asgn User`,
i.dispensation_status AS `Disp Status`,
i.dispensing_date AS `Disp Date`,
i.quantity_dispensed AS `Qty Disp`,
i.dispensing_user AS `Disp User`,
i.quantity_returned AS `Qty Ret`,
i.date_returned AS `Date Ret`,
i.return_user AS `Ret User`,
d.destruction_date AS Destroyed,
d.basket_id AS `Basket No.`
FROM iwrs_inventory i
LEFT JOIN (
SELECT medication_id,
ANY_VALUE(basket_id) AS basket_id,
ANY_VALUE(destruction_date) AS destruction_date
FROM iwrs_destruction
WHERE study = %s
GROUP BY medication_id
) d ON d.medication_id = i.medication_id
WHERE i.import_id = %s
AND i.study = %s
ORDER BY i.site, i.received_date, i.medication_id
"""
cursor.execute(sql, (study, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Inventory: {len(df)} kitu")
return df
def load_shipments(cursor, study, import_id):
sql = """
SELECT
s.shipment_id AS `Shipment ID`,
s.status AS `IRT Shipment Status`,
s.type AS Type,
s.ship_from AS `Shipment From`,
s.ship_to_site AS `Ship To:`,
s.request_date AS `Request Date`,
s.received_date AS `Received Date`,
s.received_by AS `Received by`,
s.expected_arrival AS `Expected Arrival`,
i.investigator AS Investigator,
i.medication_description AS `Medication Description`,
i.medication_id AS `Medication ID`,
i.packaged_lot_no AS `Packaged Lot number`,
i.expiration_date AS `Expiration Date`,
i.item_status AS Status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
n_ship = df["Shipment ID"].nunique() if len(df) else 0
print(f" Shipments: {n_ship} zásilek, {len(df)} kitu")
return df
# ── Odvozené sheety ───────────────────────────────────────────────────────────
def build_site_summary(shipments_df):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
print(f" Expired: {len(filtered)}")
return filtered
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
)
filtered = (
df[mask]
.copy()
.sort_values(["Site", "Date Ret"], ascending=[True, True])
.drop(columns=["Destroyed", "Basket No."])
.reset_index(drop=True)
)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Formátování ───────────────────────────────────────────────────────────────
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
header_fill = PatternFill("solid", start_color=header_color)
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
row_font = Font(name="Arial", size=10)
hi_fill = PatternFill("solid", start_color=highlight_color) if highlight_color else None
headers = [cell.value for cell in ws[1]]
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
cell.border = border
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
cell.font = row_font
cell.border = border
cell.alignment = Alignment(horizontal="center")
if col_name in DATE_COLUMNS:
cell.number_format = "DD-MMM-YYYY"
if hi_fill and col_name == highlight_col:
cell.fill = hi_fill
for cell in ws[1]:
width = COLUMN_WIDTHS.get(cell.value, 14)
ws.column_dimensions[get_column_letter(cell.column)].width = width
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
def format_overview_sheet(ws):
format_sheet(ws, header_color="1F4E79")
new_col_fill = PatternFill("solid", start_color="E2EFDA")
headers = [c.value for c in ws[1]]
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
if col_name in ("Destroyed", "Basket No."):
cell.fill = new_col_fill
def format_shipment_sheet(ws):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
fill_ship = PatternFill("solid", start_color="1F4E79")
fill_detail = PatternFill("solid", start_color="375623")
for cell in ws[1]:
cell.fill = fill_ship if cell.column <= N_SHIP_COLS else fill_detail
cell.font = hfont
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
ws.column_dimensions[get_column_letter(cell.column)].width = min(
len(str(cell.value or "")) + 4, 35
)
ws.row_dimensions[1].height = 30
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
cell.font = dfont
cell.border = border
cell.alignment = Alignment(horizontal="center", vertical="center")
if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
cell.number_format = "DD-MMM-YYYY"
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
# ── Main ──────────────────────────────────────────────────────────────────────
SHEETS_DEF = [
("CountryMedicationOverview", "overview"),
("Expired", "expired"),
("Assigned not dispensed", "assigned"),
("Not returned", "not_returned"),
("Kits for destruction", "destruction"),
("Shipments", "shipments"),
("Site Summary", "site_summary"),
]
FORMAT_MAP = {
"overview": lambda ws: format_overview_sheet(ws),
"expired": lambda ws: format_sheet(ws, "C00000", "Exp Date", "FFE0E0"),
"assigned": lambda ws: format_sheet(ws, "833C00", "Subject ID", "FFF2CC"),
"not_returned": lambda ws: format_sheet(ws, "375623", "Max Visit Date", "E2EFDA"),
"destruction": lambda ws: format_sheet(ws, "595959"),
"shipments": lambda ws: format_shipment_sheet(ws),
"site_summary": lambda ws: format_sheet(ws, "1F4E79"),
}
def process_study(cursor, study):
today = date.today().strftime("%d-%b-%Y")
import_id = get_latest_import_id(cursor, study)
print(f" import_id = {import_id}")
df = load_inventory(cursor, study, import_id)
shipments_df = load_shipments(cursor, study, import_id)
expired_df = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
not_returned_df = build_not_returned(df)
destruction_df = build_kits_for_destruction(df)
site_summ_df = build_site_summary(shipments_df)
return [
df, expired_df, assigned_df, not_returned_df,
destruction_df, shipments_df, site_summ_df,
]
def save_study_report(study, data_frames):
output_file = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {study} report.xlsx"
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
for (sheet_name, _), df_sheet in zip(SHEETS_DEF, data_frames):
df_sheet.to_excel(writer, index=False, sheet_name=sheet_name)
wb = load_workbook(output_file)
for (sheet_name, fmt_key) in SHEETS_DEF:
FORMAT_MAP[fmt_key](wb[sheet_name])
wb.save(output_file)
print(f" Uloženo: {output_file}")
def main():
OUTPUT_DIR.mkdir(exist_ok=True)
conn = get_conn()
cursor = conn.cursor(dictionary=True)
for study, _ in STUDIES:
print(f"\n{'='*55}")
print(f"[{study}]")
print(f"{'='*55}")
try:
data_frames = process_study(cursor, study)
save_study_report(study, data_frames)
except Exception as e:
import traceback
print(f" CHYBA: {e}")
traceback.print_exc()
cursor.close()
conn.close()
print(f"\nHotovo.")
if __name__ == "__main__":
main()
@@ -1,76 +0,0 @@
from playwright.sync_api import sync_playwright
import os
# ── CONFIG ──────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
# STUDY = "42847922MDD3003"
STUDY = "77242113UCO3001"
OUTPUT_DIR = f"xls_ip_destruction_{STUDY}"
# ────────────────────────────────────────────────────────────────────────────
def run(page, study):
output_dir = f"xls_ip_destruction_{study}"
os.makedirs(output_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/ip_destruction_form")
page.wait_for_load_state("networkidle", timeout=120000)
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.wait_for_timeout(1000)
baskets = [b.strip() for b in page.locator('mat-option').all_inner_texts()
if b.strip() and b.strip() != "No results found"]
print(f" Nalezeno {len(baskets)} kosiku: {baskets}")
page.keyboard.press("Escape")
page.wait_for_timeout(500)
if not baskets:
print(" Zadne destruction kosite — preskakuji.")
return
for basket in baskets:
filename = os.path.join(output_dir, f"ip_destruction_basket_{basket}.xlsx")
if os.path.exists(filename):
print(f" [{basket}] Preskakuji — existuje.")
continue
print(f" [{basket}] Stahuji...")
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(basket)
page.wait_for_timeout(500)
page.locator('mat-option').first.dispatch_event('click')
page.wait_for_load_state("networkidle", timeout=120000)
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{basket}] OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(" Destruction hotovo.")
if __name__ == "__main__":
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator('#login__submit').click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=STUDY).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
run(page, STUDY)
browser.close()
-83
View File
@@ -1,83 +0,0 @@
from playwright.sync_api import sync_playwright
import os
# ── CONFIG ──────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
# STUDY = "42847922MDD3003"
STUDY = "77242113UCO3001"
SITES = {
"42847922MDD3003": [
"S10-CZ10002",
"S10-CZ10004",
"S10-CZ10005",
"S10-CZ10008",
"S10-CZ10011",
"S10-CZ10012",
],
"77242113UCO3001": [
"DD5-CZ10001",
"DD5-CZ10003",
"DD5-CZ10006",
"DD5-CZ10009",
"DD5-CZ10010",
"DD5-CZ10012",
"DD5-CZ10013",
"DD5-CZ10015",
"DD5-CZ10016",
"DD5-CZ10020",
"DD5-CZ10021",
"DD5-CZ10022",
],
}
OUTPUT_DIR = f"xls_reports_{STUDY}"
# ────────────────────────────────────────────────────────────────────────────
def run(page, study):
output_dir = f"xls_reports_{study}"
os.makedirs(output_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
page.wait_for_load_state("networkidle", timeout=120000)
for site_id in SITES[study]:
print(f" [{site_id}] Stahuji...")
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.get_by_role("option", name=site_id).click()
page.wait_for_load_state("networkidle", timeout=120000)
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(os.path.join(output_dir, f"onsite_inventory_detail_{site_id}.xlsx"))
print(f" [{site_id}] OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(" Inventory hotovo.")
if __name__ == "__main__":
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator('#login__submit').click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=STUDY).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
run(page, STUDY)
browser.close()
@@ -1,95 +0,0 @@
from playwright.sync_api import sync_playwright
import os
import pandas as pd
# ── CONFIG ──────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
STUDY = "42847922MDD3003"
#STUDY = "77242113UCO3001"
OUTPUT_DIR = f"xls_shipment_details_{STUDY}"
# ────────────────────────────────────────────────────────────────────────────
def get_cz_shipment_ids(study):
path = f"xls_shipments_{study}/shipments_report_{study}.xlsx"
if not os.path.exists(path):
return None
df = pd.read_excel(path, header=5)
df.columns = df.columns.str.strip()
df = df.dropna(how="all")
df["Shipment ID"] = df["Shipment ID"].astype(str).str.strip()
cz = df[df["Location"].str.contains("Czech", na=False, case=False)]
return cz["Shipment ID"].tolist()
def run(page, study):
output_dir = f"xls_shipment_details_{study}"
os.makedirs(output_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/shipment_details_report")
page.wait_for_load_state("networkidle", timeout=120000)
cz_ids = get_cz_shipment_ids(study)
if cz_ids is not None:
shipments = cz_ids
print(f" Filtrovano ze shipments reportu: {len(shipments)} CZ shipmentu")
else:
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.wait_for_timeout(1000)
shipments = [s.strip() for s in page.locator('mat-option').all_inner_texts()
if s.strip() and s.strip() != "No results found"]
print(f" Nalezeno {len(shipments)} shipmentu z dropdownu")
page.keyboard.press("Escape")
page.wait_for_timeout(500)
if not shipments:
print(" Zadne shipments — preskakuji.")
return
for shipment in shipments:
filename = os.path.join(output_dir, f"shipment_details_{shipment}.xlsx")
if os.path.exists(filename):
print(f" [{shipment}] Preskakuji — existuje.")
continue
print(f" [{shipment}] Stahuji...")
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(shipment)
page.wait_for_timeout(500)
page.locator('mat-option').first.dispatch_event('click')
page.wait_for_load_state("networkidle", timeout=120000)
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{shipment}] OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(" Shipment details hotovo.")
if __name__ == "__main__":
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator('#login__submit').click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=STUDY).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
run(page, STUDY)
browser.close()
@@ -1,47 +0,0 @@
from playwright.sync_api import sync_playwright
import os
# ── CONFIG ──────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
# STUDY = "42847922MDD3003"
STUDY = "77242113UCO3001"
OUTPUT_DIR = f"xls_shipments_{STUDY}"
# ────────────────────────────────────────────────────────────────────────────
def run(page, study):
output_dir = f"xls_shipments_{study}"
os.makedirs(output_dir, exist_ok=True)
page.goto(f"{BASE_URL}/report/shipments_report")
page.wait_for_load_state("networkidle", timeout=120000)
filename = os.path.join(output_dir, f"shipments_report_{study}.xlsx")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" Shipments report OK -> {filename}")
if __name__ == "__main__":
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator('#login__submit').click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=STUDY).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
run(page, STUDY)
browser.close()
-441
View File
@@ -1,441 +0,0 @@
"""
Importuje drugs data z IWRS Excel reportů do MySQL.
Tabulky:
iwrs_shipments — zásilky (jen CZ, verzováno import_id)
iwrs_shipment_items — obsah zásilek (verzováno import_id)
iwrs_inventory — lékový sklad na centrech (verzováno import_id)
iwrs_destruction — destrukce (bez verzování, přeskočí již importované košíky)
Spustit po stažení souborů (nebo přes run_all.py).
"""
import os
import glob
import re
import datetime
import numpy as np
import pandas as pd
import mysql.connector
import db_config
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
SITES = {
"77242113UCO3001": [
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
],
"42847922MDD3003": [
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
],
}
# ── type converters ──────────────────────────────────────────────────────────
def _py(val):
if isinstance(val, np.generic):
return val.item()
return val
def to_date(val):
val = _py(val)
if val is None:
return None
if isinstance(val, float) and (val != val):
return None
try:
if pd.isna(val):
return None
except (TypeError, ValueError):
pass
if isinstance(val, pd.Timestamp):
return None if pd.isna(val) else val.date()
if isinstance(val, datetime.datetime):
return val.date()
if isinstance(val, datetime.date):
return val
s = str(val).strip()
if not s or s.lower() in ("nat", "nan", "none", ""):
return None
for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
try:
return datetime.datetime.strptime(s, fmt).date()
except ValueError:
pass
return None
def to_int(val):
val = _py(val)
try:
v = float(val)
return None if (v != v) else int(v)
except (TypeError, ValueError):
return None
def to_str(val):
val = _py(val)
if val is None:
return None
if isinstance(val, float) and (val != val):
return None
s = str(val).strip()
return None if s.lower() in ("nan", "nat", "none", "") else s
# ── DB helpers ───────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def insert_import(cursor, study, source_label):
cursor.execute(
"INSERT INTO iwrs_import (study, imported_at, source_file, report_type) VALUES (%s, %s, %s, %s)",
(study, datetime.datetime.now(), source_label, "drugs"),
)
return cursor.lastrowid
def basket_already_imported(cursor, study, basket_id):
cursor.execute(
"SELECT 1 FROM iwrs_destruction WHERE study=%s AND basket_id=%s LIMIT 1",
(study, str(basket_id)),
)
return cursor.fetchone() is not None
# ── parsers ──────────────────────────────────────────────────────────────────
def parse_shipments_report(study):
path = os.path.join(BASE_DIR, f"xls_shipments_{study}", f"shipments_report_{study}.xlsx")
if not os.path.exists(path):
print(f" CHYBÍ: {path}")
return []
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
return []
df = pd.read_excel(path, header=header_row)
df = df.dropna(how="all")
# pouze CZ zásilky
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
col = df.columns.tolist()
rows = []
for _, r in df.iterrows():
rows.append({
"shipment_id": to_str(r["Shipment ID"]),
"status": to_str(r["IRT Shipment Status"]),
"type": to_str(r["Type"]),
"ship_from": to_str(r["Shipment From"]),
"ship_to_site": to_str(r["Ship To:"]),
"location": to_str(r["Location"]),
"request_date": to_date(r["Request Date"]),
"shipped_date": to_date(r["Shipped Date"]),
"received_date": to_date(r["Received Date"]) if "Received Date" in col else None,
"received_by": to_str(r["Received by"]) if "Received by" in col else None,
"delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None,
"delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None,
"delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None,
"cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None,
"total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None,
"tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None,
"shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None,
"expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None,
})
return rows
def parse_shipment_details(study):
detail_dir = os.path.join(BASE_DIR, f"xls_shipment_details_{study}")
files = sorted(glob.glob(os.path.join(detail_dir, "shipment_details_*.xlsx")))
rows = []
for path in files:
# shipment ID z názvu souboru
m = re.search(r"shipment_details_(.+)\.xlsx", os.path.basename(path))
shipment_id = m.group(1) if m else "UNKNOWN"
raw = pd.read_excel(path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Medication ID" in [str(v).strip() for v in row]:
header_row = i
break
if header_row is None:
continue
df = pd.read_excel(path, header=header_row)
df = df.dropna(how="all")
col = df.columns.tolist()
for _, r in df.iterrows():
# normalizace názvů sloupců lišících se mezi studiemi
med_desc = (to_str(r.get("Medication Description"))
or to_str(r.get("Medication ID Description")))
med_type = (to_str(r.get("Medication type"))
or to_str(r.get("Medication ID type")))
rows.append({
"shipment_id": shipment_id,
"destination_location": to_str(r.get("Destination Location")),
"shipment_status": to_str(r.get("IRT Shipment Status")),
"shipment_type": to_str(r.get("Type")),
"destination_site": to_str(r.get("Destination Site")),
"investigator": to_str(r.get("Investigator")),
"medication_description": med_desc,
"medication_type": med_type,
"medication_id": to_str(r.get("Medication ID")),
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"container_id": to_str(r.get("Container ID")),
"quantity": to_int(r.get("Quantity of Medication IDs")),
"expiration_date": to_date(r.get("Expiration Date")),
"item_status": to_str(r.get("Status")),
})
return rows
def parse_inventory(study):
inv_dir = os.path.join(BASE_DIR, f"xls_reports_{study}")
files = sorted(glob.glob(os.path.join(inv_dir, "onsite_inventory_detail_*.xlsx")))
rows = []
for path in files:
raw = pd.read_excel(path, header=None)
# extrahuj metadata ze záhlaví
site = investigator = location = None
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
if first.startswith("Site:"):
site = first.replace("Site:", "").strip()
elif first.startswith("Investigator:"):
investigator = first.replace("Investigator:", "").strip()
elif first.startswith("Location:"):
location = first.replace("Location:", "").strip()
# hlavička dat — první sloupec je "Medication" nebo "Medication ID"
if first in ("Medication", "Medication ID") and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row)
df = df.dropna(how="all")
# normalizuj první sloupec na "medication_id"
df = df.rename(columns={df.columns[0]: "medication_id"})
col = df.columns.tolist()
for _, r in df.iterrows():
rows.append({
"site": site,
"investigator": investigator,
"location": location,
"medication_id": to_str(r["medication_id"]),
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")),
"expiration_date": to_date(r.get("Expiration date")),
"received_date": to_date(r.get("Received Date")),
"receipt_user": to_str(r.get("Shipment Receipt User")),
"subject_identifier": to_str(r.get("Subject Identifier")),
"quantity_assigned": to_int(r.get("Quantity Assigned")),
"irt_transaction": to_str(r.get("IRT Transaction")),
"date_assigned": to_date(r.get("Date Assigned")),
"assignment_user": to_str(r.get("Assignment User")),
"dispensation_status": to_str(r.get("Dispensation Status")),
"dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")),
"quantity_dispensed": to_int(r.get("Quantity Dispensed")),
"dispensing_user": to_str(r.get("Dispensing User")),
"quantity_returned": to_int(r.get("Quantity Returned")),
"date_returned": to_date(r.get("Date Returned")),
"return_user": to_str(r.get("Return User")),
})
return rows
def parse_destruction_files(study):
dest_dir = os.path.join(BASE_DIR, f"xls_ip_destruction_{study}")
files = sorted(glob.glob(os.path.join(dest_dir, "ip_destruction_basket_*.xlsx")))
baskets = []
for path in files:
raw = pd.read_excel(path, header=None)
# metadata z záhlaví
meta = {}
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
for key, attr in [
("Investigator Name:", "investigator"),
("Site ID:", "site_id"),
("Location:", "location"),
("Basket ID:", "basket_id"),
("Drug Destruction Created Date:", "destruction_date"),
]:
if first.startswith(key):
meta[attr] = first.replace(key, "").strip()
if first == "Medication ID Description" and header_row is None:
header_row = i
if header_row is None:
continue
df = pd.read_excel(path, header=header_row)
df = df.dropna(how="all")
items = []
for _, r in df.iterrows():
items.append({
"medication_description": to_str(r.get("Medication ID Description")),
"medication_id": to_str(r.get("Medication ID")),
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"comments": to_str(r.get("Comments")),
})
baskets.append({
"site_id": meta.get("site_id"),
"investigator": meta.get("investigator"),
"location": meta.get("location"),
"basket_id": meta.get("basket_id"),
"destruction_date": to_date(meta.get("destruction_date")),
"items": items,
})
return baskets
# ── inserters ────────────────────────────────────────────────────────────────
def insert_shipments(cursor, import_id, study, rows):
sql = """INSERT INTO iwrs_shipments
(import_id, study, shipment_id, status, type, ship_from, ship_to_site,
location, request_date, shipped_date, received_date, received_by,
delivered_date_utc, delivery_recipient, delivery_details, cancelled_date,
total_medication_ids, tracking_no, shipping_category, expected_arrival)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
for r in rows:
cursor.execute(sql, (
import_id, study, r["shipment_id"], r["status"], r["type"],
r["ship_from"], r["ship_to_site"], r["location"],
r["request_date"], r["shipped_date"], r["received_date"],
r["received_by"], r["delivered_date_utc"], r["delivery_recipient"],
r["delivery_details"], r["cancelled_date"], r["total_medication_ids"],
r["tracking_no"], r["shipping_category"], r["expected_arrival"],
))
def insert_shipment_items(cursor, import_id, study, rows):
sql = """INSERT INTO iwrs_shipment_items
(import_id, study, shipment_id, destination_location, shipment_status,
shipment_type, destination_site, investigator, medication_description,
medication_type, medication_id, packaged_lot_no, packaged_lot_description,
container_id, quantity, expiration_date, item_status)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
for r in rows:
cursor.execute(sql, (
import_id, study, r["shipment_id"], r["destination_location"],
r["shipment_status"], r["shipment_type"], r["destination_site"],
r["investigator"], r["medication_description"], r["medication_type"],
r["medication_id"], r["packaged_lot_no"], r["packaged_lot_description"],
r["container_id"], r["quantity"], r["expiration_date"], r["item_status"],
))
def insert_inventory(cursor, import_id, study, rows):
sql = """INSERT INTO iwrs_inventory
(import_id, study, site, investigator, location, medication_id,
packaged_lot_no, original_expiration_date, expiration_date, received_date,
receipt_user, subject_identifier, quantity_assigned, irt_transaction,
date_assigned, assignment_user, dispensation_status, dispensing_date,
quantity_dispensed, dispensing_user, quantity_returned, date_returned, return_user)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
for r in rows:
cursor.execute(sql, (
import_id, study, r["site"], r["investigator"], r["location"],
r["medication_id"], r["packaged_lot_no"], r["original_expiration_date"],
r["expiration_date"], r["received_date"], r["receipt_user"],
r["subject_identifier"], r["quantity_assigned"], r["irt_transaction"],
r["date_assigned"], r["assignment_user"], r["dispensation_status"],
r["dispensing_date"], r["quantity_dispensed"], r["dispensing_user"],
r["quantity_returned"], r["date_returned"], r["return_user"],
))
def insert_destruction(cursor, study, baskets):
sql = """INSERT IGNORE INTO iwrs_destruction
(study, site_id, investigator, location, basket_id, destruction_date,
medication_description, medication_id, packaged_lot_description, comments)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
skipped = 0
imported = 0
for b in baskets:
if basket_already_imported(cursor, study, b["basket_id"]):
skipped += 1
continue
for item in b["items"]:
cursor.execute(sql, (
study, b["site_id"], b["investigator"], b["location"],
b["basket_id"], b["destruction_date"],
item["medication_description"], item["medication_id"],
item["packaged_lot_description"], item["comments"],
))
imported += 1
return imported, skipped
# ── main ─────────────────────────────────────────────────────────────────────
def import_study(study):
print(f"\n Parsování dat pro {study}...")
shipments = parse_shipments_report(study)
items = parse_shipment_details(study)
inventory = parse_inventory(study)
baskets = parse_destruction_files(study)
print(f" Zásilky: {len(shipments)} | Položky zásilek: {len(items)} | Sklad: {len(inventory)} | Destrukční košíky: {len(baskets)}")
conn = get_conn()
cursor = conn.cursor()
import_id = insert_import(cursor, study, f"drugs_{study}")
print(f" import_id = {import_id}")
insert_shipments(cursor, import_id, study, shipments)
insert_shipment_items(cursor, import_id, study, items)
insert_inventory(cursor, import_id, study, inventory)
dest_imported, dest_skipped = insert_destruction(cursor, study, baskets)
conn.commit()
cursor.close()
conn.close()
print(f" Destrukce: {dest_imported} nových | {dest_skipped} košíků přeskočeno (již importováno)")
def main():
for study in STUDIES:
print(f"\n{'='*60}")
print(f"[{study}]")
print(f"{'='*60}")
try:
import_study(study)
print(f" OK")
except Exception as e:
import traceback
print(f" CHYBA: {e}")
traceback.print_exc()
print("\nHotovo.")
main()
-85
View File
@@ -1,85 +0,0 @@
import sys
import os
from playwright.sync_api import sync_playwright
import download_reports
import download_ip_destruction
import download_shipments_report
import download_shipment_details
import create_accountability_report
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
STUDIES = {
"1": "77242113UCO3001",
"2": "42847922MDD3003",
}
def pick_study():
print("Vyber studii:")
for k, v in STUDIES.items():
print(f" {k}) {v}")
while True:
choice = input("Volba (1/2): ").strip()
if choice in STUDIES:
return STUDIES[choice]
print(" Neplatna volba, zkus znovu.")
def login_and_select_study(page, study):
print(f"\n[1/5] Prihlaseni a vyber studie {study}...")
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator('#login__submit').click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=study).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
print(" OK")
def main():
os.chdir(os.path.dirname(os.path.abspath(__file__)))
study = pick_study()
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
login_and_select_study(page, study)
print(f"\n[2/5] Stahuji inventory reporty...")
download_reports.run(page, study)
print(f"\n[3/5] Stahuji IP destruction reporty...")
download_ip_destruction.run(page, study)
print(f"\n[4/5] Stahuji shipments report...")
download_shipments_report.run(page, study)
print(f"\n[5/5] Stahuji shipment details...")
download_shipment_details.run(page, study)
browser.close()
print(f"\n[6/6] Generuji accountability report...")
create_accountability_report.STUDY = study
create_accountability_report.INVENTORY_DIR = __import__("pathlib").Path(f"xls_reports_{study}")
create_accountability_report.DESTRUCTION_DIR= __import__("pathlib").Path(f"xls_ip_destruction_{study}")
create_accountability_report.SHIPMENTS_FILE = __import__("pathlib").Path(f"xls_shipments_{study}/shipments_report_{study}.xlsx")
create_accountability_report.DETAILS_DIR = __import__("pathlib").Path(f"xls_shipment_details_{study}")
create_accountability_report.OUTPUT_FILE = create_accountability_report.OUTPUT_DIR / f"{__import__('datetime').date.today().strftime('%Y-%m-%d')} {study} CZ IWRS overview.xlsx"
create_accountability_report.main()
print("\nVse hotovo!")
main()
-649
View File
@@ -1,649 +0,0 @@
import os
import sys
import pandas as pd
from datetime import date
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import get_db
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"
DATE_COLUMNS = {
"Orig Exp Date", "Exp Date", "Rcv Date",
"Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
"Visit Date", "Scheduled Date",
}
N_SHIP_COLS = 9 # počet shipment sloupců před detail sloupci
# ── Načítání dat z MongoDB ────────────────────────────────────────────────────
INVENTORY_COLS = [
("site", "Site"),
("medication_id", "Med ID"),
("packaged_lot_no", "Lot No."),
("original_expiration_date", "Orig Exp Date"),
("expiration_date", "Exp Date"),
("received_date", "Rcv Date"),
("receipt_user", "Rcpt User"),
("subject_identifier", "Subject ID"),
("quantity_assigned", "Qty Asgn"),
("irt_transaction", "IRT Tx"),
("date_assigned", "Date Asgn"),
("assignment_user", "Asgn User"),
("dispensation_status", "Disp Status"),
("dispensing_date", "Disp Date"),
("quantity_dispensed", "Qty Disp"),
("dispensing_user", "Disp User"),
("quantity_returned", "Qty Ret"),
("date_returned", "Date Ret"),
("return_user", "Ret User"),
]
def load_inventory(study):
db = get_db()
inv = list(db.iwrs_inventory.find({"study": study}))
destr = list(db.iwrs_destruction.find({"study": study}))
# map medication_id -> first basket+date
destr_map = {}
for d in destr:
mid = d.get("medication_id")
if mid and mid not in destr_map:
destr_map[mid] = (d.get("basket_id"), d.get("destruction_date"))
records = []
for doc in inv:
row = {label: doc.get(key) for key, label in INVENTORY_COLS}
b, dt = destr_map.get(doc.get("medication_id"), (None, None))
row["Destroyed"] = dt
row["Basket No."] = b
records.append(row)
df = pd.DataFrame(records)
if df.empty:
print(" Inventory: 0 kitu")
return df
df = df.sort_values(["Site", "Rcv Date", "Med ID"], na_position="last").reset_index(drop=True)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Inventory: {len(df)} kitu")
return df
SHIP_COLS = [
("shipment_id", "Shipment ID"),
("status", "IRT Shipment Status"),
("type", "Type"),
("ship_from", "Shipment From"),
("ship_to_site", "Ship To:"),
("request_date", "Request Date"),
("received_date", "Received Date"),
("received_by", "Received by"),
("expected_arrival", "Expected Arrival"),
]
ITEM_COLS = [
("investigator", "Investigator"),
("medication_description", "Medication Description"),
("medication_id", "Medication ID"),
("packaged_lot_no", "Packaged Lot number"),
("expiration_date", "Expiration Date"),
("item_status", "Status"),
]
def load_shipments(study):
db = get_db()
ships = list(db.iwrs_shipments.find({"study": study}))
items = list(db.iwrs_shipment_items.find({"study": study}))
# index items by shipment_id
items_by_ship = {}
for it in items:
items_by_ship.setdefault(it.get("shipment_id"), []).append(it)
records = []
for s in ships:
base = {label: s.get(key) for key, label in SHIP_COLS}
for it in items_by_ship.get(s.get("shipment_id"), []):
row = dict(base)
for key, label in ITEM_COLS:
row[label] = it.get(key)
records.append(row)
df = pd.DataFrame(records)
if df.empty:
print(" Shipments: 0 zásilek, 0 kitu")
return df
df = df.sort_values(["Ship To:", "Shipment ID", "Medication ID"], na_position="last").reset_index(drop=True)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
n_ship = df["Shipment ID"].nunique()
print(f" Shipments: {n_ship} zásilek, {len(df)} kitu")
return df
def load_visits(study):
db = get_db()
cur = db.iwrs_visits.find({
"study": study,
"visit_type": "Past",
"irt_transaction_no": {"$ne": None},
})
rows = []
for v in cur:
rows.append({
"Subject": v.get("subject"),
"Visit Date": v.get("actual_date") or v.get("scheduled_date"),
"Scheduled Date": v.get("scheduled_date"),
"IRT Tx No": v.get("irt_transaction_no"),
"Visit": v.get("irt_transaction_description"),
"Medication": v.get("medication_assignment"),
"medication_id": v.get("medication_id"),
"quantity_assigned": v.get("quantity_assigned"),
})
df = pd.DataFrame(rows)
if df.empty:
print(" Visits: 0 radku")
return df
# GROUP BY subject/actual/scheduled/irt_no/desc/medication
grouped = (
df.groupby(["Subject", "Visit Date", "Scheduled Date", "IRT Tx No", "Visit", "Medication"],
dropna=False, as_index=False)
.agg(**{
"Med IDs": ("medication_id", lambda s: ", ".join(sorted([str(x) for x in s if pd.notna(x)]))),
"Qty": ("quantity_assigned", "sum"),
})
)
grouped = grouped.sort_values(["Subject", "Visit Date"]).reset_index(drop=True)
for col in ("Visit Date", "Scheduled Date"):
if col in grouped.columns:
grouped[col] = pd.to_datetime(grouped[col], errors="coerce")
if study == "77242113UCO3001":
grouped["Visit"] = grouped["Visit"].replace("Subject Number Creation", "Screening")
print(f" Visits: {len(grouped)} řádků")
return grouped
# ── Odvozené sheety ───────────────────────────────────────────────────────────
def build_site_summary(shipments_df):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
sheet_name = f"Expired as of {today.strftime('%d-%b-%Y')}"
print(f" Expired: {len(filtered)}")
return filtered, sheet_name
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
)
filtered = (
df[mask]
.copy()
.sort_values(["Site", "Date Ret"], ascending=[True, True])
.drop(columns=["Destroyed", "Basket No."])
.reset_index(drop=True)
)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Formátování ───────────────────────────────────────────────────────────────
STRIPE_GRAY = PatternFill("solid", start_color="F2F2F2")
STRIPE_WHITE = PatternFill("solid", start_color="FFFFFF")
# pacienti — styly zachovány z create_subject_report.py
_PAT_HEADER_FILL = PatternFill("solid", start_color="1F4E79")
_PAT_HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10)
_PAT_NORMAL_FONT = Font(name="Arial", size=10)
_PAT_BOLD_FONT = Font(name="Arial", bold=True, size=10)
_PAT_STRIKE_FONT = Font(name="Arial", size=10, strike=True, color="999999")
_PAT_ADOLESC_FONT = Font(name="Arial", bold=True, size=10)
_PAT_THIN = Side(style="thin", color="CCCCCC")
_PAT_BORDER = Border(left=_PAT_THIN, right=_PAT_THIN, top=_PAT_THIN, bottom=_PAT_THIN)
_PAT_EVEN_FILL = PatternFill("solid", start_color="EBF3FB")
_PAT_ODD_FILL = PatternFill("solid", start_color="FFFFFF")
_PAT_CENTER = Alignment(horizontal="center", vertical="center")
_PAT_LEFT = Alignment(horizontal="left", vertical="center")
def _autofit(ws):
for col_cells in ws.columns:
max_len = 0
col_letter = get_column_letter(col_cells[0].column)
for cell in col_cells:
if cell.value is None:
continue
# datum se zobrazí jako DD-MMM-YYYY = 11 znaků
if hasattr(cell.value, "strftime") or cell.number_format == "DD-MMM-YYYY":
length = 11
else:
length = len(str(cell.value))
if length > max_len:
max_len = length
ws.column_dimensions[col_letter].width = min(max_len + 3, 50)
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
header_fill = PatternFill("solid", start_color=header_color)
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
row_font = Font(name="Arial", size=10)
hi_fill = PatternFill("solid", start_color=highlight_color) if highlight_color else None
headers = [cell.value for cell in ws[1]]
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
cell.border = border
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
stripe = STRIPE_GRAY if row[0].row % 2 == 0 else STRIPE_WHITE
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
cell.font = row_font
cell.border = border
cell.alignment = Alignment(horizontal="center")
if col_name in DATE_COLUMNS:
cell.number_format = "DD-MMM-YYYY"
if hi_fill and col_name == highlight_col:
cell.fill = hi_fill
else:
cell.fill = stripe
_autofit(ws)
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_cols):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
fill_ship = PatternFill("solid", start_color=header_color_ship)
fill_detail = PatternFill("solid", start_color=header_color_detail)
for cell in ws[1]:
cell.fill = fill_ship if cell.column <= n_ship_cols else fill_detail
cell.font = hfont
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
ws.row_dimensions[1].height = 30
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
stripe = STRIPE_GRAY if row[0].row % 2 == 0 else STRIPE_WHITE
for cell in row:
cell.font = dfont
cell.border = border
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.fill = stripe
if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
cell.number_format = "DD-MMM-YYYY"
_autofit(ws)
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
# ── Pacienti ─────────────────────────────────────────────────────────────────
def load_patients(study):
db = get_db()
docs = list(db.iwrs_subject_summary.find({"study": study}))
if not docs:
raise RuntimeError(f"Žádná data v Mongo pro pacienty {study}")
base_cols = [
("subject", "Subject"),
("investigator", "Investigator"),
("age", "Subject's age collection"),
("cohort_per_irt", "Cohort per IRT"),
("irt_subject_status", "IRT Subject Status"),
("last_irt_transaction", "Last Recorded IRT Transaction"),
("next_irt_transaction", "Next Expected IRT Transaction"),
("next_irt_transaction_date_local", "Next Expected IRT Transaction Date [Local]"),
]
uco_extra = [
("rescreened_subject", "Rescreened Subject"),
("adt_ir", "ADT-IR"),
("three_or_more_advanced_therapies", "3+ Adv. Therapies"),
("only_oral_5asa_compounds", "Only 5-ASA"),
("ustekinumab", "Ustekinumab"),
("isolated_proctitis", "Isolated Proctitis"),
]
cols = list(base_cols)
if study == "77242113UCO3001":
cols += uco_extra
rows = [{label: d.get(key) for key, label in cols} for d in docs]
df = pd.DataFrame(rows).sort_values("Subject").reset_index(drop=True)
if "Next Expected IRT Transaction Date [Local]" in df.columns:
df["Next Expected IRT Transaction Date [Local]"] = pd.to_datetime(
df["Next Expected IRT Transaction Date [Local]"], errors="coerce"
)
print(f" Pacienti: {len(df)} subjektů")
return df
def _simplify_cohort(val):
if pd.isna(val):
return ""
val = str(val)
if "dolescent" in val:
return "Adolescent"
if val.startswith("Adult"):
return "Adult"
return val
def _fmt_date(val):
if pd.isna(val):
return ""
if hasattr(val, "strftime"):
return val.strftime("%Y-%m-%d")
return str(val)[:10]
def _write_prehled(wb, df_raw, study):
ws = wb.create_sheet("Přehled", 0)
ws.sheet_view.showGridLines = False
is_uco = (study == "77242113UCO3001")
if is_uco:
display_headers = ["Subject", "Investigator", "Věk", "Cohort",
"Rescreened", "ADT-IR", "≥3 Adv.Th.", "5-ASA only",
"Uste.", "Isol.Proct.",
"Status", "Last IRT", "Next Visit", "Next Date"]
col_widths = [14, 22, 6, 12, 11, 8, 11, 10, 8, 12, 14, 12, 12, 13]
status_col = 11
flag_cols = set(range(5, 11)) # 1-indexed sloupce s Yes/No hodnotami
else:
display_headers = ["Subject", "Investigator", "Věk", "Cohort", "Status", "Last IRT", "Next Visit", "Next Date"]
col_widths = [14, 22, 6, 12, 14, 12, 12, 13]
status_col = 5
flag_cols = set()
last_col = get_column_letter(len(display_headers))
ws.merge_cells(f"A1:{last_col}1")
title = ws["A1"]
title.value = f"Subject Summary — {study} ({date.today().strftime('%d-%b-%Y')})"
title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
title.alignment = Alignment(horizontal="left", vertical="center")
ws.row_dimensions[1].height = 22
for c, (h, w) in enumerate(zip(display_headers, col_widths), 1):
cell = ws.cell(row=2, column=c, value=h)
cell.font = _PAT_HEADER_FONT
cell.fill = _PAT_HEADER_FILL
cell.alignment = _PAT_CENTER
cell.border = _PAT_BORDER
ws.column_dimensions[get_column_letter(c)].width = w
ws.row_dimensions[2].height = 18
base = {
"Subject": df_raw["Subject"].fillna(""),
"Investigator": df_raw["Investigator"].fillna(""),
"Věk": df_raw["Subject's age collection"].apply(lambda v: "" if pd.isna(v) else int(v)),
"Cohort": df_raw["Cohort per IRT"].apply(_simplify_cohort),
}
if is_uco:
base.update({
"Rescreened": df_raw["Rescreened Subject"].fillna(""),
"ADT-IR": df_raw["ADT-IR"].fillna(""),
"≥3 Adv.Th.": df_raw["3+ Adv. Therapies"].fillna(""),
"5-ASA only": df_raw["Only 5-ASA"].fillna(""),
"Uste.": df_raw["Ustekinumab"].fillna(""),
"Isol.Proct.": df_raw["Isolated Proctitis"].fillna(""),
})
base.update({
"Status": df_raw["IRT Subject Status"].fillna(""),
"Last IRT": df_raw["Last Recorded IRT Transaction"].fillna(""),
"Next Visit": df_raw["Next Expected IRT Transaction"].fillna(""),
"Next Date": df_raw["Next Expected IRT Transaction Date [Local]"].apply(_fmt_date),
})
display = pd.DataFrame(base).sort_values("Subject").reset_index(drop=True)
for r_idx, row in display.iterrows():
excel_row = r_idx + 3
status = str(row["Status"])
is_failed = "Screen Failed" in status or "Discontinued" in status
is_randomized = "Randomized" in status
is_adolescent = row["Cohort"] == "Adolescent"
fill = _PAT_EVEN_FILL if r_idx % 2 == 0 else _PAT_ODD_FILL
for c_idx, val in enumerate(row, 1):
cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
cell.fill = fill
cell.border = _PAT_BORDER
cell.alignment = _PAT_CENTER if (c_idx == 3 or c_idx in flag_cols) else _PAT_LEFT
if is_failed:
cell.font = _PAT_STRIKE_FONT
elif c_idx == status_col and is_randomized:
cell.font = _PAT_BOLD_FONT
elif c_idx == 4 and is_adolescent:
cell.font = _PAT_ADOLESC_FONT
else:
cell.font = _PAT_NORMAL_FONT
ws.row_dimensions[excel_row].height = 16
ws.freeze_panes = "A3"
ws.auto_filter.ref = f"A2:{last_col}{len(display) + 2}"
def _write_next_visits(wb, df_raw, study, visits_df=None):
ws = wb.create_sheet("Next Visits", 1)
ws.sheet_view.showGridLines = False
ws.merge_cells("A1:D1")
title = ws["A1"]
title.value = f"Next Expected Visits — {study} ({date.today().strftime('%d-%b-%Y')})"
title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
title.alignment = Alignment(horizontal="left", vertical="center")
ws.row_dimensions[1].height = 22
nv_headers = ["Subject", "Investigator", "Next Visit", "Datum"]
nv_widths = [14, 22, 26, 13]
for c, (h, w) in enumerate(zip(nv_headers, nv_widths), 1):
cell = ws.cell(row=2, column=c, value=h)
cell.font = _PAT_HEADER_FONT
cell.fill = _PAT_HEADER_FILL
cell.alignment = _PAT_CENTER
cell.border = _PAT_BORDER
ws.column_dimensions[get_column_letter(c)].width = w
ws.row_dimensions[2].height = 18
df = pd.DataFrame({
"Subject": df_raw["Subject"].fillna(""),
"Investigator": df_raw["Investigator"].fillna(""),
"Next Visit": df_raw["Next Expected IRT Transaction"].fillna(""),
"Datum": df_raw["Next Expected IRT Transaction Date [Local]"],
"Status": df_raw["IRT Subject Status"].fillna(""),
})
# I-0: datum = screening date + 42 dní
if visits_df is not None and not visits_df.empty:
screen = (
visits_df[visits_df["Visit"].str.contains("Screen", case=False, na=False)]
.groupby("Subject")["Visit Date"].min()
.rename("Screening Date")
)
df = df.join(screen, on="Subject")
mask_i0 = df["Next Visit"].str.contains("I-0", na=False)
df.loc[mask_i0, "Datum"] = df.loc[mask_i0, "Screening Date"] + pd.Timedelta(days=42)
df = df.drop(columns=["Screening Date"])
df = df[df["Datum"].notna()]
df = df[~df["Status"].str.contains("Screen Failed|Discontinued", na=False)]
df = df.sort_values("Datum").reset_index(drop=True)
for r_idx, row in df.iterrows():
excel_row = r_idx + 3
fill = _PAT_EVEN_FILL if r_idx % 2 == 0 else _PAT_ODD_FILL
datum_val = row["Datum"]
datum_str = datum_val.strftime("%Y-%m-%d") if hasattr(datum_val, "strftime") else str(datum_val)[:10]
for c_idx, val in enumerate([row["Subject"], row["Investigator"], row["Next Visit"], datum_str], 1):
cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
cell.fill = fill
cell.border = _PAT_BORDER
cell.font = _PAT_NORMAL_FONT
cell.alignment = _PAT_LEFT
ws.row_dimensions[excel_row].height = 16
ws.freeze_panes = "A3"
ws.auto_filter.ref = f"A2:D{len(df) + 2}"
# ── Jeden report pro jednu studii ─────────────────────────────────────────────
def create_study_report(study):
today = date.today()
# číslování: najdi nejvyšší existující verzi pro dnešní datum
existing = sorted(OUTPUT_DIR.glob(f"{today} {study} CZ IWRS overview v*.xlsx"))
if existing:
last = existing[-1].stem # např. "2026-05-12 42847922MDD3003 CZ IWRS overview v3"
last_ver = int(last.rsplit("v", 1)[-1])
version = last_ver + 1
else:
version = 1
output_file = OUTPUT_DIR / f"{today} {study} CZ IWRS overview v{version}.xlsx"
print(f"\n[{study}] Nacitam z MongoDB...")
df = load_inventory(study)
shipments_df = load_shipments(study)
df_patients = load_patients(study)
visits_df = load_visits(study)
expired_df, expired_sheet = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
not_returned_df = build_not_returned(df)
destruction_df = build_kits_for_destruction(df)
site_summary_df = build_site_summary(shipments_df)
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
df.to_excel( writer, index=False, sheet_name="CountryMedicationOverview")
expired_df.to_excel( writer, index=False, sheet_name=expired_sheet)
assigned_df.to_excel( writer, index=False, sheet_name="Assigned not dispensed")
not_returned_df.to_excel( writer, index=False, sheet_name="Not returned")
destruction_df.to_excel( writer, index=False, sheet_name="Kits for destruction")
shipments_df.to_excel( writer, index=False, sheet_name="Shipments")
site_summary_df.to_excel( writer, index=False, sheet_name="Site Summary")
visits_df.to_excel( writer, index=False, sheet_name="Patient Visits")
wb = load_workbook(output_file)
ws_main = wb["CountryMedicationOverview"]
format_sheet(ws_main, header_color="1F4E79")
green_fill = PatternFill("solid", start_color="E2EFDA")
headers_main = [c.value for c in ws_main[1]]
for row in ws_main.iter_rows(min_row=2, max_row=ws_main.max_row):
for cell in row:
col_name = headers_main[cell.column - 1] if cell.column <= len(headers_main) else None
if col_name in ("Destroyed", "Basket No."):
cell.fill = green_fill
format_sheet(wb[expired_sheet], header_color="C00000", highlight_col="Exp Date", highlight_color="FFE0E0")
format_sheet(wb["Assigned not dispensed"], header_color="833C00", highlight_col="Subject ID", highlight_color="FFF2CC")
format_sheet(wb["Not returned"], header_color="375623", highlight_col="Max Visit Date", highlight_color="E2EFDA")
format_sheet(wb["Kits for destruction"], header_color="595959")
format_shipment_sheet(wb["Shipments"], "1F4E79", "375623", N_SHIP_COLS)
format_sheet(wb["Site Summary"], header_color="1F4E79")
format_sheet(wb["Patient Visits"], header_color="1F4E79")
# ── pacienti (Přehled + Next Visits) na začátek ──────────────────────────
_write_prehled(wb, df_patients, study)
_write_next_visits(wb, df_patients, study, visits_df)
# ── pořadí listů: Patient Visits jako první ──────────────────────────────
names = wb.sheetnames
wb._sheets = [wb["Patient Visits"]] + [wb[s] for s in names if s != "Patient Visits"]
wb.save(output_file)
print(f" Uloženo: {output_file.name} ({len(df)} řádků)")
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
OUTPUT_DIR.mkdir(exist_ok=True)
for study in STUDIES:
try:
create_study_report(study)
except Exception as e:
import traceback
print(f"\n[{study}] CHYBA: {e}")
traceback.print_exc()
print("\nHotovo.")
main()
-5
View File
@@ -1,5 +0,0 @@
DB_HOST = "192.168.1.76"
DB_PORT = 3306
DB_USER = "root"
DB_PASSWORD = "Vlado9674+"
DB_NAME = "studie"
-220
View File
@@ -1,220 +0,0 @@
"""
download_drugs.py — stažení Drugs reportů pro jednu studii do IWRS/Incoming/.
Verze: 1.0 | Datum: 2026-06-10
Volá se z IWRS/run_all_v1.0.py s již přihlášenou Playwright page (login +
výběr studie zajišťuje common.iwrs_portal.login).
1. Onsite inventory detail (per site, stahuje se vždy)
2. IP destruction (per košík; přeskočí košíky už importované
v Mongo iwrs_destruction — destrukce se nemění)
3. Shipments report (jeden soubor na studii, stahuje se vždy)
4. Shipment details (per CZ zásilka; přeskočí zásilky, jejichž
položky jsou v Mongo iwrs_shipment_items se
statusem RECEIVED — finální stav)
Názvy souborů (datumované, aby zapadly do Incoming/ flow):
YYYY-MM-DD {study} Onsite Inventory {site}.xlsx
YYYY-MM-DD {study} IP Destruction {basket}.xlsx
YYYY-MM-DD {study} Shipments Report.xlsx
YYYY-MM-DD {study} Shipment Details {shipment_id}.xlsx
"""
import os
import sys
import datetime
import pandas as pd
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
IWRS_DIR = os.path.dirname(BASE_DIR)
for _p in (IWRS_DIR, BASE_DIR):
if _p not in sys.path:
sys.path.insert(0, _p)
from common.iwrs_portal import BASE_URL
from common.paths import INCOMING_DIR, unique_path
from common.mongo_writer import get_db
SITES = {
"77242113UCO3001": [
"DD5-CZ10001", "DD5-CZ10003", "DD5-CZ10006", "DD5-CZ10009",
"DD5-CZ10010", "DD5-CZ10012", "DD5-CZ10013", "DD5-CZ10015",
"DD5-CZ10016", "DD5-CZ10020", "DD5-CZ10021", "DD5-CZ10022",
],
"42847922MDD3003": [
"S10-CZ10002", "S10-CZ10004", "S10-CZ10005",
"S10-CZ10008", "S10-CZ10011", "S10-CZ10012",
],
}
def _today():
return datetime.date.today().strftime("%Y-%m-%d")
# ── skip-logika přes Mongo (náhrada za dřívější "soubor existuje") ───────────
def get_existing_baskets(study):
"""Košíky už importované v iwrs_destruction — destrukce je immutable."""
try:
db = get_db()
return set(db.iwrs_destruction.distinct("basket_id", {"study": study}))
except Exception as e:
print(f" UPOZORNĚNÍ: nelze načíst košíky z Mongo ({e}), stahuji vše")
return set()
def get_received_shipments(study):
"""Zásilky, jejichž položky už jsou v Mongo se statusem RECEIVED (finální stav)."""
try:
db = get_db()
return set(db.iwrs_shipment_items.distinct(
"shipment_id",
{"study": study, "shipment_status": {"$regex": "^received$", "$options": "i"}},
))
except Exception as e:
print(f" UPOZORNĚNÍ: nelze načíst zásilky z Mongo ({e}), stahuji vše")
return set()
# ── download funkce ──────────────────────────────────────────────────────────
def download_inventory(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/onsite_inventory_detail")
page.wait_for_load_state("networkidle", timeout=120000)
for site_id in SITES[study]:
print(f" [{site_id}] inventory...")
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.get_by_role("option", name=site_id).click()
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Onsite Inventory {site_id}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Inventory OK ({len(SITES[study])} center)")
def download_destruction(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/ip_destruction_form")
page.wait_for_load_state("networkidle", timeout=120000)
page.locator('input[placeholder="search"], input[type="text"]').first.click()
page.wait_for_timeout(1000)
baskets = [b.strip() for b in page.locator("mat-option").all_inner_texts()
if b.strip() and b.strip() != "No results found"]
page.keyboard.press("Escape")
page.wait_for_timeout(500)
if not baskets:
print(" Žádné destruction košíky")
return
existing = get_existing_baskets(study)
new_count = 0
for basket in baskets:
if basket in existing:
continue # destrukce se nemění — přeskočit
print(f" [košík {basket}] stahování...")
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(basket)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} IP Destruction {basket}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
new_count += 1
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Destruction OK ({new_count} nových, {len(baskets) - new_count} přeskočeno)")
def download_shipments_report(page, study):
today = _today()
page.goto(f"{BASE_URL}/report/shipments_report")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Shipments Report")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" Shipments report OK -> {os.path.basename(filename)}")
return filename
def download_shipment_details(page, study, shipments_report_path):
today = _today()
# načti CZ shipment IDs z právě staženého shipments reportu
raw = pd.read_excel(shipments_report_path, header=None)
header_row = None
for i, row in raw.iterrows():
if "Shipment ID" in [str(v).strip() for v in row]:
header_row = i
break
df = pd.read_excel(shipments_report_path, header=header_row)
df = df.dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
cz_shipments = list(zip(
df["Shipment ID"].astype(str).str.strip(),
df["IRT Shipment Status"].astype(str).str.strip() if "IRT Shipment Status" in df.columns else [""] * len(df),
))
print(f" CZ zásilek celkem: {len(cz_shipments)}")
received = get_received_shipments(study)
page.goto(f"{BASE_URL}/report/shipment_details_report")
page.wait_for_load_state("networkidle", timeout=120000)
skipped = 0
for shipment, status in cz_shipments:
if shipment in received:
skipped += 1
continue # položky v Mongo už mají finální stav RECEIVED
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
input_field.click()
input_field.fill(shipment)
page.wait_for_timeout(500)
page.locator("mat-option").first.dispatch_event("click")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Shipment Details {shipment}")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{shipment}] ({status}) OK")
page.get_by_role("button", name="Clear").click()
page.wait_for_load_state("networkidle", timeout=120000)
print(f" Přeskočeno (RECEIVED v Mongo): {skipped}")
def run(page, study):
"""Stáhne všechny 4 typy Drugs reportů pro studii do IWRS/Incoming/."""
os.makedirs(INCOMING_DIR, exist_ok=True)
print("\n [1/4] Onsite inventory...")
download_inventory(page, study)
print("\n [2/4] IP destruction...")
download_destruction(page, study)
print("\n [3/4] Shipments report...")
report_path = download_shipments_report(page, study)
print("\n [4/4] Shipment details (CZ)...")
download_shipment_details(page, study, report_path)
-306
View File
@@ -1,306 +0,0 @@
"""
import_drugs.py — import Drugs reportů z IWRS/Incoming/ do MongoDB.
Verze: 1.0 | Datum: 2026-06-10
Nahrazuje Drugs/import_to_mongo.py (ten parsoval pevné adresáře xls_*;
nyní se parsují datumované soubory z IWRS/Incoming/).
Per studie a běh: jeden import_id. Soubory se zpracují nejstarší napřed,
při více souborech stejného záznamu vyhrává poslední (poslední stav).
Po úspěšném zápisu do Monga se zparsované soubory přesunou do
IWRS/Incoming/Processed/; soubor s chybou parsování zůstává v Incoming/.
Cílové kolekce (db `studie`):
iwrs_shipments / iwrs_shipment_items / iwrs_inventory (upsert + snapshot)
iwrs_destruction (upsert only, immutable)
Volá se z IWRS/run_all_v1.0.py (ensure_indexes volá orchestrátor);
lze spustit i samostatně: python import_drugs.py
"""
import os
import re
import sys
import glob
import pandas as pd
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
IWRS_DIR = os.path.dirname(BASE_DIR)
for _p in (IWRS_DIR, BASE_DIR):
if _p not in sys.path:
sys.path.insert(0, _p)
from common.paths import INCOMING_DIR, STUDIES, move_done, sorted_by_mtime
from common.mongo_writer import (
to_str, to_int, to_date,
ensure_indexes, log_import,
bulk_upsert_with_snapshot, bulk_upsert_only,
)
def _pending(pattern):
return sorted_by_mtime(glob.glob(os.path.join(INCOMING_DIR, pattern)))
def _find_header_row(raw, marker):
for i, row in raw.iterrows():
if marker in [str(v).strip() for v in row]:
return i
return None
# ── XLSX parsery (per soubor) ────────────────────────────────────────────────
def parse_shipments_file(path, study):
raw = pd.read_excel(path, header=None)
header_row = _find_header_row(raw, "Shipment ID")
if header_row is None:
raise ValueError("hlavičkový řádek 'Shipment ID' nenalezen")
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df[df["Location"].astype(str).str.contains("Czech", na=False, case=False)]
col = df.columns.tolist()
rows = []
for _, r in df.iterrows():
sid = to_str(r["Shipment ID"])
if not sid:
continue
rows.append({
"_id": sid,
"shipment_id": sid,
"study": study,
"status": to_str(r["IRT Shipment Status"]),
"type": to_str(r["Type"]),
"ship_from": to_str(r["Shipment From"]),
"ship_to_site": to_str(r["Ship To:"]),
"location": to_str(r["Location"]),
"request_date": to_date(r["Request Date"]),
"shipped_date": to_date(r["Shipped Date"]),
"received_date": to_date(r["Received Date"]) if "Received Date" in col else None,
"received_by": to_str(r["Received by"]) if "Received by" in col else None,
"delivered_date_utc": to_date(r["Delivered Date [UTC]"]) if "Delivered Date [UTC]" in col else None,
"delivery_recipient": to_str(r["Delivery Recipient"]) if "Delivery Recipient" in col else None,
"delivery_details": to_str(r["Delivery Details"]) if "Delivery Details" in col else None,
"cancelled_date": to_date(r["Cancelled Date"]) if "Cancelled Date" in col else None,
"total_medication_ids": to_int(r["Total Medication IDs"]) if "Total Medication IDs" in col else None,
"tracking_no": to_str(r["Tracking #"]) if "Tracking #" in col else None,
"shipping_category": to_str(r["Shipping Category"]) if "Shipping Category" in col else None,
"expected_arrival": to_date(r["Expected Arrival"]) if "Expected Arrival" in col else None,
})
return rows
def parse_shipment_details_file(path, study):
# shipment_id z názvu: "... Shipment Details {id}[ HHMM].xlsx"
m = re.search(r"Shipment Details (\S+?)(?: \d{4})?\.xlsx$", os.path.basename(path))
shipment_id = m.group(1) if m else "UNKNOWN"
raw = pd.read_excel(path, header=None)
header_row = _find_header_row(raw, "Medication ID")
if header_row is None:
raise ValueError("hlavičkový řádek 'Medication ID' nenalezen")
df = pd.read_excel(path, header=header_row).dropna(how="all")
rows = []
for _, r in df.iterrows():
med_desc = (to_str(r.get("Medication Description"))
or to_str(r.get("Medication ID Description")))
med_type = (to_str(r.get("Medication type"))
or to_str(r.get("Medication ID type")))
med_id = to_str(r.get("Medication ID"))
if not med_id:
continue
rows.append({
"_id": f"{shipment_id}:{med_id}",
"study": study,
"shipment_id": shipment_id,
"destination_location": to_str(r.get("Destination Location")),
"shipment_status": to_str(r.get("IRT Shipment Status")),
"shipment_type": to_str(r.get("Type")),
"destination_site": to_str(r.get("Destination Site")),
"investigator": to_str(r.get("Investigator")),
"medication_description": med_desc,
"medication_type": med_type,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"container_id": to_str(r.get("Container ID")),
"quantity": to_int(r.get("Quantity of Medication IDs")),
"expiration_date": to_date(r.get("Expiration Date")),
"item_status": to_str(r.get("Status")),
})
return rows
def parse_inventory_file(path, study):
raw = pd.read_excel(path, header=None)
site = investigator = location = None
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
if first.startswith("Site:"):
site = first.replace("Site:", "").strip()
elif first.startswith("Investigator:"):
investigator = first.replace("Investigator:", "").strip()
elif first.startswith("Location:"):
location = first.replace("Location:", "").strip()
if first in ("Medication", "Medication ID") and header_row is None:
header_row = i
if header_row is None:
raise ValueError("hlavičkový řádek 'Medication' nenalezen")
df = pd.read_excel(path, header=header_row).dropna(how="all")
df = df.rename(columns={df.columns[0]: "medication_id"})
rows = []
for _, r in df.iterrows():
med_id = to_str(r["medication_id"])
if not med_id or not site:
continue
rows.append({
"_id": f"{site}:{med_id}",
"study": study,
"site": site,
"investigator": investigator,
"location": location,
"medication_id": med_id,
"packaged_lot_no": to_str(r.get("Packaged Lot number")),
"original_expiration_date": to_date(r.get("Original Expiration Date when Packaged Lot was Added")),
"expiration_date": to_date(r.get("Expiration date")),
"received_date": to_date(r.get("Received Date")),
"receipt_user": to_str(r.get("Shipment Receipt User")),
"subject_identifier": to_str(r.get("Subject Identifier")),
"quantity_assigned": to_int(r.get("Quantity Assigned")),
"irt_transaction": to_str(r.get("IRT Transaction")),
"date_assigned": to_date(r.get("Date Assigned")),
"assignment_user": to_str(r.get("Assignment User")),
"dispensation_status": to_str(r.get("Dispensation Status")),
"dispensing_date": to_date(r.get("Dispensing date") or r.get("Dispensing Date")),
"quantity_dispensed": to_int(r.get("Quantity Dispensed")),
"dispensing_user": to_str(r.get("Dispensing User")),
"quantity_returned": to_int(r.get("Quantity Returned")),
"date_returned": to_date(r.get("Date Returned")),
"return_user": to_str(r.get("Return User")),
})
return rows
def parse_destruction_file(path, study):
raw = pd.read_excel(path, header=None)
meta = {}
header_row = None
for i, row in raw.iterrows():
first = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
for key, attr in [
("Investigator Name:", "investigator"),
("Site ID:", "site_id"),
("Location:", "location"),
("Basket ID:", "basket_id"),
("Drug Destruction Created Date:", "destruction_date"),
]:
if first.startswith(key):
meta[attr] = first.replace(key, "").strip()
if first == "Medication ID Description" and header_row is None:
header_row = i
if header_row is None:
raise ValueError("hlavičkový řádek 'Medication ID Description' nenalezen")
df = pd.read_excel(path, header=header_row).dropna(how="all")
basket_id = meta.get("basket_id")
rows = []
for _, r in df.iterrows():
med_id = to_str(r.get("Medication ID"))
if not med_id or not basket_id:
continue
rows.append({
"_id": f"{basket_id}:{med_id}",
"study": study,
"site_id": meta.get("site_id"),
"investigator": meta.get("investigator"),
"location": meta.get("location"),
"basket_id": basket_id,
"destruction_date": to_date(meta.get("destruction_date")),
"medication_description": to_str(r.get("Medication ID Description")),
"medication_id": med_id,
"packaged_lot_description": to_str(r.get("Packaged Lot description")),
"comments": to_str(r.get("Comments")),
})
return rows
# ── zpracování souborů ───────────────────────────────────────────────────────
def _parse_files(files, parser, study, label):
"""Zparsuje soubory (nejstarší napřed, poslední vyhrává per _id).
Vrací (docs, ok_paths, failed_paths).
"""
docs, ok, failed = {}, [], []
for path in files:
try:
for d in parser(path, study):
docs[d["_id"]] = d
ok.append(path)
except Exception as e:
failed.append(path)
print(f" [{study}] CHYBA parsování {label} {os.path.basename(path)}: {e}")
return list(docs.values()), ok, failed
def import_study(study):
ship_files = _pending(f"* {study} Shipments Report*.xlsx")
item_files = _pending(f"* {study} Shipment Details *.xlsx")
inv_files = _pending(f"* {study} Onsite Inventory *.xlsx")
dest_files = _pending(f"* {study} IP Destruction *.xlsx")
if not (ship_files or item_files or inv_files or dest_files):
print(f" [{study}] drugs: nic ke zpracování")
return
shipments, ok_ship, _ = _parse_files(ship_files, parse_shipments_file, study, "shipments")
items, ok_item, _ = _parse_files(item_files, parse_shipment_details_file, study, "details")
inventory, ok_inv, _ = _parse_files(inv_files, parse_inventory_file, study, "inventory")
destruct, ok_dest, _ = _parse_files(dest_files, parse_destruction_file, study, "destruction")
ok_files = ok_ship + ok_item + ok_inv + ok_dest
if not ok_files:
print(f" [{study}] drugs: žádný soubor se nepodařilo zparsovat")
return
print(f" [{study}] Zásilky: {len(shipments)} | Položky: {len(items)} | "
f"Sklad: {len(inventory)} | Destrukce: {len(destruct)}")
import_id = log_import(study, f"drugs_{study}", "drugs", {
"shipments": len(shipments),
"shipment_items": len(items),
"inventory": len(inventory),
"destruction": len(destruct),
})
print(f" [{study}] import_id = {import_id}")
bulk_upsert_with_snapshot("iwrs_shipments", "iwrs_shipments_snapshots", shipments, import_id)
bulk_upsert_with_snapshot("iwrs_shipment_items", "iwrs_shipment_items_snapshots", items, import_id)
bulk_upsert_with_snapshot("iwrs_inventory", "iwrs_inventory_snapshots", inventory, import_id)
bulk_upsert_only("iwrs_destruction", destruct, import_id)
# zápis do Monga prošel → archivovat zdrojové soubory
for path in ok_files:
move_done(path)
print(f" [{study}] drugs: {len(ok_files)} soubor(ů) přesunuto do Processed")
def run(studies=None):
studies = studies or STUDIES
if not os.path.isdir(INCOMING_DIR):
print(f"Adresář neexistuje: {INCOMING_DIR}")
return
print("=" * 60)
print("Import Drugs (shipments / items / inventory / destruction)")
print("=" * 60)
for study in studies:
try:
import_study(study)
except Exception as e:
import traceback
print(f" [{study}] CHYBA importu drugs: {e}")
traceback.print_exc()
if __name__ == "__main__":
ensure_indexes()
run(sys.argv[1:] or None)
-52
View File
@@ -1,52 +0,0 @@
import mysql.connector
import pandas as pd
import db_config
conn = mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
cursor = conn.cursor(dictionary=True)
# Vezmi nejnovější import_id pro každou studii
for study in ["77242113UCO3001", "42847922MDD3003"]:
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='patients'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
print(f"\n=== {study} (import_id={mid}) ===")
cursor.execute("""
SELECT
v.subject,
v.actual_date,
v.scheduled_date,
v.irt_transaction_no,
v.irt_transaction_description,
v.medication_assignment,
GROUP_CONCAT(v.medication_id ORDER BY v.medication_id SEPARATOR ', ') AS medication_ids,
SUM(v.quantity_assigned) AS quantity_assigned
FROM iwrs_subject_visits v
WHERE v.import_id = %s AND v.study = %s AND v.visit_type = 'Past'
AND v.irt_transaction_no IS NOT NULL
GROUP BY v.subject, v.actual_date, v.scheduled_date, v.irt_transaction_no,
v.irt_transaction_description, v.medication_assignment
ORDER BY v.subject, v.actual_date
LIMIT 20
""", (mid, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
if df.empty:
print(" Žádná data.")
else:
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)
pd.set_option("display.max_colwidth", 30)
print(df.to_string(index=False))
cursor.close()
conn.close()

Some files were not shown because too many files have changed in this diff Show More