This commit is contained in:
2026-05-05 12:19:51 +02:00
parent 5103cac2c9
commit 10eba225e7
5 changed files with 772 additions and 341 deletions
+226 -230
View File
@@ -1,3 +1,6 @@
import sys
import os
import mysql.connector
import pandas as pd
from datetime import date
from pathlib import Path
@@ -5,51 +8,15 @@ from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
STUDY = "42847922MDD3003"
#STUDY = "77242113UCO3001"
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
INVENTORY_DIR = Path(f"xls_reports_{STUDY}")
DESTRUCTION_DIR = Path(f"xls_ip_destruction_{STUDY}")
SHIPMENTS_FILE = Path(f"xls_shipments_{STUDY}/shipments_report_{STUDY}.xlsx")
DETAILS_DIR = Path(f"xls_shipment_details_{STUDY}")
OUTPUT_DIR = Path("output")
OUTPUT_FILE = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {STUDY} CZ IWRS overview.xlsx"
STUDY = "42847922MDD3003"
# STUDY = "77242113UCO3001"
SHIPMENT_DROP_COLS = {
"Location", "Shipped Date", "Delivered Date [UTC]",
"Delivery Recipient", "Delivery Details", "Cancelled Date",
"Tracking #", "Total Medication IDs",
"Shipping Category", "Study", "Destination Location", "Destination Site",
"Medication type", "Container ID", "Quantity of Medication IDs",
"Packaged Lot description",
}
# ── Shared constants ──────────────────────────────────────────────────────────
COLUMN_RENAMES = {
"Site": "Site",
"Medication ID": "Med ID",
"Packaged Lot number": "Lot No.",
"Original Expiration Date when Packaged Lot was Added": "Orig Exp Date",
"Expiration date": "Exp Date",
"Received Date": "Rcv Date",
"Shipment Receipt User": "Rcpt User",
"Subject Identifier": "Subject ID",
"Quantity Assigned": "Qty Asgn",
"IRT Transaction": "IRT Tx",
"Date Assigned": "Date Asgn",
"Assignment User": "Asgn User",
"Dispensation Status": "Disp Status",
"Dispensing Date": "Disp Date",
"Dispensing date": "Disp Date",
"Quantity Dispensed": "Qty Disp",
"Dispensing User": "Disp User",
"Quantity Returned": "Qty Ret",
"Date Returned": "Date Ret",
"Return User": "Ret User",
"DestroyedOn": "Destroyed",
"Basket number": "Basket No.",
}
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"
OUTPUT_FILE = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {STUDY} CZ IWRS overview.xlsx"
DATE_COLUMNS = {
"Orig Exp Date", "Exp Date", "Rcv Date",
@@ -81,44 +48,197 @@ COLUMN_WIDTHS = {
"Max Visit Date": 16,
}
# ── Helpers ───────────────────────────────────────────────────────────────────
def read_inventory(path):
df = pd.read_excel(path, header=None)
# Support both "Medication ID" (MDD3003) and "Medication" (UCO3001)
mask = df[0].isin(["Medication ID", "Medication"])
meta = {}
for i in range(len(df)):
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
if val.startswith("Site:"):
meta["site"] = val.replace("Site:", "").strip()
if not mask.any():
print(f" {path.name}: no data (skipping)")
return None, meta
header_row = df[mask].index[0]
data = pd.read_excel(path, header=header_row)
data = data.rename(columns={"Medication": "Medication ID"})
return data, meta
# shipments sheet: kolík kde začínají detail sloupce (1-based, pro format_shipment_sheet)
N_SHIP_COLS = 9
def read_destruction_lookup():
lookup = {}
for path in DESTRUCTION_DIR.glob("*.xlsx"):
df = pd.read_excel(path, header=None)
basket_id = None
destroyed_on = None
for i in range(15):
val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else ""
if val.startswith("Basket ID:"):
basket_id = val.replace("Basket ID:", "").strip()
if val.startswith("Drug Destruction Created Date:"):
destroyed_on = val.replace("Drug Destruction Created Date:", "").strip()
header_row = df[df[0] == "Medication ID Description"].index[0]
data = pd.read_excel(path, header=header_row)
for med_id in data["Medication ID"].dropna():
lookup[int(med_id)] = (basket_id, destroyed_on)
return lookup
# ── DB ────────────────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def get_latest_import_id(cursor, study):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
return mid
# ── Načítání dat z MySQL ──────────────────────────────────────────────────────
def load_inventory(cursor, study, import_id):
"""
Vrátí DataFrame s inventory + destruction join.
Sloupce jsou rovnou přejmenované pro downstream funkce.
"""
sql = """
SELECT
i.site AS Site,
i.medication_id AS `Med ID`,
i.packaged_lot_no AS `Lot No.`,
i.original_expiration_date AS `Orig Exp Date`,
i.expiration_date AS `Exp Date`,
i.received_date AS `Rcv Date`,
i.receipt_user AS `Rcpt User`,
i.subject_identifier AS `Subject ID`,
i.quantity_assigned AS `Qty Asgn`,
i.irt_transaction AS `IRT Tx`,
i.date_assigned AS `Date Asgn`,
i.assignment_user AS `Asgn User`,
i.dispensation_status AS `Disp Status`,
i.dispensing_date AS `Disp Date`,
i.quantity_dispensed AS `Qty Disp`,
i.dispensing_user AS `Disp User`,
i.quantity_returned AS `Qty Ret`,
i.date_returned AS `Date Ret`,
i.return_user AS `Ret User`,
d.destruction_date AS Destroyed,
d.basket_id AS `Basket No.`
FROM iwrs_inventory i
LEFT JOIN (
SELECT medication_id,
ANY_VALUE(basket_id) AS basket_id,
ANY_VALUE(destruction_date) AS destruction_date
FROM iwrs_destruction
WHERE study = %s
GROUP BY medication_id
) d ON d.medication_id = i.medication_id
WHERE i.import_id = %s
AND i.study = %s
ORDER BY i.site, i.received_date, i.medication_id
"""
cursor.execute(sql, (study, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Inventory: {len(df)} kitu")
return df
def load_shipments(cursor, study, import_id):
"""
Vrátí DataFrame se spojenými shipments + items.
"""
sql = """
SELECT
s.shipment_id AS `Shipment ID`,
s.status AS `IRT Shipment Status`,
s.type AS Type,
s.ship_from AS `Shipment From`,
s.ship_to_site AS `Ship To:`,
s.request_date AS `Request Date`,
s.received_date AS `Received Date`,
s.received_by AS `Received by`,
s.expected_arrival AS `Expected Arrival`,
i.investigator AS Investigator,
i.medication_description AS `Medication Description`,
i.medication_id AS `Medication ID`,
i.packaged_lot_no AS `Packaged Lot number`,
i.expiration_date AS `Expiration Date`,
i.item_status AS Status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Shipments: {df['Shipment ID'].nunique() if len(df) else 0} zásilek, {len(df)} kitu")
return df
# ── Odvozené sheety ───────────────────────────────────────────────────────────
def build_site_summary(shipments_df):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
sheet_name = f"Expired as of {today.strftime('%d-%b-%Y')}"
print(f" Expired: {len(filtered)}")
return filtered, sheet_name
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
)
filtered = (
df[mask]
.copy()
.sort_values(["Site", "Date Ret"], ascending=[True, True])
.drop(columns=["Destroyed", "Basket No."])
.reset_index(drop=True)
)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Formátování ───────────────────────────────────────────────────────────────
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
thin = Side(style="thin", color="000000")
@@ -155,62 +275,11 @@ def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
ws.freeze_panes = "A2"
# ── Shipment helpers ─────────────────────────────────────────────────────────
def build_shipments():
sh = pd.read_excel(SHIPMENTS_FILE, sheet_name=0, header=5)
sh.columns = sh.columns.str.strip()
sh = sh.dropna(how="all")
sh["Shipment ID"] = sh["Shipment ID"].astype(str).str.strip()
sh = sh.drop(columns=[c for c in SHIPMENT_DROP_COLS if c in sh.columns])
shipment_cols = list(sh.columns)
all_rows = []
for _, s_row in sh.iterrows():
sid = s_row["Shipment ID"]
path = DETAILS_DIR / f"shipment_details_{sid}.xlsx"
if not path.exists():
continue
det = pd.read_excel(path, sheet_name=0, header=5)
det.columns = det.columns.str.strip()
det = det.dropna(how="all")
det["Shipment"] = det["Shipment"].astype(str).str.strip()
extra_cols = [c for c in det.columns if c not in shipment_cols and c != "Shipment" and c not in SHIPMENT_DROP_COLS]
for _, d_row in det.iterrows():
all_rows.append({**s_row.to_dict(), **{c: d_row[c] for c in extra_cols}})
result = pd.DataFrame(all_rows)
all_cols = shipment_cols + [c for c in extra_cols if c in result.columns]
result = result[all_cols]
for col in ["Request Date", "Received Date", "Expiration Date"]:
if col in result.columns:
result[col] = pd.to_datetime(result[col], errors="coerce")
print(f" Shipments: {result['Shipment ID'].nunique()} shipments, {len(result)} kitu")
return result
def build_site_summary(result):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = result.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = pivot[STATUS_COLS].reset_index().rename(columns={
"Ship To:": "Site", "Returned by Subject": "Returned"
})
pivot = pivot.sort_values("Site").reset_index(drop=True)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_cols):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
fill_ship = PatternFill("solid", start_color=header_color_ship)
fill_detail = PatternFill("solid", start_color=header_color_detail)
@@ -219,7 +288,9 @@ def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_col
cell.font = hfont
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
ws.column_dimensions[get_column_letter(cell.column)].width = min(len(str(cell.value or "")) + 4, 35)
ws.column_dimensions[get_column_letter(cell.column)].width = min(
len(str(cell.value or "")) + 4, 35
)
ws.row_dimensions[1].height = 30
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
@@ -234,101 +305,29 @@ def format_shipment_sheet(ws, header_color_ship, header_color_detail, n_ship_col
ws.freeze_panes = "A2"
# ── Build DataFrames ──────────────────────────────────────────────────────────
def build_main(lookup):
all_rows = []
for path in sorted(INVENTORY_DIR.glob("onsite_inventory_detail_*.xlsx")):
df, meta = read_inventory(path)
if df is None:
continue
df["DestroyedOn"] = df["Medication ID"].apply(
lambda x: lookup.get(int(x), (None, None))[1] if pd.notna(x) else None)
df["Basket number"] = df["Medication ID"].apply(
lambda x: lookup.get(int(x), (None, None))[0] if pd.notna(x) else None)
df.insert(0, "Site", meta.get("site", path.stem))
all_rows.append(df)
print(f" {path.name}: {len(df)} kits")
combined = pd.concat(all_rows, ignore_index=True)
combined.rename(columns=COLUMN_RENAMES, inplace=True)
for col in DATE_COLUMNS:
if col in combined.columns:
combined[col] = pd.to_datetime(combined[col], dayfirst=True, errors="coerce")
combined.sort_values(["Site", "Rcv Date", "Med ID"], inplace=True, ignore_index=True)
return combined
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
sheet_name = f"Expired as of {today.strftime('%d-%b-%Y')}"
print(f" Expired: {len(filtered)}")
return filtered, sheet_name
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].str.upper() == "NOT DISPENSED"))
)
filtered = df[mask].copy().sort_values(["Site", "Date Ret"], ascending=[True, True])
filtered = filtered.drop(columns=["Destroyed", "Basket No."]).reset_index(drop=True)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
# Prepare output dir, remove any previous overview file
OUTPUT_DIR.mkdir(exist_ok=True)
for old in OUTPUT_DIR.glob(f"*{STUDY} CZ IWRS overview.xlsx"):
old.unlink()
print(f"Removed old file: {old.name}")
lookup = read_destruction_lookup()
print(f"Loaded {len(lookup)} kits from destruction reports")
print(f"\nNačítám data z MySQL pro {STUDY}...")
conn = get_conn()
cursor = conn.cursor(dictionary=True)
import_id = get_latest_import_id(cursor, STUDY)
print(f" import_id = {import_id}")
df = build_main(lookup)
df = load_inventory(cursor, STUDY, import_id)
shipments_df = load_shipments(cursor, STUDY, import_id)
cursor.close()
conn.close()
expired_df, expired_sheet = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
not_returned_df = build_not_returned(df)
destruction_df = build_kits_for_destruction(df)
site_summary_df = build_site_summary(shipments_df)
shipments_df = build_shipments()
site_summary_df = build_site_summary(shipments_df)
n_ship_cols = shipments_df.columns.tolist().index("Investigator") # first detail col index (0-based)
# Write all sheets
with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:
df.to_excel( writer, index=False, sheet_name="CountryMedicationOverview")
expired_df.to_excel( writer, index=False, sheet_name=expired_sheet)
@@ -338,30 +337,27 @@ def main():
shipments_df.to_excel( writer, index=False, sheet_name="Shipments")
site_summary_df.to_excel( writer, index=False, sheet_name="Site Summary")
# Format all sheets
wb = load_workbook(OUTPUT_FILE)
# Main sheet — dark blue, green highlight for Destroyed/Basket No.
ws_main = wb["CountryMedicationOverview"]
ws_main = wb["CountryMedicationOverview"]
format_sheet(ws_main, header_color="1F4E79")
# Extra: green fill for Destroyed and Basket No. columns
new_col_fill = PatternFill("solid", start_color="E2EFDA")
headers_main = [c.value for c in ws_main[1]]
new_col_fill = PatternFill("solid", start_color="E2EFDA")
headers_main = [c.value for c in ws_main[1]]
for row in ws_main.iter_rows(min_row=2, max_row=ws_main.max_row):
for cell in row:
col_name = headers_main[cell.column - 1] if cell.column <= len(headers_main) else None
if col_name in ("Destroyed", "Basket No."):
cell.fill = new_col_fill
format_sheet(wb[expired_sheet], header_color="C00000", highlight_col="Exp Date", highlight_color="FFE0E0")
format_sheet(wb["Assigned not dispensed"], header_color="833C00", highlight_col="Subject ID", highlight_color="FFF2CC")
format_sheet(wb["Not returned"], header_color="375623", highlight_col="Max Visit Date", highlight_color="E2EFDA")
format_sheet(wb["Kits for destruction"], header_color="595959")
format_shipment_sheet(wb["Shipments"], "1F4E79", "375623", n_ship_cols)
format_sheet(wb["Site Summary"], header_color="1F4E79")
format_sheet(wb[expired_sheet], header_color="C00000", highlight_col="Exp Date", highlight_color="FFE0E0")
format_sheet(wb["Assigned not dispensed"], header_color="833C00", highlight_col="Subject ID", highlight_color="FFF2CC")
format_sheet(wb["Not returned"], header_color="375623", highlight_col="Max Visit Date", highlight_color="E2EFDA")
format_sheet(wb["Kits for destruction"], header_color="595959")
format_shipment_sheet(wb["Shipments"], "1F4E79", "375623", N_SHIP_COLS)
format_sheet(wb["Site Summary"], header_color="1F4E79")
wb.save(OUTPUT_FILE)
print(f"\nSaved: {OUTPUT_FILE} ({len(df)} rows on main sheet, {wb.sheetnames})")
print(f"\nUloženo: {OUTPUT_FILE} ({len(df)} řádků, sheety: {wb.sheetnames})")
if __name__ == "__main__":
+153 -111
View File
@@ -1,163 +1,205 @@
import pandas as pd
import sys
import os
import mysql.connector
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from datetime import date
import os
import pandas as pd
STUDY = "77242113UCO3001"
SHIPMENTS_FILE = f"xls_shipments_{STUDY}/shipments_report_{STUDY}.xlsx"
DETAILS_DIR = f"xls_shipment_details_{STUDY}"
OUTPUT_DIR = "output"
TEST_SHIPMENT = None # None = vsechny shipments
# db_config.py je v nadřazeném adresáři (Drugs/)
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
DROP_COLS = {
"Location", "Shipped Date", "Delivered Date [UTC]",
"Delivery Recipient", "Delivery Details", "Cancelled Date",
"Tracking #", "Total Medication IDs",
"Shipping Category", "Study", "Destination Location", "Destination Site",
"Medication type", "Container ID", "Quantity of Medication IDs",
"Packaged Lot description",
}
STUDY = "77242113UCO3001"
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output")
os.makedirs(OUTPUT_DIR, exist_ok=True)
def read_shipments():
df = pd.read_excel(SHIPMENTS_FILE, sheet_name=0, header=5)
df.columns = df.columns.str.strip()
df = df.dropna(how="all")
df["Shipment ID"] = df["Shipment ID"].astype(str).str.strip()
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
return df
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def read_details(shipment_id):
path = os.path.join(DETAILS_DIR, f"shipment_details_{shipment_id}.xlsx")
if not os.path.exists(path):
return None
df = pd.read_excel(path, sheet_name=0, header=5)
df.columns = df.columns.str.strip()
df = df.dropna(how="all")
df["Shipment"] = df["Shipment"].astype(str).str.strip()
return df
def load_data(study):
conn = get_conn()
cursor = conn.cursor(dictionary=True)
# nejnovější import_id pro danou studii
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
import_id = row["mid"]
if import_id is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
print(f" import_id = {import_id}")
sql = """
SELECT
s.shipment_id,
s.status AS irt_shipment_status,
s.type,
s.ship_from AS shipment_from,
s.ship_to_site AS ship_to,
s.request_date,
s.received_date,
s.received_by,
s.expected_arrival,
i.investigator,
i.medication_description,
i.medication_id,
i.packaged_lot_no,
i.expiration_date,
i.item_status AS status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
cursor.close()
conn.close()
print(f" Načteno řádků: {len(rows)}")
return rows
def build_report():
shipments = read_shipments()
if TEST_SHIPMENT:
shipments = shipments[shipments["Shipment ID"] == TEST_SHIPMENT]
# shipment sloupce (modrý header) / detail sloupce (zelený header)
SHIP_COLS = [
("shipment_id", "Shipment ID"),
("irt_shipment_status","IRT Shipment Status"),
("type", "Type"),
("shipment_from", "Shipment From"),
("ship_to", "Ship To:"),
("request_date", "Request Date"),
("received_date", "Received Date"),
("received_by", "Received by"),
("expected_arrival", "Expected Arrival"),
]
shipment_cols = list(shipments.columns)
all_rows = []
DETAIL_COLS = [
("investigator", "Investigator"),
("medication_description", "Medication Description"),
("medication_id", "Medication ID"),
("packaged_lot_no", "Packaged Lot number"),
("expiration_date", "Expiration Date"),
("status", "Status"),
]
for _, s_row in shipments.iterrows():
sid = s_row["Shipment ID"]
details = read_details(sid)
if details is None:
continue
extra_cols = [c for c in details.columns if c not in shipment_cols and c != "Shipment" and c not in DROP_COLS]
for _, d_row in details.iterrows():
row = {**s_row.to_dict(), **{c: d_row[c] for c in extra_cols}}
all_rows.append(row)
print(f" [{sid}] {len(details)} kitu")
ALL_COLS = SHIP_COLS + DETAIL_COLS
N_SHIP_COLS = len(SHIP_COLS)
result = pd.DataFrame(all_rows)
all_cols = shipment_cols + [c for c in extra_cols if c in result.columns]
result = result[all_cols]
HEADER_FILL_SHIP = PatternFill("solid", fgColor="1F4E79")
HEADER_FILL_DETAIL = PatternFill("solid", fgColor="375623")
HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10)
DATA_FONT = Font(name="Arial", size=10)
THIN_BORDER = Border(
left=Side(style="thin", color="BFBFBF"),
right=Side(style="thin", color="BFBFBF"),
bottom=Side(style="thin", color="BFBFBF"),
)
wb = openpyxl.Workbook()
def write_shipments_sheet(wb, rows):
ws = wb.active
ws.title = "Shipments"
HEADER_FILL_SHIP = PatternFill("solid", fgColor="1F4E79")
HEADER_FILL_DETAIL = PatternFill("solid", fgColor="375623")
HEADER_FONT = Font(name="Arial", bold=True, color="FFFFFF", size=10)
DATA_FONT = Font(name="Arial", size=10)
BORDER = Border(
left=Side(style="thin", color="BFBFBF"),
right=Side(style="thin", color="BFBFBF"),
bottom=Side(style="thin", color="BFBFBF"),
)
n_ship = len(shipment_cols)
for ci, col in enumerate(all_cols, 1):
cell = ws.cell(row=1, column=ci, value=col)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL_SHIP if ci <= n_ship else HEADER_FILL_DETAIL
# záhlaví
for ci, (_, label) in enumerate(ALL_COLS, 1):
cell = ws.cell(row=1, column=ci, value=label)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL_SHIP if ci <= N_SHIP_COLS else HEADER_FILL_DETAIL
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = BORDER
cell.border = THIN_BORDER
ws.row_dimensions[1].height = 30
for ri, (_, row) in enumerate(result.iterrows(), 2):
for ci, col in enumerate(all_cols, 1):
val = row[col]
if pd.isna(val):
val = None
elif hasattr(val, "date"):
val = val.date()
# data
for ri, row in enumerate(rows, 2):
for ci, (key, _) in enumerate(ALL_COLS, 1):
val = row[key]
cell = ws.cell(row=ri, column=ci, value=val)
cell.font = DATA_FONT
cell.border = BORDER
cell.font = DATA_FONT
cell.border = THIN_BORDER
cell.alignment = Alignment(horizontal="center", vertical="center")
if isinstance(val, date):
cell.number_format = "DD-MMM-YYYY"
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
ws.freeze_panes = "A2"
for ci, col in enumerate(all_cols, 1):
vals = [col] + [str(result.iloc[r][col]) for r in range(len(result)) if pd.notna(result.iloc[r][col])]
ws.column_dimensions[get_column_letter(ci)].width = min(max((len(v) for v in vals), default=10) + 2, 35)
# šířky sloupců
for ci, (key, label) in enumerate(ALL_COLS, 1):
vals = [label] + [str(r[key]) for r in rows if r[key] is not None]
ws.column_dimensions[get_column_letter(ci)].width = min(
max((len(v) for v in vals), default=10) + 2, 35
)
# --- Sheet 2: Site Summary ---
def write_summary_sheet(wb, rows):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = result.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
df = pd.DataFrame(rows)
pivot = df.groupby("ship_to")["status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = pivot[STATUS_COLS].reset_index().rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
pivot = pivot.sort_values("Site").reset_index(drop=True)
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"ship_to": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
ws2 = wb.create_sheet("Site Summary")
summary_cols = ["Site", "Available", "Assigned", "Dispensed", "Returned", "Total"]
HEADER_FILL_SUMM = PatternFill("solid", fgColor="1F4E79")
ws = wb.create_sheet("Site Summary")
s_cols = ["Site", "Available", "Assigned", "Dispensed", "Returned", "Total"]
for ci, col in enumerate(summary_cols, 1):
cell = ws2.cell(row=1, column=ci, value=col)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL_SUMM
for ci, col in enumerate(s_cols, 1):
cell = ws.cell(row=1, column=ci, value=col)
cell.font = HEADER_FONT
cell.fill = PatternFill("solid", fgColor="1F4E79")
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.border = BORDER
ws2.row_dimensions[1].height = 25
cell.border = THIN_BORDER
ws.row_dimensions[1].height = 25
for ri, (_, row) in enumerate(pivot.iterrows(), 2):
for ci, col in enumerate(summary_cols, 1):
cell = ws2.cell(row=ri, column=ci, value=row[col])
cell.font = DATA_FONT
cell.border = BORDER
for ci, col in enumerate(s_cols, 1):
cell = ws.cell(row=ri, column=ci, value=row[col])
cell.font = DATA_FONT
cell.border = THIN_BORDER
cell.alignment = Alignment(horizontal="center", vertical="center")
for ci, col in enumerate(summary_cols, 1):
for ci, col in enumerate(s_cols, 1):
vals = [col] + [str(pivot.iloc[r][col]) for r in range(len(pivot))]
ws2.column_dimensions[get_column_letter(ci)].width = min(max(len(v) for v in vals) + 4, 35)
ws.column_dimensions[get_column_letter(ci)].width = min(
max(len(v) for v in vals) + 4, 35
)
ws2.freeze_panes = "A2"
ws.freeze_panes = "A2"
suffix = f"_{TEST_SHIPMENT}" if TEST_SHIPMENT else ""
pattern = f"{STUDY} CZ Shipments{suffix}.xlsx"
for old in os.listdir(OUTPUT_DIR):
if old.endswith(pattern):
try:
os.remove(os.path.join(OUTPUT_DIR, old))
print(f"Smazan -> {old}")
except OSError:
print(f"Preskakuji smazani (soubor otevren?) -> {old}")
outfile = os.path.join(OUTPUT_DIR, f"{date.today()} {STUDY} CZ Shipments{suffix}.xlsx")
def build_report():
print(f"\nNačítám data z MySQL pro {STUDY}...")
rows = load_data(STUDY)
wb = openpyxl.Workbook()
write_shipments_sheet(wb, rows)
write_summary_sheet(wb, rows)
outfile = os.path.join(OUTPUT_DIR, f"{date.today()} {STUDY} CZ Shipments.xlsx")
wb.save(outfile)
print(f"\nUlozeno -> {outfile}")
print(f"\nUloženo -> {outfile}")
build_report()
+393
View File
@@ -0,0 +1,393 @@
import sys
import os
import mysql.connector
import pandas as pd
from datetime import date
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
import db_config
STUDIES = [
("77242113UCO3001", "UCO"),
("42847922MDD3003", "MDD"),
]
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = BASE_DIR / "output"
DATE_COLUMNS = {
"Orig Exp Date", "Exp Date", "Rcv Date",
"Date Asgn", "Disp Date", "Date Ret", "Destroyed", "Max Visit Date",
}
COLUMN_WIDTHS = {
"Site": 14,
"Med ID": 10,
"Lot No.": 12,
"Orig Exp Date": 16,
"Exp Date": 14,
"Rcv Date": 14,
"Rcpt User": 22,
"Subject ID": 14,
"Qty Asgn": 9,
"IRT Tx": 8,
"Date Asgn": 14,
"Asgn User": 20,
"Disp Status": 16,
"Disp Date": 14,
"Qty Disp": 9,
"Disp User": 20,
"Qty Ret": 10,
"Date Ret": 14,
"Ret User": 18,
"Destroyed": 14,
"Basket No.": 12,
"Max Visit Date": 16,
}
N_SHIP_COLS = 9 # počet shipment sloupců (modrý header v Shipments sheetu)
# ── DB ────────────────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def get_latest_import_id(cursor, study):
cursor.execute(
"SELECT MAX(import_id) AS mid FROM iwrs_import WHERE study=%s AND report_type='drugs'",
(study,),
)
row = cursor.fetchone()
mid = row["mid"]
if mid is None:
raise RuntimeError(f"Žádná data v MySQL pro studii {study}")
return mid
# ── Načítání dat ──────────────────────────────────────────────────────────────
def load_inventory(cursor, study, import_id):
sql = """
SELECT
i.site AS Site,
i.medication_id AS `Med ID`,
i.packaged_lot_no AS `Lot No.`,
i.original_expiration_date AS `Orig Exp Date`,
i.expiration_date AS `Exp Date`,
i.received_date AS `Rcv Date`,
i.receipt_user AS `Rcpt User`,
i.subject_identifier AS `Subject ID`,
i.quantity_assigned AS `Qty Asgn`,
i.irt_transaction AS `IRT Tx`,
i.date_assigned AS `Date Asgn`,
i.assignment_user AS `Asgn User`,
i.dispensation_status AS `Disp Status`,
i.dispensing_date AS `Disp Date`,
i.quantity_dispensed AS `Qty Disp`,
i.dispensing_user AS `Disp User`,
i.quantity_returned AS `Qty Ret`,
i.date_returned AS `Date Ret`,
i.return_user AS `Ret User`,
d.destruction_date AS Destroyed,
d.basket_id AS `Basket No.`
FROM iwrs_inventory i
LEFT JOIN (
SELECT medication_id,
ANY_VALUE(basket_id) AS basket_id,
ANY_VALUE(destruction_date) AS destruction_date
FROM iwrs_destruction
WHERE study = %s
GROUP BY medication_id
) d ON d.medication_id = i.medication_id
WHERE i.import_id = %s
AND i.study = %s
ORDER BY i.site, i.received_date, i.medication_id
"""
cursor.execute(sql, (study, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in DATE_COLUMNS:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
print(f" Inventory: {len(df)} kitu")
return df
def load_shipments(cursor, study, import_id):
sql = """
SELECT
s.shipment_id AS `Shipment ID`,
s.status AS `IRT Shipment Status`,
s.type AS Type,
s.ship_from AS `Shipment From`,
s.ship_to_site AS `Ship To:`,
s.request_date AS `Request Date`,
s.received_date AS `Received Date`,
s.received_by AS `Received by`,
s.expected_arrival AS `Expected Arrival`,
i.investigator AS Investigator,
i.medication_description AS `Medication Description`,
i.medication_id AS `Medication ID`,
i.packaged_lot_no AS `Packaged Lot number`,
i.expiration_date AS `Expiration Date`,
i.item_status AS Status
FROM iwrs_shipments s
JOIN iwrs_shipment_items i
ON i.study = s.study
AND i.shipment_id = s.shipment_id
AND i.import_id = %s
WHERE s.import_id = %s
AND s.study = %s
ORDER BY s.ship_to_site, s.shipment_id, i.medication_id
"""
cursor.execute(sql, (import_id, import_id, study))
rows = cursor.fetchall()
df = pd.DataFrame(rows)
for col in ("Request Date", "Received Date", "Expiration Date", "Expected Arrival"):
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce")
n_ship = df["Shipment ID"].nunique() if len(df) else 0
print(f" Shipments: {n_ship} zásilek, {len(df)} kitu")
return df
# ── Odvozené sheety ───────────────────────────────────────────────────────────
def build_site_summary(shipments_df):
STATUS_COLS = ["Available", "Assigned", "Dispensed", "Returned by Subject"]
pivot = shipments_df.groupby("Ship To:")["Status"].value_counts().unstack(fill_value=0)
for s in STATUS_COLS:
if s not in pivot.columns:
pivot[s] = 0
pivot = (
pivot[STATUS_COLS]
.reset_index()
.rename(columns={"Ship To:": "Site", "Returned by Subject": "Returned"})
.sort_values("Site")
.reset_index(drop=True)
)
pivot["Total"] = pivot[["Available", "Assigned", "Dispensed", "Returned"]].sum(axis=1)
print(f" Site Summary: {len(pivot)} center")
return pivot
def build_expired(df):
today = date.today()
mask = (
df["Basket No."].isna() &
df["Subject ID"].isna() &
(df["Exp Date"] < pd.Timestamp(today))
)
filtered = df[mask].copy().reset_index(drop=True)
print(f" Expired: {len(filtered)}")
return filtered
def build_assigned_not_dispensed(df):
mask = df["Subject ID"].notna() & df["Disp Date"].isna()
filtered = df[mask].copy().reset_index(drop=True)
print(f" Assigned not dispensed: {len(filtered)}")
return filtered
def build_not_returned(df):
no_ret = df[
df["Date Ret"].isna() &
df["Subject ID"].notna() &
(df["Disp Status"].fillna("").str.upper() != "NOT DISPENSED")
].copy()
max_asgn = df.groupby("Subject ID")["Date Asgn"].max().rename("Max Visit Date")
no_ret = no_ret.join(max_asgn, on="Subject ID")
filtered = no_ret[no_ret["Date Asgn"] < no_ret["Max Visit Date"]].copy()
filtered = filtered.drop(columns=["Qty Ret", "Date Ret", "Ret User", "Destroyed", "Basket No."])
filtered = filtered.reset_index(drop=True)
print(f" Not returned: {len(filtered)}")
return filtered
def build_kits_for_destruction(df):
mask = (
df["Basket No."].isna() &
(df["Date Ret"].notna() | (df["Disp Status"].fillna("").str.upper() == "NOT DISPENSED"))
)
filtered = (
df[mask]
.copy()
.sort_values(["Site", "Date Ret"], ascending=[True, True])
.drop(columns=["Destroyed", "Basket No."])
.reset_index(drop=True)
)
print(f" Kits for destruction: {len(filtered)}")
return filtered
# ── Formátování ───────────────────────────────────────────────────────────────
def format_sheet(ws, header_color, highlight_col=None, highlight_color=None):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
header_fill = PatternFill("solid", start_color=header_color)
header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10)
row_font = Font(name="Arial", size=10)
hi_fill = PatternFill("solid", start_color=highlight_color) if highlight_color else None
headers = [cell.value for cell in ws[1]]
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False)
cell.border = border
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
cell.font = row_font
cell.border = border
cell.alignment = Alignment(horizontal="center")
if col_name in DATE_COLUMNS:
cell.number_format = "DD-MMM-YYYY"
if hi_fill and col_name == highlight_col:
cell.fill = hi_fill
for cell in ws[1]:
width = COLUMN_WIDTHS.get(cell.value, 14)
ws.column_dimensions[get_column_letter(cell.column)].width = width
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
def format_overview_sheet(ws):
format_sheet(ws, header_color="1F4E79")
new_col_fill = PatternFill("solid", start_color="E2EFDA")
headers = [c.value for c in ws[1]]
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
col_name = headers[cell.column - 1] if cell.column <= len(headers) else None
if col_name in ("Destroyed", "Basket No."):
cell.fill = new_col_fill
def format_shipment_sheet(ws):
thin = Side(style="thin", color="000000")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
hfont = Font(bold=True, color="FFFFFF", name="Arial", size=10)
dfont = Font(name="Arial", size=10)
fill_ship = PatternFill("solid", start_color="1F4E79")
fill_detail = PatternFill("solid", start_color="375623")
for cell in ws[1]:
cell.fill = fill_ship if cell.column <= N_SHIP_COLS else fill_detail
cell.font = hfont
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
ws.column_dimensions[get_column_letter(cell.column)].width = min(
len(str(cell.value or "")) + 4, 35
)
ws.row_dimensions[1].height = 30
for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
for cell in row:
cell.font = dfont
cell.border = border
cell.alignment = Alignment(horizontal="center", vertical="center")
if cell.value.__class__.__name__ in ("datetime", "date", "Timestamp"):
cell.number_format = "DD-MMM-YYYY"
ws.auto_filter.ref = ws.dimensions
ws.freeze_panes = "A2"
# ── Main ──────────────────────────────────────────────────────────────────────
SHEETS_DEF = [
("CountryMedicationOverview", "overview"),
("Expired", "expired"),
("Assigned not dispensed", "assigned"),
("Not returned", "not_returned"),
("Kits for destruction", "destruction"),
("Shipments", "shipments"),
("Site Summary", "site_summary"),
]
FORMAT_MAP = {
"overview": lambda ws: format_overview_sheet(ws),
"expired": lambda ws: format_sheet(ws, "C00000", "Exp Date", "FFE0E0"),
"assigned": lambda ws: format_sheet(ws, "833C00", "Subject ID", "FFF2CC"),
"not_returned": lambda ws: format_sheet(ws, "375623", "Max Visit Date", "E2EFDA"),
"destruction": lambda ws: format_sheet(ws, "595959"),
"shipments": lambda ws: format_shipment_sheet(ws),
"site_summary": lambda ws: format_sheet(ws, "1F4E79"),
}
def process_study(cursor, study):
today = date.today().strftime("%d-%b-%Y")
import_id = get_latest_import_id(cursor, study)
print(f" import_id = {import_id}")
df = load_inventory(cursor, study, import_id)
shipments_df = load_shipments(cursor, study, import_id)
expired_df = build_expired(df)
assigned_df = build_assigned_not_dispensed(df)
not_returned_df = build_not_returned(df)
destruction_df = build_kits_for_destruction(df)
site_summ_df = build_site_summary(shipments_df)
return [
df, expired_df, assigned_df, not_returned_df,
destruction_df, shipments_df, site_summ_df,
]
def save_study_report(study, data_frames):
output_file = OUTPUT_DIR / f"{date.today().strftime('%Y-%m-%d')} {study} report.xlsx"
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
for (sheet_name, _), df_sheet in zip(SHEETS_DEF, data_frames):
df_sheet.to_excel(writer, index=False, sheet_name=sheet_name)
wb = load_workbook(output_file)
for (sheet_name, fmt_key) in SHEETS_DEF:
FORMAT_MAP[fmt_key](wb[sheet_name])
wb.save(output_file)
print(f" Uloženo: {output_file}")
def main():
OUTPUT_DIR.mkdir(exist_ok=True)
conn = get_conn()
cursor = conn.cursor(dictionary=True)
for study, _ in STUDIES:
print(f"\n{'='*55}")
print(f"[{study}]")
print(f"{'='*55}")
try:
data_frames = process_study(cursor, study)
save_study_report(study, data_frames)
except Exception as e:
import traceback
print(f" CHYBA: {e}")
traceback.print_exc()
cursor.close()
conn.close()
print(f"\nHotovo.")
if __name__ == "__main__":
main()