z230
This commit is contained in:
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
create_report.py
|
||||
Verze: 1.6
|
||||
Datum: 2026-06-02
|
||||
|
||||
Generuje Excel report (.xlsm) pro studii 77242113UCO3001 z MongoDB databáze Clario.
|
||||
Výstup: U:/Dropbox/!!!Days/Downloads Z230/YYYY-MM-DD 77242113UCO3001 Clario Reports.xlsm
|
||||
|
||||
Zdroj dat:
|
||||
MongoDB 192.168.1.76, databáze Clario
|
||||
Kolekce Clario.MayoScore — skóre Mayo per pacient × visit
|
||||
Kolekce Clario.MayoDiary — denní záznamy deníku pacienta
|
||||
Kolekce Clario.eCOA_DCRs — data correction requests eCOA
|
||||
Kolekce Clario.ECG_DCRs — data correction requests ECG
|
||||
|
||||
Listy:
|
||||
MayoScore — jeden řádek = pacient × visit
|
||||
sloupec „KLIKNI SEM" naviguje na filtrovaný EligibleDays
|
||||
řádky I-0 s Modified Mayo < 5 červeně tučně
|
||||
MayoDiary — jeden řádek = denní záznam deníku pacienta
|
||||
EligibleDays — jeden řádek = jeden eligible day z MayoScore obohacený o data z MayoDiary;
|
||||
included/excluded flag, excluded dny šedě na žlutém pozadí
|
||||
eCOA_DCRs — všechna pole z kolekce Clario.eCOA_DCRs
|
||||
ECG_DCRs — všechna pole z kolekce Clario.ECG_DCRs
|
||||
|
||||
VBA makro (Worksheet_SelectionChange na listu MayoScore):
|
||||
Klik na sloupec „KLIKNI SEM" → přepne na EligibleDays a vyfiltruje záznamy
|
||||
pro daného pacienta a visit. Vyžaduje povolení maker při otevření souboru.
|
||||
"""
|
||||
|
||||
VERSION = "1.6"
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
from pymongo import MongoClient
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
import xlwings as xw
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Konfigurace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB_NAME = "Clario"
|
||||
OUTPUT_DIR = Path(r"U:\Dropbox\!!!Days\Downloads Z230")
|
||||
|
||||
VISIT_ORDER = ["I-0", "I-2", "I-4", "I-8", "I-12"]
|
||||
|
||||
COLUMNS_SCORE = [
|
||||
("KLIKNI SEM", lambda d: "▶ klikni sem"),
|
||||
("Site", lambda d: d.get("site", {}).get("name", "")),
|
||||
("Subject ID", lambda d: d.get("subject", {}).get("id", "")),
|
||||
("Visit", lambda d: d["fields"].get("Visit", "")),
|
||||
("Visit Date", lambda d: d["fields"].get("Visit Date", "")),
|
||||
("Baseline Stool Frequency", lambda d: _num(d["fields"].get("Baseline Stool Frequency", ""))),
|
||||
("Central Endoscopy Score", lambda d: _num(d["fields"].get("Central Endoscopy Score", ""))),
|
||||
("PGA Score", lambda d: _num(d["fields"].get("PGA Score", ""))),
|
||||
("Stool Frequency Sub-score", lambda d: _num(d["fields"].get("Stool Frequency Sub-score", ""))),
|
||||
("Rectal Bleeding Sub-score", lambda d: _num(d["fields"].get("Rectal Bleeding Sub-score", ""))),
|
||||
("Partial Mayo Score", lambda d: _num(d["fields"].get("Partial Mayo Score", ""))),
|
||||
("Modified Mayo Score", lambda d: _num(d["fields"].get("Modified Mayo Score", ""))),
|
||||
("Full Mayo Score", lambda d: _num(d["fields"].get("Full Mayo Score", ""))),
|
||||
("Site Action", lambda d: d.get("Site Action") or ""),
|
||||
("Last Mayo Score Submission", lambda d: d.get("Last Mayo Score Submission") or ""),
|
||||
("Wk I-12 Responder", lambda d: d.get("Week I-12 Clinical Responder") or ""),
|
||||
("Wk I-12 Remission", lambda d: d.get("Week I-12 Clinical Remission") or ""),
|
||||
("Clinical Flare", lambda d: d.get("Clinical Flare") or ""),
|
||||
("Loss of Response", lambda d: d.get("Loss of Response") or ""),
|
||||
("Partial Mayo Post LoR", lambda d: d.get("Partial Mayo Response Post Loss of Response") or ""),
|
||||
("Partial Mayo Non-Resp", lambda d: d.get("Partial Mayo Response for Clinical Non-Responders") or ""),
|
||||
]
|
||||
|
||||
COLUMNS_DIARY = [
|
||||
("Subject ID", lambda d: d.get("subject", {}).get("id", "")),
|
||||
("Report Date", lambda d: d["fields"].get("Report Date", "")),
|
||||
("Baseline Stool Count", lambda d: _num(d["fields"].get("Baseline Stool Count", ""))),
|
||||
("Stool Frequency", lambda d: _num(d["fields"].get("Stool Frequency", ""))),
|
||||
("MAYO050", lambda d: d["fields"].get("MAYO050", "")),
|
||||
("Not Applicable", lambda d: d["fields"].get("Not Applicable", "")),
|
||||
("Constipation", lambda d: d["fields"].get("Constipation", "")),
|
||||
("Diarrhea", lambda d: d["fields"].get("Diarrhea", "")),
|
||||
("Irregularity", lambda d: d["fields"].get("Irregularity", "")),
|
||||
]
|
||||
|
||||
COLUMNS_ECOA_DCRS = [
|
||||
("Site", lambda d: d.get("site", {}).get("name", "")),
|
||||
("Subject ID", lambda d: d.get("subject", {}).get("id", "")),
|
||||
("Data Correction ID", lambda d: d["fields"].get("Data Correction ID", "")),
|
||||
("PI Name", lambda d: d["fields"].get("PI Name", "")),
|
||||
("Creation Date UTC", lambda d: d["fields"].get("Creation Date UTC", "")),
|
||||
("Date of Last Action UTC", lambda d: d["fields"].get("Date of Last Action UTC", "")),
|
||||
("Status", lambda d: d["fields"].get("Status", "")),
|
||||
("Type", lambda d: d["fields"].get("Type", "")),
|
||||
("Next Action Required", lambda d: d["fields"].get("Next Action Required", "")),
|
||||
("Category", lambda d: d["fields"].get("Category", "")),
|
||||
("Total Open Period", lambda d: d["fields"].get("Total Open Period", "")),
|
||||
("Total Open Time (Days)", lambda d: _num(d["fields"].get("Total Open Time (Days)", ""))),
|
||||
("Current Status Time (Days)", lambda d: _num(d["fields"].get("Current Status Time (Days)", ""))),
|
||||
("Reason for Change", lambda d: d["fields"].get("Reason for Change", "")),
|
||||
("Description", lambda d: d["fields"].get("Description", "")),
|
||||
("Resolution", lambda d: d["fields"].get("Resolution", "")),
|
||||
("Query History", lambda d: d["fields"].get("Query History", "")),
|
||||
("Age at Informed Consent", lambda d: d["fields"].get("Age at Informed Consent", "")),
|
||||
("Baseline Stool Count", lambda d: _num(d["fields"].get("Baseline Stool Count", ""))),
|
||||
("firstSeen", lambda d: d.get("firstSeen", "")),
|
||||
("lastSeen", lambda d: d.get("lastSeen", "")),
|
||||
]
|
||||
|
||||
COLUMNS_ECG_DCRS = [
|
||||
("Site ID", lambda d: d.get("site", {}).get("name", "")),
|
||||
("Subject Number", lambda d: d.get("subject", {}).get("id", "")),
|
||||
("Data Correction ID", lambda d: d["fields"].get("Data Correction ID", "")),
|
||||
("PI Name", lambda d: d["fields"].get("PI_NAME", "")),
|
||||
("Age", lambda d: d["fields"].get("Age", "")),
|
||||
("Creation Date UTC", lambda d: d["fields"].get("Creation Date UTC", "")),
|
||||
("Date of Last Action UTC", lambda d: d["fields"].get("Date of Last Action UTC", "")),
|
||||
("Status", lambda d: d["fields"].get("Status", "")),
|
||||
("Type", lambda d: d["fields"].get("Type", "")),
|
||||
("Next Action Required", lambda d: d["fields"].get("Next Action Required", "")),
|
||||
("Category", lambda d: d["fields"].get("Category", "")),
|
||||
("Total Open Period", lambda d: d["fields"].get("Total Open Period", "")),
|
||||
("Total Open Time (Days)", lambda d: _num(d["fields"].get("Total Open Time (Days)", ""))),
|
||||
("Current Status Time (Days)", lambda d: _num(d["fields"].get("Current Status Time (Days)", ""))),
|
||||
("Reason for Change", lambda d: d["fields"].get("Reason for Change", "")),
|
||||
("Query History", lambda d: d["fields"].get("Query History", "")),
|
||||
("firstSeen", lambda d: d.get("firstSeen", "")),
|
||||
("lastSeen", lambda d: d.get("lastSeen", "")),
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _num(value):
|
||||
"""Převede číselný string na int, jinak vrátí původní hodnotu nebo None."""
|
||||
if value == "" or value is None:
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except (ValueError, TypeError):
|
||||
try:
|
||||
return float(value)
|
||||
except (ValueError, TypeError):
|
||||
return value
|
||||
|
||||
|
||||
def _visit_sort_key(doc):
|
||||
visit = doc["fields"].get("Visit", "")
|
||||
try:
|
||||
idx = VISIT_ORDER.index(visit)
|
||||
except ValueError:
|
||||
idx = len(VISIT_ORDER)
|
||||
return (doc.get("site", {}).get("name", ""), doc.get("subject", {}).get("id", ""), idx, visit)
|
||||
|
||||
|
||||
def _iso_to_date(value):
|
||||
"""ISO string → Python date pro Excel."""
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return datetime.fromisoformat(value).date()
|
||||
except ValueError:
|
||||
return value
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Styly
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HEADER_FILL = PatternFill("solid", fgColor="1F497D")
|
||||
HEADER_FONT = Font(bold=True, color="FFFFFF", size=10)
|
||||
CELL_FONT = Font(size=10)
|
||||
ALIGN_CTR = Alignment(horizontal="center", vertical="center", wrap_text=False)
|
||||
ALIGN_LEFT = Alignment(horizontal="left", vertical="center")
|
||||
|
||||
THIN = Side(style="thin", color="BFBFBF")
|
||||
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
||||
|
||||
# zebra
|
||||
FILL_ODD = PatternFill("solid", fgColor="FFFFFF")
|
||||
FILL_EVEN = PatternFill("solid", fgColor="EBF1DE")
|
||||
|
||||
# DCR status barvy
|
||||
FILL_DCR_SITE = PatternFill("solid", fgColor="FFFF00") # žlutá — čeká lékař
|
||||
FILL_DCR_CLARIO = PatternFill("solid", fgColor="BDD7EE") # modrá — čeká Clario
|
||||
FILL_DCR_QC = PatternFill("solid", fgColor="F4B942") # oranžová — ReadyForQC
|
||||
FILL_DCR_DONE = PatternFill("solid", fgColor="FFFFFF") # bílá — Completed
|
||||
|
||||
SCORE_COLS = {"Partial Mayo Score", "Modified Mayo Score", "Full Mayo Score"}
|
||||
SCORE_FILL = PatternFill("solid", fgColor="FFC7CE") # červená pro skóre ≥ 5 (placeholder — nepoužíváme podmíněné formátování)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sestavení sheetu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_sheet(ws, docs, columns, date_cols, center_cols, col_widths, row_font_fn=None, wrap_cols=None, header_row=1):
|
||||
headers = [c[0] for c in columns]
|
||||
|
||||
for col_idx, header in enumerate(headers, 1):
|
||||
cell = ws.cell(row=header_row, column=col_idx, value=header)
|
||||
cell.font = HEADER_FONT
|
||||
cell.fill = HEADER_FILL
|
||||
cell.alignment = ALIGN_CTR
|
||||
cell.border = BORDER
|
||||
ws.row_dimensions[header_row].height = 28
|
||||
|
||||
data_start = header_row + 1
|
||||
for row_idx, doc in enumerate(docs, data_start):
|
||||
fill = FILL_EVEN if (row_idx - header_row) % 2 == 0 else FILL_ODD
|
||||
font = row_font_fn(doc) if row_font_fn else CELL_FONT
|
||||
for col_idx, (col_name, getter) in enumerate(columns, 1):
|
||||
value = getter(doc)
|
||||
if col_name in date_cols and isinstance(value, str):
|
||||
value = _iso_to_date(value)
|
||||
cell = ws.cell(row=row_idx, column=col_idx, value=value)
|
||||
cell.font = font
|
||||
cell.fill = fill
|
||||
cell.border = BORDER
|
||||
if wrap_cols and col_name in wrap_cols:
|
||||
cell.alignment = Alignment(horizontal="left", vertical="top", wrap_text=True)
|
||||
else:
|
||||
cell.alignment = ALIGN_CTR if col_name in center_cols else ALIGN_LEFT
|
||||
|
||||
for col_idx, (col_name, _) in enumerate(columns, 1):
|
||||
ws.column_dimensions[get_column_letter(col_idx)].width = col_widths.get(col_name, 14)
|
||||
|
||||
for col_name in date_cols:
|
||||
if col_name in headers:
|
||||
letter = get_column_letter(headers.index(col_name) + 1)
|
||||
for row_idx in range(data_start, len(docs) + data_start):
|
||||
ws[f"{letter}{row_idx}"].number_format = "DD-MMM-YYYY"
|
||||
|
||||
ws.freeze_panes = f"A{data_start}"
|
||||
ws.auto_filter.ref = f"A{header_row}:{get_column_letter(len(headers))}{header_row}"
|
||||
|
||||
|
||||
def _score_row_font(doc):
|
||||
visit = doc["fields"].get("Visit", "")
|
||||
try:
|
||||
mod_mayo = int(doc["fields"].get("Modified Mayo Score", ""))
|
||||
except (ValueError, TypeError):
|
||||
mod_mayo = None
|
||||
if visit == "I-0" and mod_mayo is not None and mod_mayo < 5:
|
||||
return Font(size=10, bold=True, color="FF0000")
|
||||
return CELL_FONT
|
||||
|
||||
|
||||
def build_mayo_score_sheet(ws, docs):
|
||||
_build_sheet(
|
||||
ws, docs, COLUMNS_SCORE,
|
||||
date_cols={"Visit Date", "Last Mayo Score Submission"},
|
||||
center_cols={"KLIKNI SEM", "Visit", "Central Endoscopy Score", "PGA Score",
|
||||
"Stool Frequency Sub-score", "Rectal Bleeding Sub-score",
|
||||
"Partial Mayo Score", "Modified Mayo Score", "Full Mayo Score",
|
||||
"Baseline Stool Frequency",
|
||||
"Wk I-12 Responder", "Wk I-12 Remission", "Clinical Flare",
|
||||
"Loss of Response", "Partial Mayo Post LoR", "Partial Mayo Non-Resp",
|
||||
"Last Mayo Score Submission"},
|
||||
col_widths={
|
||||
"KLIKNI SEM": 14,
|
||||
"Site": 18, "Subject ID": 16, "Visit": 12, "Visit Date": 14,
|
||||
"Baseline Stool Frequency": 14, "Central Endoscopy Score": 14,
|
||||
"PGA Score": 10, "Stool Frequency Sub-score": 14,
|
||||
"Rectal Bleeding Sub-score": 14, "Partial Mayo Score": 14,
|
||||
"Modified Mayo Score": 14, "Full Mayo Score": 13,
|
||||
"Site Action": 22, "Last Mayo Score Submission": 16,
|
||||
"Wk I-12 Responder": 14, "Wk I-12 Remission": 14,
|
||||
"Clinical Flare": 14, "Loss of Response": 14,
|
||||
"Partial Mayo Post LoR": 20, "Partial Mayo Non-Resp": 20,
|
||||
},
|
||||
row_font_fn=_score_row_font,
|
||||
)
|
||||
# Speciální styl pro sloupec KLIKNI SEM — vypadá jako tlačítko/odkaz
|
||||
link_font = Font(size=10, bold=True, color="FFFFFF")
|
||||
link_fill = PatternFill("solid", fgColor="2E75B6")
|
||||
for row in range(2, len(docs) + 2):
|
||||
cell = ws.cell(row=row, column=1)
|
||||
cell.font = link_font
|
||||
cell.fill = link_fill
|
||||
cell.alignment = ALIGN_CTR
|
||||
|
||||
|
||||
def build_mayo_diary_sheet(ws, docs):
|
||||
_build_sheet(
|
||||
ws, docs, COLUMNS_DIARY,
|
||||
date_cols={"Report Date"},
|
||||
center_cols={"Baseline Stool Count", "Stool Frequency", "Not Applicable",
|
||||
"Constipation", "Diarrhea", "Irregularity"},
|
||||
col_widths={
|
||||
"Subject ID": 16, "Report Date": 14, "Baseline Stool Count": 14,
|
||||
"Stool Frequency": 14, "MAYO050": 48, "Not Applicable": 14,
|
||||
"Constipation": 14, "Diarrhea": 12, "Irregularity": 14,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def build_eligible_days_sheet(ws, score_docs, diary_docs):
|
||||
# Lookup diary records by (subject_id, date_part YYYY-MM-DD)
|
||||
diary_lookup: dict[tuple, dict] = {}
|
||||
for d in diary_docs:
|
||||
subj = d.get("subject", {}).get("id", "")
|
||||
date_iso = d["fields"].get("Report Date", "")
|
||||
date_part = date_iso[:10] if date_iso else ""
|
||||
if subj and date_part:
|
||||
diary_lookup[(subj, date_part)] = d
|
||||
|
||||
headers = [
|
||||
"Included", "Subject ID", "Visit", "Visit Date", "Day",
|
||||
"Report Date", "Baseline Stool Count", "Stool Frequency",
|
||||
"MAYO050", "Not Applicable", "Constipation", "Diarrhea", "Irregularity",
|
||||
]
|
||||
col_widths = {
|
||||
"Included": 10, "Subject ID": 16, "Visit": 10, "Visit Date": 14, "Day": 8,
|
||||
"Report Date": 14, "Baseline Stool Count": 14, "Stool Frequency": 14,
|
||||
"MAYO050": 48, "Not Applicable": 14, "Constipation": 14,
|
||||
"Diarrhea": 12, "Irregularity": 14,
|
||||
}
|
||||
center_cols = {"Included", "Visit", "Day", "Baseline Stool Count", "Stool Frequency",
|
||||
"Not Applicable", "Constipation", "Diarrhea", "Irregularity"}
|
||||
date_cols = {"Visit Date", "Report Date"}
|
||||
no_fill = PatternFill("solid", fgColor="FFF2CC") # žlutá pro excluded dny
|
||||
|
||||
for col_idx, header in enumerate(headers, 1):
|
||||
cell = ws.cell(row=1, column=col_idx, value=header)
|
||||
cell.font = HEADER_FONT
|
||||
cell.fill = HEADER_FILL
|
||||
cell.alignment = ALIGN_CTR
|
||||
cell.border = BORDER
|
||||
ws.row_dimensions[1].height = 28
|
||||
|
||||
row_idx = 2
|
||||
for score_doc in score_docs:
|
||||
subj = score_doc.get("subject", {}).get("id", "")
|
||||
visit = score_doc["fields"].get("Visit", "")
|
||||
visit_date = score_doc["fields"].get("Visit Date", "")
|
||||
|
||||
for n in range(1, 11):
|
||||
day_date_iso = score_doc["fields"].get(f"Eligible Day (-{n})")
|
||||
if not day_date_iso or day_date_iso == "-":
|
||||
continue
|
||||
date_part = day_date_iso[:10]
|
||||
excl_reason = score_doc["fields"].get(f"Day (-{n}) Excluded Reason(s)", "")
|
||||
included = "No" if excl_reason and excl_reason != "-" else "Yes"
|
||||
|
||||
diary = diary_lookup.get((subj, date_part), {})
|
||||
df = diary.get("fields", {})
|
||||
|
||||
fill = no_fill if included == "No" else (FILL_EVEN if row_idx % 2 == 0 else FILL_ODD)
|
||||
font = Font(size=10, color="808080") if included == "No" else CELL_FONT
|
||||
|
||||
values = [
|
||||
included,
|
||||
subj,
|
||||
visit,
|
||||
_iso_to_date(visit_date) if isinstance(visit_date, str) else visit_date,
|
||||
f"-{n}",
|
||||
_iso_to_date(day_date_iso),
|
||||
_num(df.get("Baseline Stool Count", "")),
|
||||
_num(df.get("Stool Frequency", "")),
|
||||
df.get("MAYO050", ""),
|
||||
df.get("Not Applicable", ""),
|
||||
df.get("Constipation", ""),
|
||||
df.get("Diarrhea", ""),
|
||||
df.get("Irregularity", ""),
|
||||
]
|
||||
|
||||
for col_idx, (header, value) in enumerate(zip(headers, values), 1):
|
||||
cell = ws.cell(row=row_idx, column=col_idx, value=value)
|
||||
cell.font = font
|
||||
cell.fill = fill
|
||||
cell.border = BORDER
|
||||
if header in date_cols:
|
||||
cell.number_format = "DD-MMM-YYYY"
|
||||
cell.alignment = ALIGN_CTR if header in center_cols else ALIGN_LEFT
|
||||
|
||||
row_idx += 1
|
||||
|
||||
for col_idx, header in enumerate(headers, 1):
|
||||
ws.column_dimensions[get_column_letter(col_idx)].width = col_widths.get(header, 14)
|
||||
|
||||
ws.freeze_panes = "A2"
|
||||
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1"
|
||||
|
||||
|
||||
def _build_dcr_legend(ws):
|
||||
"""Vloží legendu do řádků 1–4, prázdný řádek 5. Data začínají od řádku 6."""
|
||||
legend = [
|
||||
(FILL_DCR_SITE, "Čeká lékař — Next Action Required = Site (lékař musí odpovědět nebo potvrdit)"),
|
||||
(FILL_DCR_CLARIO, "Čeká Clario — Next Action Required = Clario DM (Clario dostalo podklady, provede změnu)"),
|
||||
(FILL_DCR_QC, "ReadyForQC — Clario provedlo změny, čeká na finální QC kontrolu"),
|
||||
(FILL_DCR_DONE, "Completed / Resolved — DCR je uzavřen"),
|
||||
]
|
||||
for i, (fill, text) in enumerate(legend, 1):
|
||||
a = ws.cell(row=i, column=1, value="")
|
||||
a.fill = fill
|
||||
a.border = BORDER
|
||||
b = ws.cell(row=i, column=2, value=text)
|
||||
b.font = Font(size=10, bold=True)
|
||||
b.alignment = ALIGN_LEFT
|
||||
# řádek 5 prázdný — nic nedělat
|
||||
|
||||
|
||||
def _dcr_row_fill(doc):
|
||||
"""Vrátí fill barvu dle stavu DCR."""
|
||||
status = doc["fields"].get("Status", "")
|
||||
next_action = doc["fields"].get("Next Action Required", "")
|
||||
if status in ("Completed", "Resolved"):
|
||||
return FILL_DCR_DONE
|
||||
if status == "ReadyForQC":
|
||||
return FILL_DCR_QC
|
||||
if "Site" in next_action:
|
||||
return FILL_DCR_SITE
|
||||
if "Clario" in next_action or next_action == "":
|
||||
return FILL_DCR_CLARIO
|
||||
return FILL_ODD
|
||||
|
||||
|
||||
def build_ecoa_dcrs_sheet(ws, docs):
|
||||
_build_dcr_legend(ws)
|
||||
docs_sorted = sorted(docs, key=lambda d: (
|
||||
d.get("site", {}).get("name", ""),
|
||||
d.get("subject", {}).get("id", ""),
|
||||
d["fields"].get("Creation Date UTC", ""),
|
||||
))
|
||||
_build_sheet(
|
||||
ws, docs_sorted, COLUMNS_ECOA_DCRS,
|
||||
date_cols={"Creation Date UTC", "Date of Last Action UTC"},
|
||||
center_cols={"Status", "Type", "Next Action Required", "Category",
|
||||
"Total Open Time (Days)", "Current Status Time (Days)",
|
||||
"Baseline Stool Count", "firstSeen", "lastSeen"},
|
||||
col_widths={
|
||||
"Site": 16, "Subject ID": 16, "Data Correction ID": 18,
|
||||
"PI Name": 18, "Creation Date UTC": 14, "Date of Last Action UTC": 14,
|
||||
"Status": 14, "Type": 16, "Next Action Required": 16, "Category": 20,
|
||||
"Total Open Period": 14, "Total Open Time (Days)": 14,
|
||||
"Current Status Time (Days)": 16, "Reason for Change": 20,
|
||||
"Description": 50, "Resolution": 50, "Query History": 60,
|
||||
"Age at Informed Consent": 14, "Baseline Stool Count": 14,
|
||||
"firstSeen": 12, "lastSeen": 12,
|
||||
},
|
||||
wrap_cols={"Reason for Change", "Description", "Resolution", "Query History"},
|
||||
header_row=6,
|
||||
row_font_fn=lambda doc: CELL_FONT,
|
||||
)
|
||||
# Přebarvení řádků dle DCR stavu (přepíše zebra fill)
|
||||
data_start = 7
|
||||
for row_idx, doc in enumerate(docs_sorted, data_start):
|
||||
fill = _dcr_row_fill(doc)
|
||||
for col_idx in range(1, len(COLUMNS_ECOA_DCRS) + 1):
|
||||
ws.cell(row=row_idx, column=col_idx).fill = fill
|
||||
|
||||
|
||||
def build_ecg_dcrs_sheet(ws, docs):
|
||||
_build_dcr_legend(ws)
|
||||
docs_sorted = sorted(docs, key=lambda d: (
|
||||
d.get("site", {}).get("name", ""),
|
||||
d.get("subject", {}).get("id", ""),
|
||||
d["fields"].get("Creation Date UTC", ""),
|
||||
))
|
||||
_build_sheet(
|
||||
ws, docs_sorted, COLUMNS_ECG_DCRS,
|
||||
date_cols={"Creation Date UTC", "Date of Last Action UTC"},
|
||||
center_cols={"Status", "Type", "Next Action Required", "Category",
|
||||
"Total Open Time (Days)", "Current Status Time (Days)",
|
||||
"firstSeen", "lastSeen"},
|
||||
col_widths={
|
||||
"Site ID": 14, "Subject Number": 16, "Data Correction ID": 16,
|
||||
"PI Name": 18, "Age": 10, "Creation Date UTC": 14,
|
||||
"Date of Last Action UTC": 14, "Status": 14, "Type": 12,
|
||||
"Next Action Required": 16, "Category": 14,
|
||||
"Total Open Period": 14, "Total Open Time (Days)": 14,
|
||||
"Current Status Time (Days)": 16, "Reason for Change": 20,
|
||||
"Query History": 60, "firstSeen": 12, "lastSeen": 12,
|
||||
},
|
||||
wrap_cols={"Query History"},
|
||||
header_row=6,
|
||||
row_font_fn=lambda doc: CELL_FONT,
|
||||
)
|
||||
# Přebarvení řádků dle DCR stavu
|
||||
data_start = 7
|
||||
for row_idx, doc in enumerate(docs_sorted, data_start):
|
||||
fill = _dcr_row_fill(doc)
|
||||
for col_idx in range(1, len(COLUMNS_ECG_DCRS) + 1):
|
||||
ws.cell(row=row_idx, column=col_idx).fill = fill
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: výstupní cesta
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _unique_path(directory: Path, stem: str, suffix: str) -> Path:
|
||||
candidate = directory / f"{stem}{suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
n = 2
|
||||
while True:
|
||||
candidate = directory / f"{stem} ({n}){suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
n += 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timing helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tick(label: str, t0: float) -> float:
|
||||
"""Vypíše dobu od t0 a vrátí aktuální čas jako nový t0."""
|
||||
elapsed = time.perf_counter() - t0
|
||||
print(f" {label:<30} {elapsed:6.2f} s")
|
||||
return time.perf_counter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
t_total = time.perf_counter()
|
||||
print("Spouštím generování reportu...")
|
||||
print()
|
||||
|
||||
# -- 1. MongoDB: připojení + načtení + seřazení --------------------------
|
||||
t = time.perf_counter()
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
client.admin.command("ping")
|
||||
db = client[DB_NAME]
|
||||
score_docs = list(db["Clario.MayoScore"].find({}))
|
||||
diary_docs = list(db["Clario.MayoDiary"].find({}))
|
||||
ecoa_dcr_docs = list(db["Clario.eCOA_DCRs"].find({}))
|
||||
ecg_dcr_docs = list(db["Clario.ECG_DCRs"].find({}))
|
||||
client.close()
|
||||
score_docs.sort(key=_visit_sort_key)
|
||||
diary_docs.sort(key=lambda d: (
|
||||
d.get("subject", {}).get("id", ""),
|
||||
d["fields"].get("Report Date", ""),
|
||||
))
|
||||
t = _tick(f"MongoDB (ping, fetch, sort → {len(score_docs)} + {len(diary_docs)} + {len(ecoa_dcr_docs)} + {len(ecg_dcr_docs)} záznamů)", t)
|
||||
|
||||
# -- 2–4. Tvorba listů ---------------------------------------------------
|
||||
wb = Workbook()
|
||||
ws_score = wb.active
|
||||
ws_score.title = "MayoScore"
|
||||
build_mayo_score_sheet(ws_score, score_docs)
|
||||
t = _tick("List MayoScore (KLIKNI SEM, zebra, červené I-0, autofilter)", t)
|
||||
|
||||
ws_diary = wb.create_sheet("MayoDiary")
|
||||
build_mayo_diary_sheet(ws_diary, diary_docs)
|
||||
t = _tick("List MayoDiary (zebra, formátování dat, autofilter)", t)
|
||||
|
||||
ws_days = wb.create_sheet("EligibleDays")
|
||||
build_eligible_days_sheet(ws_days, score_docs, diary_docs)
|
||||
t = _tick("List EligibleDays (diary lookup, included/excluded flag, autofilter)", t)
|
||||
|
||||
ws_ecoa = wb.create_sheet("eCOA_DCRs")
|
||||
build_ecoa_dcrs_sheet(ws_ecoa, ecoa_dcr_docs)
|
||||
t = _tick(f"List eCOA_DCRs ({len(ecoa_dcr_docs)} záznamů)", t)
|
||||
|
||||
ws_ecg = wb.create_sheet("ECG_DCRs")
|
||||
build_ecg_dcrs_sheet(ws_ecg, ecg_dcr_docs)
|
||||
t = _tick(f"List ECG_DCRs ({len(ecg_dcr_docs)} záznamů)", t)
|
||||
|
||||
# -- 5. Uložení XLSX -----------------------------------------------------
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
base_stem = f"{today} 77242113UCO3001 Clario Reports"
|
||||
xlsm_path = _unique_path(OUTPUT_DIR, base_stem, ".xlsm")
|
||||
xlsx_path = xlsm_path.with_suffix(".xlsx")
|
||||
wb.save(str(xlsx_path))
|
||||
t = _tick("Uložení XLSX (openpyxl, dočasný soubor)", t)
|
||||
|
||||
# -- 6. Injektování VBA --------------------------------------------------
|
||||
inject_vba(xlsx_path, xlsm_path)
|
||||
xlsx_path.unlink(missing_ok=True)
|
||||
_tick("Injektování VBA (xlwings: open → AddFromString → SaveAs .xlsm)", t)
|
||||
|
||||
# -- Souhrn --------------------------------------------------------------
|
||||
total = time.perf_counter() - t_total
|
||||
print()
|
||||
print(f" {'Celkem':<30} {total:6.2f} s")
|
||||
print()
|
||||
print(f"Uloženo: {xlsm_path}")
|
||||
|
||||
|
||||
def inject_vba(xlsx_path: Path, xlsm_path: Path) -> None:
|
||||
vba_code = '''\
|
||||
Private Sub Worksheet_SelectionChange(ByVal Target As Range)
|
||||
If Target.Row < 2 Then Exit Sub
|
||||
If Target.Rows.Count > 1 Then Exit Sub
|
||||
If Target.Column <> 1 Then Exit Sub
|
||||
|
||||
Dim subjectId As String
|
||||
Dim visit As String
|
||||
subjectId = CStr(Me.Cells(Target.Row, 3).Value)
|
||||
visit = CStr(Me.Cells(Target.Row, 4).Value)
|
||||
|
||||
If subjectId = "" Or visit = "" Then Exit Sub
|
||||
|
||||
Dim ws As Worksheet
|
||||
On Error Resume Next
|
||||
Set ws = ThisWorkbook.Sheets("EligibleDays")
|
||||
On Error GoTo 0
|
||||
If ws Is Nothing Then Exit Sub
|
||||
|
||||
Application.ScreenUpdating = False
|
||||
|
||||
ws.AutoFilterMode = False
|
||||
ws.Range("A1").AutoFilter
|
||||
ws.Range("A1").AutoFilter Field:=2, Criteria1:=subjectId
|
||||
ws.Range("A1").AutoFilter Field:=3, Criteria1:=visit
|
||||
|
||||
ws.Activate
|
||||
ws.Range("A2").Select
|
||||
|
||||
Application.ScreenUpdating = True
|
||||
End Sub
|
||||
'''
|
||||
|
||||
app = xw.App(visible=False)
|
||||
try:
|
||||
wb = app.books.open(str(xlsx_path))
|
||||
# Najdi VBComponent odpovídající listu "MayoScore" podle tab názvu
|
||||
vb_comp = None
|
||||
for comp in wb.api.VBProject.VBComponents:
|
||||
if comp.Type == 100: # xlSheet
|
||||
try:
|
||||
if comp.Properties("Name").Value == "MayoScore":
|
||||
vb_comp = comp
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if vb_comp is None:
|
||||
# fallback: první sheet (Sheet1)
|
||||
vb_comp = wb.api.VBProject.VBComponents("Sheet1")
|
||||
vb_comp.CodeModule.AddFromString(vba_code)
|
||||
wb.api.SaveAs(str(xlsm_path), FileFormat=52) # 52 = xlOpenXMLWorkbookMacroEnabled
|
||||
wb.close()
|
||||
finally:
|
||||
app.quit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,302 @@
|
||||
"""
|
||||
import_to_mongo.py
|
||||
Verze: 1.2
|
||||
Datum: 2026-06-02
|
||||
|
||||
Import Clario CSV do MongoDB (databáze: Clario).
|
||||
|
||||
Kolekce: Clario.MayoDiary / Clario.MayoScore / Clario.eCOA_DCRs / Clario.ECG_DCRs
|
||||
Filtr: pouze řádky s Country == "Czech Republic"
|
||||
Klíč: MayoDiary → Subject ID + Form Number
|
||||
MayoScore → Participant ID + Visit
|
||||
eCOA_DCRs → Data Correction ID
|
||||
ECG_DCRs → Data Correction ID
|
||||
Historie: při změně fields se stará verze uloží do pole history[]
|
||||
Po importu přesune zpracované CSV do downloads/Zpracovano/
|
||||
|
||||
Použití:
|
||||
python import_to_mongo.py # importuje všechny CSV z downloads/
|
||||
python import_to_mongo.py downloads/konkretni.csv # jeden soubor
|
||||
"""
|
||||
|
||||
import csv
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from pymongo import MongoClient, ASCENDING
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB_NAME = "Clario"
|
||||
DOWNLOADS_DIR = Path(__file__).parent / "downloads"
|
||||
PROCESSED_DIR = DOWNLOADS_DIR / "Zpracovano"
|
||||
|
||||
COUNTRY_FILTER = "Czech Republic"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Konfigurace kolekcí
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
COLLECTION_CONFIG = {
|
||||
"MayoDiary": {
|
||||
"collection": "Clario.MayoDiary",
|
||||
"subject_col": "Subject ID",
|
||||
"key_cols": ("Subject ID", "Form Number"),
|
||||
},
|
||||
"MayoScore": {
|
||||
"collection": "Clario.MayoScore",
|
||||
"subject_col": "Participant ID",
|
||||
"key_cols": ("Participant ID", "Visit"),
|
||||
"outcome_cols": (
|
||||
"Site Action",
|
||||
"Last Mayo Score Submission",
|
||||
"Week I-12 Clinical Responder",
|
||||
"Week I-12 Clinical Remission",
|
||||
"Clinical Flare",
|
||||
"Loss of Response",
|
||||
"Partial Mayo Response Post Loss of Response",
|
||||
"Partial Mayo Response for Clinical Non-Responders",
|
||||
),
|
||||
},
|
||||
"eCOA DCRs": {
|
||||
"collection": "Clario.eCOA_DCRs",
|
||||
"subject_col": "Subject ID",
|
||||
"key_cols": ("Data Correction ID",),
|
||||
},
|
||||
"ECG DCRs": {
|
||||
"collection": "Clario.ECG_DCRs",
|
||||
"subject_col": "Subject Number",
|
||||
"key_cols": ("Data Correction ID",),
|
||||
},
|
||||
}
|
||||
|
||||
DATE_FORMATS = [
|
||||
"%d-%b-%Y ",
|
||||
"%d-%b-%Y",
|
||||
"%d-%b-%Y %H:%M:%S",
|
||||
"%d %b %Y %H:%M:%S",
|
||||
"%d %b %Y %H:%M:%S:%f",
|
||||
"%d %b %Y",
|
||||
"%d %B %Y",
|
||||
"%Y%m%d %H:%M:%S.%f",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
"%m/%d/%Y %I:%M:%S %p",
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def clean_colname(name: str) -> str:
|
||||
"""Odstraní BOM a okolní uvozovky/mezery z názvu sloupce."""
|
||||
return name.lstrip("").strip().strip('"')
|
||||
|
||||
|
||||
def parse_date(value: str) -> str | None:
|
||||
v = value.strip()
|
||||
for fmt in DATE_FORMATS:
|
||||
try:
|
||||
dt = datetime.strptime(v, fmt.strip())
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def extract_snapshot_date(filename: str) -> str:
|
||||
match = re.match(r"(\d{4}-\d{2}-\d{2})", Path(filename).name)
|
||||
return match.group(1) if match else datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def detect_collection_type(filename: str) -> str | None:
|
||||
"""Vrátí klíč do COLLECTION_CONFIG nebo None."""
|
||||
stem = Path(filename).stem
|
||||
for key in COLLECTION_CONFIG:
|
||||
if key in stem:
|
||||
return key
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CSV → dokument
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def map_row(row: dict, col_type: str) -> dict:
|
||||
cfg = COLLECTION_CONFIG[col_type]
|
||||
doc: dict = {}
|
||||
fields: dict = {}
|
||||
|
||||
cleaned = {clean_colname(k): v.strip() if v else "" for k, v in row.items()}
|
||||
|
||||
subject_col = cfg["subject_col"]
|
||||
doc["subject"] = {"id": cleaned.get(subject_col, "")}
|
||||
# ECG DCRs používají "Site ID" místo "Site"
|
||||
site_name = cleaned.get("Site") or cleaned.get("Site ID", "")
|
||||
doc["site"] = {"name": site_name}
|
||||
doc["country"] = cleaned.get("Country", "")
|
||||
doc["study"] = cleaned.get("Protocol", "")
|
||||
|
||||
key_parts = [cleaned.get(c, "") for c in cfg["key_cols"]]
|
||||
doc["recordKey"] = "_".join(key_parts)
|
||||
|
||||
outcome_cols = set(cfg.get("outcome_cols", ()))
|
||||
for col in outcome_cols:
|
||||
value = cleaned.get(col, "")
|
||||
if value and value != "-":
|
||||
parsed = parse_date(value)
|
||||
doc[col] = parsed if parsed else value
|
||||
else:
|
||||
doc[col] = None
|
||||
|
||||
skip_top = {"Protocol", "Country", "Site", subject_col} | outcome_cols
|
||||
for col, value in cleaned.items():
|
||||
if col in skip_top:
|
||||
continue
|
||||
if not value or value == "-":
|
||||
continue
|
||||
parsed = parse_date(value)
|
||||
fields[col] = parsed if parsed else value
|
||||
|
||||
doc["fields"] = fields
|
||||
return doc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Import jednoho souboru
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def import_file(csv_path: str, db) -> dict:
|
||||
filename = Path(csv_path).name
|
||||
col_type = detect_collection_type(filename)
|
||||
if col_type is None:
|
||||
print(f" Preskakuji (neznamy typ): {filename}")
|
||||
return {"skipped": True}
|
||||
|
||||
cfg = COLLECTION_CONFIG[col_type]
|
||||
col_name = cfg["collection"]
|
||||
snapshot_date = extract_snapshot_date(filename)
|
||||
collection = db[col_name]
|
||||
|
||||
inserted = changed = unchanged = filtered_out = 0
|
||||
|
||||
with open(csv_path, encoding="utf-8-sig", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter=",", quotechar='"')
|
||||
|
||||
for row in reader:
|
||||
cleaned_row = {clean_colname(k): v for k, v in row.items()}
|
||||
country = cleaned_row.get("Country", "").strip()
|
||||
if COUNTRY_FILTER not in country:
|
||||
filtered_out += 1
|
||||
continue
|
||||
|
||||
doc = map_row(row, col_type)
|
||||
record_key = doc.get("recordKey")
|
||||
if not record_key:
|
||||
continue
|
||||
|
||||
doc["sourceFile"] = filename
|
||||
|
||||
existing = collection.find_one({"recordKey": record_key})
|
||||
|
||||
if existing is None:
|
||||
doc["firstSeen"] = snapshot_date
|
||||
doc["lastSeen"] = snapshot_date
|
||||
doc["history"] = []
|
||||
collection.insert_one(doc)
|
||||
inserted += 1
|
||||
|
||||
elif existing.get("fields") != doc["fields"]:
|
||||
old_entry = {
|
||||
"date": existing.get("lastSeen", snapshot_date),
|
||||
"fields": existing["fields"],
|
||||
}
|
||||
update_doc = {k: v for k, v in doc.items()}
|
||||
update_doc["lastSeen"] = snapshot_date
|
||||
collection.update_one(
|
||||
{"_id": existing["_id"]},
|
||||
{
|
||||
"$push": {"history": old_entry},
|
||||
"$set": update_doc,
|
||||
},
|
||||
)
|
||||
changed += 1
|
||||
|
||||
else:
|
||||
collection.update_one(
|
||||
{"_id": existing["_id"]},
|
||||
{"$set": {"lastSeen": snapshot_date, "sourceFile": filename}},
|
||||
)
|
||||
unchanged += 1
|
||||
|
||||
collection.create_index([("recordKey", ASCENDING)], unique=True)
|
||||
collection.create_index([("subject.id", ASCENDING)])
|
||||
collection.create_index([("site.name", ASCENDING)])
|
||||
if col_type == "MayoScore":
|
||||
collection.create_index([("Site Action", ASCENDING)])
|
||||
if col_type in ("eCOA DCRs", "ECG DCRs"):
|
||||
collection.create_index([("fields.Status", ASCENDING)])
|
||||
collection.create_index([("fields.Type", ASCENDING)])
|
||||
|
||||
stats = {
|
||||
"collection": col_name,
|
||||
"snapshot": snapshot_date,
|
||||
"inserted": inserted,
|
||||
"changed": changed,
|
||||
"unchanged": unchanged,
|
||||
"filtered_out": filtered_out,
|
||||
}
|
||||
print(f" {col_name} [{snapshot_date}]: +{inserted} new, ~{changed} changed, ={unchanged} same, -{filtered_out} non-CZ")
|
||||
return stats
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
paths: list[Path] = []
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
for arg in sys.argv[1:]:
|
||||
p = Path(arg)
|
||||
if p.is_file():
|
||||
paths.append(p)
|
||||
else:
|
||||
print(f"Soubor nenalezen: {arg}")
|
||||
else:
|
||||
paths = sorted(DOWNLOADS_DIR.glob("*.csv"))
|
||||
|
||||
if not paths:
|
||||
print("Zadne CSV soubory k importu.")
|
||||
return
|
||||
|
||||
print(f"Nalezeno {len(paths)} souboru.\n")
|
||||
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
client.admin.command("ping")
|
||||
db = client[DB_NAME]
|
||||
|
||||
PROCESSED_DIR.mkdir(exist_ok=True)
|
||||
|
||||
total = {"inserted": 0, "changed": 0, "unchanged": 0}
|
||||
|
||||
for csv_path in paths:
|
||||
print(f"Import: {csv_path.name}")
|
||||
stats = import_file(str(csv_path), db)
|
||||
if not stats.get("skipped"):
|
||||
for k in total:
|
||||
total[k] += stats.get(k, 0)
|
||||
|
||||
dest = PROCESSED_DIR / csv_path.name
|
||||
shutil.move(str(csv_path), str(dest))
|
||||
print(f" -> presunut do Zpracovano/")
|
||||
|
||||
client.close()
|
||||
|
||||
print(f"\nCelkem: +{total['inserted']} new, ~{total['changed']} changed, ={total['unchanged']} same")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user