diff --git a/Medidata/create_report_v1.0.md b/Medidata/create_report_v1.0.md new file mode 100644 index 0000000..e2cc0be --- /dev/null +++ b/Medidata/create_report_v1.0.md @@ -0,0 +1,43 @@ +# create_report_v1.0.md + +**Skript:** `create_report_v1.0.py` +**Verze:** 1.0 +**Datum:** 2026-06-01 + +## Popis + +Generuje Excel EDC DataListing report pro studii **77242113UCO3001** z MongoDB (db: `edc`). + +## Výstup + +`Medidata/reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx` + +Stará verze se automaticky přesune do `reports/TRASH/`. + +## Listy + +| List | Kolekce MongoDB | Záznamy (CZE) | +|------|----------------|---------------| +| DateofVisit | UCO3001.DateofVisit | 55 | +| ConcomitantTherapy | UCO3001.ConcomitantTherapy | 91 | +| TrialDisposition | UCO3001.TrialDispositionCompletion-Discontinuation | 3 | + +## Sloupce (každý list) + +**Pevné:** SiteNumber · SiteName · Subject · Visit · FolderSeq · RecordPos · LastModified + +**Dynamické:** všechny klíče z `fields{}` v pořadí výskytu v MongoDB + +## Formátování + +- Záhlaví: tmavomodrý fill, bílý tučný text, Calibri 10 +- Data: Calibri 10, tenké ohraničení +- Zmrazení řádku 1, autofilter, šířky sloupců auto (max 55) +- Datumy: DD-MMM-YYYY (čas jen pokud != 00:00) + +## Spuštění + +``` +cd Medidata +python create_report_v1.0.py +``` diff --git a/Medidata/create_report_v1.0.py b/Medidata/create_report_v1.0.py new file mode 100644 index 0000000..9d47e6d --- /dev/null +++ b/Medidata/create_report_v1.0.py @@ -0,0 +1,210 @@ +""" +create_report_v1.0.py +Verze: 1.0 +Datum: 2026-06-01 +Popis: Excel EDC DataListing report pro studii UCO3001 z MongoDB (db: edc). + Jeden list per kolekce (DateofVisit / ConcomitantTherapy / TrialDisposition). + Sloupce: SiteNumber, SiteName, Subject, Visit, FolderSeq, RecordPos, + LastModified + dynamické fields.* z MongoDB. + Výstup: reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx +""" + +import shutil +from datetime import datetime +from pathlib import Path + +from openpyxl import Workbook +from openpyxl.styles import Alignment, Border, Font, PatternFill, Side +from openpyxl.utils import get_column_letter +from pymongo import ASCENDING, MongoClient + +# ── Konfigurace ─────────────────────────────────────────────────────────────── + +MONGO_URI = "mongodb://192.168.1.76:27017" +DB_NAME = "edc" +STUDY_FULL = "77242113UCO3001" +VERSION = "1.0" + +OUTPUT_DIR = Path(__file__).parent / "reports" +TRASH_DIR = OUTPUT_DIR / "TRASH" + +COLLECTIONS = [ + "UCO3001.DateofVisit", + "UCO3001.ConcomitantTherapy", + "UCO3001.TrialDispositionCompletion-Discontinuation", +] + +# ── Formátování ─────────────────────────────────────────────────────────────── + +HEADER_FILL = PatternFill("solid", fgColor="1F4E79") +HEADER_FONT = Font(bold=True, color="FFFFFF", name="Calibri", size=10) +DATA_FONT = Font(name="Calibri", size=10) +THIN = Side(style="thin", color="CCCCCC") +BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN) + +# ── Pevné sloupce ───────────────────────────────────────────────────────────── + +FIXED_COLS = [ + ("SiteNumber", lambda d: d.get("site", {}).get("number", "")), + ("SiteName", lambda d: d.get("site", {}).get("name", "")), + ("Subject", lambda d: d.get("subject", {}).get("label", "")), + ("Visit", lambda d: d.get("form", {}).get("instanceName", "")), + ("FolderSeq", lambda d: d.get("form", {}).get("folderSeq", "")), + ("RecordPos", lambda d: d.get("form", {}).get("recordPosition", "")), + ("LastModified", lambda d: _fmt(d.get("lastModified", ""))), +] + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _fmt(value: str) -> str: + """ISO datetime string → 'DD-MMM-YYYY' nebo 'DD-MMM-YYYY HH:MM'.""" + if not value: + return "" + try: + dt = datetime.fromisoformat(value) + if dt.hour == 0 and dt.minute == 0 and dt.second == 0: + return dt.strftime("%d-%b-%Y") + return dt.strftime("%d-%b-%Y %H:%M") + except Exception: + return value + + +def _fmt_field(value) -> str: + """Naformátuje hodnotu z fields{} — datum nebo string.""" + if isinstance(value, str) and "T" in value and value.endswith(("+00:00", "Z")): + return _fmt(value) + return value if value is not None else "" + + +COLS_LAST_CT = [ + "CMTRT_ATC1", "CMTRT_ATC2", "CMTRT_ATC3", "CMTRT_ATC4", + "CMTRT_RXPREF", "CMTRT_TRADE_NAME", + "CMTRT_ATC1_CODE", "CMTRT_ATC2_CODE", "CMTRT_ATC3_CODE", "CMTRT_ATC4_CODE", + "CMTRT_RXPREF_CODE", "CMTRT_TRADE_NAME_CODE", +] + + +def _field_keys(docs: list, last: list | None = None) -> list: + """Vrátí seznam unikátních klíčů z fields{} — klíče v `last` přesunuty na konec.""" + seen = set() + keys = [] + for doc in docs: + for k in doc.get("fields", {}).keys(): + if k not in seen: + seen.add(k) + keys.append(k) + if last: + tail = [k for k in last if k in seen] + keys = [k for k in keys if k not in set(tail)] + tail + return keys + + +def _sheet_name(collection: str) -> str: + """UCO3001.SomeName → SomeName (max 31 znaků pro Excel).""" + name = collection.split(".", 1)[-1] + abbreviations = { + "TrialDispositionCompletion-Discontinuation": "TrialDisposition", + } + return abbreviations.get(name, name)[:31] + + +# ── Zápis listu ─────────────────────────────────────────────────────────────── + +def write_sheet(ws, docs: list, last_cols: list | None = None) -> None: + fixed_names = [c[0] for c in FIXED_COLS] + field_keys = _field_keys(docs, last=last_cols) + all_headers = fixed_names + field_keys + + # záhlaví + for col_i, header in enumerate(all_headers, 1): + cell = ws.cell(row=1, column=col_i, value=header) + cell.font = HEADER_FONT + cell.fill = HEADER_FILL + cell.border = BORDER + cell.alignment = Alignment(horizontal="center", vertical="center") + ws.row_dimensions[1].height = 18 + ws.freeze_panes = "A2" + + # data + for row_i, doc in enumerate(docs, 2): + fields = doc.get("fields", {}) + + for col_i, (_, getter) in enumerate(FIXED_COLS, 1): + cell = ws.cell(row=row_i, column=col_i, value=getter(doc)) + cell.font = DATA_FONT + cell.border = BORDER + cell.alignment = Alignment(vertical="top") + + for col_off, key in enumerate(field_keys): + col_i = len(FIXED_COLS) + col_off + 1 + cell = ws.cell(row=row_i, column=col_i, value=_fmt_field(fields.get(key, ""))) + cell.font = DATA_FONT + cell.border = BORDER + cell.alignment = Alignment(vertical="top") + + # autofilter + if all_headers: + ws.auto_filter.ref = f"A1:{get_column_letter(len(all_headers))}1" + + # šířky sloupců + widths = {i: len(h) for i, h in enumerate(all_headers, 1)} + for doc in docs: + fields = doc.get("fields", {}) + for col_i, (_, getter) in enumerate(FIXED_COLS, 1): + widths[col_i] = max(widths[col_i], len(str(getter(doc)))) + for col_off, key in enumerate(field_keys): + col_i = len(FIXED_COLS) + col_off + 1 + widths[col_i] = max(widths[col_i], len(str(fields.get(key, "")))) + + for col_i, w in widths.items(): + ws.column_dimensions[get_column_letter(col_i)].width = min(w + 2, 55) + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000) + client.admin.command("ping") + db = client[DB_NAME] + + wb = Workbook() + wb.remove(wb.active) + + for coll_name in COLLECTIONS: + docs = list(db[coll_name].find( + {}, + {"_id": 0, "sourceFile": 0, "history": 0}, + sort=[ + ("site.number", ASCENDING), + ("subject.label", ASCENDING), + ("form.folderSeq", ASCENDING), + ("form.recordPosition", ASCENDING), + ], + )) + ws = wb.create_sheet(title=_sheet_name(coll_name)) + last = COLS_LAST_CT if "ConcomitantTherapy" in coll_name else None + write_sheet(ws, docs, last_cols=last) + print(f" {coll_name}: {len(docs)} zaznamu -> list '{ws.title}'") + + client.close() + + OUTPUT_DIR.mkdir(exist_ok=True) + TRASH_DIR.mkdir(exist_ok=True) + + # přesun starých verzí do TRASH + pattern = f"* {STUDY_FULL} EDC DataListing *.xlsx" + for old in OUTPUT_DIR.glob(pattern): + dest = TRASH_DIR / old.name + shutil.move(str(old), str(dest)) + print(f" Přesunuto do TRASH: {old.name}") + + today = datetime.now().strftime("%Y-%m-%d") + filename = f"{today} {STUDY_FULL} EDC DataListing v{VERSION}.xlsx" + out_path = OUTPUT_DIR / filename + wb.save(str(out_path)) + print(f"\nUloženo: {out_path}") + + +if __name__ == "__main__": + main() diff --git a/Medidata/reports/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx b/Medidata/reports/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx new file mode 100644 index 0000000..3efbaf6 Binary files /dev/null and b/Medidata/reports/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx differ diff --git a/Medidata/reports/TRASH/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx b/Medidata/reports/TRASH/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx new file mode 100644 index 0000000..b584d67 Binary files /dev/null and b/Medidata/reports/TRASH/2026-06-01 77242113UCO3001 EDC DataListing v1.0.xlsx differ