z230
This commit is contained in:
@@ -0,0 +1,43 @@
|
|||||||
|
# create_report_v1.0.md
|
||||||
|
|
||||||
|
**Skript:** `create_report_v1.0.py`
|
||||||
|
**Verze:** 1.0
|
||||||
|
**Datum:** 2026-06-01
|
||||||
|
|
||||||
|
## Popis
|
||||||
|
|
||||||
|
Generuje Excel EDC DataListing report pro studii **77242113UCO3001** z MongoDB (db: `edc`).
|
||||||
|
|
||||||
|
## Výstup
|
||||||
|
|
||||||
|
`Medidata/reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx`
|
||||||
|
|
||||||
|
Stará verze se automaticky přesune do `reports/TRASH/`.
|
||||||
|
|
||||||
|
## Listy
|
||||||
|
|
||||||
|
| List | Kolekce MongoDB | Záznamy (CZE) |
|
||||||
|
|------|----------------|---------------|
|
||||||
|
| DateofVisit | UCO3001.DateofVisit | 55 |
|
||||||
|
| ConcomitantTherapy | UCO3001.ConcomitantTherapy | 91 |
|
||||||
|
| TrialDisposition | UCO3001.TrialDispositionCompletion-Discontinuation | 3 |
|
||||||
|
|
||||||
|
## Sloupce (každý list)
|
||||||
|
|
||||||
|
**Pevné:** SiteNumber · SiteName · Subject · Visit · FolderSeq · RecordPos · LastModified
|
||||||
|
|
||||||
|
**Dynamické:** všechny klíče z `fields{}` v pořadí výskytu v MongoDB
|
||||||
|
|
||||||
|
## Formátování
|
||||||
|
|
||||||
|
- Záhlaví: tmavomodrý fill, bílý tučný text, Calibri 10
|
||||||
|
- Data: Calibri 10, tenké ohraničení
|
||||||
|
- Zmrazení řádku 1, autofilter, šířky sloupců auto (max 55)
|
||||||
|
- Datumy: DD-MMM-YYYY (čas jen pokud != 00:00)
|
||||||
|
|
||||||
|
## Spuštění
|
||||||
|
|
||||||
|
```
|
||||||
|
cd Medidata
|
||||||
|
python create_report_v1.0.py
|
||||||
|
```
|
||||||
@@ -0,0 +1,210 @@
|
|||||||
|
"""
|
||||||
|
create_report_v1.0.py
|
||||||
|
Verze: 1.0
|
||||||
|
Datum: 2026-06-01
|
||||||
|
Popis: Excel EDC DataListing report pro studii UCO3001 z MongoDB (db: edc).
|
||||||
|
Jeden list per kolekce (DateofVisit / ConcomitantTherapy / TrialDisposition).
|
||||||
|
Sloupce: SiteNumber, SiteName, Subject, Visit, FolderSeq, RecordPos,
|
||||||
|
LastModified + dynamické fields.* z MongoDB.
|
||||||
|
Výstup: reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx
|
||||||
|
"""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
|
||||||
|
from openpyxl.utils import get_column_letter
|
||||||
|
from pymongo import ASCENDING, MongoClient
|
||||||
|
|
||||||
|
# ── Konfigurace ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||||
|
DB_NAME = "edc"
|
||||||
|
STUDY_FULL = "77242113UCO3001"
|
||||||
|
VERSION = "1.0"
|
||||||
|
|
||||||
|
OUTPUT_DIR = Path(__file__).parent / "reports"
|
||||||
|
TRASH_DIR = OUTPUT_DIR / "TRASH"
|
||||||
|
|
||||||
|
COLLECTIONS = [
|
||||||
|
"UCO3001.DateofVisit",
|
||||||
|
"UCO3001.ConcomitantTherapy",
|
||||||
|
"UCO3001.TrialDispositionCompletion-Discontinuation",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ── Formátování ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
HEADER_FILL = PatternFill("solid", fgColor="1F4E79")
|
||||||
|
HEADER_FONT = Font(bold=True, color="FFFFFF", name="Calibri", size=10)
|
||||||
|
DATA_FONT = Font(name="Calibri", size=10)
|
||||||
|
THIN = Side(style="thin", color="CCCCCC")
|
||||||
|
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
||||||
|
|
||||||
|
# ── Pevné sloupce ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
FIXED_COLS = [
|
||||||
|
("SiteNumber", lambda d: d.get("site", {}).get("number", "")),
|
||||||
|
("SiteName", lambda d: d.get("site", {}).get("name", "")),
|
||||||
|
("Subject", lambda d: d.get("subject", {}).get("label", "")),
|
||||||
|
("Visit", lambda d: d.get("form", {}).get("instanceName", "")),
|
||||||
|
("FolderSeq", lambda d: d.get("form", {}).get("folderSeq", "")),
|
||||||
|
("RecordPos", lambda d: d.get("form", {}).get("recordPosition", "")),
|
||||||
|
("LastModified", lambda d: _fmt(d.get("lastModified", ""))),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _fmt(value: str) -> str:
|
||||||
|
"""ISO datetime string → 'DD-MMM-YYYY' nebo 'DD-MMM-YYYY HH:MM'."""
|
||||||
|
if not value:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(value)
|
||||||
|
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
|
||||||
|
return dt.strftime("%d-%b-%Y")
|
||||||
|
return dt.strftime("%d-%b-%Y %H:%M")
|
||||||
|
except Exception:
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_field(value) -> str:
|
||||||
|
"""Naformátuje hodnotu z fields{} — datum nebo string."""
|
||||||
|
if isinstance(value, str) and "T" in value and value.endswith(("+00:00", "Z")):
|
||||||
|
return _fmt(value)
|
||||||
|
return value if value is not None else ""
|
||||||
|
|
||||||
|
|
||||||
|
COLS_LAST_CT = [
|
||||||
|
"CMTRT_ATC1", "CMTRT_ATC2", "CMTRT_ATC3", "CMTRT_ATC4",
|
||||||
|
"CMTRT_RXPREF", "CMTRT_TRADE_NAME",
|
||||||
|
"CMTRT_ATC1_CODE", "CMTRT_ATC2_CODE", "CMTRT_ATC3_CODE", "CMTRT_ATC4_CODE",
|
||||||
|
"CMTRT_RXPREF_CODE", "CMTRT_TRADE_NAME_CODE",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _field_keys(docs: list, last: list | None = None) -> list:
|
||||||
|
"""Vrátí seznam unikátních klíčů z fields{} — klíče v `last` přesunuty na konec."""
|
||||||
|
seen = set()
|
||||||
|
keys = []
|
||||||
|
for doc in docs:
|
||||||
|
for k in doc.get("fields", {}).keys():
|
||||||
|
if k not in seen:
|
||||||
|
seen.add(k)
|
||||||
|
keys.append(k)
|
||||||
|
if last:
|
||||||
|
tail = [k for k in last if k in seen]
|
||||||
|
keys = [k for k in keys if k not in set(tail)] + tail
|
||||||
|
return keys
|
||||||
|
|
||||||
|
|
||||||
|
def _sheet_name(collection: str) -> str:
|
||||||
|
"""UCO3001.SomeName → SomeName (max 31 znaků pro Excel)."""
|
||||||
|
name = collection.split(".", 1)[-1]
|
||||||
|
abbreviations = {
|
||||||
|
"TrialDispositionCompletion-Discontinuation": "TrialDisposition",
|
||||||
|
}
|
||||||
|
return abbreviations.get(name, name)[:31]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Zápis listu ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def write_sheet(ws, docs: list, last_cols: list | None = None) -> None:
|
||||||
|
fixed_names = [c[0] for c in FIXED_COLS]
|
||||||
|
field_keys = _field_keys(docs, last=last_cols)
|
||||||
|
all_headers = fixed_names + field_keys
|
||||||
|
|
||||||
|
# záhlaví
|
||||||
|
for col_i, header in enumerate(all_headers, 1):
|
||||||
|
cell = ws.cell(row=1, column=col_i, value=header)
|
||||||
|
cell.font = HEADER_FONT
|
||||||
|
cell.fill = HEADER_FILL
|
||||||
|
cell.border = BORDER
|
||||||
|
cell.alignment = Alignment(horizontal="center", vertical="center")
|
||||||
|
ws.row_dimensions[1].height = 18
|
||||||
|
ws.freeze_panes = "A2"
|
||||||
|
|
||||||
|
# data
|
||||||
|
for row_i, doc in enumerate(docs, 2):
|
||||||
|
fields = doc.get("fields", {})
|
||||||
|
|
||||||
|
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||||
|
cell = ws.cell(row=row_i, column=col_i, value=getter(doc))
|
||||||
|
cell.font = DATA_FONT
|
||||||
|
cell.border = BORDER
|
||||||
|
cell.alignment = Alignment(vertical="top")
|
||||||
|
|
||||||
|
for col_off, key in enumerate(field_keys):
|
||||||
|
col_i = len(FIXED_COLS) + col_off + 1
|
||||||
|
cell = ws.cell(row=row_i, column=col_i, value=_fmt_field(fields.get(key, "")))
|
||||||
|
cell.font = DATA_FONT
|
||||||
|
cell.border = BORDER
|
||||||
|
cell.alignment = Alignment(vertical="top")
|
||||||
|
|
||||||
|
# autofilter
|
||||||
|
if all_headers:
|
||||||
|
ws.auto_filter.ref = f"A1:{get_column_letter(len(all_headers))}1"
|
||||||
|
|
||||||
|
# šířky sloupců
|
||||||
|
widths = {i: len(h) for i, h in enumerate(all_headers, 1)}
|
||||||
|
for doc in docs:
|
||||||
|
fields = doc.get("fields", {})
|
||||||
|
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||||
|
widths[col_i] = max(widths[col_i], len(str(getter(doc))))
|
||||||
|
for col_off, key in enumerate(field_keys):
|
||||||
|
col_i = len(FIXED_COLS) + col_off + 1
|
||||||
|
widths[col_i] = max(widths[col_i], len(str(fields.get(key, ""))))
|
||||||
|
|
||||||
|
for col_i, w in widths.items():
|
||||||
|
ws.column_dimensions[get_column_letter(col_i)].width = min(w + 2, 55)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||||
|
client.admin.command("ping")
|
||||||
|
db = client[DB_NAME]
|
||||||
|
|
||||||
|
wb = Workbook()
|
||||||
|
wb.remove(wb.active)
|
||||||
|
|
||||||
|
for coll_name in COLLECTIONS:
|
||||||
|
docs = list(db[coll_name].find(
|
||||||
|
{},
|
||||||
|
{"_id": 0, "sourceFile": 0, "history": 0},
|
||||||
|
sort=[
|
||||||
|
("site.number", ASCENDING),
|
||||||
|
("subject.label", ASCENDING),
|
||||||
|
("form.folderSeq", ASCENDING),
|
||||||
|
("form.recordPosition", ASCENDING),
|
||||||
|
],
|
||||||
|
))
|
||||||
|
ws = wb.create_sheet(title=_sheet_name(coll_name))
|
||||||
|
last = COLS_LAST_CT if "ConcomitantTherapy" in coll_name else None
|
||||||
|
write_sheet(ws, docs, last_cols=last)
|
||||||
|
print(f" {coll_name}: {len(docs)} zaznamu -> list '{ws.title}'")
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||||
|
TRASH_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# přesun starých verzí do TRASH
|
||||||
|
pattern = f"* {STUDY_FULL} EDC DataListing *.xlsx"
|
||||||
|
for old in OUTPUT_DIR.glob(pattern):
|
||||||
|
dest = TRASH_DIR / old.name
|
||||||
|
shutil.move(str(old), str(dest))
|
||||||
|
print(f" Přesunuto do TRASH: {old.name}")
|
||||||
|
|
||||||
|
today = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
filename = f"{today} {STUDY_FULL} EDC DataListing v{VERSION}.xlsx"
|
||||||
|
out_path = OUTPUT_DIR / filename
|
||||||
|
wb.save(str(out_path))
|
||||||
|
print(f"\nUloženo: {out_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user