z230
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
# create_report_v1.0.md
|
||||
|
||||
**Skript:** `create_report_v1.0.py`
|
||||
**Verze:** 1.0
|
||||
**Datum:** 2026-06-01
|
||||
|
||||
## Popis
|
||||
|
||||
Generuje Excel EDC DataListing report pro studii **77242113UCO3001** z MongoDB (db: `edc`).
|
||||
|
||||
## Výstup
|
||||
|
||||
`Medidata/reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx`
|
||||
|
||||
Stará verze se automaticky přesune do `reports/TRASH/`.
|
||||
|
||||
## Listy
|
||||
|
||||
| List | Kolekce MongoDB | Záznamy (CZE) |
|
||||
|------|----------------|---------------|
|
||||
| DateofVisit | UCO3001.DateofVisit | 55 |
|
||||
| ConcomitantTherapy | UCO3001.ConcomitantTherapy | 91 |
|
||||
| TrialDisposition | UCO3001.TrialDispositionCompletion-Discontinuation | 3 |
|
||||
|
||||
## Sloupce (každý list)
|
||||
|
||||
**Pevné:** SiteNumber · SiteName · Subject · Visit · FolderSeq · RecordPos · LastModified
|
||||
|
||||
**Dynamické:** všechny klíče z `fields{}` v pořadí výskytu v MongoDB
|
||||
|
||||
## Formátování
|
||||
|
||||
- Záhlaví: tmavomodrý fill, bílý tučný text, Calibri 10
|
||||
- Data: Calibri 10, tenké ohraničení
|
||||
- Zmrazení řádku 1, autofilter, šířky sloupců auto (max 55)
|
||||
- Datumy: DD-MMM-YYYY (čas jen pokud != 00:00)
|
||||
|
||||
## Spuštění
|
||||
|
||||
```
|
||||
cd Medidata
|
||||
python create_report_v1.0.py
|
||||
```
|
||||
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
create_report_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-01
|
||||
Popis: Excel EDC DataListing report pro studii UCO3001 z MongoDB (db: edc).
|
||||
Jeden list per kolekce (DateofVisit / ConcomitantTherapy / TrialDisposition).
|
||||
Sloupce: SiteNumber, SiteName, Subject, Visit, FolderSeq, RecordPos,
|
||||
LastModified + dynamické fields.* z MongoDB.
|
||||
Výstup: reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx
|
||||
"""
|
||||
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
from pymongo import ASCENDING, MongoClient
|
||||
|
||||
# ── Konfigurace ───────────────────────────────────────────────────────────────
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB_NAME = "edc"
|
||||
STUDY_FULL = "77242113UCO3001"
|
||||
VERSION = "1.0"
|
||||
|
||||
OUTPUT_DIR = Path(__file__).parent / "reports"
|
||||
TRASH_DIR = OUTPUT_DIR / "TRASH"
|
||||
|
||||
COLLECTIONS = [
|
||||
"UCO3001.DateofVisit",
|
||||
"UCO3001.ConcomitantTherapy",
|
||||
"UCO3001.TrialDispositionCompletion-Discontinuation",
|
||||
]
|
||||
|
||||
# ── Formátování ───────────────────────────────────────────────────────────────
|
||||
|
||||
HEADER_FILL = PatternFill("solid", fgColor="1F4E79")
|
||||
HEADER_FONT = Font(bold=True, color="FFFFFF", name="Calibri", size=10)
|
||||
DATA_FONT = Font(name="Calibri", size=10)
|
||||
THIN = Side(style="thin", color="CCCCCC")
|
||||
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
||||
|
||||
# ── Pevné sloupce ─────────────────────────────────────────────────────────────
|
||||
|
||||
FIXED_COLS = [
|
||||
("SiteNumber", lambda d: d.get("site", {}).get("number", "")),
|
||||
("SiteName", lambda d: d.get("site", {}).get("name", "")),
|
||||
("Subject", lambda d: d.get("subject", {}).get("label", "")),
|
||||
("Visit", lambda d: d.get("form", {}).get("instanceName", "")),
|
||||
("FolderSeq", lambda d: d.get("form", {}).get("folderSeq", "")),
|
||||
("RecordPos", lambda d: d.get("form", {}).get("recordPosition", "")),
|
||||
("LastModified", lambda d: _fmt(d.get("lastModified", ""))),
|
||||
]
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _fmt(value: str) -> str:
|
||||
"""ISO datetime string → 'DD-MMM-YYYY' nebo 'DD-MMM-YYYY HH:MM'."""
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
dt = datetime.fromisoformat(value)
|
||||
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
|
||||
return dt.strftime("%d-%b-%Y")
|
||||
return dt.strftime("%d-%b-%Y %H:%M")
|
||||
except Exception:
|
||||
return value
|
||||
|
||||
|
||||
def _fmt_field(value) -> str:
|
||||
"""Naformátuje hodnotu z fields{} — datum nebo string."""
|
||||
if isinstance(value, str) and "T" in value and value.endswith(("+00:00", "Z")):
|
||||
return _fmt(value)
|
||||
return value if value is not None else ""
|
||||
|
||||
|
||||
COLS_LAST_CT = [
|
||||
"CMTRT_ATC1", "CMTRT_ATC2", "CMTRT_ATC3", "CMTRT_ATC4",
|
||||
"CMTRT_RXPREF", "CMTRT_TRADE_NAME",
|
||||
"CMTRT_ATC1_CODE", "CMTRT_ATC2_CODE", "CMTRT_ATC3_CODE", "CMTRT_ATC4_CODE",
|
||||
"CMTRT_RXPREF_CODE", "CMTRT_TRADE_NAME_CODE",
|
||||
]
|
||||
|
||||
|
||||
def _field_keys(docs: list, last: list | None = None) -> list:
|
||||
"""Vrátí seznam unikátních klíčů z fields{} — klíče v `last` přesunuty na konec."""
|
||||
seen = set()
|
||||
keys = []
|
||||
for doc in docs:
|
||||
for k in doc.get("fields", {}).keys():
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
keys.append(k)
|
||||
if last:
|
||||
tail = [k for k in last if k in seen]
|
||||
keys = [k for k in keys if k not in set(tail)] + tail
|
||||
return keys
|
||||
|
||||
|
||||
def _sheet_name(collection: str) -> str:
|
||||
"""UCO3001.SomeName → SomeName (max 31 znaků pro Excel)."""
|
||||
name = collection.split(".", 1)[-1]
|
||||
abbreviations = {
|
||||
"TrialDispositionCompletion-Discontinuation": "TrialDisposition",
|
||||
}
|
||||
return abbreviations.get(name, name)[:31]
|
||||
|
||||
|
||||
# ── Zápis listu ───────────────────────────────────────────────────────────────
|
||||
|
||||
def write_sheet(ws, docs: list, last_cols: list | None = None) -> None:
|
||||
fixed_names = [c[0] for c in FIXED_COLS]
|
||||
field_keys = _field_keys(docs, last=last_cols)
|
||||
all_headers = fixed_names + field_keys
|
||||
|
||||
# záhlaví
|
||||
for col_i, header in enumerate(all_headers, 1):
|
||||
cell = ws.cell(row=1, column=col_i, value=header)
|
||||
cell.font = HEADER_FONT
|
||||
cell.fill = HEADER_FILL
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.row_dimensions[1].height = 18
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
# data
|
||||
for row_i, doc in enumerate(docs, 2):
|
||||
fields = doc.get("fields", {})
|
||||
|
||||
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||
cell = ws.cell(row=row_i, column=col_i, value=getter(doc))
|
||||
cell.font = DATA_FONT
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(vertical="top")
|
||||
|
||||
for col_off, key in enumerate(field_keys):
|
||||
col_i = len(FIXED_COLS) + col_off + 1
|
||||
cell = ws.cell(row=row_i, column=col_i, value=_fmt_field(fields.get(key, "")))
|
||||
cell.font = DATA_FONT
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(vertical="top")
|
||||
|
||||
# autofilter
|
||||
if all_headers:
|
||||
ws.auto_filter.ref = f"A1:{get_column_letter(len(all_headers))}1"
|
||||
|
||||
# šířky sloupců
|
||||
widths = {i: len(h) for i, h in enumerate(all_headers, 1)}
|
||||
for doc in docs:
|
||||
fields = doc.get("fields", {})
|
||||
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||
widths[col_i] = max(widths[col_i], len(str(getter(doc))))
|
||||
for col_off, key in enumerate(field_keys):
|
||||
col_i = len(FIXED_COLS) + col_off + 1
|
||||
widths[col_i] = max(widths[col_i], len(str(fields.get(key, ""))))
|
||||
|
||||
for col_i, w in widths.items():
|
||||
ws.column_dimensions[get_column_letter(col_i)].width = min(w + 2, 55)
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
client.admin.command("ping")
|
||||
db = client[DB_NAME]
|
||||
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active)
|
||||
|
||||
for coll_name in COLLECTIONS:
|
||||
docs = list(db[coll_name].find(
|
||||
{},
|
||||
{"_id": 0, "sourceFile": 0, "history": 0},
|
||||
sort=[
|
||||
("site.number", ASCENDING),
|
||||
("subject.label", ASCENDING),
|
||||
("form.folderSeq", ASCENDING),
|
||||
("form.recordPosition", ASCENDING),
|
||||
],
|
||||
))
|
||||
ws = wb.create_sheet(title=_sheet_name(coll_name))
|
||||
last = COLS_LAST_CT if "ConcomitantTherapy" in coll_name else None
|
||||
write_sheet(ws, docs, last_cols=last)
|
||||
print(f" {coll_name}: {len(docs)} zaznamu -> list '{ws.title}'")
|
||||
|
||||
client.close()
|
||||
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
TRASH_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# přesun starých verzí do TRASH
|
||||
pattern = f"* {STUDY_FULL} EDC DataListing *.xlsx"
|
||||
for old in OUTPUT_DIR.glob(pattern):
|
||||
dest = TRASH_DIR / old.name
|
||||
shutil.move(str(old), str(dest))
|
||||
print(f" Přesunuto do TRASH: {old.name}")
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
filename = f"{today} {STUDY_FULL} EDC DataListing v{VERSION}.xlsx"
|
||||
out_path = OUTPUT_DIR / filename
|
||||
wb.save(str(out_path))
|
||||
print(f"\nUloženo: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user