Files
janssen/Covance/create_lab_results_report_v1.0.py
administrator 8c01fd6e1a Pridani novych skriptu, reportu a zpracovanych dat
- EmailsImport: jnj_mailbox_sync_v1.0 (sync JNJ schranky)
- Covance: create_lab_results_report_v1.0 + zpracovane CSV (samples/kits/equeries/test-results), browser profily
- Feasibility UCO2001: store_cda_*, store_sipiq_links, classify_krok, mark_sipiq_sent, report v1.1 (stary report do TRASH)
- IWRS/Drugs: pregenerovane onsite inventory / shipment reporty
- TrilliumMCP server + trilium upload/diacritics skripty
- .mcp.json

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 15:10:21 +02:00

1033 lines
40 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# =============================================================================
# Nazev: create_lab_results_report_v1.0.py
# Verze: 1.0
# Datum: 2026-06-09
# Popis: Samostatny skript — pekny Excel report laboratornich vysledku ze
# studii Covance/Labcorp (36940 UCO3001 + 35472 MDD3003).
# Zdroj dat: MongoDB db "covance", kolekce "results"
# (standard + microbiology). Vystupni .xlsx ve stylu hlavniho
# create_report_v2.0.py (modre hlavicky, ramecky, freeze, autofiltr,
# skutecne Excel datumy DD-MMM-YYYY, hyperlinky mezi listy).
#
# Listy:
# 1. PREHLED — KPI dashboard, kazde cislo je odkaz do detailu
# 2. SOUHRN CENTER — rollup per (studie, centrum, pacient)
# 3. DILI Hyuv zakon — verdikt + vsech 5 DILI kriterii + jaterni testy
# 4. Abnormality — pracovni seznam vsech vysledku mimo rozmezi
# 5. Matice pokryti — pacient x navsteva (kde chybi / je abnormalni)
# 6. Zrusene testy — vysledky s Cancellation Reason
# 7. Mikrobiologie — kultivace, auto-flag prvni pozitivni
# 8. Datove mezery — DILI gap hodnoty + prazdne klicove panely
# 9. Zdroj — plochy vypis vsech standardnich vysledku (kotva)
# 10. LEGENDA — vysvetleni flagu, barev, Hyova zakona, studii
#
# Pozn.: pri spojeni do hlavniho create_report.py staci prevzit sekce
# load + helpers + write_* a sdilet styly.
# =============================================================================
import re
from datetime import datetime
from collections import defaultdict
from pymongo import MongoClient
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
from openpyxl.utils import get_column_letter
# -- Konfigurace --------------------------------------------------------------
MONGO_URI = "mongodb://192.168.1.76:27017"
OUT_DIR = "U:/Dropbox/!!!Days/Downloads Z230/"
STUDY_LABEL = {"36940": "UCO3001", "35472": "MDD3003"}
PROTOCOL = {"36940": "77242113UCO3001", "35472": "42847922MDD3003"}
DILI_DANGER = {"ALT>/=3XULN", "AST>/=3XULN",
"ALT>/=3XULN & TBIL>/=2XULN", "AST>/=3XULN & TBIL>/=2XULN"} # ALP NE
DILI_GROUPS = DILI_DANGER | {"ALP<2XULN"}
GAP_VALUES = {"required testing unavailable", "unable to calc due to loq"}
KEY_PANELS = {"CHEMISTRY PANEL", "HEMATOLOGY&DIFFERENTIAL PANEL",
"LIPID PANEL", "URINE MICRO PANEL", "URINE MACRO PANEL"}
LIVER_TESTS = {"ALT (SGPT)", "AST (SGOT)", "Total Bilirubin", "Alkaline Phosphatase"}
CYTOPENIA_TESTS = {"Hemoglobin", "Platelets", "Neutrophils", "WBC"}
SEV_LABEL = {3: "Kriticka", 2: "Vysoka", 1: "Mirna", 0: ""}
# Poradi navstev pro matici pokryti (chronologicky dle protokolu).
VISIT_ORDER = [
"Screening", "Induction Week 0", "Induction Week 2", "Induction Week 4",
"Induction Week 12", "Part 1 Screening", "Part 1 DB Baseline",
"Part 2 Screening", "Part 2 DB Baseline", "Part 2 DB Week 13",
"Part 2 DB Every 12 Weeks", "Part 2 OL Induction Baseline",
"Part 2 OL Induction Day 29", "End of Phase/Treatment", "Early Withdrawal",
"Retest", "Biopsy", "TB Testing", "Stool Pathogens", "Unscheduled",
]
def visit_group(v):
if v in ("Screening",): return "SCREENING"
if v.startswith("Induction"): return "INDUKCE"
if v.startswith("Part 1"): return "PART 1"
if v.startswith("Part 2"): return "PART 2"
if v in ("End of Phase/Treatment", "Early Withdrawal", "Retest"): return "UKONCENI"
return "OSTATNI"
# -- Styly (house style z create_report_v2.0.py) ------------------------------
thin = Side(style="thin")
border = Border(left=thin, right=thin, top=thin, bottom=thin)
header_fill = PatternFill("solid", fgColor="4472C4")
header_font = Font(name="Calibri", bold=True, size=11, color="FFFFFF")
data_font = Font(name="Calibri", size=11)
title_fill = PatternFill("solid", fgColor="203764")
title_font = Font(name="Calibri", bold=True, size=14, color="FFFFFF")
problem_fill = PatternFill("solid", fgColor="FFE7E7")
attn_fill = PatternFill("solid", fgColor="FFF2CC")
ok_fill = PatternFill("solid", fgColor="E2EFDA")
total_fill = PatternFill("solid", fgColor="D9E1F2")
red_font = Font(name="Calibri", size=11, color="C00000")
red_bold = Font(name="Calibri", bold=True, size=11, color="C00000")
muted_font = Font(name="Calibri", size=11, color="BFBFBF")
link_font = Font(name="Calibri", size=11, color="0563C1", underline="single")
bold_font = Font(name="Calibri", bold=True, size=11)
white_bold = Font(name="Calibri", bold=True, size=12, color="FFFFFF")
CENTER = Alignment(horizontal="center", vertical="center")
CENTERW = Alignment(horizontal="center", vertical="center", wrap_text=True)
LEFT = Alignment(horizontal="left", vertical="center")
DATE_FMT = "DD-MMM-YYYY"
# -- Pomocne funkce -----------------------------------------------------------
def parse_flags(flags):
"""Vrati (smer, kvalifikatory). smer in {'H','L',None}."""
if not flags:
return None, set()
direction, quals = None, set()
for t in str(flags).split():
if t[:1] == "H":
direction = direction or "H"
elif t[:1] == "L":
direction = direction or "L"
if t in ("HT", "LT"):
quals.add("T")
elif t == "SF":
quals.add("SF")
elif t == "RX":
quals.add("RX")
return direction, quals
def parse_collected(s):
if not s:
return None
for fmt in ("%b %d, %Y %I:%M %p", "%b %d, %Y"):
try:
return datetime.strptime(str(s).strip(), fmt)
except ValueError:
pass
return None
def parse_value_num(v):
if v is None:
return None
v = str(v).strip()
return float(v) if re.fullmatch(r"-?\d+(\.\d+)?", v) else None
def parse_range(r):
if not r:
return (None, None)
m = re.match(r"^\s*(-?[\d.]+)\s*-\s*(-?[\d.]+)", str(r))
return (float(m.group(1)), float(m.group(2))) if m else (None, None)
def out_by_pct(value_num, lo, hi, direction):
if value_num is None or lo is None or hi is None:
return None
width = hi - lo
if width == 0:
bound = hi if direction == "H" else lo
return abs(value_num - bound) / bound * 100 if bound else None
if direction == "H":
return (value_num - hi) / width * 100
if direction == "L":
return (lo - value_num) / width * 100
return None
def severity(direction, quals, opct, test):
if direction is None:
return 0
if ("T" in quals
or (opct is not None and opct >= 100)
or (test in LIVER_TESTS and direction == "H")
or (test in CYTOPENIA_TESTS and direction == "L")):
return 3
if opct is not None and opct >= 50:
return 2
return 1
def is_criteria_met(value):
return value is not None and "criteria met" in str(value).lower()
def rev_flags(fields):
s = set()
if fields.get("Subject Data Revision") == "R":
s.add("S")
if fields.get("Visit Data Revision") == "R":
s.add("V")
if fields.get("Result Data Revision") == "R":
s.add("R")
return s
NEG_RE = re.compile(r"(?i)(no enteric pathogens|negative|no growth|no\b.*pathogen)")
def micro_positive(fields):
if fields.get("Organism") or fields.get("Growth"):
return True
tr = fields.get("Test Result")
return bool(tr and not NEG_RE.search(str(tr)))
# -- Nacteni dat --------------------------------------------------------------
print("Nacitam data z MongoDB...")
client = MongoClient(MONGO_URI)
col = client["covance"]["results"]
raw = list(col.find())
print(f" results: {len(raw)} dokumentu")
# Predpocet odvozenych poli pro standardni vysledky.
S = [] # standard
M = [] # microbiology
for doc in raw:
f = doc.get("fields", {})
rtype = doc.get("resultType")
base = {
"id": str(doc.get("_id")),
"study": doc.get("study"),
"studyl": STUDY_LABEL.get(doc.get("study"), doc.get("study")),
"site": doc.get("site"),
"subject": doc.get("subject"),
"gender": f.get("Gender"),
"accession": f.get("Accession"),
"cancel": f.get("Cancellation Reason"),
"comments": f.get("Comments"),
"rev": rev_flags(f),
"f": f,
}
if rtype == "microbiology":
base["visit"] = f.get("Visit Description")
base["collected"] = parse_collected(f.get("Collected Time"))
base["positive"] = micro_positive(f)
M.append(base)
else:
tg = f.get("Test Group")
test = f.get("Test")
val = f.get("Value")
flags = f.get("Flags")
direction, quals = parse_flags(flags)
vnum = parse_value_num(val)
lo, hi = parse_range(f.get("Range"))
opct = out_by_pct(vnum, lo, hi, direction)
base.update({
"visit": f.get("Visit"),
"collected": parse_collected(f.get("Collected Time")),
"tg": tg,
"test": test,
"value": val,
"vnum": vnum,
"units": f.get("Units"),
"range": f.get("Range"),
"flags": flags,
"direction": direction,
"quals": quals,
"opct": opct,
"sev": severity(direction, quals, opct, test),
"is_dili": tg in DILI_GROUPS,
"is_gap": (str(val).strip().lower() in GAP_VALUES if val else False)
or (not val and tg in KEY_PANELS and not f.get("Cancellation Reason")),
})
S.append(base)
print(f" standardnich: {len(S)}, mikrobiologie: {len(M)}")
MINDT = datetime.min
def skey(d):
return (d["study"] or "", d["site"] or "", d["subject"] or "",
d["collected"] or MINDT, d["accession"] or "",
d.get("tg") or "", d.get("test") or "")
S.sort(key=skey)
# -- Agregace pro KPI ---------------------------------------------------------
all_docs = S + M
sites = sorted({d["site"] for d in all_docs if d["site"]})
subjects = sorted({d["subject"] for d in all_docs if d["subject"]})
abn_rows = [d for d in S if d["direction"]]
crit_rows = [d for d in S if d["sev"] == 3]
cancel_std = [d for d in S if d["cancel"]]
cancel_mic = [d for d in M if d["cancel"]]
gap_rows = [d for d in S if d["is_gap"]]
rev_rows = [d for d in S if d["rev"]]
micro_pos = [d for d in M if d["positive"]]
collected_all = [d["collected"] for d in all_docs if d["collected"]]
dt_min = min(collected_all) if collected_all else None
dt_max = max(collected_all) if collected_all else None
# Hyuv zakon — pozitivni subjekty (konjunkce transaminaza & TBIL kombo).
dili_by_subject = defaultdict(list)
for d in S:
if d["is_dili"]:
dili_by_subject[(d["study"], d["site"], d["subject"])].append(d)
def hys_positive(rows):
met = {d["tg"] for d in rows if is_criteria_met(d["value"])}
transaminase = ("ALT>/=3XULN" in met) or ("AST>/=3XULN" in met)
tbil_combo = ("ALT>/=3XULN & TBIL>/=2XULN" in met) or ("AST>/=3XULN & TBIL>/=2XULN" in met)
return transaminase and tbil_combo
hys_subjects = [k for k, rows in dili_by_subject.items() if hys_positive(rows)]
# -- Workbook + listy (poradi zalozek) ---------------------------------------
wb = Workbook()
wb.remove(wb.active)
ws_over = wb.create_sheet("PREHLED")
ws_souhrn = wb.create_sheet("SOUHRN CENTER")
ws_dili = wb.create_sheet("DILI Hyuv zakon")
ws_abn = wb.create_sheet("Abnormality")
ws_matice = wb.create_sheet("Matice pokryti")
ws_cancel = wb.create_sheet("Zrusene testy")
ws_micro = wb.create_sheet("Mikrobiologie")
ws_gaps = wb.create_sheet("Datove mezery")
ws_zdroj = wb.create_sheet("Zdroj")
ws_legend = wb.create_sheet("LEGENDA")
# Nazvy listu pro hyperlinky.
N_OVER, N_SOUHRN, N_DILI, N_ABN = "PREHLED", "SOUHRN CENTER", "DILI Hyuv zakon", "Abnormality"
N_MAT, N_CANCEL, N_MICRO, N_GAP, N_ZDROJ = ("Matice pokryti", "Zrusene testy",
"Mikrobiologie", "Datove mezery", "Zdroj")
def link(target_sheet, row, col="A"):
return f"#'{target_sheet}'!{col}{row}"
# -- Pomocne pro psani --------------------------------------------------------
def write_header(ws, row, headers, widths=None):
for ci, h in enumerate(headers, 1):
c = ws.cell(row=row, column=ci, value=h)
c.font = header_font
c.fill = header_fill
c.border = border
c.alignment = CENTERW
if widths:
ws.column_dimensions[get_column_letter(ci)].width = widths[ci - 1]
ws.row_dimensions[row].height = 30
def cell(ws, r, c, value, font=data_font, fill=None, align=CENTER,
numfmt=None, hyperlink=None):
x = ws.cell(row=r, column=c, value=value)
x.font = font
x.border = border
x.alignment = align
if fill:
x.fill = fill
if numfmt:
x.number_format = numfmt
if hyperlink:
x.hyperlink = hyperlink
x.font = link_font
return x
# =============================================================================
# PREDVYPOCET ROZLOZENI RADKU (aby hyperlinky znaly cilove radky predem)
# =============================================================================
# Zdroj: kazdy standard doc -> radek (header row 1, data od 2).
zdroj_row = {d["id"]: i + 2 for i, d in enumerate(S)}
# Prvni radek Zdroje pro (subject, visit) — pro matici.
zdroj_first_sv = {}
for d in S:
k = (d["subject"], d["visit"])
if k not in zdroj_first_sv:
zdroj_first_sv[k] = zdroj_row[d["id"]]
# Abnormality: serazeno worst-first.
abn_sorted = sorted(
abn_rows,
key=lambda d: (-d["sev"], -(d["opct"] if d["opct"] is not None else -1),
d["study"] or "", d["site"] or "", d["subject"] or "",
d["collected"] or MINDT, d["test"] or ""),
)
abn_row = {d["id"]: i + 2 for i, d in enumerate(abn_sorted)}
abn_first_subject = {}
for d in abn_sorted:
k = (d["study"], d["site"], d["subject"])
if k not in abn_first_subject:
abn_first_subject[k] = abn_row[d["id"]]
# SOUHRN CENTER: rollup radky (per studie/centrum/pacient) + subtotaly + celkem.
agg = defaultdict(lambda: {"n": 0, "abn": 0, "crit": 0, "gap": 0,
"cancel": 0, "rev": 0, "dmin": None, "dmax": None})
for d in S:
a = agg[(d["study"], d["site"], d["subject"])]
a["n"] += 1
if d["direction"]:
a["abn"] += 1
if d["sev"] == 3:
a["crit"] += 1
if d["is_gap"]:
a["gap"] += 1
if d["cancel"]:
a["cancel"] += 1
if d["rev"]:
a["rev"] += 1
if d["collected"]:
a["dmin"] = d["collected"] if a["dmin"] is None else min(a["dmin"], d["collected"])
a["dmax"] = d["collected"] if a["dmax"] is None else max(a["dmax"], d["collected"])
souhrn_keys = sorted(agg.keys(), key=lambda k: (k[0] or "", k[1] or "", k[2] or ""))
# Predpocet radku v SOUHRN: header row1, pak data se subtotaly per centrum.
souhrn_row_by_ssp = {}
r = 2
prev_site = None
for k in souhrn_keys:
if prev_site is not None and (k[0], k[1]) != prev_site:
r += 1 # subtotal radek
souhrn_row_by_ssp[k] = r
prev_site = (k[0], k[1])
r += 1
# =============================================================================
# LIST 9: Zdroj (psano prvni interne — kotva pro vse)
# =============================================================================
zdroj_cols = [
("Studie", 12), ("Centrum", 12), ("Pacient", 16), ("Pohlavi", 10),
("Navsteva", 22), ("Datum odberu", 15), ("Accession", 14),
("Panel", 26), ("Test", 24), ("Hodnota", 14), ("Priznak", 10),
("Rozmezi", 14), ("Jednotky", 12), ("Duvod zruseni", 22),
("Komentar", 24), ("Revize", 10),
]
write_header(ws_zdroj, 1, [h for h, _ in zdroj_cols], [w for _, w in zdroj_cols])
ws_zdroj.freeze_panes = "A2"
ws_zdroj.auto_filter.ref = f"A1:{get_column_letter(len(zdroj_cols))}1"
for i, d in enumerate(S, 2):
cell(ws_zdroj, i, 1, d["studyl"])
cell(ws_zdroj, i, 2, d["site"])
cell(ws_zdroj, i, 3, d["subject"])
cell(ws_zdroj, i, 4, d["gender"])
cell(ws_zdroj, i, 5, d["visit"], align=LEFT)
cell(ws_zdroj, i, 6, d["collected"], numfmt=DATE_FMT)
cell(ws_zdroj, i, 7, d["accession"])
cell(ws_zdroj, i, 8, d["tg"], align=LEFT)
cell(ws_zdroj, i, 9, d["test"], align=LEFT)
cell(ws_zdroj, i, 10, d["vnum"] if d["vnum"] is not None else d["value"])
fc = cell(ws_zdroj, i, 11, d["flags"])
if d["direction"]:
fc.fill = attn_fill
cell(ws_zdroj, i, 12, d["range"])
cell(ws_zdroj, i, 13, d["units"])
cell(ws_zdroj, i, 14, d["cancel"], align=LEFT)
cell(ws_zdroj, i, 15, d["comments"], align=LEFT)
cell(ws_zdroj, i, 16, ",".join(sorted(d["rev"])) or None)
# =============================================================================
# LIST 1: PREHLED (KPI dashboard)
# =============================================================================
ws = ws_over
ws.merge_cells("A1:H1")
c = ws.cell(row=1, column=1, value="Laboratorni vysledky — Covance / Labcorp")
c.fill = title_fill
c.font = title_font
c.alignment = LEFT
ws.merge_cells("A2:H2")
fresh = ""
if dt_min and dt_max:
fresh = f" | sber: {dt_min.strftime('%d-%b-%Y')} {dt_max.strftime('%d-%b-%Y')}"
c2 = ws.cell(row=2, column=1,
value=f"Vygenerovano {datetime.now().strftime('%d-%b-%Y %H:%M')}{fresh}")
c2.fill = title_fill
c2.font = Font(name="Calibri", size=11, color="FFFFFF")
c2.alignment = LEFT
ws.row_dimensions[1].height = 26
ws.row_dimensions[2].height = 18
for col, w in (("A", 40), ("B", 16), ("C", 28)):
ws.column_dimensions[col].width = w
n_hys, n_mic = len(hys_subjects), len(micro_pos)
tiles = [
("Celkem vysledku", len(all_docs), total_fill, link(N_ZDROJ, 2), None),
("Studie", len({d["study"] for d in all_docs}), total_fill, link(N_SOUHRN, 2), None),
("Centra", len(sites), total_fill, link(N_SOUHRN, 2), None),
("Pacienti", len(subjects), total_fill, link(N_SOUHRN, 2), None),
("Abnormalni (mimo rozmezi)", len(abn_rows), attn_fill, link(N_ABN, 2), None),
("z toho kriticke", len(crit_rows), attn_fill, link(N_ABN, 2), None),
("Bezpecnostni signaly (Hyuv zakon)", n_hys,
ok_fill if n_hys == 0 else problem_fill, link(N_DILI, 3),
None if n_hys == 0 else red_bold),
("Mikrobiologie pozitivni", f"{n_mic}/{len(M)}",
ok_fill if n_mic == 0 else problem_fill, link(N_MICRO, 3),
None if n_mic == 0 else red_bold),
("Datove mezery (DILI/panely)", len(gap_rows), attn_fill, link(N_GAP, 2), None),
("Zrusene testy", len(cancel_std) + len(cancel_mic), attn_fill, link(N_CANCEL, 4), None),
("Revize dat (R)", len(rev_rows), attn_fill, link(N_ZDROJ, 2, "P"), None),
]
row = 4
for label, value, fill, tgt, vfont in tiles:
lc = ws.cell(row=row, column=1, value=label)
lc.font = bold_font
lc.fill = fill
lc.border = border
lc.alignment = LEFT
vc = ws.cell(row=row, column=2, value=value)
vc.fill = fill
vc.border = border
vc.alignment = CENTER
vc.hyperlink = tgt
vc.font = vfont or link_font
ws.row_dimensions[row].height = 20
row += 1
# Mini-pivot: abnormality dle studie a smeru.
row += 1
ws.cell(row=row, column=1, value="Abnormality dle studie a smeru").font = bold_font
row += 1
for ci, h in enumerate(["Studie", "Vysoke (H*)", "Nizke (L*)", "Kriticke"], 1):
hc = ws.cell(row=row, column=ci, value=h)
hc.font = header_font
hc.fill = header_fill
hc.border = border
hc.alignment = CENTER
row += 1
for st in sorted({d["study"] for d in abn_rows}):
rs = [d for d in abn_rows if d["study"] == st]
vals = [STUDY_LABEL.get(st, st),
sum(1 for d in rs if d["direction"] == "H"),
sum(1 for d in rs if d["direction"] == "L"),
sum(1 for d in rs if d["sev"] == 3)]
for ci, v in enumerate(vals, 1):
x = ws.cell(row=row, column=ci, value=v)
x.border = border
x.alignment = CENTER
x.font = data_font
if ci >= 2:
x.hyperlink = link(N_ABN, 2)
x.font = link_font
row += 1
# =============================================================================
# LIST 2: SOUHRN CENTER
# =============================================================================
ws = ws_souhrn
sc_cols = [
("Studie", 12), ("Centrum", 12), ("Pacient", 16), ("Pohlavi", 10),
("Vysledku", 11), ("Abnormalni", 12), ("Kriticke", 11), ("Mezery", 10),
("Zrusene", 10), ("Revize", 9), ("Prvni sber", 14), ("Posledni sber", 14),
]
write_header(ws, 1, [h for h, _ in sc_cols], [w for _, w in sc_cols])
ws.freeze_panes = "A2"
ws.auto_filter.ref = f"A1:{get_column_letter(len(sc_cols))}1"
def sc_count_cell(ws, r, ci, val, warn=False, crit=False, good=False):
x = ws.cell(row=r, column=ci, value=val)
x.border = border
x.alignment = CENTER
if crit and val > 0:
x.fill = problem_fill
x.font = red_bold
elif warn and val > 0:
x.fill = attn_fill
x.font = data_font
elif good and val == 0:
x.fill = ok_fill
x.font = data_font
elif val == 0:
x.font = muted_font
else:
x.font = data_font
return x
r = 2
prev_site = None
sub_tot = defaultdict(int)
grand = defaultdict(int)
def write_subtotal(ws, r, site_label):
vals = ["", f"MEZISOUCET {site_label}", "", "",
sub_tot["n"], sub_tot["abn"], sub_tot["crit"], sub_tot["gap"],
sub_tot["cancel"], sub_tot["rev"], "", ""]
for ci, v in enumerate(vals, 1):
x = ws.cell(row=r, column=ci, value=v)
x.fill = total_fill
x.border = border
x.font = bold_font
x.alignment = LEFT if ci == 2 else CENTER
return r + 1
for k in souhrn_keys:
if prev_site is not None and (k[0], k[1]) != prev_site:
r = write_subtotal(ws, r, f"{STUDY_LABEL.get(prev_site[0], prev_site[0])}/{prev_site[1]}")
sub_tot = defaultdict(int)
a = agg[k]
cell(ws, r, 1, STUDY_LABEL.get(k[0], k[0]))
cell(ws, r, 2, k[1])
pac = cell(ws, r, 3, k[2], align=LEFT)
tgt = abn_first_subject.get(k)
if tgt:
pac.hyperlink = link(N_ABN, tgt)
pac.font = link_font
g = next((d["gender"] for d in S if d["subject"] == k[2]), None)
cell(ws, r, 4, g)
cell(ws, r, 5, a["n"])
ab = sc_count_cell(ws, r, 6, a["abn"], warn=True, good=True)
if tgt and a["abn"]:
ab.hyperlink = link(N_ABN, tgt)
ab.font = link_font if a["crit"] == 0 else red_bold
sc_count_cell(ws, r, 7, a["crit"], crit=True)
sc_count_cell(ws, r, 8, a["gap"], warn=True)
sc_count_cell(ws, r, 9, a["cancel"], warn=True)
sc_count_cell(ws, r, 10, a["rev"], warn=True)
cell(ws, r, 11, a["dmin"], numfmt=DATE_FMT)
cell(ws, r, 12, a["dmax"], numfmt=DATE_FMT)
for key in ("n", "abn", "crit", "gap", "cancel", "rev"):
sub_tot[key] += a[key]
grand[key] += a[key]
prev_site = (k[0], k[1])
r += 1
if prev_site is not None:
r = write_subtotal(ws, r, f"{STUDY_LABEL.get(prev_site[0], prev_site[0])}/{prev_site[1]}")
# Grand total.
gvals = ["", "CELKEM", "", "", grand["n"], grand["abn"], grand["crit"],
grand["gap"], grand["cancel"], grand["rev"], "", ""]
for ci, v in enumerate(gvals, 1):
x = ws.cell(row=r, column=ci, value=v)
x.fill = total_fill
x.border = border
x.font = Font(name="Calibri", bold=True, size=12)
x.alignment = LEFT if ci == 2 else CENTER
# =============================================================================
# LIST 3: DILI Hyuv zakon
# =============================================================================
ws = ws_dili
ws.merge_cells("A1:H1")
t = ws.cell(row=1, column=1, value="DILI / HYUV ZAKON")
t.fill = title_fill
t.font = title_font
t.alignment = LEFT
ws.merge_cells("A2:H2")
v = ws.cell(row=2, column=1)
if hys_subjects:
v.value = f"POZOR: {len(hys_subjects)} pacient(u) splnuje Hyuv zakon"
v.fill = problem_fill
v.font = red_bold
else:
v.value = "Zadny pacient nesplnuje kriteria Hyova zakona"
v.fill = ok_fill
v.font = bold_font
v.alignment = LEFT
ws.row_dimensions[1].height = 24
dili_cols = [("Studie", 12), ("Centrum", 12), ("Pacient", 16), ("Accession", 14),
("Navsteva", 22), ("Datum", 14), ("Kriterium", 30), ("Hodnota", 22)]
write_header(ws, 4, [h for h, _ in dili_cols], [w for _, w in dili_cols])
ws.freeze_panes = "A5"
ws.auto_filter.ref = f"A4:{get_column_letter(len(dili_cols))}4"
dili_sorted = sorted([d for d in S if d["is_dili"]],
key=lambda d: (0 if is_criteria_met(d["value"]) and d["tg"] in DILI_DANGER else 1,
d["study"] or "", d["site"] or "", d["subject"] or "",
d["collected"] or MINDT))
r = 5
for d in dili_sorted:
cell(ws, r, 1, d["studyl"])
cell(ws, r, 2, d["site"])
pac = cell(ws, r, 3, d["subject"], align=LEFT)
sr = souhrn_row_by_ssp.get((d["study"], d["site"], d["subject"]))
if sr:
pac.hyperlink = link(N_SOUHRN, sr)
pac.font = link_font
acc = cell(ws, r, 4, d["accession"])
acc.hyperlink = link(N_ZDROJ, zdroj_row[d["id"]])
acc.font = link_font
cell(ws, r, 5, d["visit"], align=LEFT)
cell(ws, r, 6, d["collected"], numfmt=DATE_FMT)
cell(ws, r, 7, d["tg"], align=LEFT)
hc = cell(ws, r, 8, d["value"], align=LEFT)
met = is_criteria_met(d["value"])
if met and d["tg"] in DILI_DANGER:
for ci in range(1, 9):
ws.cell(row=r, column=ci).fill = problem_fill
hc.font = red_bold
elif met and d["tg"] == "ALP<2XULN":
hc.fill = attn_fill # cholestaza-exclusion, samo o sobe NENI pozitivni
elif str(d["value"]).strip().lower() in GAP_VALUES:
hc.fill = attn_fill
elif str(d["value"]).strip().lower() == "criteria not met":
hc.fill = ok_fill
r += 1
# =============================================================================
# LIST 4: Abnormality
# =============================================================================
ws = ws_abn
abn_cols = [
("Zavaznost", 11), ("Studie", 11), ("Centrum", 11), ("Pacient", 16),
("Pohlavi", 9), ("Navsteva", 20), ("Datum odberu", 14), ("Accession", 14),
("Panel", 26), ("Test", 24), ("Hodnota", 12), ("Jednotky", 11),
("Rozmezi", 13), ("Smer", 9), ("Mimo o %", 11), ("Priznak", 9),
("Kvalifikator", 16), ("Komentar", 22),
]
write_header(ws, 1, [h for h, _ in abn_cols], [w for _, w in abn_cols])
ws.freeze_panes = "A2"
ws.auto_filter.ref = f"A1:{get_column_letter(len(abn_cols))}1"
QUAL_EXP = {"T": "kriticka hodnota", "SF": "SF", "RX": "RX"}
r = 2
for d in abn_sorted:
sev = d["sev"]
row_fill = problem_fill if sev == 3 else (attn_fill if sev == 2 else None)
cell(ws, r, 1, SEV_LABEL[sev], font=bold_font if sev == 3 else data_font)
cell(ws, r, 2, d["studyl"])
cell(ws, r, 3, d["site"])
pac = cell(ws, r, 4, d["subject"], align=LEFT)
sr = souhrn_row_by_ssp.get((d["study"], d["site"], d["subject"]))
if sr:
pac.hyperlink = link(N_SOUHRN, sr)
pac.font = link_font
cell(ws, r, 5, d["gender"])
cell(ws, r, 6, d["visit"], align=LEFT)
cell(ws, r, 7, d["collected"], numfmt=DATE_FMT)
acc = cell(ws, r, 8, d["accession"])
acc.hyperlink = link(N_ZDROJ, zdroj_row[d["id"]])
acc.font = link_font
cell(ws, r, 9, d["tg"], align=LEFT)
cell(ws, r, 10, d["test"], align=LEFT)
hc = cell(ws, r, 11, d["vnum"] if d["vnum"] is not None else d["value"])
hc.font = red_bold if sev == 3 else red_font
cell(ws, r, 12, d["units"])
cell(ws, r, 13, d["range"])
sm = cell(ws, r, 14, "Vysoke" if d["direction"] == "H" else "Nizke")
sm.font = red_font
cell(ws, r, 15, round(d["opct"], 1) if d["opct"] is not None else None)
cell(ws, r, 16, d["flags"])
qexp = ", ".join(QUAL_EXP.get(q, q) for q in sorted(d["quals"]))
cell(ws, r, 17, qexp or None, font=bold_font if "T" in d["quals"] else data_font, align=LEFT)
cell(ws, r, 18, d["comments"], align=LEFT)
if row_fill:
for ci in range(1, len(abn_cols) + 1):
if ws.cell(row=r, column=ci).fill.fgColor.rgb in ("00000000", None):
ws.cell(row=r, column=ci).fill = row_fill
r += 1
# =============================================================================
# LIST 5: Matice pokryti
# =============================================================================
ws = ws_matice
present_visits = [v for v in VISIT_ORDER if any(d["visit"] == v for d in S)]
extra = sorted({d["visit"] for d in S if d["visit"] and d["visit"] not in VISIT_ORDER})
present_visits += extra
# Skupinove pasy (row 1).
ws.cell(row=1, column=1, value="").fill = header_fill
groups_seq = []
for v in present_visits:
g = visit_group(v)
if not groups_seq or groups_seq[-1][0] != g:
groups_seq.append([g, 1])
else:
groups_seq[-1][1] += 1
col = 3
for g, span in groups_seq:
ws.merge_cells(start_row=1, start_column=col, end_row=1, end_column=col + span - 1)
gc = ws.cell(row=1, column=col, value=g)
gc.font = bold_font
gc.alignment = CENTER
gc.fill = total_fill
for cc in range(col, col + span):
ws.cell(row=1, column=cc).border = border
col += span
# Hlavicka (row 2).
hdr = ["Centrum", "Pacient"] + present_visits
write_header(ws, 2, hdr,
[12, 16] + [max(10, min(16, len(v))) for v in present_visits])
ws.freeze_panes = "C3"
mat = defaultdict(lambda: {"n": 0, "abn": 0, "crit": 0, "cancel": 0, "rev": 0})
for d in S:
m = mat[(d["subject"], d["visit"])]
m["n"] += 1
if d["direction"]:
m["abn"] += 1
if d["sev"] == 3:
m["crit"] += 1
if d["cancel"]:
m["cancel"] += 1
if d["rev"]:
m["rev"] += 1
mat_subjects = sorted({(d["study"], d["site"], d["subject"]) for d in S},
key=lambda k: (k[0] or "", k[1] or "", k[2] or ""))
r = 3
for st, site, subj in mat_subjects:
cell(ws, r, 1, site)
cell(ws, r, 2, subj, align=LEFT)
for ci, v in enumerate(present_visits, 3):
m = mat.get((subj, v))
if not m:
# Prazdna bunka = navsteva bez dat. Necervenime ji — bez planu navstev
# per pacient nevime, zda byla ocekavana (screening-only pacient nema
# Week 12). Jen ramecek, vizualne neutralni.
x = ws.cell(row=r, column=ci, value=None)
x.border = border
continue
disp = str(m["n"])
if m["cancel"]:
disp += f" (x{m['cancel']})"
if m["rev"]:
disp += " R"
x = ws.cell(row=r, column=ci, value=disp)
x.border = border
x.alignment = CENTER
if m["crit"]:
x.fill = problem_fill
x.font = red_bold
elif m["abn"]:
x.fill = attn_fill
x.font = red_font
elif m["cancel"]:
x.fill = attn_fill
x.font = data_font
else:
x.fill = ok_fill
x.font = data_font
fr = zdroj_first_sv.get((subj, v))
if fr:
x.hyperlink = link(N_ZDROJ, fr)
r += 1
# =============================================================================
# LIST 6: Zrusene testy
# =============================================================================
ws = ws_cancel
# Pivot duvodu (rows 1..N).
reason_counts = defaultdict(int)
for d in cancel_std + cancel_mic:
reason_counts[d["cancel"]] += 1
ws.cell(row=1, column=1, value="Duvody zruseni").font = bold_font
pr = 2
for reason, n in sorted(reason_counts.items(), key=lambda x: -x[1]):
rc = ws.cell(row=pr, column=1, value=reason)
rc.fill = attn_fill
rc.border = border
rc.alignment = LEFT
rc.font = data_font
nc = ws.cell(row=pr, column=2, value=n)
nc.fill = attn_fill
nc.border = border
nc.alignment = CENTER
nc.font = data_font
pr += 1
hdr_row = pr + 1
cancel_cols = [("Studie", 12), ("Centrum", 12), ("Pacient", 16), ("Navsteva", 22),
("Datum", 14), ("Panel", 26), ("Test", 24), ("Duvod zruseni", 26),
("Accession", 14), ("Komentar", 22)]
write_header(ws, hdr_row, [h for h, _ in cancel_cols], [w for _, w in cancel_cols])
ws.freeze_panes = f"A{hdr_row + 1}"
ws.auto_filter.ref = f"A{hdr_row}:{get_column_letter(len(cancel_cols))}{hdr_row}"
LOST_RE = re.compile(r"(?i)(no specimen|quantity not sufficient|beyond stability|not received|qns)")
cancel_all = sorted(cancel_std + cancel_mic,
key=lambda d: (d["study"] or "", d["site"] or "", d["subject"] or "",
d["cancel"] or ""))
r = hdr_row + 1
for d in cancel_all:
for ci in range(1, len(cancel_cols) + 1):
ws.cell(row=r, column=ci).fill = attn_fill
cell(ws, r, 1, d["studyl"], fill=attn_fill)
cell(ws, r, 2, d["site"], fill=attn_fill)
pac = cell(ws, r, 3, d["subject"], fill=attn_fill, align=LEFT)
sr = souhrn_row_by_ssp.get((d["study"], d["site"], d["subject"]))
if sr:
pac.hyperlink = link(N_SOUHRN, sr)
pac.font = link_font
cell(ws, r, 4, d["visit"], fill=attn_fill, align=LEFT)
cell(ws, r, 5, d["collected"], fill=attn_fill, numfmt=DATE_FMT)
cell(ws, r, 6, d.get("tg"), fill=attn_fill, align=LEFT)
cell(ws, r, 7, d.get("test"), fill=attn_fill, align=LEFT)
rc = cell(ws, r, 8, d["cancel"], fill=attn_fill, align=LEFT)
if d["cancel"] and LOST_RE.search(str(d["cancel"])):
rc.font = red_font
ac = cell(ws, r, 9, d["accession"], fill=attn_fill)
if d["id"] in zdroj_row:
ac.hyperlink = link(N_ZDROJ, zdroj_row[d["id"]])
ac.font = link_font
cell(ws, r, 10, d["comments"], fill=attn_fill, align=LEFT)
r += 1
# =============================================================================
# LIST 7: Mikrobiologie
# =============================================================================
ws = ws_micro
ws.merge_cells("A1:N1")
mb = ws.cell(row=1, column=1)
if micro_pos:
mb.value = f"{len(micro_pos)} pozitivnich kultivaci"
mb.fill = problem_fill
mb.font = red_bold
else:
mb.value = "Zadne pozitivni kultivace"
mb.fill = ok_fill
mb.font = bold_font
mb.alignment = LEFT
micro_cols = [("Centrum", 12), ("Pacient", 16), ("Navsteva", 24), ("Datum", 14),
("Accession", 14), ("Panel", 18), ("Vzorek", 12), ("Organismus", 16),
("Rust", 12), ("Vysledek", 28), ("Stav reportu", 16),
("Zruseno", 18), ("MIC", 14), ("Disk", 14)]
write_header(ws, 2, [h for h, _ in micro_cols], [w for _, w in micro_cols])
ws.freeze_panes = "A3"
ws.auto_filter.ref = f"A2:{get_column_letter(len(micro_cols))}2"
micro_sorted = sorted(M, key=lambda d: (0 if d["positive"] else 1,
d["site"] or "", d["subject"] or "",
d["collected"] or MINDT))
r = 3
for d in micro_sorted:
f = d["f"]
pos = d["positive"]
pending = not f.get("Test Result") and not f.get("Test Description")
if pos:
row_fill, fnt = problem_fill, red_bold
elif d["cancel"] or pending:
row_fill, fnt = attn_fill, data_font
else:
row_fill, fnt = ok_fill, data_font
vals = [d["site"], d["subject"], d["visit"], None, d["accession"],
f.get("Test Group"), f.get("Specimen"), f.get("Organism"),
f.get("Growth"), f.get("Test Result"), f.get("Test Description"),
d["cancel"],
" ".join(x for x in [f.get("MIC Result"), f.get("MIC Interpretation")] if x) or None,
f.get("Disk Diffusion Interpretation")]
for ci, v in enumerate(vals, 1):
x = cell(ws, r, ci, v, font=fnt, fill=row_fill,
align=LEFT if ci in (3, 8, 9, 10, 11) else CENTER)
if ci == 4:
x.value = d["collected"]
x.number_format = DATE_FMT
r += 1
# =============================================================================
# LIST 8: Datove mezery
# =============================================================================
ws = ws_gaps
gap_cols = [("Studie", 12), ("Centrum", 12), ("Pacient", 16), ("Navsteva", 22),
("Datum", 14), ("Panel", 26), ("Test", 24), ("Problem", 28)]
write_header(ws, 1, [h for h, _ in gap_cols], [w for _, w in gap_cols])
ws.freeze_panes = "A2"
ws.auto_filter.ref = f"A1:{get_column_letter(len(gap_cols))}1"
gap_sorted = sorted(gap_rows, key=lambda d: (d["study"] or "", d["site"] or "",
d["subject"] or "", d["collected"] or MINDT))
r = 2
for d in gap_sorted:
for ci in range(1, len(gap_cols) + 1):
ws.cell(row=r, column=ci).fill = attn_fill
cell(ws, r, 1, d["studyl"], fill=attn_fill)
cell(ws, r, 2, d["site"], fill=attn_fill)
cell(ws, r, 3, d["subject"], fill=attn_fill, align=LEFT)
cell(ws, r, 4, d["visit"], fill=attn_fill, align=LEFT)
cell(ws, r, 5, d["collected"], fill=attn_fill, numfmt=DATE_FMT)
cell(ws, r, 6, d["tg"], fill=attn_fill, align=LEFT)
cell(ws, r, 7, d["test"], fill=attn_fill, align=LEFT)
if d["value"] and str(d["value"]).strip().lower() in GAP_VALUES:
problem = d["value"]
else:
problem = "Prazdna hodnota v klicovem panelu"
cell(ws, r, 8, problem, fill=attn_fill, align=LEFT)
r += 1
# =============================================================================
# LIST 10: LEGENDA
# =============================================================================
ws = ws_legend
ws.column_dimensions["A"].width = 26
ws.column_dimensions["B"].width = 60
lr = 1
def legend_title(text):
global lr
ws.merge_cells(start_row=lr, start_column=1, end_row=lr, end_column=2)
c = ws.cell(row=lr, column=1, value=text)
c.fill = title_fill
c.font = Font(name="Calibri", bold=True, size=12, color="FFFFFF")
c.alignment = LEFT
lr += 1
def legend_row(a, b, fill=None):
global lr
ca = ws.cell(row=lr, column=1, value=a)
ca.border = border
ca.alignment = LEFT
ca.font = bold_font
if fill:
ca.fill = fill
cb = ws.cell(row=lr, column=2, value=b)
cb.border = border
cb.alignment = Alignment(horizontal="left", vertical="center", wrap_text=True)
cb.font = data_font
lr += 1
legend_title("Priznaky (Flags)")
for a, b in [("H / L", "Hodnota nad / pod referencnim rozmezim"),
("HT / LT", "Kriticka (panic) hodnota — vysoka / nizka"),
("SF", "Vysledek mimo ocekavani (see footnote)"),
("RX", "Doplnujici/opakovane vysetreni")]:
legend_row(a, b)
lr += 1
legend_title("Zavaznost (list Abnormality)")
legend_row("Kriticka", "Panic flag (T), >100 % mimo rozmezi, nebo jaterni H / cytopenie L", problem_fill)
legend_row("Vysoka", ">=50 % mimo rozmezi", attn_fill)
legend_row("Mirna", "Mimo rozmezi do 50 %")
lr += 1
legend_title("Barvy")
legend_row("Cervena", "Bezpecnostni signal / kriticke / chybejici data", problem_fill)
legend_row("Zluta", "Pozornost — abnormalni, zrusene, mezery, revize", attn_fill)
legend_row("Zelena", "V poradku / bez nalezu", ok_fill)
legend_row("Modra", "Soucty a celkove radky", total_fill)
lr += 1
legend_title("Hyuv zakon (DILI)")
legend_row("Pozitivni", "ALT nebo AST >=3x ULN 'Criteria Met' SOUCASNE s TBIL >=2x ULN kombo")
legend_row("ALP<2XULN", "Sam o sobe NENI pozitivni (cholestaza-exclusion), proto jen zluta")
lr += 1
legend_title("Studie")
legend_row("36940", "Protokol 77242113UCO3001 (UCO3001)")
legend_row("35472", "Protokol 42847922MDD3003 (MDD3003)")
lr += 1
legend_title("Aktualnost dat")
if dt_min and dt_max:
legend_row("Sber", f"{dt_min.strftime('%d-%b-%Y')} {dt_max.strftime('%d-%b-%Y')}")
legend_row("Zdroj", "MongoDB covance.results (import z Labcorp XSP CSV)")
# -- Ulozeni ------------------------------------------------------------------
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
out_path = OUT_DIR + f"{timestamp} Covance lab results report.xlsx"
wb.save(out_path)
client.close()
print(f"\nUlozeno: {out_path}")
print(f" Vysledku celkem: {len(all_docs)} (standard {len(S)}, micro {len(M)})")
print(f" Abnormalni: {len(abn_rows)} (kriticke {len(crit_rows)}), "
f"zrusene {len(cancel_std) + len(cancel_mic)}, mezery {len(gap_rows)}, "
f"revize {len(rev_rows)}")
print(f" Hyuv zakon pozitivni: {len(hys_subjects)}, mikrobiologie pozitivni: {len(micro_pos)}")
print(f" Centra: {len(sites)}, pacienti: {len(subjects)}")