# -*- coding: utf-8 -*- # ============================================================================= # Nazev: illuminator_gap_v1.1.py # Verze: 1.1 # Datum: 2026-06-12 # Zmeny v1.1: KROK 3.1/4 -> "Recommended for Assessment" (drive chybne SipIQ). # "Recommended for SipIQ" az od KROK 5 (CDA PODEPSANO). # Popis: Porovna Illuminator export (SDL_export.xlsx, filtr UCO3002 CZ+SK) # s aktualnim stavem Mongo feasibility.investigators (pole KROK) a # vypise nesoulady ke korekci v Illuminatoru: # A) Illuminator zaostava za nasim postupem (posunout vys) # B) odmitnuti / "nechceme" nepromitnuta (terminalni status) # C) v Mongo aktivni, ale chybi v exportu (zalozit/overit) # + v souladu # Match primarne podle e-mailu, fallback prijmeni+jmeno (bez diakritiky). # Pouziti: python illuminator_gap_v1.0.py # ============================================================================= import sys import unicodedata import openpyxl from pymongo import MongoClient XLSX = r"U:\Dropbox\!!!Days\Downloads Z230\SDL_export.xlsx" MONGO_URI = "mongodb://192.168.1.76:27017" # KROK -> (cilovy Illuminator status, rank, je_terminalni) FUNNEL_RANK = { "Recommended for Assessment": 1, "Recommended for SipIQ": 2, "SIPIQ Link Generated": 3, "Email Sent": 4, "SIPIQ Submitted": 5, "Recommended for SQV": 6, "Recommended for Selection": 7, "Selected": 8, } KROK_TARGET = { "1 - nabidka odeslana": ("Recommended for Assessment", False), "2 - nabidka pripomenuta": ("Recommended for Assessment", False), "3.1 - odpovedel: ma zajem": ("Recommended for Assessment", False), "4 - zajem, CDA vyzadano": ("Recommended for Assessment", False), "5 - CDA podepsano": ("Recommended for SipIQ", False), "6 - SIPIQ odeslan": ("Email Sent", False), "7 - SIPIQ vyplneny": ("SIPIQ Submitted", False), "3.2 - odpovedel: nema zajem": ("Not Interested", True), "0 - nechceme (nase rozhodnuti)": ("Not Recommended", True), "0 - mimo (nedoruceno/neplatny kontakt)": ("Not Recommended / No Response", True), } RANK_LOWER = {k.lower(): v for k, v in FUNNEL_RANK.items()} def rank_of(status): return RANK_LOWER.get((status or "").strip().lower(), 0) def norm(s): s = (s or "").strip() s = unicodedata.normalize("NFKD", s) s = "".join(c for c in s if not unicodedata.combining(c)) return " ".join(s.lower().split()) def load_export(): wb = openpyxl.load_workbook(XLSX, read_only=True) ws = wb.active rows = list(ws.iter_rows(values_only=True)) header = rows[2] idx = {h: i for i, h in enumerate(header) if h} out = [] for r in rows[3:]: if not any(r): continue rec = { "last": (r[idx["PI Last Name"]] or "").strip(), "first": (r[idx["PI First Name"]] or "").strip(), "email": (r[idx["PI Email Address"]] or "").strip(), "status": (r[idx["Feasibility Status in Site Funnel"]] or "").strip(), "reason": (r[idx["Reason Code"]] or "").strip(), "site": (r[idx["Site Name"]] or "").strip(), "city": (r[idx["Site City"]] or "").strip(), "country": (r[idx["Country/Territory"]] or "").strip(), "regdate": r[idx["Regulatory Check Complete Date"]], } out.append(rec) return out def load_mongo(): cli = MongoClient(MONGO_URI) col = cli["feasibility"]["investigators"] return list(col.find({}, {"prijmeni": 1, "jmeno": 1, "email": 1, "KROK": 1, "STATUS": 1, "zeme": 1})) def main(): exp = load_export() inv = load_mongo() by_email = {} by_name = {} for e in exp: if e["email"]: by_email[norm(e["email"])] = e by_name[(norm(e["last"]), norm(e["first"]))] = e matched_exp = set() A, B, sync, Cmiss = [], [], [], [] for d in inv: krok = d.get("KROK", "") tgt = KROK_TARGET.get(krok) last = norm(d.get("prijmeni")) first = norm(d.get("jmeno")) email = norm(d.get("email")) e = by_email.get(email) or by_name.get((last, first)) name = f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip() if not e: Cmiss.append((d, krok, tgt)) continue matched_exp.add(id(e)) cur = e["status"] if tgt is None: continue target_status, terminal = tgt if terminal: # mel by byt terminalni; pokud Illuminator stale v aktivnim funnelu -> B if rank_of(cur) > 0: B.append((d, krok, e, target_status)) else: sync.append((name, krok, cur)) else: tr = FUNNEL_RANK[target_status] cr = rank_of(cur) # 0 = neni v aktivnim funnelu (napr. terminalni) if cr < tr: A.append((d, krok, e, target_status)) else: sync.append((name, krok, cur)) # export radky bez Mongo protejsku (info) exp_unmatched = [e for e in exp if id(e) not in matched_exp] # duplicity v exportu (stejny email nebo stejne jmeno vicekrat) from collections import Counter cnt_email = Counter(norm(e["email"]) for e in exp if e["email"]) cnt_name = Counter((norm(e["last"]), norm(e["first"])) for e in exp) dup_rows = [] for e in exp: if (e["email"] and cnt_email[norm(e["email"])] > 1) or \ cnt_name[(norm(e["last"]), norm(e["first"]))] > 1: dup_rows.append(e) def pname(d): return f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip() print("=" * 70) print(f"EXPORT radku: {len(exp)} | Mongo investigatoru: {len(inv)}") print(f"A (zaostava): {len(A)} | B (odmitnuti neprom.): {len(B)} | " f"C (chybi v exportu): {len(Cmiss)} | v souladu: {len(sync)}") print(f"export radku bez Mongo protejsku: {len(exp_unmatched)}") print("=" * 70) print("\n### A) ILLUMINATOR ZAOSTAVA (posunout vys) ###") for d, krok, e, tgt in sorted(A, key=lambda x: x[1]): print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}") print("\n### B) ODMITNUTI / NECHCEME nepromitnuta (terminalni) ###") for d, krok, e, tgt in sorted(B, key=lambda x: x[1]): print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}") print("\n### C) V MONGO, CHYBI V EXPORTU (podle KROK) ###") from collections import defaultdict cg = defaultdict(list) for d, krok, tgt in Cmiss: cg[krok].append(pname(d) + f" [{d.get('zeme','')}]") for krok in sorted(cg): print(f"\n -- {krok} ({len(cg[krok])}):") for n in sorted(cg[krok]): print(f" {n}") print("\n### V SOULADU ###") for name, krok, cur in sorted(sync, key=lambda x: x[1]): print(f"- {name:28} | KROK {krok:32} | {cur}") print("\n### EXPORT radky bez Mongo protejsku (k overeni) ###") for e in exp_unmatched: print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})") print("\n### DUPLICITNI RADKY V EXPORTU (uklidit) ###") for e in sorted(dup_rows, key=lambda x: (norm(x['last']), norm(x['first']))): print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})") if __name__ == "__main__": main()