Files
janssen/Feasibility/TRASH/illuminator_gap_v1.0.py
2026-06-12 15:29:57 +02:00

196 lines
7.2 KiB
Python

# -*- coding: utf-8 -*-
# =============================================================================
# Nazev: illuminator_gap_v1.0.py
# Verze: 1.0
# Datum: 2026-06-12
# Popis: Porovna Illuminator export (SDL_export.xlsx, filtr UCO3002 CZ+SK)
# s aktualnim stavem Mongo feasibility.investigators (pole KROK) a
# vypise nesoulady ke korekci v Illuminatoru:
# A) Illuminator zaostava za nasim postupem (posunout vys)
# B) odmitnuti / "nechceme" nepromitnuta (terminalni status)
# C) v Mongo aktivni, ale chybi v exportu (zalozit/overit)
# + v souladu
# Match primarne podle e-mailu, fallback prijmeni+jmeno (bez diakritiky).
# Pouziti: python illuminator_gap_v1.0.py
# =============================================================================
import sys
import unicodedata
import openpyxl
from pymongo import MongoClient
XLSX = r"U:\Dropbox\!!!Days\Downloads Z230\SDL_export.xlsx"
MONGO_URI = "mongodb://192.168.1.76:27017"
# KROK -> (cilovy Illuminator status, rank, je_terminalni)
FUNNEL_RANK = {
"Recommended for Assessment": 1,
"Recommended for SipIQ": 2,
"SIPIQ Link Generated": 3,
"Email Sent": 4,
"SIPIQ Submitted": 5,
"Recommended for SQV": 6,
"Recommended for Selection": 7,
"Selected": 8,
}
KROK_TARGET = {
"1 - nabidka odeslana": ("Recommended for Assessment", False),
"2 - nabidka pripomenuta": ("Recommended for Assessment", False),
"3.1 - odpovedel: ma zajem": ("Recommended for SipIQ", False),
"4 - zajem, CDA vyzadano": ("Recommended for SipIQ", False),
"5 - CDA podepsano": ("Recommended for SipIQ", False),
"6 - SIPIQ odeslan": ("Email Sent", False),
"7 - SIPIQ vyplneny": ("SIPIQ Submitted", False),
"3.2 - odpovedel: nema zajem": ("Not Interested", True),
"0 - nechceme (nase rozhodnuti)": ("Not Recommended", True),
"0 - mimo (nedoruceno/neplatny kontakt)": ("Not Recommended / No Response", True),
}
RANK_LOWER = {k.lower(): v for k, v in FUNNEL_RANK.items()}
def rank_of(status):
return RANK_LOWER.get((status or "").strip().lower(), 0)
def norm(s):
s = (s or "").strip()
s = unicodedata.normalize("NFKD", s)
s = "".join(c for c in s if not unicodedata.combining(c))
return " ".join(s.lower().split())
def load_export():
wb = openpyxl.load_workbook(XLSX, read_only=True)
ws = wb.active
rows = list(ws.iter_rows(values_only=True))
header = rows[2]
idx = {h: i for i, h in enumerate(header) if h}
out = []
for r in rows[3:]:
if not any(r):
continue
rec = {
"last": (r[idx["PI Last Name"]] or "").strip(),
"first": (r[idx["PI First Name"]] or "").strip(),
"email": (r[idx["PI Email Address"]] or "").strip(),
"status": (r[idx["Feasibility Status in Site Funnel"]] or "").strip(),
"reason": (r[idx["Reason Code"]] or "").strip(),
"site": (r[idx["Site Name"]] or "").strip(),
"city": (r[idx["Site City"]] or "").strip(),
"country": (r[idx["Country/Territory"]] or "").strip(),
"regdate": r[idx["Regulatory Check Complete Date"]],
}
out.append(rec)
return out
def load_mongo():
cli = MongoClient(MONGO_URI)
col = cli["feasibility"]["investigators"]
return list(col.find({}, {"prijmeni": 1, "jmeno": 1, "email": 1,
"KROK": 1, "STATUS": 1, "zeme": 1}))
def main():
exp = load_export()
inv = load_mongo()
by_email = {}
by_name = {}
for e in exp:
if e["email"]:
by_email[norm(e["email"])] = e
by_name[(norm(e["last"]), norm(e["first"]))] = e
matched_exp = set()
A, B, sync, Cmiss = [], [], [], []
for d in inv:
krok = d.get("KROK", "")
tgt = KROK_TARGET.get(krok)
last = norm(d.get("prijmeni"))
first = norm(d.get("jmeno"))
email = norm(d.get("email"))
e = by_email.get(email) or by_name.get((last, first))
name = f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
if not e:
Cmiss.append((d, krok, tgt))
continue
matched_exp.add(id(e))
cur = e["status"]
if tgt is None:
continue
target_status, terminal = tgt
if terminal:
# mel by byt terminalni; pokud Illuminator stale v aktivnim funnelu -> B
if rank_of(cur) > 0:
B.append((d, krok, e, target_status))
else:
sync.append((name, krok, cur))
else:
tr = FUNNEL_RANK[target_status]
cr = rank_of(cur) # 0 = neni v aktivnim funnelu (napr. terminalni)
if cr < tr:
A.append((d, krok, e, target_status))
else:
sync.append((name, krok, cur))
# export radky bez Mongo protejsku (info)
exp_unmatched = [e for e in exp if id(e) not in matched_exp]
# duplicity v exportu (stejny email nebo stejne jmeno vicekrat)
from collections import Counter
cnt_email = Counter(norm(e["email"]) for e in exp if e["email"])
cnt_name = Counter((norm(e["last"]), norm(e["first"])) for e in exp)
dup_rows = []
for e in exp:
if (e["email"] and cnt_email[norm(e["email"])] > 1) or \
cnt_name[(norm(e["last"]), norm(e["first"]))] > 1:
dup_rows.append(e)
def pname(d):
return f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
print("=" * 70)
print(f"EXPORT radku: {len(exp)} | Mongo investigatoru: {len(inv)}")
print(f"A (zaostava): {len(A)} | B (odmitnuti neprom.): {len(B)} | "
f"C (chybi v exportu): {len(Cmiss)} | v souladu: {len(sync)}")
print(f"export radku bez Mongo protejsku: {len(exp_unmatched)}")
print("=" * 70)
print("\n### A) ILLUMINATOR ZAOSTAVA (posunout vys) ###")
for d, krok, e, tgt in sorted(A, key=lambda x: x[1]):
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
print("\n### B) ODMITNUTI / NECHCEME nepromitnuta (terminalni) ###")
for d, krok, e, tgt in sorted(B, key=lambda x: x[1]):
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
print("\n### C) V MONGO, CHYBI V EXPORTU (podle KROK) ###")
from collections import defaultdict
cg = defaultdict(list)
for d, krok, tgt in Cmiss:
cg[krok].append(pname(d) + f" [{d.get('zeme','')}]")
for krok in sorted(cg):
print(f"\n -- {krok} ({len(cg[krok])}):")
for n in sorted(cg[krok]):
print(f" {n}")
print("\n### V SOULADU ###")
for name, krok, cur in sorted(sync, key=lambda x: x[1]):
print(f"- {name:28} | KROK {krok:32} | {cur}")
print("\n### EXPORT radky bez Mongo protejsku (k overeni) ###")
for e in exp_unmatched:
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
print("\n### DUPLICITNI RADKY V EXPORTU (uklidit) ###")
for e in sorted(dup_rows, key=lambda x: (norm(x['last']), norm(x['first']))):
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
if __name__ == "__main__":
main()