Files
2026-06-10 11:59:03 +02:00

144 lines
5.6 KiB
Python

# -*- coding: utf-8 -*-
# =============================================================================
# Nazev: classify_krok_v1.0.py
# Verze: 1.0
# Datum: 2026-06-09
# Popis: Odvodi pole KROK (krok feasibility workflow) ze STATUS u kazdeho
# investigatora v Mongo feasibility.investigators a zapise jej.
# Workflow kroky (DAWN / 77242113UCO3002):
# 1 - nabidka odeslana
# 2 - nabidka pripomenuta
# 3.1 - odpovedel: ma zajem (CDA jeste nevyzadano)
# 3.2 - odpovedel: nema zajem (nezajem / Nechceme / Rejected)
# 4 - zajem, CDA vyzadano (u CTA, nepodepsano)
# 5 - CDA podepsano (fully signed / mame CDA / master)
# 6 - SIPIQ odeslan
# 7 - SIPIQ vyplneny
# 0 - mimo (nedoruceno / neplatny kontakt)
# KROK se odvozuje ze STATUS (znovu-spustitelne). Pro pripady, kde
# STATUS neobsahuje klicove slovo, jsou explicitni OVERRIDES.
# Pouziti: python classify_krok_v1.0.py (dry-run, jen vypis)
# python classify_krok_v1.0.py --apply (zapise KROK do Mongo)
# =============================================================================
import os
import sys
from pymongo import MongoClient
MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
# --- Stitky kroku ---
K0 = "0 - mimo (nedoruceno/neplatny kontakt)"
K0N = "0 - nechceme (nase rozhodnuti)"
K1 = "1 - nabidka odeslana"
K2 = "2 - nabidka pripomenuta"
K31 = "3.1 - odpovedel: ma zajem"
K32 = "3.2 - odpovedel: nema zajem"
K4 = "4 - zajem, CDA vyzadano"
K5 = "5 - CDA podepsano"
K6 = "6 - SIPIQ odeslan"
K7 = "7 - SIPIQ vyplneny"
# Explicitni vyjimky (STATUS neobsahuje jednoznacne klicove slovo)
# - Dzurikova: odpovedela, chce telefonat k domluve o ucasti (zajem), ale slovo
# "zajem" v STATUS neni -> 3.1
# - Reif: prave odeslana 1. oficialni nabidka, ackoliv zajem projevil pres
# EFERTUS -> ponechat v kroku 1 (cekame na primou odpoved)
OVERRIDES = {
"6a19832b5fc2213518257964": K31, # Dzurikova Michaela
"6a268cdeb84bf5597759b478": K1, # Reif Stanislav
"6a19832b5fc2213518257979": K32, # Koskova Radka (nema kapacitu, nezapoji se)
}
# Ocekavane pocty (dle odsouhlasene klasifikace 09JUN2026) pro kontrolu
EXPECTED = {K0: 4, K0N: 36, K1: 17, K2: 30, K31: 3, K32: 14, K4: 9, K5: 3, K6: 13, K7: 1}
def classify(status: str) -> str:
s = (status or "").lower()
# 0 - nedorucitelne / spatny kontakt
if "nedorucen" in s or "neplatna adresa" in s or "neplatná adresa" in s or "spatny kontakt" in s or "špatný kontakt" in s:
return K0
# 0 - NECHCEME = nase rozhodnuti na zacatku (centrum se neoslovovalo, zadny dalsi krok).
# Odlisit od "nezajem" (odpoved lekare = 3.2).
if "nechceme" in s or "rejected" in s:
return K0N
# 7 - SIPIQ vyplneny
if "sipiq" in s and ("vyplnen" in s or "vyplněn" in s):
return K7
# 6 - SIPIQ odeslan (pozor: "nutno poslat/odeslat SIPIQ" NENI odeslano)
if "sipiq odeslan" in s or "sipiq odeslán" in s or "odeslan sipiq" in s or "odeslán sipiq" in s:
return K6
# 5 - CDA podepsano (mame CDA / fully signed / podepsano)
if "fully signed" in s or "mame cda" in s or "máme cda" in s or "cda podeps" in s or "podepsano" in s or "podepsáno" in s:
return K5
# 4 - zajem, CDA vyzadano / cekame na CDA
# (CDA jeste nepodepsano - krok 5 se kontroluje vyse). Detekce pres koren "vyzad".
if ("cda" in s and ("vyžád" in s or "vyzad" in s)) or "čekáme na cda" in s or "cekame na cda" in s:
return K4
# 3.2 - nema zajem
if "nezájem" in s or "nezajem" in s or "nechceme" in s or "rejected" in s or "nemá zájem" in s or "nema zajem" in s:
return K32
# 3.1 - ma zajem
if "zájem" in s or "zajem" in s:
return K31
# 2 - pripomenuta
if "připomínka odeslan" in s or "pripominka odeslan" in s:
return K2
# 1 - nabidka odeslana (default)
return K1
def main():
apply = "--apply" in sys.argv
client = MongoClient(MONGO_URI)
col = client["feasibility"]["investigators"]
docs = list(col.find({}, {"prijmeni": 1, "jmeno": 1, "STATUS": 1}))
print(f"Nacteno {len(docs)} zaznamu.\n")
counts = {}
rows = []
for d in docs:
_id = str(d["_id"])
krok = OVERRIDES.get(_id) or classify(d.get("STATUS", ""))
counts[krok] = counts.get(krok, 0) + 1
rows.append((krok, d.get("prijmeni", ""), d.get("jmeno", ""), _id))
# Vypis po krocich
rows.sort(key=lambda r: r[0])
for krok in sorted(set(r[0] for r in rows)):
members = [f"{r[1]} {r[2]}".strip() for r in rows if r[0] == krok]
print(f"[{krok}] ({len(members)})")
print(" " + ", ".join(members))
print()
# Kontrola proti ocekavani
print("--- Kontrola poctu (ocekavano vs. spocteno) ---")
ok = True
for k in [K0, K0N, K1, K2, K31, K32, K4, K5, K6, K7]:
exp = EXPECTED.get(k, 0)
got = counts.get(k, 0)
flag = "OK" if exp == got else "!!! MISMATCH"
if exp != got:
ok = False
print(f" {k:45s} exp={exp:3d} got={got:3d} {flag}")
print("Souhrn:", "VSE SEDI" if ok else "NESEDI - zkontroluj pravidla/OVERRIDES")
print()
if not apply:
print(">>> DRY-RUN. Pro zapis spust s --apply")
return
# Zapis
n = 0
for krok, _, _, _id_str in rows:
from bson import ObjectId
res = col.update_one({"_id": ObjectId(_id_str)}, {"$set": {"KROK": krok}})
n += res.modified_count
print(f">>> ZAPSANO: KROK aktualizovan u {n} zaznamu (z {len(rows)}).")
if __name__ == "__main__":
main()