z230
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
# store_cda_batch_v1.3.py
|
||||
|
||||
**Verze:** 1.3 · **Datum:** 2026-06-12
|
||||
|
||||
Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` →
|
||||
`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`),
|
||||
stažené přes SFTP (paramiko), příloha vytažena `extract_msg`.
|
||||
|
||||
## Princip
|
||||
- `MAPPING` = explicitní seznam `(investigator_id, msg_filename,
|
||||
attachment_filename, label)` — žádné hádání za běhu.
|
||||
- CDA chodí od 3 asistentek z CTA: **Wittenbergerová** (LWittenb), **Hrabalová**
|
||||
(LHrabalo), **Vojčová** (LVojcova).
|
||||
|
||||
## Co zapisuje
|
||||
`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`,
|
||||
`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`.
|
||||
|
||||
## Spuštění
|
||||
```
|
||||
python store_cda_batch_v1.3.py # dry-run / náhled
|
||||
python store_cda_batch_v1.3.py --apply # zápis do Mongo
|
||||
```
|
||||
venv má `paramiko` + `extract_msg` + `pymongo`.
|
||||
|
||||
## Historie
|
||||
- v1.3 — DÁVKA 6 (12JUN2026): Gregušová Katarína (KM Management spol. s r.o.,
|
||||
krok 4 → 5), Drastich Pavel (IKEM — doplnění CDA k již odeslanému SIPIQ, krok 6;
|
||||
uzavírá výjimečný stav „SIPIQ před podpisem CDA").
|
||||
- v1.2 — DÁVKA 5 (11JUN2026): Mudr Robert.
|
||||
- v1.1 — DÁVKA 4 (11JUN2026): Konečný Michal, Baláž Jozef.
|
||||
- v1.0 — DÁVKY 1–3 (09–10JUN2026): Hlavatý, Fedurco, Tichý, Falc, Pešta,
|
||||
Jungwirthová, Matouš, Mihálkanin, Krížová, Gregar, Ďurina, Horváth.
|
||||
(Lukáč zvlášť přes store_cda_to_mongo.)
|
||||
@@ -0,0 +1,150 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# =============================================================================
|
||||
# Nazev: store_cda_batch_v1.3.py
|
||||
# Verze: 1.3
|
||||
# Datum: 2026-06-12
|
||||
# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum
|
||||
# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na
|
||||
# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena
|
||||
# extract_msg. Mapovani investigator -> (.msg, attachment) je
|
||||
# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky
|
||||
# dokument z e-mailu -> do Mongo (CDA fyzicky ulozeno k lekarum).
|
||||
# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename,
|
||||
# cda.data_mime, cda.data_size, cda.data_stored_at,
|
||||
# cda.data_source_msg; doplni cda.soubor pokud chybi.
|
||||
# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI.
|
||||
# Pouziti: python store_cda_batch_v1.3.py (dry-run / nahled)
|
||||
# python store_cda_batch_v1.3.py --apply (zapise do Mongo)
|
||||
# Zmeny v1.3: DAVKA 6 (12JUN2026) - Gregusova Katarina (krok 4 -> 5),
|
||||
# Drastich Pavel (doplneni CDA k jiz odeslanemu SIPIQ, krok 6).
|
||||
# =============================================================================
|
||||
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
import hashlib
|
||||
import unicodedata
|
||||
import paramiko
|
||||
import extract_msg
|
||||
from pymongo import MongoClient
|
||||
from bson import ObjectId
|
||||
|
||||
MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
|
||||
TOWER_HOST = "192.168.1.76"
|
||||
TOWER_USER = "root"
|
||||
TOWER_PASS = "7309208104"
|
||||
REMOTE_DIR = "/mnt/user/JNJEMAILS"
|
||||
TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp"
|
||||
STORED_AT = "2026-06-12"
|
||||
|
||||
# investigator_id -> (msg_filename, attachment_filename, label)
|
||||
# DAVKA 6 (12JUN2026): nove CDA z 12.6.
|
||||
MAPPING = [
|
||||
("6a1c4275aa46d8b608065ce2", "FC130007E9D30EB30000.msg",
|
||||
"SK_CDA Institution_KM Management, spol. s r.o._fully signed 12Jun2026.pdf",
|
||||
"Gregusova Katarina (KM Management spol. s r.o.)"),
|
||||
("6a19832b5fc221351825796a", "FC130007E9D30EB10000.msg",
|
||||
"CZ_CDA PI_Doc. MUDr. Pavel Drastich, Ph.D._fully signed 11Jun2026.pdf",
|
||||
"Drastich Pavel (IKEM)"),
|
||||
]
|
||||
|
||||
# HISTORIE drivejsich davek (jiz ulozeno):
|
||||
# DAVKA 5 (11JUN2026): Mudr Robert FC130007DE92C232.
|
||||
# DAVKA 4 (11JUN2026): Konecny Michal FC130007DE92C231, Balaz Jozef FC130007DE92C20F.
|
||||
# DAVKA 3 (10JUN2026): Gregar FC130007DE92C204, Durina FC130007DE92C203,
|
||||
# Horvath/Accout Center FC130007DE92C1FE.
|
||||
# DAVKA 1+2 (09JUN2026): Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED
|
||||
# FC1300053049739B, Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta
|
||||
# FC130007D8A1F0E1, Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF
|
||||
# (store_cda_to_mongo_v1.0), Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM
|
||||
# FC130007D8A1F0E6, Krizova/Motol FC130007C1643CA1.
|
||||
|
||||
|
||||
def norm(s):
|
||||
"""lowercase, bez diakritiky, sjednocene mezery"""
|
||||
s = s or ""
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(c for c in s if not unicodedata.combining(c))
|
||||
return " ".join(s.lower().split())
|
||||
|
||||
|
||||
def main():
|
||||
apply = "--apply" in sys.argv
|
||||
os.makedirs(TMPDIR, exist_ok=True)
|
||||
|
||||
# SSH/SFTP
|
||||
ssh = paramiko.SSHClient()
|
||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30)
|
||||
sftp = ssh.open_sftp()
|
||||
|
||||
client = MongoClient(MONGO_URI)
|
||||
col = client["feasibility"]["investigators"]
|
||||
|
||||
plan = []
|
||||
for inv_id, msg_name, att_name, label in MAPPING:
|
||||
local_msg = os.path.join(TMPDIR, msg_name)
|
||||
if not os.path.exists(local_msg):
|
||||
sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg)
|
||||
m = extract_msg.Message(local_msg)
|
||||
target = norm(att_name)
|
||||
chosen = None
|
||||
for att in m.attachments:
|
||||
name = att.longFilename or att.shortFilename or ""
|
||||
if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")):
|
||||
chosen = (name, att.data)
|
||||
break
|
||||
m.close()
|
||||
if not chosen:
|
||||
plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA"))
|
||||
continue
|
||||
raw = chosen[1]
|
||||
sha = hashlib.sha256(raw).hexdigest()
|
||||
plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK"))
|
||||
|
||||
sftp.close(); ssh.close()
|
||||
|
||||
# Nahled
|
||||
print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n")
|
||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
||||
doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1})
|
||||
has = bool(doc and doc.get("cda", {}).get("data_base64"))
|
||||
print(f"[{status}] {label} (_id {inv_id})")
|
||||
print(f" .msg: {msg_name}")
|
||||
print(f" priloha: {att_name}")
|
||||
if info:
|
||||
print(f" velikost: {info[0]} B sha256: {info[1]}")
|
||||
print(f" data_base64 jiz existuje: {has}")
|
||||
print()
|
||||
|
||||
if not apply:
|
||||
print(">>> DRY-RUN. Pro zapis spust s --apply")
|
||||
return
|
||||
|
||||
n = 0
|
||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
||||
if status != "OK" or not info:
|
||||
print(f"PRESKAKUJI {label}: {status}")
|
||||
continue
|
||||
size, sha, raw = info
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
res = col.update_one(
|
||||
{"_id": ObjectId(inv_id)},
|
||||
{"$set": {
|
||||
"cda.data_base64": b64,
|
||||
"cda.data_sha256": sha,
|
||||
"cda.data_filename": att_name,
|
||||
"cda.data_mime": "application/pdf",
|
||||
"cda.data_size": size,
|
||||
"cda.data_stored_at": STORED_AT,
|
||||
"cda.data_source_msg": msg_name,
|
||||
"cda.soubor": att_name,
|
||||
}},
|
||||
)
|
||||
n += res.modified_count
|
||||
print(f"ZAPSANO: {label} (modified={res.modified_count})")
|
||||
print(f"\n>>> CELKEM ZAPSANO: {n}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,195 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# =============================================================================
|
||||
# Nazev: illuminator_gap_v1.0.py
|
||||
# Verze: 1.0
|
||||
# Datum: 2026-06-12
|
||||
# Popis: Porovna Illuminator export (SDL_export.xlsx, filtr UCO3002 CZ+SK)
|
||||
# s aktualnim stavem Mongo feasibility.investigators (pole KROK) a
|
||||
# vypise nesoulady ke korekci v Illuminatoru:
|
||||
# A) Illuminator zaostava za nasim postupem (posunout vys)
|
||||
# B) odmitnuti / "nechceme" nepromitnuta (terminalni status)
|
||||
# C) v Mongo aktivni, ale chybi v exportu (zalozit/overit)
|
||||
# + v souladu
|
||||
# Match primarne podle e-mailu, fallback prijmeni+jmeno (bez diakritiky).
|
||||
# Pouziti: python illuminator_gap_v1.0.py
|
||||
# =============================================================================
|
||||
import sys
|
||||
import unicodedata
|
||||
import openpyxl
|
||||
from pymongo import MongoClient
|
||||
|
||||
XLSX = r"U:\Dropbox\!!!Days\Downloads Z230\SDL_export.xlsx"
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
|
||||
# KROK -> (cilovy Illuminator status, rank, je_terminalni)
|
||||
FUNNEL_RANK = {
|
||||
"Recommended for Assessment": 1,
|
||||
"Recommended for SipIQ": 2,
|
||||
"SIPIQ Link Generated": 3,
|
||||
"Email Sent": 4,
|
||||
"SIPIQ Submitted": 5,
|
||||
"Recommended for SQV": 6,
|
||||
"Recommended for Selection": 7,
|
||||
"Selected": 8,
|
||||
}
|
||||
KROK_TARGET = {
|
||||
"1 - nabidka odeslana": ("Recommended for Assessment", False),
|
||||
"2 - nabidka pripomenuta": ("Recommended for Assessment", False),
|
||||
"3.1 - odpovedel: ma zajem": ("Recommended for SipIQ", False),
|
||||
"4 - zajem, CDA vyzadano": ("Recommended for SipIQ", False),
|
||||
"5 - CDA podepsano": ("Recommended for SipIQ", False),
|
||||
"6 - SIPIQ odeslan": ("Email Sent", False),
|
||||
"7 - SIPIQ vyplneny": ("SIPIQ Submitted", False),
|
||||
"3.2 - odpovedel: nema zajem": ("Not Interested", True),
|
||||
"0 - nechceme (nase rozhodnuti)": ("Not Recommended", True),
|
||||
"0 - mimo (nedoruceno/neplatny kontakt)": ("Not Recommended / No Response", True),
|
||||
}
|
||||
|
||||
|
||||
RANK_LOWER = {k.lower(): v for k, v in FUNNEL_RANK.items()}
|
||||
|
||||
|
||||
def rank_of(status):
|
||||
return RANK_LOWER.get((status or "").strip().lower(), 0)
|
||||
|
||||
|
||||
def norm(s):
|
||||
s = (s or "").strip()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(c for c in s if not unicodedata.combining(c))
|
||||
return " ".join(s.lower().split())
|
||||
|
||||
|
||||
def load_export():
|
||||
wb = openpyxl.load_workbook(XLSX, read_only=True)
|
||||
ws = wb.active
|
||||
rows = list(ws.iter_rows(values_only=True))
|
||||
header = rows[2]
|
||||
idx = {h: i for i, h in enumerate(header) if h}
|
||||
out = []
|
||||
for r in rows[3:]:
|
||||
if not any(r):
|
||||
continue
|
||||
rec = {
|
||||
"last": (r[idx["PI Last Name"]] or "").strip(),
|
||||
"first": (r[idx["PI First Name"]] or "").strip(),
|
||||
"email": (r[idx["PI Email Address"]] or "").strip(),
|
||||
"status": (r[idx["Feasibility Status in Site Funnel"]] or "").strip(),
|
||||
"reason": (r[idx["Reason Code"]] or "").strip(),
|
||||
"site": (r[idx["Site Name"]] or "").strip(),
|
||||
"city": (r[idx["Site City"]] or "").strip(),
|
||||
"country": (r[idx["Country/Territory"]] or "").strip(),
|
||||
"regdate": r[idx["Regulatory Check Complete Date"]],
|
||||
}
|
||||
out.append(rec)
|
||||
return out
|
||||
|
||||
|
||||
def load_mongo():
|
||||
cli = MongoClient(MONGO_URI)
|
||||
col = cli["feasibility"]["investigators"]
|
||||
return list(col.find({}, {"prijmeni": 1, "jmeno": 1, "email": 1,
|
||||
"KROK": 1, "STATUS": 1, "zeme": 1}))
|
||||
|
||||
|
||||
def main():
|
||||
exp = load_export()
|
||||
inv = load_mongo()
|
||||
|
||||
by_email = {}
|
||||
by_name = {}
|
||||
for e in exp:
|
||||
if e["email"]:
|
||||
by_email[norm(e["email"])] = e
|
||||
by_name[(norm(e["last"]), norm(e["first"]))] = e
|
||||
|
||||
matched_exp = set()
|
||||
A, B, sync, Cmiss = [], [], [], []
|
||||
|
||||
for d in inv:
|
||||
krok = d.get("KROK", "")
|
||||
tgt = KROK_TARGET.get(krok)
|
||||
last = norm(d.get("prijmeni"))
|
||||
first = norm(d.get("jmeno"))
|
||||
email = norm(d.get("email"))
|
||||
e = by_email.get(email) or by_name.get((last, first))
|
||||
name = f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
|
||||
|
||||
if not e:
|
||||
Cmiss.append((d, krok, tgt))
|
||||
continue
|
||||
matched_exp.add(id(e))
|
||||
cur = e["status"]
|
||||
if tgt is None:
|
||||
continue
|
||||
target_status, terminal = tgt
|
||||
if terminal:
|
||||
# mel by byt terminalni; pokud Illuminator stale v aktivnim funnelu -> B
|
||||
if rank_of(cur) > 0:
|
||||
B.append((d, krok, e, target_status))
|
||||
else:
|
||||
sync.append((name, krok, cur))
|
||||
else:
|
||||
tr = FUNNEL_RANK[target_status]
|
||||
cr = rank_of(cur) # 0 = neni v aktivnim funnelu (napr. terminalni)
|
||||
if cr < tr:
|
||||
A.append((d, krok, e, target_status))
|
||||
else:
|
||||
sync.append((name, krok, cur))
|
||||
|
||||
# export radky bez Mongo protejsku (info)
|
||||
exp_unmatched = [e for e in exp if id(e) not in matched_exp]
|
||||
|
||||
# duplicity v exportu (stejny email nebo stejne jmeno vicekrat)
|
||||
from collections import Counter
|
||||
cnt_email = Counter(norm(e["email"]) for e in exp if e["email"])
|
||||
cnt_name = Counter((norm(e["last"]), norm(e["first"])) for e in exp)
|
||||
dup_rows = []
|
||||
for e in exp:
|
||||
if (e["email"] and cnt_email[norm(e["email"])] > 1) or \
|
||||
cnt_name[(norm(e["last"]), norm(e["first"]))] > 1:
|
||||
dup_rows.append(e)
|
||||
|
||||
def pname(d):
|
||||
return f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
|
||||
|
||||
print("=" * 70)
|
||||
print(f"EXPORT radku: {len(exp)} | Mongo investigatoru: {len(inv)}")
|
||||
print(f"A (zaostava): {len(A)} | B (odmitnuti neprom.): {len(B)} | "
|
||||
f"C (chybi v exportu): {len(Cmiss)} | v souladu: {len(sync)}")
|
||||
print(f"export radku bez Mongo protejsku: {len(exp_unmatched)}")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n### A) ILLUMINATOR ZAOSTAVA (posunout vys) ###")
|
||||
for d, krok, e, tgt in sorted(A, key=lambda x: x[1]):
|
||||
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
|
||||
|
||||
print("\n### B) ODMITNUTI / NECHCEME nepromitnuta (terminalni) ###")
|
||||
for d, krok, e, tgt in sorted(B, key=lambda x: x[1]):
|
||||
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
|
||||
|
||||
print("\n### C) V MONGO, CHYBI V EXPORTU (podle KROK) ###")
|
||||
from collections import defaultdict
|
||||
cg = defaultdict(list)
|
||||
for d, krok, tgt in Cmiss:
|
||||
cg[krok].append(pname(d) + f" [{d.get('zeme','')}]")
|
||||
for krok in sorted(cg):
|
||||
print(f"\n -- {krok} ({len(cg[krok])}):")
|
||||
for n in sorted(cg[krok]):
|
||||
print(f" {n}")
|
||||
|
||||
print("\n### V SOULADU ###")
|
||||
for name, krok, cur in sorted(sync, key=lambda x: x[1]):
|
||||
print(f"- {name:28} | KROK {krok:32} | {cur}")
|
||||
|
||||
print("\n### EXPORT radky bez Mongo protejsku (k overeni) ###")
|
||||
for e in exp_unmatched:
|
||||
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
|
||||
|
||||
print("\n### DUPLICITNI RADKY V EXPORTU (uklidit) ###")
|
||||
for e in sorted(dup_rows, key=lambda x: (norm(x['last']), norm(x['first']))):
|
||||
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,197 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# =============================================================================
|
||||
# Nazev: illuminator_gap_v1.1.py
|
||||
# Verze: 1.1
|
||||
# Datum: 2026-06-12
|
||||
# Zmeny v1.1: KROK 3.1/4 -> "Recommended for Assessment" (drive chybne SipIQ).
|
||||
# "Recommended for SipIQ" az od KROK 5 (CDA PODEPSANO).
|
||||
# Popis: Porovna Illuminator export (SDL_export.xlsx, filtr UCO3002 CZ+SK)
|
||||
# s aktualnim stavem Mongo feasibility.investigators (pole KROK) a
|
||||
# vypise nesoulady ke korekci v Illuminatoru:
|
||||
# A) Illuminator zaostava za nasim postupem (posunout vys)
|
||||
# B) odmitnuti / "nechceme" nepromitnuta (terminalni status)
|
||||
# C) v Mongo aktivni, ale chybi v exportu (zalozit/overit)
|
||||
# + v souladu
|
||||
# Match primarne podle e-mailu, fallback prijmeni+jmeno (bez diakritiky).
|
||||
# Pouziti: python illuminator_gap_v1.0.py
|
||||
# =============================================================================
|
||||
import sys
|
||||
import unicodedata
|
||||
import openpyxl
|
||||
from pymongo import MongoClient
|
||||
|
||||
XLSX = r"U:\Dropbox\!!!Days\Downloads Z230\SDL_export.xlsx"
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
|
||||
# KROK -> (cilovy Illuminator status, rank, je_terminalni)
|
||||
FUNNEL_RANK = {
|
||||
"Recommended for Assessment": 1,
|
||||
"Recommended for SipIQ": 2,
|
||||
"SIPIQ Link Generated": 3,
|
||||
"Email Sent": 4,
|
||||
"SIPIQ Submitted": 5,
|
||||
"Recommended for SQV": 6,
|
||||
"Recommended for Selection": 7,
|
||||
"Selected": 8,
|
||||
}
|
||||
KROK_TARGET = {
|
||||
"1 - nabidka odeslana": ("Recommended for Assessment", False),
|
||||
"2 - nabidka pripomenuta": ("Recommended for Assessment", False),
|
||||
"3.1 - odpovedel: ma zajem": ("Recommended for Assessment", False),
|
||||
"4 - zajem, CDA vyzadano": ("Recommended for Assessment", False),
|
||||
"5 - CDA podepsano": ("Recommended for SipIQ", False),
|
||||
"6 - SIPIQ odeslan": ("Email Sent", False),
|
||||
"7 - SIPIQ vyplneny": ("SIPIQ Submitted", False),
|
||||
"3.2 - odpovedel: nema zajem": ("Not Interested", True),
|
||||
"0 - nechceme (nase rozhodnuti)": ("Not Recommended", True),
|
||||
"0 - mimo (nedoruceno/neplatny kontakt)": ("Not Recommended / No Response", True),
|
||||
}
|
||||
|
||||
|
||||
RANK_LOWER = {k.lower(): v for k, v in FUNNEL_RANK.items()}
|
||||
|
||||
|
||||
def rank_of(status):
|
||||
return RANK_LOWER.get((status or "").strip().lower(), 0)
|
||||
|
||||
|
||||
def norm(s):
|
||||
s = (s or "").strip()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(c for c in s if not unicodedata.combining(c))
|
||||
return " ".join(s.lower().split())
|
||||
|
||||
|
||||
def load_export():
|
||||
wb = openpyxl.load_workbook(XLSX, read_only=True)
|
||||
ws = wb.active
|
||||
rows = list(ws.iter_rows(values_only=True))
|
||||
header = rows[2]
|
||||
idx = {h: i for i, h in enumerate(header) if h}
|
||||
out = []
|
||||
for r in rows[3:]:
|
||||
if not any(r):
|
||||
continue
|
||||
rec = {
|
||||
"last": (r[idx["PI Last Name"]] or "").strip(),
|
||||
"first": (r[idx["PI First Name"]] or "").strip(),
|
||||
"email": (r[idx["PI Email Address"]] or "").strip(),
|
||||
"status": (r[idx["Feasibility Status in Site Funnel"]] or "").strip(),
|
||||
"reason": (r[idx["Reason Code"]] or "").strip(),
|
||||
"site": (r[idx["Site Name"]] or "").strip(),
|
||||
"city": (r[idx["Site City"]] or "").strip(),
|
||||
"country": (r[idx["Country/Territory"]] or "").strip(),
|
||||
"regdate": r[idx["Regulatory Check Complete Date"]],
|
||||
}
|
||||
out.append(rec)
|
||||
return out
|
||||
|
||||
|
||||
def load_mongo():
|
||||
cli = MongoClient(MONGO_URI)
|
||||
col = cli["feasibility"]["investigators"]
|
||||
return list(col.find({}, {"prijmeni": 1, "jmeno": 1, "email": 1,
|
||||
"KROK": 1, "STATUS": 1, "zeme": 1}))
|
||||
|
||||
|
||||
def main():
|
||||
exp = load_export()
|
||||
inv = load_mongo()
|
||||
|
||||
by_email = {}
|
||||
by_name = {}
|
||||
for e in exp:
|
||||
if e["email"]:
|
||||
by_email[norm(e["email"])] = e
|
||||
by_name[(norm(e["last"]), norm(e["first"]))] = e
|
||||
|
||||
matched_exp = set()
|
||||
A, B, sync, Cmiss = [], [], [], []
|
||||
|
||||
for d in inv:
|
||||
krok = d.get("KROK", "")
|
||||
tgt = KROK_TARGET.get(krok)
|
||||
last = norm(d.get("prijmeni"))
|
||||
first = norm(d.get("jmeno"))
|
||||
email = norm(d.get("email"))
|
||||
e = by_email.get(email) or by_name.get((last, first))
|
||||
name = f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
|
||||
|
||||
if not e:
|
||||
Cmiss.append((d, krok, tgt))
|
||||
continue
|
||||
matched_exp.add(id(e))
|
||||
cur = e["status"]
|
||||
if tgt is None:
|
||||
continue
|
||||
target_status, terminal = tgt
|
||||
if terminal:
|
||||
# mel by byt terminalni; pokud Illuminator stale v aktivnim funnelu -> B
|
||||
if rank_of(cur) > 0:
|
||||
B.append((d, krok, e, target_status))
|
||||
else:
|
||||
sync.append((name, krok, cur))
|
||||
else:
|
||||
tr = FUNNEL_RANK[target_status]
|
||||
cr = rank_of(cur) # 0 = neni v aktivnim funnelu (napr. terminalni)
|
||||
if cr < tr:
|
||||
A.append((d, krok, e, target_status))
|
||||
else:
|
||||
sync.append((name, krok, cur))
|
||||
|
||||
# export radky bez Mongo protejsku (info)
|
||||
exp_unmatched = [e for e in exp if id(e) not in matched_exp]
|
||||
|
||||
# duplicity v exportu (stejny email nebo stejne jmeno vicekrat)
|
||||
from collections import Counter
|
||||
cnt_email = Counter(norm(e["email"]) for e in exp if e["email"])
|
||||
cnt_name = Counter((norm(e["last"]), norm(e["first"])) for e in exp)
|
||||
dup_rows = []
|
||||
for e in exp:
|
||||
if (e["email"] and cnt_email[norm(e["email"])] > 1) or \
|
||||
cnt_name[(norm(e["last"]), norm(e["first"]))] > 1:
|
||||
dup_rows.append(e)
|
||||
|
||||
def pname(d):
|
||||
return f"{d.get('prijmeni','')} {d.get('jmeno','')}".strip()
|
||||
|
||||
print("=" * 70)
|
||||
print(f"EXPORT radku: {len(exp)} | Mongo investigatoru: {len(inv)}")
|
||||
print(f"A (zaostava): {len(A)} | B (odmitnuti neprom.): {len(B)} | "
|
||||
f"C (chybi v exportu): {len(Cmiss)} | v souladu: {len(sync)}")
|
||||
print(f"export radku bez Mongo protejsku: {len(exp_unmatched)}")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n### A) ILLUMINATOR ZAOSTAVA (posunout vys) ###")
|
||||
for d, krok, e, tgt in sorted(A, key=lambda x: x[1]):
|
||||
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
|
||||
|
||||
print("\n### B) ODMITNUTI / NECHCEME nepromitnuta (terminalni) ###")
|
||||
for d, krok, e, tgt in sorted(B, key=lambda x: x[1]):
|
||||
print(f"- {pname(d):28} | KROK {krok:32} | now: {e['status']:30} -> {tgt}")
|
||||
|
||||
print("\n### C) V MONGO, CHYBI V EXPORTU (podle KROK) ###")
|
||||
from collections import defaultdict
|
||||
cg = defaultdict(list)
|
||||
for d, krok, tgt in Cmiss:
|
||||
cg[krok].append(pname(d) + f" [{d.get('zeme','')}]")
|
||||
for krok in sorted(cg):
|
||||
print(f"\n -- {krok} ({len(cg[krok])}):")
|
||||
for n in sorted(cg[krok]):
|
||||
print(f" {n}")
|
||||
|
||||
print("\n### V SOULADU ###")
|
||||
for name, krok, cur in sorted(sync, key=lambda x: x[1]):
|
||||
print(f"- {name:28} | KROK {krok:32} | {cur}")
|
||||
|
||||
print("\n### EXPORT radky bez Mongo protejsku (k overeni) ###")
|
||||
for e in exp_unmatched:
|
||||
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
|
||||
|
||||
print("\n### DUPLICITNI RADKY V EXPORTU (uklidit) ###")
|
||||
for e in sorted(dup_rows, key=lambda x: (norm(x['last']), norm(x['first']))):
|
||||
print(f"- {e['last']} {e['first']} | {e['email']} | {e['status']} | {e['site']} ({e['city']})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,69 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# =============================================================================
|
||||
# Nazev: watch_new_emails_v1.0.py
|
||||
# Verze: 1.0
|
||||
# Datum: 2026-06-12
|
||||
# Popis: Background poller pro feasibility session. Sleduje kolekci
|
||||
# emaily."vbuzalka@its.jnj.com" v Mongo (192.168.1.76:27017) a ceka,
|
||||
# az pribude email novejsi nez baseline (nejnovejsi received_at pri
|
||||
# startu). Jakmile neco najde, vypise prehled novych emailu a SKONCI
|
||||
# (exit 0) -> background task tim re-invokuje Claude, ktery je vyhodnoti.
|
||||
# Polluje kazdych POLL_SECONDS. Po MAX_HOURS se ukonci (exit 0) i bez
|
||||
# nalezu, aby session nedrzel proces donekonecna.
|
||||
# Pouziti: python watch_new_emails_v1.0.py
|
||||
# =============================================================================
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pymongo import MongoClient
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB = "emaily"
|
||||
COLL = "vbuzalka@its.jnj.com"
|
||||
FOLDER = "/vbuzalka@its.jnj.com/Inbox"
|
||||
POLL_SECONDS = 120
|
||||
MAX_HOURS = 12
|
||||
|
||||
def newest(col):
|
||||
doc = col.find_one({"jnj_folder": FOLDER}, sort=[("received_at", -1)],
|
||||
projection={"received_at": 1})
|
||||
return doc["received_at"] if doc else None
|
||||
|
||||
def fmt(v):
|
||||
if isinstance(v, datetime):
|
||||
return v.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return str(v)
|
||||
|
||||
def main():
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=10000)
|
||||
col = client[DB][COLL]
|
||||
baseline = newest(col)
|
||||
print(f"[watch] start baseline received_at = {fmt(baseline)}", flush=True)
|
||||
deadline = datetime.now() + timedelta(hours=MAX_HOURS)
|
||||
|
||||
while datetime.now() < deadline:
|
||||
time.sleep(POLL_SECONDS)
|
||||
try:
|
||||
cur = newest(col)
|
||||
except Exception as e:
|
||||
print(f"[watch] poll error: {e}", flush=True)
|
||||
continue
|
||||
if cur is not None and baseline is not None and cur > baseline:
|
||||
new_docs = list(col.find(
|
||||
{"jnj_folder": FOLDER, "received_at": {"$gt": baseline}},
|
||||
projection={"received_at": 1, "sender.email": 1, "subject": 1,
|
||||
"attachments.filename": 1},
|
||||
sort=[("received_at", -1)]))
|
||||
print(f"\n=== NOVE EMAILY: {len(new_docs)} (po {fmt(baseline)}) ===", flush=True)
|
||||
for d in new_docs:
|
||||
snd = (d.get("sender") or {}).get("email", "")
|
||||
atts = [a.get("filename") for a in (d.get("attachments") or [])]
|
||||
print(f"- {fmt(d.get('received_at'))} | {snd} | {d.get('subject')}"
|
||||
+ (f" | prilohy: {atts}" if atts else ""), flush=True)
|
||||
print(f" _id: {d.get('_id')}", flush=True)
|
||||
print("=== KONEC – task skoncil, vyhodnot v session ===", flush=True)
|
||||
return
|
||||
print("[watch] MAX_HOURS vyprselo bez noveho emailu – konec.", flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user