# -*- coding: utf-8 -*- # ============================================================================= # Nazev: store_cda_batch_v1.4.py # Verze: 1.4 # Datum: 2026-06-15 # Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum # (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na # Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena # extract_msg. Mapovani investigator -> (.msg, attachment) je # explicitni. Zapise cda.data_* + doplni cda.soubor. # Pouziti: python store_cda_batch_v1.4.py (dry-run / nahled) # python store_cda_batch_v1.4.py --apply (zapise do Mongo) # Zmeny v1.4: DAVKA 7 (15JUN2026) - Molnar Martin (GASTROMART s.r.o., krok 4->5), # Dzurikova Michaela (IBDcentrum s.r.o., krok 4->5). # ============================================================================= import os import sys import base64 import hashlib import unicodedata import paramiko import extract_msg from pymongo import MongoClient from bson import ObjectId MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") TOWER_HOST = "192.168.1.76" TOWER_USER = "root" TOWER_PASS = "7309208104" REMOTE_DIR = "/mnt/user/JNJEMAILS" TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" STORED_AT = "2026-06-15" # investigator_id -> (msg_filename, attachment_filename, label) # DAVKA 7 (15JUN2026) MAPPING = [ ("6a19832b5fc221351825797f", "FC130007F372CFD10000.msg", "SK_CDA_Institution_GASTROMART s.r.o._fully signed 15Jun2026.pdf", "Molnar Martin (GASTROMART s.r.o.)"), ("6a19832b5fc2213518257964", "FC130007F17E55100000.msg", "SK_CDA PI_MUDr. Michaela Dzurikova_IBDcentrum s.r.o_13Jun2026.pdf", "Dzurikova Michaela (IBDcentrum s.r.o.)"), ] # HISTORIE drivejsich davek (jiz ulozeno): # DAVKA 6 (12JUN2026): Gregusova Katarina FC130007E9D30EB3, Drastich Pavel FC130007E9D30EB1. # DAVKA 5 (11JUN2026): Mudr Robert FC130007DE92C232. # DAVKA 4 (11JUN2026): Konecny Michal FC130007DE92C231, Balaz Jozef FC130007DE92C20F. # DAVKA 3 (10JUN2026): Gregar, Durina, Horvath. # DAVKA 1+2 (09JUN2026): Hlavaty, Fedurco, Tichy, Falc, Pesta, Jungwirthova, Lukac, # Matous, Mihalkanin, Krizova. def norm(s): s = s or "" s = unicodedata.normalize("NFKD", s) s = "".join(c for c in s if not unicodedata.combining(c)) return " ".join(s.lower().split()) def main(): apply = "--apply" in sys.argv os.makedirs(TMPDIR, exist_ok=True) ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) sftp = ssh.open_sftp() client = MongoClient(MONGO_URI) col = client["feasibility"]["investigators"] plan = [] for inv_id, msg_name, att_name, label in MAPPING: local_msg = os.path.join(TMPDIR, msg_name) if not os.path.exists(local_msg): sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) m = extract_msg.Message(local_msg) target = norm(att_name) chosen = None for att in m.attachments: name = att.longFilename or att.shortFilename or "" if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): chosen = (name, att.data) break m.close() if not chosen: plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) continue raw = chosen[1] sha = hashlib.sha256(raw).hexdigest() plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) sftp.close(); ssh.close() print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") for inv_id, label, msg_name, att_name, info, status in plan: doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) has = bool(doc and doc.get("cda", {}).get("data_base64")) print(f"[{status}] {label} (_id {inv_id})") print(f" .msg: {msg_name}") print(f" priloha: {att_name}") if info: print(f" velikost: {info[0]} B sha256: {info[1]}") print(f" data_base64 jiz existuje: {has}") print() if not apply: print(">>> DRY-RUN. Pro zapis spust s --apply") return n = 0 for inv_id, label, msg_name, att_name, info, status in plan: if status != "OK" or not info: print(f"PRESKAKUJI {label}: {status}") continue size, sha, raw = info b64 = base64.b64encode(raw).decode("ascii") res = col.update_one( {"_id": ObjectId(inv_id)}, {"$set": { "cda.data_base64": b64, "cda.data_sha256": sha, "cda.data_filename": att_name, "cda.data_mime": "application/pdf", "cda.data_size": size, "cda.data_stored_at": STORED_AT, "cda.data_source_msg": msg_name, "cda.soubor": att_name, }}, ) n += res.modified_count print(f"ZAPSANO: {label} (modified={res.modified_count})") print(f"\n>>> CELKEM ZAPSANO: {n}") if __name__ == "__main__": main()