# -*- coding: utf-8 -*- # ============================================================================= # Nazev: store_cda_batch_v1.2.py # Verze: 1.2 # Datum: 2026-06-11 # Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum # (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na # Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena # extract_msg. Mapovani investigator -> (.msg, attachment) je # explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky # dokument z e-mailu -> do Mongo (CDA fyzicky ulozeno k lekarum). # Zapise: cda.data_base64, cda.data_sha256, cda.data_filename, # cda.data_mime, cda.data_size, cda.data_stored_at, # cda.data_source_msg; doplni cda.soubor pokud chybi. # Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI. # Pouziti: python store_cda_batch_v1.2.py (dry-run / nahled) # python store_cda_batch_v1.2.py --apply (zapise do Mongo) # Zmeny v1.2: DAVKA 5 (11JUN2026) - Mudr Robert (krok 4 -> 5). # ============================================================================= import os import sys import base64 import hashlib import unicodedata import paramiko import extract_msg from pymongo import MongoClient from bson import ObjectId MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") TOWER_HOST = "192.168.1.76" TOWER_USER = "root" TOWER_PASS = "7309208104" REMOTE_DIR = "/mnt/user/JNJEMAILS" TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" STORED_AT = "2026-06-11" # investigator_id -> (msg_filename, attachment_filename, label) # DAVKA 5 (11JUN2026): nove CDA z 11.6. (krok 4 -> 5) MAPPING = [ ("6a19832b5fc2213518257978", "FC130007DE92C2320000.msg", "CZ_ CDA PI_MUDr. Robert Mudr_11Jun2026.pdf", "Mudr Robert (Nemocnice Milosrdnych sester sv. Karla Boromejskeho)"), ] # HISTORIE drivejsich davek (jiz ulozeno): # DAVKA 4 (11JUN2026): Konecny Michal FC130007DE92C231, Balaz Jozef FC130007DE92C20F. # DAVKA 3 (10JUN2026): Gregar FC130007DE92C204, Durina FC130007DE92C203, # Horvath/Accout Center FC130007DE92C1FE. # DAVKA 1+2 (09JUN2026): Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED # FC1300053049739B, Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta # FC130007D8A1F0E1, Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF # (store_cda_to_mongo_v1.0), Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM # FC130007D8A1F0E6, Krizova/Motol FC130007C1643CA1. def norm(s): """lowercase, bez diakritiky, sjednocene mezery""" s = s or "" s = unicodedata.normalize("NFKD", s) s = "".join(c for c in s if not unicodedata.combining(c)) return " ".join(s.lower().split()) def main(): apply = "--apply" in sys.argv os.makedirs(TMPDIR, exist_ok=True) # SSH/SFTP ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) sftp = ssh.open_sftp() client = MongoClient(MONGO_URI) col = client["feasibility"]["investigators"] plan = [] for inv_id, msg_name, att_name, label in MAPPING: local_msg = os.path.join(TMPDIR, msg_name) if not os.path.exists(local_msg): sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) m = extract_msg.Message(local_msg) target = norm(att_name) chosen = None for att in m.attachments: name = att.longFilename or att.shortFilename or "" if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): chosen = (name, att.data) break m.close() if not chosen: plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) continue raw = chosen[1] sha = hashlib.sha256(raw).hexdigest() plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) sftp.close(); ssh.close() # Nahled print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") for inv_id, label, msg_name, att_name, info, status in plan: doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) has = bool(doc and doc.get("cda", {}).get("data_base64")) print(f"[{status}] {label} (_id {inv_id})") print(f" .msg: {msg_name}") print(f" priloha: {att_name}") if info: print(f" velikost: {info[0]} B sha256: {info[1]}") print(f" data_base64 jiz existuje: {has}") print() if not apply: print(">>> DRY-RUN. Pro zapis spust s --apply") return n = 0 for inv_id, label, msg_name, att_name, info, status in plan: if status != "OK" or not info: print(f"PRESKAKUJI {label}: {status}") continue size, sha, raw = info b64 = base64.b64encode(raw).decode("ascii") res = col.update_one( {"_id": ObjectId(inv_id)}, {"$set": { "cda.data_base64": b64, "cda.data_sha256": sha, "cda.data_filename": att_name, "cda.data_mime": "application/pdf", "cda.data_size": size, "cda.data_stored_at": STORED_AT, "cda.data_source_msg": msg_name, "cda.soubor": att_name, }}, ) n += res.modified_count print(f"ZAPSANO: {label} (modified={res.modified_count})") print(f"\n>>> CELKEM ZAPSANO: {n}") if __name__ == "__main__": main()