diff --git a/Feasibility/77242113UCO2001/store_cda_batch_v1.0.md b/Feasibility/77242113UCO2001/store_cda_batch_v1.0.md deleted file mode 100644 index c459851..0000000 --- a/Feasibility/77242113UCO2001/store_cda_batch_v1.0.md +++ /dev/null @@ -1,30 +0,0 @@ -# store_cda_batch_v1.0.py - -**Verze:** 1.0 · **Datum:** 2026-06-09 - -Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` → -`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`), -stažené přes SFTP (paramiko), příloha vytažena `extract_msg`. - -## Princip -- `MAPPING` = explicitní seznam `(investigator_id, msg_filename, - attachment_filename, label)` — žádné hádání za běhu (matchování přílohy v `.msg` - přes normalizaci bez diakritiky). -- CDA chodí od 3 asistentek: **Wittenbergerová** (LWittenb), **Hrabalová** - (LHrabalo), **Vojčová** (LVojcova). Konkrétní soubor bývá jmenován v STATUS lékaře. - -## Co zapisuje -`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`, -`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`. - -## Spuštění -``` -python store_cda_batch_v1.0.py # dry-run / náhled -python store_cda_batch_v1.0.py --apply # zápis do Mongo -``` -venv má `paramiko` + `extract_msg` + `pymongo`. - -## Historie -- v1.0 — dávka 1 (6×): Hlavatý/Cliniq, Fedurco/ENDOMED, Tichý, Falc, Pešta, - Jungwirthová. Dávka 2 (3×): Matouš/Axon-master, Mihálkanin/Gastro LM, - Krížová/FN Motol. Aplikováno 09JUN2026. (Lukáč zvlášť přes store_cda_to_mongo.) diff --git a/Feasibility/77242113UCO2001/store_cda_batch_v1.0.py b/Feasibility/77242113UCO2001/store_cda_batch_v1.0.py deleted file mode 100644 index 99bad63..0000000 --- a/Feasibility/77242113UCO2001/store_cda_batch_v1.0.py +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- -# ============================================================================= -# Nazev: store_cda_batch_v1.0.py -# Verze: 1.0 -# Datum: 2026-06-09 -# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum -# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na -# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena -# extract_msg. Mapovani investigator -> (.msg, attachment) je -# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky -# dokument z e-mailu -> do Mongo. -# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename, -# cda.data_mime, cda.data_size, cda.data_stored_at, -# cda.data_source_msg; doplni cda.soubor pokud chybi. -# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI. -# Pouziti: python store_cda_batch_v1.0.py (dry-run / nahled) -# python store_cda_batch_v1.0.py --apply (zapise do Mongo) -# ============================================================================= - -import os -import sys -import base64 -import hashlib -import unicodedata -import paramiko -import extract_msg -from pymongo import MongoClient -from bson import ObjectId - -MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") -TOWER_HOST = "192.168.1.76" -TOWER_USER = "root" -TOWER_PASS = "7309208104" -REMOTE_DIR = "/mnt/user/JNJEMAILS" -TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" -STORED_AT = "2026-06-10" - -# investigator_id -> (msg_filename, attachment_filename, label) -# DAVKA 3 (10JUN2026): nove CDA z 10.6. (krok 4 -> 5) -MAPPING = [ - ("6a19832b5fc221351825796c", "FC130007DE92C2040000.msg", - "CZ_CDA institution_MUDr. GREGAR s.r.o_Jan Gregar_fully signed_09Jun2026.pdf", - "Gregar Jan (MUDr. GREGAR s.r.o.)"), - ("6a19832b5fc2213518257969", "FC130007DE92C2030000.msg", - "SK_CDA PI_Durina_FN Nove Zamky_fully signed 09Jun2026.pdf", - "Durina Juraj (FN Nove Zamky)"), - ("6a19832b5fc2213518257973", "FC130007DE92C1FE0000.msg", - "SK_CDA_Institution_Accout Center s.r.o_09Jun2026.pdf", - "Horvath Frantisek (Accout Center)"), -] - -# DAVKA 1+2 (09JUN2026) - jiz ulozeno, ponechano pro historii: -# Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED FC1300053049739B, -# Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta FC130007D8A1F0E1, -# Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF (store_cda_to_mongo_v1.0), -# Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM FC130007D8A1F0E6, -# Krizova/Motol FC130007C1643CA1 - - -def norm(s): - """lowercase, bez diakritiky, sjednocene mezery""" - s = s or "" - s = unicodedata.normalize("NFKD", s) - s = "".join(c for c in s if not unicodedata.combining(c)) - return " ".join(s.lower().split()) - - -def main(): - apply = "--apply" in sys.argv - os.makedirs(TMPDIR, exist_ok=True) - - # SSH/SFTP - ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) - sftp = ssh.open_sftp() - - client = MongoClient(MONGO_URI) - col = client["feasibility"]["investigators"] - - plan = [] - for inv_id, msg_name, att_name, label in MAPPING: - local_msg = os.path.join(TMPDIR, msg_name) - if not os.path.exists(local_msg): - sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) - m = extract_msg.Message(local_msg) - target = norm(att_name) - chosen = None - for att in m.attachments: - name = att.longFilename or att.shortFilename or "" - if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): - chosen = (name, att.data) - break - m.close() - if not chosen: - plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) - continue - raw = chosen[1] - sha = hashlib.sha256(raw).hexdigest() - plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) - - sftp.close(); ssh.close() - - # Nahled - print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") - for inv_id, label, msg_name, att_name, info, status in plan: - doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) - has = bool(doc and doc.get("cda", {}).get("data_base64")) - print(f"[{status}] {label} (_id {inv_id})") - print(f" .msg: {msg_name}") - print(f" priloha: {att_name}") - if info: - print(f" velikost: {info[0]} B sha256: {info[1]}") - print(f" data_base64 jiz existuje: {has}") - print() - - if not apply: - print(">>> DRY-RUN. Pro zapis spust s --apply") - return - - n = 0 - for inv_id, label, msg_name, att_name, info, status in plan: - if status != "OK" or not info: - print(f"PRESKAKUJI {label}: {status}") - continue - size, sha, raw = info - b64 = base64.b64encode(raw).decode("ascii") - res = col.update_one( - {"_id": ObjectId(inv_id)}, - {"$set": { - "cda.data_base64": b64, - "cda.data_sha256": sha, - "cda.data_filename": att_name, - "cda.data_mime": "application/pdf", - "cda.data_size": size, - "cda.data_stored_at": STORED_AT, - "cda.data_source_msg": msg_name, - "cda.soubor": att_name, - }}, - ) - n += res.modified_count - print(f"ZAPSANO: {label} (modified={res.modified_count})") - print(f"\n>>> CELKEM ZAPSANO: {n}") - - -if __name__ == "__main__": - main()