z230
This commit is contained in:
@@ -1,30 +0,0 @@
|
|||||||
# store_cda_batch_v1.0.py
|
|
||||||
|
|
||||||
**Verze:** 1.0 · **Datum:** 2026-06-09
|
|
||||||
|
|
||||||
Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` →
|
|
||||||
`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`),
|
|
||||||
stažené přes SFTP (paramiko), příloha vytažena `extract_msg`.
|
|
||||||
|
|
||||||
## Princip
|
|
||||||
- `MAPPING` = explicitní seznam `(investigator_id, msg_filename,
|
|
||||||
attachment_filename, label)` — žádné hádání za běhu (matchování přílohy v `.msg`
|
|
||||||
přes normalizaci bez diakritiky).
|
|
||||||
- CDA chodí od 3 asistentek: **Wittenbergerová** (LWittenb), **Hrabalová**
|
|
||||||
(LHrabalo), **Vojčová** (LVojcova). Konkrétní soubor bývá jmenován v STATUS lékaře.
|
|
||||||
|
|
||||||
## Co zapisuje
|
|
||||||
`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`,
|
|
||||||
`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`.
|
|
||||||
|
|
||||||
## Spuštění
|
|
||||||
```
|
|
||||||
python store_cda_batch_v1.0.py # dry-run / náhled
|
|
||||||
python store_cda_batch_v1.0.py --apply # zápis do Mongo
|
|
||||||
```
|
|
||||||
venv má `paramiko` + `extract_msg` + `pymongo`.
|
|
||||||
|
|
||||||
## Historie
|
|
||||||
- v1.0 — dávka 1 (6×): Hlavatý/Cliniq, Fedurco/ENDOMED, Tichý, Falc, Pešta,
|
|
||||||
Jungwirthová. Dávka 2 (3×): Matouš/Axon-master, Mihálkanin/Gastro LM,
|
|
||||||
Krížová/FN Motol. Aplikováno 09JUN2026. (Lukáč zvlášť přes store_cda_to_mongo.)
|
|
||||||
@@ -1,147 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# =============================================================================
|
|
||||||
# Nazev: store_cda_batch_v1.0.py
|
|
||||||
# Verze: 1.0
|
|
||||||
# Datum: 2026-06-09
|
|
||||||
# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum
|
|
||||||
# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na
|
|
||||||
# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena
|
|
||||||
# extract_msg. Mapovani investigator -> (.msg, attachment) je
|
|
||||||
# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky
|
|
||||||
# dokument z e-mailu -> do Mongo.
|
|
||||||
# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename,
|
|
||||||
# cda.data_mime, cda.data_size, cda.data_stored_at,
|
|
||||||
# cda.data_source_msg; doplni cda.soubor pokud chybi.
|
|
||||||
# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI.
|
|
||||||
# Pouziti: python store_cda_batch_v1.0.py (dry-run / nahled)
|
|
||||||
# python store_cda_batch_v1.0.py --apply (zapise do Mongo)
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import base64
|
|
||||||
import hashlib
|
|
||||||
import unicodedata
|
|
||||||
import paramiko
|
|
||||||
import extract_msg
|
|
||||||
from pymongo import MongoClient
|
|
||||||
from bson import ObjectId
|
|
||||||
|
|
||||||
MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
|
|
||||||
TOWER_HOST = "192.168.1.76"
|
|
||||||
TOWER_USER = "root"
|
|
||||||
TOWER_PASS = "7309208104"
|
|
||||||
REMOTE_DIR = "/mnt/user/JNJEMAILS"
|
|
||||||
TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp"
|
|
||||||
STORED_AT = "2026-06-10"
|
|
||||||
|
|
||||||
# investigator_id -> (msg_filename, attachment_filename, label)
|
|
||||||
# DAVKA 3 (10JUN2026): nove CDA z 10.6. (krok 4 -> 5)
|
|
||||||
MAPPING = [
|
|
||||||
("6a19832b5fc221351825796c", "FC130007DE92C2040000.msg",
|
|
||||||
"CZ_CDA institution_MUDr. GREGAR s.r.o_Jan Gregar_fully signed_09Jun2026.pdf",
|
|
||||||
"Gregar Jan (MUDr. GREGAR s.r.o.)"),
|
|
||||||
("6a19832b5fc2213518257969", "FC130007DE92C2030000.msg",
|
|
||||||
"SK_CDA PI_Durina_FN Nove Zamky_fully signed 09Jun2026.pdf",
|
|
||||||
"Durina Juraj (FN Nove Zamky)"),
|
|
||||||
("6a19832b5fc2213518257973", "FC130007DE92C1FE0000.msg",
|
|
||||||
"SK_CDA_Institution_Accout Center s.r.o_09Jun2026.pdf",
|
|
||||||
"Horvath Frantisek (Accout Center)"),
|
|
||||||
]
|
|
||||||
|
|
||||||
# DAVKA 1+2 (09JUN2026) - jiz ulozeno, ponechano pro historii:
|
|
||||||
# Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED FC1300053049739B,
|
|
||||||
# Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta FC130007D8A1F0E1,
|
|
||||||
# Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF (store_cda_to_mongo_v1.0),
|
|
||||||
# Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM FC130007D8A1F0E6,
|
|
||||||
# Krizova/Motol FC130007C1643CA1
|
|
||||||
|
|
||||||
|
|
||||||
def norm(s):
|
|
||||||
"""lowercase, bez diakritiky, sjednocene mezery"""
|
|
||||||
s = s or ""
|
|
||||||
s = unicodedata.normalize("NFKD", s)
|
|
||||||
s = "".join(c for c in s if not unicodedata.combining(c))
|
|
||||||
return " ".join(s.lower().split())
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
apply = "--apply" in sys.argv
|
|
||||||
os.makedirs(TMPDIR, exist_ok=True)
|
|
||||||
|
|
||||||
# SSH/SFTP
|
|
||||||
ssh = paramiko.SSHClient()
|
|
||||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
||||||
ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30)
|
|
||||||
sftp = ssh.open_sftp()
|
|
||||||
|
|
||||||
client = MongoClient(MONGO_URI)
|
|
||||||
col = client["feasibility"]["investigators"]
|
|
||||||
|
|
||||||
plan = []
|
|
||||||
for inv_id, msg_name, att_name, label in MAPPING:
|
|
||||||
local_msg = os.path.join(TMPDIR, msg_name)
|
|
||||||
if not os.path.exists(local_msg):
|
|
||||||
sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg)
|
|
||||||
m = extract_msg.Message(local_msg)
|
|
||||||
target = norm(att_name)
|
|
||||||
chosen = None
|
|
||||||
for att in m.attachments:
|
|
||||||
name = att.longFilename or att.shortFilename or ""
|
|
||||||
if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")):
|
|
||||||
chosen = (name, att.data)
|
|
||||||
break
|
|
||||||
m.close()
|
|
||||||
if not chosen:
|
|
||||||
plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA"))
|
|
||||||
continue
|
|
||||||
raw = chosen[1]
|
|
||||||
sha = hashlib.sha256(raw).hexdigest()
|
|
||||||
plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK"))
|
|
||||||
|
|
||||||
sftp.close(); ssh.close()
|
|
||||||
|
|
||||||
# Nahled
|
|
||||||
print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n")
|
|
||||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
|
||||||
doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1})
|
|
||||||
has = bool(doc and doc.get("cda", {}).get("data_base64"))
|
|
||||||
print(f"[{status}] {label} (_id {inv_id})")
|
|
||||||
print(f" .msg: {msg_name}")
|
|
||||||
print(f" priloha: {att_name}")
|
|
||||||
if info:
|
|
||||||
print(f" velikost: {info[0]} B sha256: {info[1]}")
|
|
||||||
print(f" data_base64 jiz existuje: {has}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
if not apply:
|
|
||||||
print(">>> DRY-RUN. Pro zapis spust s --apply")
|
|
||||||
return
|
|
||||||
|
|
||||||
n = 0
|
|
||||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
|
||||||
if status != "OK" or not info:
|
|
||||||
print(f"PRESKAKUJI {label}: {status}")
|
|
||||||
continue
|
|
||||||
size, sha, raw = info
|
|
||||||
b64 = base64.b64encode(raw).decode("ascii")
|
|
||||||
res = col.update_one(
|
|
||||||
{"_id": ObjectId(inv_id)},
|
|
||||||
{"$set": {
|
|
||||||
"cda.data_base64": b64,
|
|
||||||
"cda.data_sha256": sha,
|
|
||||||
"cda.data_filename": att_name,
|
|
||||||
"cda.data_mime": "application/pdf",
|
|
||||||
"cda.data_size": size,
|
|
||||||
"cda.data_stored_at": STORED_AT,
|
|
||||||
"cda.data_source_msg": msg_name,
|
|
||||||
"cda.soubor": att_name,
|
|
||||||
}},
|
|
||||||
)
|
|
||||||
n += res.modified_count
|
|
||||||
print(f"ZAPSANO: {label} (modified={res.modified_count})")
|
|
||||||
print(f"\n>>> CELKEM ZAPSANO: {n}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user