z230
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
# store_cda_batch_v1.4.py
|
||||
|
||||
**Verze:** 1.4 · **Datum:** 2026-06-15
|
||||
|
||||
Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` →
|
||||
`cda.data_*`. Zdroj = `.msg` na Toweru (`/mnt/user/JNJEMAILS`), SFTP + extract_msg.
|
||||
|
||||
## Spuštění
|
||||
```
|
||||
python store_cda_batch_v1.4.py # dry-run
|
||||
python store_cda_batch_v1.4.py --apply # zápis
|
||||
```
|
||||
|
||||
## Historie
|
||||
- v1.4 — DÁVKA 7 (15JUN2026): Molnár Martin (GASTROMART s.r.o., krok 4→5),
|
||||
Dzuriková Michaela (IBDcentrum s.r.o., krok 4→5).
|
||||
- v1.3 — DÁVKA 6 (12JUN2026): Gregušová Katarína, Drastich Pavel.
|
||||
- v1.2 — DÁVKA 5 (11JUN2026): Mudr Robert.
|
||||
- v1.1 — DÁVKA 4 (11JUN2026): Konečný Michal, Baláž Jozef.
|
||||
- v1.0 — DÁVKY 1–3 (09–10JUN2026): Hlavatý, Fedurco, Tichý, Falc, Pešta,
|
||||
Jungwirthová, Matouš, Mihálkanin, Krížová, Gregar, Ďurina, Horváth.
|
||||
@@ -0,0 +1,139 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# =============================================================================
|
||||
# Nazev: store_cda_batch_v1.4.py
|
||||
# Verze: 1.4
|
||||
# Datum: 2026-06-15
|
||||
# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum
|
||||
# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na
|
||||
# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena
|
||||
# extract_msg. Mapovani investigator -> (.msg, attachment) je
|
||||
# explicitni. Zapise cda.data_* + doplni cda.soubor.
|
||||
# Pouziti: python store_cda_batch_v1.4.py (dry-run / nahled)
|
||||
# python store_cda_batch_v1.4.py --apply (zapise do Mongo)
|
||||
# Zmeny v1.4: DAVKA 7 (15JUN2026) - Molnar Martin (GASTROMART s.r.o., krok 4->5),
|
||||
# Dzurikova Michaela (IBDcentrum s.r.o., krok 4->5).
|
||||
# =============================================================================
|
||||
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
import hashlib
|
||||
import unicodedata
|
||||
import paramiko
|
||||
import extract_msg
|
||||
from pymongo import MongoClient
|
||||
from bson import ObjectId
|
||||
|
||||
MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
|
||||
TOWER_HOST = "192.168.1.76"
|
||||
TOWER_USER = "root"
|
||||
TOWER_PASS = "7309208104"
|
||||
REMOTE_DIR = "/mnt/user/JNJEMAILS"
|
||||
TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp"
|
||||
STORED_AT = "2026-06-15"
|
||||
|
||||
# investigator_id -> (msg_filename, attachment_filename, label)
|
||||
# DAVKA 7 (15JUN2026)
|
||||
MAPPING = [
|
||||
("6a19832b5fc221351825797f", "FC130007F372CFD10000.msg",
|
||||
"SK_CDA_Institution_GASTROMART s.r.o._fully signed 15Jun2026.pdf",
|
||||
"Molnar Martin (GASTROMART s.r.o.)"),
|
||||
("6a19832b5fc2213518257964", "FC130007F17E55100000.msg",
|
||||
"SK_CDA PI_MUDr. Michaela Dzurikova_IBDcentrum s.r.o_13Jun2026.pdf",
|
||||
"Dzurikova Michaela (IBDcentrum s.r.o.)"),
|
||||
]
|
||||
|
||||
# HISTORIE drivejsich davek (jiz ulozeno):
|
||||
# DAVKA 6 (12JUN2026): Gregusova Katarina FC130007E9D30EB3, Drastich Pavel FC130007E9D30EB1.
|
||||
# DAVKA 5 (11JUN2026): Mudr Robert FC130007DE92C232.
|
||||
# DAVKA 4 (11JUN2026): Konecny Michal FC130007DE92C231, Balaz Jozef FC130007DE92C20F.
|
||||
# DAVKA 3 (10JUN2026): Gregar, Durina, Horvath.
|
||||
# DAVKA 1+2 (09JUN2026): Hlavaty, Fedurco, Tichy, Falc, Pesta, Jungwirthova, Lukac,
|
||||
# Matous, Mihalkanin, Krizova.
|
||||
|
||||
|
||||
def norm(s):
|
||||
s = s or ""
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(c for c in s if not unicodedata.combining(c))
|
||||
return " ".join(s.lower().split())
|
||||
|
||||
|
||||
def main():
|
||||
apply = "--apply" in sys.argv
|
||||
os.makedirs(TMPDIR, exist_ok=True)
|
||||
|
||||
ssh = paramiko.SSHClient()
|
||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30)
|
||||
sftp = ssh.open_sftp()
|
||||
|
||||
client = MongoClient(MONGO_URI)
|
||||
col = client["feasibility"]["investigators"]
|
||||
|
||||
plan = []
|
||||
for inv_id, msg_name, att_name, label in MAPPING:
|
||||
local_msg = os.path.join(TMPDIR, msg_name)
|
||||
if not os.path.exists(local_msg):
|
||||
sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg)
|
||||
m = extract_msg.Message(local_msg)
|
||||
target = norm(att_name)
|
||||
chosen = None
|
||||
for att in m.attachments:
|
||||
name = att.longFilename or att.shortFilename or ""
|
||||
if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")):
|
||||
chosen = (name, att.data)
|
||||
break
|
||||
m.close()
|
||||
if not chosen:
|
||||
plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA"))
|
||||
continue
|
||||
raw = chosen[1]
|
||||
sha = hashlib.sha256(raw).hexdigest()
|
||||
plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK"))
|
||||
|
||||
sftp.close(); ssh.close()
|
||||
|
||||
print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n")
|
||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
||||
doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1})
|
||||
has = bool(doc and doc.get("cda", {}).get("data_base64"))
|
||||
print(f"[{status}] {label} (_id {inv_id})")
|
||||
print(f" .msg: {msg_name}")
|
||||
print(f" priloha: {att_name}")
|
||||
if info:
|
||||
print(f" velikost: {info[0]} B sha256: {info[1]}")
|
||||
print(f" data_base64 jiz existuje: {has}")
|
||||
print()
|
||||
|
||||
if not apply:
|
||||
print(">>> DRY-RUN. Pro zapis spust s --apply")
|
||||
return
|
||||
|
||||
n = 0
|
||||
for inv_id, label, msg_name, att_name, info, status in plan:
|
||||
if status != "OK" or not info:
|
||||
print(f"PRESKAKUJI {label}: {status}")
|
||||
continue
|
||||
size, sha, raw = info
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
res = col.update_one(
|
||||
{"_id": ObjectId(inv_id)},
|
||||
{"$set": {
|
||||
"cda.data_base64": b64,
|
||||
"cda.data_sha256": sha,
|
||||
"cda.data_filename": att_name,
|
||||
"cda.data_mime": "application/pdf",
|
||||
"cda.data_size": size,
|
||||
"cda.data_stored_at": STORED_AT,
|
||||
"cda.data_source_msg": msg_name,
|
||||
"cda.soubor": att_name,
|
||||
}},
|
||||
)
|
||||
n += res.modified_count
|
||||
print(f"ZAPSANO: {label} (modified={res.modified_count})")
|
||||
print(f"\n>>> CELKEM ZAPSANO: {n}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user