Files
janssen/Feasibility/store_cda_seaweed_v1.1.py
2026-06-19 14:28:54 +02:00

106 lines
4.1 KiB
Python

# -*- coding: utf-8 -*-
# =============================================================================
# Nazev: store_cda_seaweed_v1.1.py
# Verze: 1.1
# Datum: 2026-06-19
# Popis: Ulozi podepsane CDA (PDF) z e-mailu asistentek do Mongo
# feasibility.investigators do pole cda.* a posune lekare na
# KROK "5 - CDA podepsano". PDF se stahuji primo ze SeaweedFS
# (seaweed_url z attachments v emaily."vbuzalka@its.jnj.com"),
# overuje se SHA256 proti metadatum z Mongo.
# Pouziti: python store_cda_seaweed_v1.1.py (dry-run / nahled)
# python store_cda_seaweed_v1.1.py --apply (zapise do Mongo)
# Zmena: v1.1 - STATUS radek + datum bere z DATE; MAPPING = Leksa Vaclav.
# v1.0 -> TRASH.
# Pozn.: MAPPING nize = explicitni parovani investigator -> CDA priloha.
# Jen stdlib + pymongo. SeaweedFS host 192.168.1.50:8888.
# =============================================================================
import sys
import base64
import hashlib
import urllib.request
from datetime import datetime, timezone
from pymongo import MongoClient
from bson import ObjectId
MONGO_URI = "mongodb://192.168.1.76:27017"
DBN, COL = "feasibility", "investigators"
DATE = "19JUN2026"
# (investigator _id, seaweed_url, filename, sha256, size, source_msg_id, label)
MAPPING = [
("6a268cdeb84bf5597759b478",
"http://192.168.1.50:8888/mail-attachments/b5/c8/b5c8677c335f77e2b3184aca71628393bf30bd843334edfdecd32b544e91882d",
"CZ_CDA PI_MUDr. Stanislav Reif_ICO_fully signed_18Jun2026.pdf",
"b5c8677c335f77e2b3184aca71628393bf30bd843334edfdecd32b544e91882d",
476306, "<CH2PR07MB7190C02CBCFF82E500D792B980E32@CH2PR07MB7190.namprd07.prod.outlook.com>",
"Reif Stanislav"),
]
def fetch(url):
with urllib.request.urlopen(url, timeout=30) as r:
return r.read()
def main():
apply = "--apply" in sys.argv
cli = MongoClient(MONGO_URI)
col = cli[DBN][COL]
now = datetime.now(timezone.utc).isoformat()
ok = 0
for _id, url, fname, sha, size, src, label in MAPPING:
oid = ObjectId(_id)
doc = col.find_one({"_id": oid}, {"STATUS": 1, "KROK": 1, "cda.stav": 1})
if not doc:
print(f" !! {label}: investigator _id={_id} NENALEZEN"); continue
try:
raw = fetch(url)
except Exception as e:
print(f" !! {label}: stazeni selhalo: {e}"); continue
got = hashlib.sha256(raw).hexdigest()
sha_ok = (got == sha)
size_ok = (len(raw) == size)
head_ok = raw[:5] == b"%PDF-"
print(f" [{label}]")
print(f" soubor : {fname}")
print(f" stazeno : {len(raw)} B (ocek. {size}) {'OK' if size_ok else 'MISMATCH'}")
print(f" sha256 : {'OK' if sha_ok else 'MISMATCH! ' + got}")
print(f" PDF hdr : {'OK' if head_ok else 'NENI PDF'}")
print(f" KROK : {doc.get('KROK')} -> 5 - CDA podepsano")
if not (sha_ok and size_ok and head_ok):
print(" >> PRESKAKUJI (kontrola selhala)"); continue
if not apply:
ok += 1; continue
b64 = base64.b64encode(raw).decode("ascii")
old_status = doc.get("STATUS", "") or ""
new_line = (f"{DATE}: podepsane CDA ULOZENO do Mongo (cda.data) — {fname} "
f"(z e-mailu asistentky CTA). KROK 5, pripraveno na SIPIQ.")
col.update_one({"_id": oid}, {"$set": {
"KROK": "5 - CDA podepsano",
"STATUS": new_line + "\n" + old_status,
"cda.stav": "podepsano",
"cda.soubor": fname,
"cda.zdroj": "e-mail asistentky (SeaweedFS)",
"cda.data_base64": b64,
"cda.data_sha256": sha,
"cda.data_filename": fname,
"cda.data_mime": "application/pdf",
"cda.data_size": len(raw),
"cda.data_stored_at": now,
"cda.data_source_msg": src,
}})
ok += 1
print(" >> ULOZENO + KROK 5")
print(f"\n{'ZAPSANO' if apply else 'DRY-RUN OK'}: {ok}/{len(MAPPING)}")
if not apply:
print(">>> Pro zapis spust s --apply")
if __name__ == "__main__":
main()