Files
janssen/Feasibility/77242113UCO2001/store_cda_to_mongo_v1.0.py
administrator 8c01fd6e1a Pridani novych skriptu, reportu a zpracovanych dat
- EmailsImport: jnj_mailbox_sync_v1.0 (sync JNJ schranky)
- Covance: create_lab_results_report_v1.0 + zpracovane CSV (samples/kits/equeries/test-results), browser profily
- Feasibility UCO2001: store_cda_*, store_sipiq_links, classify_krok, mark_sipiq_sent, report v1.1 (stary report do TRASH)
- IWRS/Drugs: pregenerovane onsite inventory / shipment reporty
- TrilliumMCP server + trilium upload/diacritics skripty
- .mcp.json

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 15:10:21 +02:00

92 lines
3.4 KiB
Python

# -*- coding: utf-8 -*-
# =============================================================================
# Nazev: store_cda_to_mongo_v1.0.py
# Verze: 1.0
# Datum: 2026-06-09
# Popis: Ulozi binarku dokumentu (napr. CDA PDF) do Mongo k investigatorovi
# (feasibility.investigators -> pole cda.data_*). Drzi se domluvy:
# "fyzicky dokument z e-mailu -> do Mongo".
# Zdroj dat = lokalni soubor (PDF uz extrahovany z .msg).
# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename,
# cda.data_mime, cda.data_size, cda.data_stored_at.
# Existujici cda.* (stav, datum_*, soubor, zdroj, poznamka) NEMENI.
# Pouziti: python store_cda_to_mongo_v1.0.py (dry-run / nahled)
# python store_cda_to_mongo_v1.0.py --apply (zapise do Mongo)
# =============================================================================
import os
import sys
import base64
import hashlib
from pymongo import MongoClient
from bson import ObjectId
MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
# --- Konfigurace konkretniho zapisu ---
INVESTIGATOR_ID = "6a1c4275aa46d8b608065cec" # Lukac Ludovit
PDF_PATH = r"u:\Dropbox\!!!Days\Downloads Z230\SK_CDA_ PI_doc. MUDr. Ludovít Lukác, Ph.D._fully signed_04Jun2026.pdf"
DATA_FILENAME = "SK_CDA_ PI_doc. MUDr. Ľudovít Lukáč, Ph.D._fully signed_04Jun2026.pdf"
DATA_MIME = "application/pdf"
STORED_AT = "2026-06-09" # datum ulozeni (date nelze generovat za behu)
def main():
apply = "--apply" in sys.argv
if not os.path.exists(PDF_PATH):
print("!!! Soubor neexistuje:", PDF_PATH)
return
with open(PDF_PATH, "rb") as f:
raw = f.read()
size = len(raw)
sha = hashlib.sha256(raw).hexdigest()
b64 = base64.b64encode(raw).decode("ascii")
client = MongoClient(MONGO_URI)
col = client["feasibility"]["investigators"]
doc = col.find_one({"_id": ObjectId(INVESTIGATOR_ID)},
{"prijmeni": 1, "jmeno": 1, "cda": 1})
if not doc:
print("!!! Investigator nenalezen:", INVESTIGATOR_ID)
return
print("=== NAHLED ZAPISU ===")
print(f"Investigator: {doc.get('prijmeni')} {doc.get('jmeno')} (_id {INVESTIGATOR_ID})")
cda = doc.get("cda", {}) or {}
print("Stavajici cda.* pole:", ", ".join(sorted(cda.keys())) or "(zadne)")
print(" - cda.soubor:", cda.get("soubor"))
print(" - cda.velikost_bytes:", cda.get("velikost_bytes"))
has_data = "data_base64" in cda
print(" - cda.data_base64 jiz existuje:", has_data)
print()
print("PRIDAM/PREPISI tato pole:")
print(" cda.data_filename =", DATA_FILENAME)
print(" cda.data_mime =", DATA_MIME)
print(" cda.data_size =", size, "B")
print(" cda.data_sha256 =", sha)
print(" cda.data_stored_at=", STORED_AT)
print(f" cda.data_base64 = <base64 {len(b64)} znaku>")
print()
if not apply:
print(">>> DRY-RUN. Pro zapis spust s --apply")
return
res = col.update_one(
{"_id": ObjectId(INVESTIGATOR_ID)},
{"$set": {
"cda.data_base64": b64,
"cda.data_sha256": sha,
"cda.data_filename": DATA_FILENAME,
"cda.data_mime": DATA_MIME,
"cda.data_size": size,
"cda.data_stored_at": STORED_AT,
}},
)
print(f">>> ZAPSANO: matched={res.matched_count}, modified={res.modified_count}")
if __name__ == "__main__":
main()