diff --git a/.gitignore b/.gitignore index e9af3ec..2250532 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ Outlook/outlook_profile/ **/browser_profile_*/ /OutlookVBCZ/ EmailsImport/_test_open/CORRUPT__899C000242F1A76E0000.msg +# Evernote — velké exporty a lokální backup DB neverzovat +Evernote/Export_2026-06-11/ +Evernote/en_backup.db +*.enex diff --git a/Evernote/en_backup.db b/Evernote/en_backup.db deleted file mode 100644 index 7f71dbd..0000000 Binary files a/Evernote/en_backup.db and /dev/null differ diff --git a/Evernote/evernote_to_joplin_mirror_v1.0.md b/Evernote/evernote_to_joplin_mirror_v1.0.md new file mode 100644 index 0000000..883ff4e --- /dev/null +++ b/Evernote/evernote_to_joplin_mirror_v1.0.md @@ -0,0 +1,69 @@ +# evernote_to_joplin_mirror_v1.0 + +**Verze:** 1.0 | **Datum:** 2026-06-11 | **Autor:** Claude (pro V. Buzalku) + +Jednosměrné zrcadlení **Evernote → self-hosted Joplin Server**. Záložní „únikový +východ" pro případ, že by Evernote (Bending Spoons) přestal vyhovovat — kompletní +kopie poznámek na vlastním Toweru, odkud lze plynule přejít na Joplin. + +## Architektura + +``` +Evernote cloud --(evernote-backup sync)--> en_backup.db --(tento skript)--> Joplin Server +``` + +- **Čtení**: přímo z `en_backup.db` přes knihovnu `evernote_backup.note_storage.SqliteStorage` + (žádný ENEX mezikrok — knihovna je vlastní kód nástroje, čtení je spolehlivé). +- **Zápis**: Joplin sync API `PUT /api/items/root:/.md:/content`, autentizace + session tokenem z `POST /api/sessions` (header `X-API-AUTH`). + +## Mapování + +| Evernote | Joplin | poznámka | +|----------|--------|----------| +| notebook | folder (`type_ 2`) | | +| stack | nadřazený folder | notebooky ve stacku dostanou parent | +| note | note (`type_ 1`) | tělo = ENML → HTML (`markup_language: 2`) | +| en-media (příloha) | resource (`type_ 4`) + blob | odkaz v těle `:/<id>` | +| en-todo | ☐ / ☑ | | + +**ID v Joplinu** = `md5("evernote-:")` → 32 hex. Deterministická, +takže opakované běhy **aktualizují** (upsert), neduplikují. + +## Použití + +```bash +# pilot na jednom notebooku +python evernote_to_joplin_mirror_v1.0.py --notebook "CL2-78989-011" + +# test s limitem poznámek +python evernote_to_joplin_mirror_v1.0.py --notebook "Recepty" --limit 5 + +# plné zrcadlení +python evernote_to_joplin_mirror_v1.0.py --all + +# náhled bez zápisu +python evernote_to_joplin_mirror_v1.0.py --all --dry-run +``` + +Po běhu spustit v Joplin klientovi synchronizaci, aby se položky stáhly ze serveru. + +## Ověřeno (2026-06-11) + +- Pilot `CL2-78989-011`: 4 poznámky, text + HTML formátování věrně přeneseno. +- Pilot `Recepty --limit 1`: 1 poznámka + 20 příloh; blob byte-identický (md5 shoda), + tělo správně odkazuje `:/<resid>`. + +## Známá omezení / TODO v1.1 + +- **Upsert only** — položky smazané/přejmenované v Evernote se v Joplinu zatím NEMAŽOU. + Pro pravý mirror doplnit mazání (porovnat sadu GUID, smazat osiřelé `deleted_time`). +- Heslo je v hlavičce skriptu plaintextem — zvážit přesun do env/konfig. +- ENML→HTML je „best effort" (en-media, en-todo). Exotické prvky (en-crypt, tabulky + se zvláštním stylingem) neřešeny. +- Plánování zatím ručně na Z230; přesun na Tower (User Scripts cron) plánován. + +## Předchůdce + +evernote-backup `sync` musí proběhnout před tímto skriptem (naplní/aktualizuje +`en_backup.db`). Plná pipeline: `evernote-backup sync && python evernote_to_joplin_mirror_v1.0.py --all`. diff --git a/Evernote/evernote_to_joplin_mirror_v1.0.py b/Evernote/evernote_to_joplin_mirror_v1.0.py new file mode 100644 index 0000000..2ea84f3 --- /dev/null +++ b/Evernote/evernote_to_joplin_mirror_v1.0.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# ============================================================================= +# Nazev: evernote_to_joplin_mirror_v1.0.py +# Verze: 1.0 +# Datum: 2026-06-11 +# Autor: Claude (pro Vladimira Buzalku) +# Popis: Jednosmerne zrcadleni Evernote -> self-hosted Joplin Server. +# Cte poznamky primo z lokalni databaze nastroje evernote-backup +# (en_backup.db, pres knihovnu evernote_backup) a zapisuje je do +# Joplin Serveru pres jeho sync API (PUT /api/items, X-API-AUTH). +# +# Evernote notebook -> Joplin folder (stack -> nadrazeny folder) +# Evernote note -> Joplin note (telo = ENML prevedeny na HTML) +# Evernote resource -> Joplin resource (priloha + binarni blob) +# +# ID v Joplinu jsou odvozena deterministicky z Evernote GUID +# (md5), takze opakovane behy poznamky AKTUALIZUJI, neduplikuji. +# +# POZOR (v1.0): upsert only. Poznamky/notebooky smazane v Evernote tento +# skript zatim v Joplinu NEMAZE (planovano do v1.1). Mirror je +# jednosmerny - do Joplinu rucne nepiste, prepise se. +# +# Pouziti: +# # pilotni beh na jednom notebooku: +# python evernote_to_joplin_mirror_v1.0.py --notebook "CL2-78989-011" +# # vic notebooku + limit poznamek (test): +# python evernote_to_joplin_mirror_v1.0.py --notebook "Recepty" --limit 5 +# # plne zrcadleni vseho: +# python evernote_to_joplin_mirror_v1.0.py --all +# # jen vypsat co by se delalo, nic nezapisovat: +# python evernote_to_joplin_mirror_v1.0.py --all --dry-run +# ============================================================================= + +import argparse +import hashlib +import json +import re +import sys +import urllib.request +import urllib.error +from datetime import datetime, timezone +from pathlib import Path + +# --- konfigurace ------------------------------------------------------------ +DB_PATH = Path(__file__).with_name("en_backup.db") +JOPLIN_BASE = "https://joplin.buzalka.cz" +JOPLIN_EMAIL = "vladimir.buzalka@buzalka.cz" +JOPLIN_PASSWORD = "Vlado7309208104++" # heslo = Vlado + RC + ++ (shodne s Postgres) + +# Joplin item typy +T_NOTE = 1 +T_FOLDER = 2 +T_RESOURCE = 4 + +IMG_MIMES = ("image/",) + +# --- pomocne ----------------------------------------------------------------- + +def jid(prefix: str, *parts: str) -> str: + """Deterministicke 32-hex Joplin ID odvozene z Evernote identifikatoru.""" + h = hashlib.md5((prefix + ":" + ":".join(parts)).encode("utf-8")) + return h.hexdigest() + + +def iso(ms) -> str: + """ms timestamp (int) -> Joplin ISO 8601 'YYYY-MM-DDTHH:MM:SS.000Z'.""" + if not ms: + ms = 0 + dt = datetime.fromtimestamp(ms / 1000, tz=timezone.utc) + return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z") + + +def now_iso() -> str: + return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z") + + +# --- prevod ENML -> Joplin HTML telo ---------------------------------------- + +EN_NOTE_RE = re.compile(r"]*>(.*)", re.DOTALL) +EN_MEDIA_RE = re.compile(r"]*?/?>", re.DOTALL) +EN_TODO_RE = re.compile(r"]*?)/?>", re.DOTALL) +HASH_ATTR_RE = re.compile(r'hash="([0-9a-fA-F]+)"') + + +def enml_to_html(content: str, hash_to_res: dict) -> str: + """Vytahne vnitrek a prevede en-media/en-todo na Joplin HTML.""" + m = EN_NOTE_RE.search(content or "") + body = m.group(1) if m else (content or "") + + def repl_media(mt): + tag = mt.group(0) + hm = HASH_ATTR_RE.search(tag) + if not hm: + return "" + res = hash_to_res.get(hm.group(1).lower()) + if not res: + return "" + rid, mime, fname = res + if mime.startswith(IMG_MIMES): + return f'' + label = fname or "priloha" + return f'{label}' + + body = EN_MEDIA_RE.sub(repl_media, body) + + def repl_todo(tt): + checked = "checked" in tt.group(1).lower() and "true" in tt.group(1).lower() + return "☑ " if checked else "☐ " + + body = EN_TODO_RE.sub(repl_todo, body) + return body.strip() + + +# --- Joplin sync API klient ------------------------------------------------- + +class Joplin: + def __init__(self, base, email, password, dry_run=False): + self.base = base.rstrip("/") + self.dry_run = dry_run + self.token = None + if not dry_run: + self._login(email, password) + + def _login(self, email, password): + req = urllib.request.Request( + f"{self.base}/api/sessions", + data=json.dumps({"email": email, "password": password}).encode(), + headers={"Content-Type": "application/json"}, + ) + r = urllib.request.urlopen(req, timeout=30) + self.token = json.loads(r.read().decode())["id"] + + def put_item(self, path: str, body: bytes): + """PUT na /api/items/root:/:/content.""" + if self.dry_run: + return None + url = f"{self.base}/api/items/root:/{path}:/content" + req = urllib.request.Request( + url, data=body, method="PUT", + headers={"X-API-AUTH": self.token, + "Content-Type": "application/octet-stream"}, + ) + r = urllib.request.urlopen(req, timeout=120) + return json.loads(r.read().decode()) + + +# --- skladani Joplin sync polozek ------------------------------------------- + +def folder_item(fid, title, parent_id=""): + t = now_iso() + return ( + f"{title}\n\n" + f"id: {fid}\n" + f"created_time: {t}\n" + f"updated_time: {t}\n" + f"user_created_time: {t}\n" + f"user_updated_time: {t}\n" + f"encryption_cipher_text: \n" + f"encryption_applied: 0\n" + f"parent_id: {parent_id}\n" + f"is_shared: 0\n" + f"share_id: \n" + f"master_key_id: \n" + f"icon: \n" + f"user_data: \n" + f"deleted_time: 0\n" + f"type_: {T_FOLDER}" + ).encode("utf-8") + + +def note_item(nid, parent_id, title, body_html, created, updated, author): + ct, ut = iso(created), iso(updated) + safe_title = (title or "(bez nazvu)").replace("\n", " ") + return ( + f"{safe_title}\n\n" + f"{body_html}\n\n" + f"id: {nid}\n" + f"parent_id: {parent_id}\n" + f"created_time: {ct}\n" + f"updated_time: {ut}\n" + f"user_created_time: {ct}\n" + f"user_updated_time: {ut}\n" + f"is_conflict: 0\n" + f"latitude: 0.00000000\n" + f"longitude: 0.00000000\n" + f"altitude: 0.0000\n" + f"author: {author or ''}\n" + f"source_url: \n" + f"is_todo: 0\n" + f"todo_due: 0\n" + f"todo_completed: 0\n" + f"source: evernote-mirror\n" + f"source_application: evernote_to_joplin_mirror\n" + f"application_data: \n" + f"order: 0\n" + f"encryption_cipher_text: \n" + f"encryption_applied: 0\n" + f"markup_language: 2\n" # 2 = HTML + f"is_shared: 0\n" + f"share_id: \n" + f"conflict_original_id: \n" + f"master_key_id: \n" + f"user_data: \n" + f"deleted_time: 0\n" + f"type_: {T_NOTE}" + ).encode("utf-8") + + +def resource_item(rid, title, mime, filename, size, created, updated): + ct, ut = iso(created), iso(updated) + ext = "" + if filename and "." in filename: + ext = filename.rsplit(".", 1)[1].lower() + elif "/" in mime: + ext = mime.split("/", 1)[1] + return ( + f"{title}\n\n" + f"id: {rid}\n" + f"mime: {mime}\n" + f"filename: {filename or ''}\n" + f"created_time: {ct}\n" + f"updated_time: {ut}\n" + f"user_created_time: {ct}\n" + f"user_updated_time: {ut}\n" + f"file_extension: {ext}\n" + f"encryption_cipher_text: \n" + f"encryption_applied: 0\n" + f"encryption_blob_encrypted: 0\n" + f"size: {size}\n" + f"is_shared: 0\n" + f"share_id: \n" + f"master_key_id: \n" + f"user_data: \n" + f"blob_updated_time: {ut}\n" + f"ocr_text: \n" + f"ocr_details: \n" + f"ocr_status: 0\n" + f"ocr_error: \n" + f"type_: {T_RESOURCE}" + ).encode("utf-8") + + +# --- hlavni logika ---------------------------------------------------------- + +def main(): + ap = argparse.ArgumentParser(description="Evernote -> Joplin mirror v1.0") + ap.add_argument("--notebook", action="append", default=[], + help="nazev notebooku k zrcadleni (lze opakovat)") + ap.add_argument("--all", action="store_true", help="zrcadlit vsechny notebooky") + ap.add_argument("--limit", type=int, default=0, + help="max poznamek na notebook (0 = bez limitu, pro test)") + ap.add_argument("--dry-run", action="store_true", + help="nic nezapisovat, jen vypsat") + ap.add_argument("--db", default=str(DB_PATH), help="cesta k en_backup.db") + args = ap.parse_args() + + if not args.all and not args.notebook: + ap.error("zadej --notebook NAZEV nebo --all") + + from evernote_backup.note_storage import SqliteStorage + storage = SqliteStorage(Path(args.db)) + + jop = Joplin(JOPLIN_BASE, JOPLIN_EMAIL, JOPLIN_PASSWORD, dry_run=args.dry_run) + mode = "DRY-RUN" if args.dry_run else "ZAPIS" + print(f"[{mode}] Joplin {JOPLIN_BASE} | db {args.db}") + + notebooks = list(storage.notebooks.iter_notebooks()) + if not args.all: + wanted = set(args.notebook) + notebooks = [nb for nb in notebooks if nb.name in wanted] + if not notebooks: + print("Zadny odpovidajici notebook nenalezen.", file=sys.stderr) + sys.exit(1) + + # stack -> folder id (vytvarime nadrazene foldery podle stacku) + stack_ids = {} + n_nb = n_notes = n_res = n_err = 0 + + for nb in notebooks: + parent_id = "" + if nb.stack: + sid = stack_ids.get(nb.stack) + if sid is None: + sid = jid("evernote-stack", nb.stack) + stack_ids[nb.stack] = sid + try: + jop.put_item(f"{sid}.md", folder_item(sid, nb.stack)) + except Exception as e: + print(f" ! stack '{nb.stack}': {e}", file=sys.stderr) + parent_id = sid + + fid = jid("evernote-notebook", nb.guid) + try: + jop.put_item(f"{fid}.md", folder_item(fid, nb.name, parent_id)) + n_nb += 1 + except Exception as e: + print(f" ! notebook '{nb.name}': {e}", file=sys.stderr) + continue + print(f"[notebook] {nb.name}") + + count = 0 + for note in storage.notes.iter_notes(nb.guid): + if args.limit and count >= args.limit: + break + count += 1 + + # priprav resources + mapu hash->resid + hash_to_res = {} + for res in (note.resources or []): + if not (res.data and res.data.body): + continue + bh = res.data.bodyHash + hexhash = bh.hex() if isinstance(bh, (bytes, bytearray)) else str(bh) + fname = res.attributes.fileName if res.attributes else None + rid = jid("evernote-resource", note.guid, hexhash) + hash_to_res[hexhash.lower()] = (rid, res.mime or "application/octet-stream", fname) + try: + jop.put_item(f"{rid}.md", resource_item( + rid, fname or "priloha", res.mime or "application/octet-stream", + fname, len(res.data.body), note.created, note.updated)) + jop.put_item(f".resource/{rid}", res.data.body) + n_res += 1 + except Exception as e: + n_err += 1 + print(f" ! resource {fname}: {e}", file=sys.stderr) + + body = enml_to_html(note.content, hash_to_res) + nid = jid("evernote-note", note.guid) + author = note.attributes.author if note.attributes else None + try: + jop.put_item(f"{nid}.md", note_item( + nid, fid, note.title, body, note.created, note.updated, author)) + n_notes += 1 + except Exception as e: + n_err += 1 + print(f" ! note '{note.title}': {e}", file=sys.stderr) + + print(f" ({count} poznamek)") + + print(f"\nHOTOVO: notebooku={n_nb} poznamek={n_notes} priloh={n_res} chyb={n_err}") + if not args.dry_run: + print("V Joplin klientovi spust synchronizaci, aby se polozky stahly.") + + +if __name__ == "__main__": + main() diff --git a/Evernote/joplin_wipe_all.py b/Evernote/joplin_wipe_all.py new file mode 100644 index 0000000..df5c8a0 --- /dev/null +++ b/Evernote/joplin_wipe_all.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Smaze VSECHNY polozky z Joplin Serveru (pro cisty full-sync). Jednorazova pomucka. +import json, urllib.request + +BASE = "https://joplin.buzalka.cz" +EMAIL = "vladimir.buzalka@buzalka.cz" +PASSWORD = "Vlado7309208104++" + +tok = json.loads(urllib.request.urlopen(urllib.request.Request( + BASE + "/api/sessions", + data=json.dumps({"email": EMAIL, "password": PASSWORD}).encode(), + headers={"Content-Type": "application/json"})).read())["id"] + + +def req(method, path, data=None): + r = urllib.request.Request(BASE + path, data=data, method=method, + headers={"X-API-AUTH": tok}) + return urllib.request.urlopen(r, timeout=60).read().decode() + + +deleted = 0 +while True: + page = json.loads(req("GET", "/api/items/root/children")) + items = page.get("items", []) + if not items: + break + for it in items: + req("DELETE", "/api/items/root:/{}:".format(it["name"])) + deleted += 1 + print(" smazano {} (has_more={})".format(deleted, page.get("has_more"))) + +print("CELKEM smazano polozek:", deleted) +chk = json.loads(req("GET", "/api/items/root/children")) +print("zbyva polozek na serveru:", len(chk.get("items", []))) diff --git a/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.md b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.md new file mode 100644 index 0000000..c459851 --- /dev/null +++ b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.md @@ -0,0 +1,30 @@ +# store_cda_batch_v1.0.py + +**Verze:** 1.0 · **Datum:** 2026-06-09 + +Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` → +`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`), +stažené přes SFTP (paramiko), příloha vytažena `extract_msg`. + +## Princip +- `MAPPING` = explicitní seznam `(investigator_id, msg_filename, + attachment_filename, label)` — žádné hádání za běhu (matchování přílohy v `.msg` + přes normalizaci bez diakritiky). +- CDA chodí od 3 asistentek: **Wittenbergerová** (LWittenb), **Hrabalová** + (LHrabalo), **Vojčová** (LVojcova). Konkrétní soubor bývá jmenován v STATUS lékaře. + +## Co zapisuje +`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`, +`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`. + +## Spuštění +``` +python store_cda_batch_v1.0.py # dry-run / náhled +python store_cda_batch_v1.0.py --apply # zápis do Mongo +``` +venv má `paramiko` + `extract_msg` + `pymongo`. + +## Historie +- v1.0 — dávka 1 (6×): Hlavatý/Cliniq, Fedurco/ENDOMED, Tichý, Falc, Pešta, + Jungwirthová. Dávka 2 (3×): Matouš/Axon-master, Mihálkanin/Gastro LM, + Krížová/FN Motol. Aplikováno 09JUN2026. (Lukáč zvlášť přes store_cda_to_mongo.) diff --git a/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.py b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.py new file mode 100644 index 0000000..99bad63 --- /dev/null +++ b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.0.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# ============================================================================= +# Nazev: store_cda_batch_v1.0.py +# Verze: 1.0 +# Datum: 2026-06-09 +# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum +# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na +# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena +# extract_msg. Mapovani investigator -> (.msg, attachment) je +# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky +# dokument z e-mailu -> do Mongo. +# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename, +# cda.data_mime, cda.data_size, cda.data_stored_at, +# cda.data_source_msg; doplni cda.soubor pokud chybi. +# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI. +# Pouziti: python store_cda_batch_v1.0.py (dry-run / nahled) +# python store_cda_batch_v1.0.py --apply (zapise do Mongo) +# ============================================================================= + +import os +import sys +import base64 +import hashlib +import unicodedata +import paramiko +import extract_msg +from pymongo import MongoClient +from bson import ObjectId + +MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") +TOWER_HOST = "192.168.1.76" +TOWER_USER = "root" +TOWER_PASS = "7309208104" +REMOTE_DIR = "/mnt/user/JNJEMAILS" +TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" +STORED_AT = "2026-06-10" + +# investigator_id -> (msg_filename, attachment_filename, label) +# DAVKA 3 (10JUN2026): nove CDA z 10.6. (krok 4 -> 5) +MAPPING = [ + ("6a19832b5fc221351825796c", "FC130007DE92C2040000.msg", + "CZ_CDA institution_MUDr. GREGAR s.r.o_Jan Gregar_fully signed_09Jun2026.pdf", + "Gregar Jan (MUDr. GREGAR s.r.o.)"), + ("6a19832b5fc2213518257969", "FC130007DE92C2030000.msg", + "SK_CDA PI_Durina_FN Nove Zamky_fully signed 09Jun2026.pdf", + "Durina Juraj (FN Nove Zamky)"), + ("6a19832b5fc2213518257973", "FC130007DE92C1FE0000.msg", + "SK_CDA_Institution_Accout Center s.r.o_09Jun2026.pdf", + "Horvath Frantisek (Accout Center)"), +] + +# DAVKA 1+2 (09JUN2026) - jiz ulozeno, ponechano pro historii: +# Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED FC1300053049739B, +# Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta FC130007D8A1F0E1, +# Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF (store_cda_to_mongo_v1.0), +# Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM FC130007D8A1F0E6, +# Krizova/Motol FC130007C1643CA1 + + +def norm(s): + """lowercase, bez diakritiky, sjednocene mezery""" + s = s or "" + s = unicodedata.normalize("NFKD", s) + s = "".join(c for c in s if not unicodedata.combining(c)) + return " ".join(s.lower().split()) + + +def main(): + apply = "--apply" in sys.argv + os.makedirs(TMPDIR, exist_ok=True) + + # SSH/SFTP + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) + sftp = ssh.open_sftp() + + client = MongoClient(MONGO_URI) + col = client["feasibility"]["investigators"] + + plan = [] + for inv_id, msg_name, att_name, label in MAPPING: + local_msg = os.path.join(TMPDIR, msg_name) + if not os.path.exists(local_msg): + sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) + m = extract_msg.Message(local_msg) + target = norm(att_name) + chosen = None + for att in m.attachments: + name = att.longFilename or att.shortFilename or "" + if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): + chosen = (name, att.data) + break + m.close() + if not chosen: + plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) + continue + raw = chosen[1] + sha = hashlib.sha256(raw).hexdigest() + plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) + + sftp.close(); ssh.close() + + # Nahled + print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") + for inv_id, label, msg_name, att_name, info, status in plan: + doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) + has = bool(doc and doc.get("cda", {}).get("data_base64")) + print(f"[{status}] {label} (_id {inv_id})") + print(f" .msg: {msg_name}") + print(f" priloha: {att_name}") + if info: + print(f" velikost: {info[0]} B sha256: {info[1]}") + print(f" data_base64 jiz existuje: {has}") + print() + + if not apply: + print(">>> DRY-RUN. Pro zapis spust s --apply") + return + + n = 0 + for inv_id, label, msg_name, att_name, info, status in plan: + if status != "OK" or not info: + print(f"PRESKAKUJI {label}: {status}") + continue + size, sha, raw = info + b64 = base64.b64encode(raw).decode("ascii") + res = col.update_one( + {"_id": ObjectId(inv_id)}, + {"$set": { + "cda.data_base64": b64, + "cda.data_sha256": sha, + "cda.data_filename": att_name, + "cda.data_mime": "application/pdf", + "cda.data_size": size, + "cda.data_stored_at": STORED_AT, + "cda.data_source_msg": msg_name, + "cda.soubor": att_name, + }}, + ) + n += res.modified_count + print(f"ZAPSANO: {label} (modified={res.modified_count})") + print(f"\n>>> CELKEM ZAPSANO: {n}") + + +if __name__ == "__main__": + main() diff --git a/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.md b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.md new file mode 100644 index 0000000..d70692e --- /dev/null +++ b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.md @@ -0,0 +1,34 @@ +# store_cda_batch_v1.1.py + +**Verze:** 1.1 · **Datum:** 2026-06-11 + +Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` → +`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`), +stažené přes SFTP (paramiko), příloha vytažena `extract_msg`. + +## Princip +- `MAPPING` = explicitní seznam `(investigator_id, msg_filename, + attachment_filename, label)` — žádné hádání za běhu (matchování přílohy v `.msg` + přes normalizaci bez diakritiky). +- CDA chodí od 3 asistentek: **Wittenbergerová** (LWittenb), **Hrabalová** + (LHrabalo), **Vojčová** (LVojcova). Konkrétní soubor bývá jmenován v STATUS lékaře. + +## Co zapisuje +`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`, +`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`. + +## Spuštění +``` +python store_cda_batch_v1.1.py # dry-run / náhled +python store_cda_batch_v1.1.py --apply # zápis do Mongo +``` +venv má `paramiko` + `extract_msg` + `pymongo`. + +## Historie +- v1.1 — DÁVKA 4 (11JUN2026): Konečný Michal (MUDr. Michal Konečný, Ph.D. s.r.o., + od Hrabalové), Baláž Jozef (FNsP F. D. Roosevelta Banská Bystrica, od Vojčové). + Krok 4 → 5. +- v1.0 — DÁVKA 1 (6×): Hlavatý/Cliniq, Fedurco/ENDOMED, Tichý, Falc, Pešta, + Jungwirthová. DÁVKA 2 (3×): Matouš/Axon-master, Mihálkanin/Gastro LM, + Krížová/FN Motol. DÁVKA 3 (3×): Gregar, Ďurina, Horváth. Aplikováno 09–10JUN2026. + (Lukáč zvlášť přes store_cda_to_mongo.) diff --git a/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.py b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.py new file mode 100644 index 0000000..a9ca1a2 --- /dev/null +++ b/Feasibility/77242113UCO2001/TRASH/store_cda_batch_v1.1.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# ============================================================================= +# Nazev: store_cda_batch_v1.1.py +# Verze: 1.1 +# Datum: 2026-06-11 +# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum +# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na +# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena +# extract_msg. Mapovani investigator -> (.msg, attachment) je +# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky +# dokument z e-mailu -> do Mongo (CDA fyzicky ulozeno k lekarum). +# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename, +# cda.data_mime, cda.data_size, cda.data_stored_at, +# cda.data_source_msg; doplni cda.soubor pokud chybi. +# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI. +# Pouziti: python store_cda_batch_v1.1.py (dry-run / nahled) +# python store_cda_batch_v1.1.py --apply (zapise do Mongo) +# Zmeny v1.1: DAVKA 4 (11JUN2026) - Konecny Michal + Balaz Jozef (krok 4 -> 5). +# ============================================================================= + +import os +import sys +import base64 +import hashlib +import unicodedata +import paramiko +import extract_msg +from pymongo import MongoClient +from bson import ObjectId + +MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") +TOWER_HOST = "192.168.1.76" +TOWER_USER = "root" +TOWER_PASS = "7309208104" +REMOTE_DIR = "/mnt/user/JNJEMAILS" +TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" +STORED_AT = "2026-06-11" + +# investigator_id -> (msg_filename, attachment_filename, label) +# DAVKA 4 (11JUN2026): nove CDA z 10.6. (krok 4 -> 5) +MAPPING = [ + ("6a19832b5fc221351825796f", "FC130007DE92C2310000.msg", + "CZ_CDA Institution_MUDr. Michal Konecný, Ph.D. s.r.o._fully signed 10Jun2026.pdf", + "Konecny Michal (MUDr. Michal Konecny, Ph.D. s.r.o.)"), + ("6a19832b5fc2213518257953", "FC130007DE92C20F0000.msg", + "SK_CDA PI_MUDr. Jozef Balaz_FD Roosevelta_BB_10Jun2026.pdf", + "Balaz Jozef (FNsP F. D. Roosevelta Banska Bystrica)"), +] + +# HISTORIE drivejsich davek (jiz ulozeno): +# DAVKA 3 (10JUN2026): Gregar/MUDr.GREGAR FC130007DE92C204, Durina/FN Nove Zamky +# FC130007DE92C203, Horvath/Accout Center FC130007DE92C1FE. +# DAVKA 1+2 (09JUN2026): Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED +# FC1300053049739B, Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta +# FC130007D8A1F0E1, Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF +# (store_cda_to_mongo_v1.0), Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM +# FC130007D8A1F0E6, Krizova/Motol FC130007C1643CA1. + + +def norm(s): + """lowercase, bez diakritiky, sjednocene mezery""" + s = s or "" + s = unicodedata.normalize("NFKD", s) + s = "".join(c for c in s if not unicodedata.combining(c)) + return " ".join(s.lower().split()) + + +def main(): + apply = "--apply" in sys.argv + os.makedirs(TMPDIR, exist_ok=True) + + # SSH/SFTP + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) + sftp = ssh.open_sftp() + + client = MongoClient(MONGO_URI) + col = client["feasibility"]["investigators"] + + plan = [] + for inv_id, msg_name, att_name, label in MAPPING: + local_msg = os.path.join(TMPDIR, msg_name) + if not os.path.exists(local_msg): + sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) + m = extract_msg.Message(local_msg) + target = norm(att_name) + chosen = None + for att in m.attachments: + name = att.longFilename or att.shortFilename or "" + if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): + chosen = (name, att.data) + break + m.close() + if not chosen: + plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) + continue + raw = chosen[1] + sha = hashlib.sha256(raw).hexdigest() + plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) + + sftp.close(); ssh.close() + + # Nahled + print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") + for inv_id, label, msg_name, att_name, info, status in plan: + doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) + has = bool(doc and doc.get("cda", {}).get("data_base64")) + print(f"[{status}] {label} (_id {inv_id})") + print(f" .msg: {msg_name}") + print(f" priloha: {att_name}") + if info: + print(f" velikost: {info[0]} B sha256: {info[1]}") + print(f" data_base64 jiz existuje: {has}") + print() + + if not apply: + print(">>> DRY-RUN. Pro zapis spust s --apply") + return + + n = 0 + for inv_id, label, msg_name, att_name, info, status in plan: + if status != "OK" or not info: + print(f"PRESKAKUJI {label}: {status}") + continue + size, sha, raw = info + b64 = base64.b64encode(raw).decode("ascii") + res = col.update_one( + {"_id": ObjectId(inv_id)}, + {"$set": { + "cda.data_base64": b64, + "cda.data_sha256": sha, + "cda.data_filename": att_name, + "cda.data_mime": "application/pdf", + "cda.data_size": size, + "cda.data_stored_at": STORED_AT, + "cda.data_source_msg": msg_name, + "cda.soubor": att_name, + }}, + ) + n += res.modified_count + print(f"ZAPSANO: {label} (modified={res.modified_count})") + print(f"\n>>> CELKEM ZAPSANO: {n}") + + +if __name__ == "__main__": + main() diff --git a/Feasibility/77242113UCO2001/store_cda_batch_v1.2.md b/Feasibility/77242113UCO2001/store_cda_batch_v1.2.md new file mode 100644 index 0000000..5061207 --- /dev/null +++ b/Feasibility/77242113UCO2001/store_cda_batch_v1.2.md @@ -0,0 +1,32 @@ +# store_cda_batch_v1.2.py + +**Verze:** 1.2 · **Datum:** 2026-06-11 + +Dávkové uložení binárek CDA (PDF) do Mongo `feasibility.investigators` → +`cda.data_*`. Zdroj = `.msg` soubory na Toweru (`/mnt/user/JNJEMAILS`), +stažené přes SFTP (paramiko), příloha vytažena `extract_msg`. + +## Princip +- `MAPPING` = explicitní seznam `(investigator_id, msg_filename, + attachment_filename, label)` — žádné hádání za běhu. +- CDA chodí od 3 asistentek z CTA: **Wittenbergerová** (LWittenb), **Hrabalová** + (LHrabalo), **Vojčová** (LVojcova). + +## Co zapisuje +`cda.data_base64`, `cda.data_sha256`, `cda.data_filename`, `cda.data_mime`, +`cda.data_size`, `cda.data_stored_at`, `cda.data_source_msg`; doplní `cda.soubor`. + +## Spuštění +``` +python store_cda_batch_v1.2.py # dry-run / náhled +python store_cda_batch_v1.2.py --apply # zápis do Mongo +``` +venv má `paramiko` + `extract_msg` + `pymongo`. + +## Historie +- v1.2 — DÁVKA 5 (11JUN2026): Mudr Robert (Nemocnice Milosrdných sester sv. Karla + Boromejského, od Vojčové). Krok 4 → 5. +- v1.1 — DÁVKA 4 (11JUN2026): Konečný Michal, Baláž Jozef. +- v1.0 — DÁVKY 1–3 (09–10JUN2026): Hlavatý, Fedurco, Tichý, Falc, Pešta, + Jungwirthová, Matouš, Mihálkanin, Krížová, Gregar, Ďurina, Horváth. + (Lukáč zvlášť přes store_cda_to_mongo.) diff --git a/Feasibility/77242113UCO2001/store_cda_batch_v1.2.py b/Feasibility/77242113UCO2001/store_cda_batch_v1.2.py new file mode 100644 index 0000000..9a85729 --- /dev/null +++ b/Feasibility/77242113UCO2001/store_cda_batch_v1.2.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +# ============================================================================= +# Nazev: store_cda_batch_v1.2.py +# Verze: 1.2 +# Datum: 2026-06-11 +# Popis: Davkove ulozi binarky CDA (PDF) do Mongo k investigatorum +# (feasibility.investigators -> cda.data_*). Zdroj = .msg soubory na +# Toweru (/mnt/user/JNJEMAILS), stazene pres SFTP, priloha vytazena +# extract_msg. Mapovani investigator -> (.msg, attachment) je +# explicitni (zadne hadani za behu). Drzi se domluvy: fyzicky +# dokument z e-mailu -> do Mongo (CDA fyzicky ulozeno k lekarum). +# Zapise: cda.data_base64, cda.data_sha256, cda.data_filename, +# cda.data_mime, cda.data_size, cda.data_stored_at, +# cda.data_source_msg; doplni cda.soubor pokud chybi. +# Existujici cda.* (stav, datum_*, zdroj, poznamka) NEMENI. +# Pouziti: python store_cda_batch_v1.2.py (dry-run / nahled) +# python store_cda_batch_v1.2.py --apply (zapise do Mongo) +# Zmeny v1.2: DAVKA 5 (11JUN2026) - Mudr Robert (krok 4 -> 5). +# ============================================================================= + +import os +import sys +import base64 +import hashlib +import unicodedata +import paramiko +import extract_msg +from pymongo import MongoClient +from bson import ObjectId + +MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017") +TOWER_HOST = "192.168.1.76" +TOWER_USER = "root" +TOWER_PASS = "7309208104" +REMOTE_DIR = "/mnt/user/JNJEMAILS" +TMPDIR = r"u:\Dropbox\!!!Days\Downloads Z230\_cda_tmp" +STORED_AT = "2026-06-11" + +# investigator_id -> (msg_filename, attachment_filename, label) +# DAVKA 5 (11JUN2026): nove CDA z 11.6. (krok 4 -> 5) +MAPPING = [ + ("6a19832b5fc2213518257978", "FC130007DE92C2320000.msg", + "CZ_ CDA PI_MUDr. Robert Mudr_11Jun2026.pdf", + "Mudr Robert (Nemocnice Milosrdnych sester sv. Karla Boromejskeho)"), +] + +# HISTORIE drivejsich davek (jiz ulozeno): +# DAVKA 4 (11JUN2026): Konecny Michal FC130007DE92C231, Balaz Jozef FC130007DE92C20F. +# DAVKA 3 (10JUN2026): Gregar FC130007DE92C204, Durina FC130007DE92C203, +# Horvath/Accout Center FC130007DE92C1FE. +# DAVKA 1+2 (09JUN2026): Hlavaty/Cliniq FC1300053049739C, Fedurco/ENDOMED +# FC1300053049739B, Tichy FC13000530495B95, Falc FC130007D8A1F0E6, Pesta +# FC130007D8A1F0E1, Jungwirthova FC130007D8A1F0E2, Lukac FC130007C9E971FF +# (store_cda_to_mongo_v1.0), Matous/Axon FC130007D8A1F0E3, Mihalkanin/GastroLM +# FC130007D8A1F0E6, Krizova/Motol FC130007C1643CA1. + + +def norm(s): + """lowercase, bez diakritiky, sjednocene mezery""" + s = s or "" + s = unicodedata.normalize("NFKD", s) + s = "".join(c for c in s if not unicodedata.combining(c)) + return " ".join(s.lower().split()) + + +def main(): + apply = "--apply" in sys.argv + os.makedirs(TMPDIR, exist_ok=True) + + # SSH/SFTP + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(TOWER_HOST, username=TOWER_USER, password=TOWER_PASS, timeout=30) + sftp = ssh.open_sftp() + + client = MongoClient(MONGO_URI) + col = client["feasibility"]["investigators"] + + plan = [] + for inv_id, msg_name, att_name, label in MAPPING: + local_msg = os.path.join(TMPDIR, msg_name) + if not os.path.exists(local_msg): + sftp.get(f"{REMOTE_DIR}/{msg_name}", local_msg) + m = extract_msg.Message(local_msg) + target = norm(att_name) + chosen = None + for att in m.attachments: + name = att.longFilename or att.shortFilename or "" + if norm(name) == target or (target in norm(name)) or (norm(name) in target and name.lower().endswith(".pdf")): + chosen = (name, att.data) + break + m.close() + if not chosen: + plan.append((inv_id, label, msg_name, att_name, None, "!!! PRILOHA NENALEZENA")) + continue + raw = chosen[1] + sha = hashlib.sha256(raw).hexdigest() + plan.append((inv_id, label, msg_name, chosen[0], (len(raw), sha, raw), "OK")) + + sftp.close(); ssh.close() + + # Nahled + print("=== NAHLED DAVKY (CDA -> Mongo cda.data) ===\n") + for inv_id, label, msg_name, att_name, info, status in plan: + doc = col.find_one({"_id": ObjectId(inv_id)}, {"prijmeni": 1, "jmeno": 1, "cda.data_base64": 1}) + has = bool(doc and doc.get("cda", {}).get("data_base64")) + print(f"[{status}] {label} (_id {inv_id})") + print(f" .msg: {msg_name}") + print(f" priloha: {att_name}") + if info: + print(f" velikost: {info[0]} B sha256: {info[1]}") + print(f" data_base64 jiz existuje: {has}") + print() + + if not apply: + print(">>> DRY-RUN. Pro zapis spust s --apply") + return + + n = 0 + for inv_id, label, msg_name, att_name, info, status in plan: + if status != "OK" or not info: + print(f"PRESKAKUJI {label}: {status}") + continue + size, sha, raw = info + b64 = base64.b64encode(raw).decode("ascii") + res = col.update_one( + {"_id": ObjectId(inv_id)}, + {"$set": { + "cda.data_base64": b64, + "cda.data_sha256": sha, + "cda.data_filename": att_name, + "cda.data_mime": "application/pdf", + "cda.data_size": size, + "cda.data_stored_at": STORED_AT, + "cda.data_source_msg": msg_name, + "cda.soubor": att_name, + }}, + ) + n += res.modified_count + print(f"ZAPSANO: {label} (modified={res.modified_count})") + print(f"\n>>> CELKEM ZAPSANO: {n}") + + +if __name__ == "__main__": + main()