"
+ if isinstance(val, datetime):
+ return parse_date(val)
+ if isinstance(val, int):
+ return val if _INT64_MIN <= val <= _INT64_MAX else str(val)
+ if isinstance(val, (str, float, type(None))):
+ return val
+ if isinstance(val, list):
+ return [to_bson(v) for v in val]
+ try:
+ iv = int(val)
+ return iv if _INT64_MIN <= iv <= _INT64_MAX else str(iv)
+ except Exception:
+ pass
+ return str(val)
+
+
+def extract_headers(msg) -> dict:
+ headers = {}
+ try:
+ hdr = msg.header
+ if not hdr:
+ return {}
+ from email.header import decode_header as _dh
+
+ def _decode(v: str) -> str:
+ try:
+ parts = _dh(v)
+ out = ""
+ for part, enc in parts:
+ out += part.decode(enc or "utf-8", errors="replace") if isinstance(part, bytes) else part
+ return out
+ except Exception:
+ return v
+
+ for key in set(hdr.keys()):
+ k = key.lower().replace("-", "_")
+ vals = [_decode(v) for v in hdr.get_all(key, [])]
+ headers[k] = vals if len(vals) > 1 else (vals[0] if vals else "")
+ except Exception as e:
+ logging.error("extract_headers: %s", e)
+ return headers
+
+
+def extract_recipients(msg) -> list:
+ result = []
+ type_map = {1: "to", 2: "cc", 3: "bcc"}
+ try:
+ for r in msg.recipients:
+ rtype = getattr(r, "type", 1)
+ try:
+ rtype = int(rtype)
+ except Exception:
+ try:
+ rtype = int(rtype.value)
+ except Exception:
+ rtype = 1
+ rec = {
+ "type": type_map.get(rtype, "to"),
+ "email": safe(r, "email", default=""),
+ "name": safe(r, "name", default=""),
+ }
+ result.append(rec)
+ except Exception as e:
+ logging.error("extract_recipients: %s", e)
+ return result
+
+
+def extract_attachments(msg) -> list:
+ result = []
+ try:
+ for att in msg.attachments:
+ fname = safe(att, "longFilename", "shortFilename", default="")
+ if not fname:
+ continue
+ size = 0
+ raw = None
+ try:
+ d = att.data
+ if isinstance(d, (bytes, bytearray)):
+ raw = bytes(d)
+ size = len(raw)
+ elif d:
+ size = len(d) # embedded message apod. — bez bajtu
+ except Exception:
+ pass
+ mime = safe(att, "mimetype", "mimeType", default="application/octet-stream")
+ entry = {
+ "filename": fname,
+ "size_bytes": size,
+ "mime_type": mime,
+ "content_id": safe(att, "cid", default=None),
+ "is_inline": bool(safe(att, "isInline", default=False)),
+ }
+ # SeaweedFS upload (dedup dle obsahu, sdilene s Graph/mailstore vetvi).
+ # Vypadek SeaweedFS NESMI shodit parse — pole se proste nedoplni a
+ # dozene je seaweed_attachments_backfill_jnj.py.
+ if raw:
+ try:
+ h = hashlib.sha256(raw).hexdigest()
+ path, url, _ = sw.store(h, raw, mime)
+ entry["sha256"] = h
+ entry["seaweed_path"] = path
+ entry["seaweed_url"] = url
+ except Exception as e:
+ logging.warning("SeaweedFS upload selhal (%s): %s", fname, e)
+ result.append(entry)
+ except Exception as e:
+ logging.error("extract_attachments: %s", e)
+ return result
+
+
+def extract_mapi_props(msg) -> dict:
+ """Vsechny raw MAPI properties jako {0xXXXX: value}."""
+ result = {}
+ try:
+ props = msg.props
+ if not hasattr(props, "items"):
+ return {}
+ for key, prop in props.items():
+ try:
+ val = to_bson(prop.value)
+ prop_id = f"0x{key[:4].upper()}" if len(key) >= 4 else f"0x{key.upper()}"
+ result[prop_id] = val
+ except Exception:
+ pass
+ except Exception as e:
+ logging.error("extract_mapi_props: %s", e)
+ return result
+
+
+# ─── Tolerantni otevirani a raw-OLE fallback ─────────────────────────────────
+_CPID_TO_CODEC = {
+ 1250: "cp1250", 1251: "cp1251", 1252: "cp1252", 1253: "cp1253",
+ 1254: "cp1254", 1255: "cp1255", 1256: "cp1256", 1257: "cp1257",
+ 1258: "cp1258", 874: "cp874", 932: "shift_jis", 936: "gb2312",
+ 949: "euc_kr", 950: "big5", 65001: "utf-8", 28591: "iso-8859-1",
+ 28592: "iso-8859-2", 20127: "ascii",
+}
+
+
+def _read_u32_prop(ole, propid):
+ """Precte 32-bit hodnotu MAPI property z top-level __properties_version1.0."""
+ try:
+ data = ole.openstream("__properties_version1.0").read()
+ except Exception:
+ return None
+ body = data[32:] # 32-bajtova hlavicka top-level property streamu
+ for i in range(0, len(body) - 16 + 1, 16):
+ rec = body[i:i + 16]
+ tag = struct.unpack("> 16) & 0xFFFF) == propid:
+ return struct.unpack(" Optional[str]:
+ """Codec dle PR_INTERNET_CPID / PR_MESSAGE_CODEPAGE (jako napoveda, ne dogma)."""
+ for pid in (0x3FDE, 0x3FFD): # INTERNET_CPID, MESSAGE_CODEPAGE
+ codec = _CPID_TO_CODEC.get(_read_u32_prop(ole, pid))
+ # utf-8/ascii nejsou dobry hint pro 8-bit stream (casto lzou)
+ if codec and codec not in ("utf-8", "ascii"):
+ return codec
+ return None
+
+
+def _cascade_decode(raw: bytes, is_unicode: bool, cpid_codec: Optional[str]) -> str:
+ """Dekoduje bajty MAPI stringu. Hlavickam se neveri — zkousime striktne
+ v poradi priorit a vezmeme prvni, co projde bez chyby."""
+ if not raw:
+ return ""
+ if is_unicode: # PT_UNICODE = utf-16-le
+ try:
+ return raw.decode("utf-16-le")
+ except Exception:
+ return raw.decode("utf-16-le", errors="replace")
+ order = ["utf-8"] # utf-8 strict = silny rozlisovac
+ if cpid_codec:
+ order.append(cpid_codec)
+ order += ["cp1250", "cp1252", "gb2312", "big5"]
+ for enc in order:
+ try:
+ return raw.decode(enc, errors="strict")
+ except Exception:
+ continue
+ return raw.decode("latin-1", errors="replace") # nikdy nespadne
+
+
+def _raw_mapi_strings(msg_path: Path) -> dict:
+ """Cte klicova textova MAPI pole PRIMO z OLE (mimo extract_msg).
+ Pouzije se jen kdyz extract_msg vrati degradovane pole."""
+ out = {"subject": "", "normalized_subject": "", "sender_name": "",
+ "sender_email": "", "sender_smtp": "", "body_text": "", "body_html": ""}
+ try:
+ ole = olefile.OleFileIO(str(msg_path))
+ except Exception:
+ return out
+ try:
+ cpid = _detect_cpid(ole)
+ wanted = { # MAPI tag -> klic v out
+ "0037": "subject", "0E1D": "normalized_subject",
+ "0C1A": "sender_name", "5D01": "sender_smtp",
+ "0C1F": "sender_email", "1000": "body_text", "1013": "body_html",
+ }
+ prefix = "__substg1.0_"
+ found = {} # key -> (priorita_typu, hodnota)
+ for entry in ole.listdir():
+ if len(entry) != 1: # jen top-level (ne vnorene zpravy)
+ continue
+ name = entry[0]
+ if not name.startswith(prefix):
+ continue
+ tag = name[len(prefix):len(prefix) + 4].upper()
+ key = wanted.get(tag)
+ if not key:
+ continue
+ typ = name[-4:].upper()
+ prio = {"001F": 3, "001E": 2, "0102": 1}.get(typ, 0)
+ if prio == 0:
+ continue
+ prev = found.get(key)
+ if prev and prev[0] >= prio: # preferuj unicode > ansi > binarni
+ continue
+ try:
+ raw = ole.openstream(entry).read()
+ val = _cascade_decode(raw, typ == "001F", cpid)
+ except Exception:
+ continue
+ found[key] = (prio, val)
+ for key, (_, val) in found.items():
+ out[key] = val
+ finally:
+ ole.close()
+ return out
+
+
+def _degraded(s) -> bool:
+ """Pole je degradovane: prazdne nebo obsahuje U+FFFD (nahradni znak)."""
+ return (not s) or ("�" in s)
+
+
+def open_message(msg_path: Path):
+ """Kaskadove otevreni .msg -> (msg, mode) nebo (None, None)."""
+ try:
+ return extract_msg.Message(str(msg_path)), "normal"
+ except Exception:
+ pass
+ try:
+ return extract_msg.Message(
+ str(msg_path), errorBehavior=ErrorBehavior.SUPPRESS_ALL), "suppress_all"
+ except Exception:
+ pass
+ encs = []
+ try:
+ ole = olefile.OleFileIO(str(msg_path))
+ c = _detect_cpid(ole)
+ ole.close()
+ if c:
+ encs.append(c)
+ except Exception:
+ pass
+ for e in encs + ["cp1250", "cp1252"]:
+ try:
+ return extract_msg.Message(
+ str(msg_path), errorBehavior=ErrorBehavior.SUPPRESS_ALL,
+ overrideEncoding=e), f"override:{e}"
+ except Exception:
+ continue
+ return None, None
+
+
+def detect_send_failure(*texts):
+ """Vrati (send_failed, send_error) — hleda stopy chyby odeslani v tele.
+ Stopy se objevi v neodeslanem .msg (napr. SendAsDenied) az kdyz Outlook
+ chybu dopsal a re-upload (jnj_mailbox_sync v1.3) ji prinesl na Tower."""
+ blob = "\n".join(t for t in texts if isinstance(t, str))
+ if not blob:
+ return False, None
+ if not any(m in blob for m in SEND_FAIL_MARKERS):
+ return False, None
+ err = "send failed"
+ m = re.search(r"ec=(\d+)", blob)
+ if m:
+ err = f"SendAsDenied (ec={m.group(1)})"
+ m2 = re.search(r"Error is \[([0-9xA-Fa-f\-]+)\]", blob)
+ if m2:
+ err += f" {m2.group(1)}"
+ return True, err
+
+
+def extract_message(msg_path: Path) -> Optional[dict]:
+ """Parsuje jeden .msg soubor -> MongoDB dokument."""
+ msg, parse_mode = open_message(msg_path)
+ if msg is None:
+ logging.error("open failed [%s]: vsechny pokusy o otevreni selhaly", msg_path.name)
+ return None
+
+ try:
+ # ── Message-ID ────────────────────────────────────────────────
+ mid = None
+ for attr in ("messageId", "message_id", "internetMessageId"):
+ mid = safe(msg, attr)
+ if mid:
+ break
+ if not mid:
+ mid = f"filename:{msg_path.stem}"
+ mid = str(mid).strip()
+
+ # ── Predmet ───────────────────────────────────────────────────
+ try:
+ subject = msg.subject or ""
+ except Exception:
+ subject = ""
+
+ normalized_subject = safe(msg, "normalizedSubject", "normalized_subject", default="")
+
+ # ── Telo ──────────────────────────────────────────────────────
+ try:
+ body_text = msg.body or ""
+ except Exception:
+ body_text = ""
+
+ body_html = None
+ try:
+ bh = msg.htmlBody
+ if isinstance(bh, bytes):
+ bh = bh.decode("utf-8", errors="replace")
+ if bh:
+ body_html = bh if len(bh) <= 2 * 1024 * 1024 else bh[:2 * 1024 * 1024]
+ except Exception:
+ pass
+
+ # ── Odesilatel ────────────────────────────────────────────────
+ try:
+ sender_email = msg.sender or ""
+ except Exception:
+ sender_email = ""
+
+ sender_name = safe(msg, "senderName", "sender_name", default="")
+ sender_smtp = safe(msg, "senderSmtpAddress", "sent_representing_smtp_address", default="")
+
+ # ── Prijemci ──────────────────────────────────────────────────
+ recipients = extract_recipients(msg)
+
+ try:
+ to_raw = msg.to or ""
+ except Exception:
+ to_raw = ""
+ try:
+ cc_raw = msg.cc or ""
+ except Exception:
+ cc_raw = ""
+ try:
+ bcc_raw = getattr(msg, "bcc", None) or ""
+ except Exception:
+ bcc_raw = ""
+
+ display_to = safe(msg, "displayTo", "display_to", default="")
+ display_cc = safe(msg, "displayCc", "display_cc", default="")
+
+ # ── Casy ──────────────────────────────────────────────────────
+ try:
+ received_at = parse_date(msg.date)
+ except Exception:
+ received_at = None
+
+ sent_at = None
+ for attr in ("clientSubmitTime", "client_submit_time", "sentOn"):
+ v = safe(msg, attr)
+ if v:
+ sent_at = parse_date(v)
+ break
+
+ # ── MAPI vlastnosti ───────────────────────────────────────────
+ importance = 1
+ try:
+ v = msg.importance
+ if v is not None:
+ importance = int(v)
+ except Exception:
+ pass
+
+ sensitivity = 0
+ try:
+ v = getattr(msg, "sensitivity", None)
+ if v is not None:
+ sensitivity = int(v)
+ except Exception:
+ pass
+
+ flag_status = 0
+ try:
+ v = safe(msg, "flagStatus", "flag_status")
+ if v is not None:
+ flag_status = int(v)
+ except Exception:
+ pass
+
+ conversation_topic = safe(msg, "conversationTopic", "conversation_topic", default="")
+
+ conversation_index = ""
+ try:
+ ci = safe(msg, "conversationIndex", "conversation_index")
+ if isinstance(ci, bytes):
+ conversation_index = base64.b64encode(ci).decode()
+ elif ci:
+ conversation_index = str(ci)
+ except Exception:
+ pass
+
+ in_reply_to = safe(msg, "inReplyTo", "in_reply_to", default="")
+
+ internet_refs = []
+ try:
+ refs = safe(msg, "internetReferences", "internet_references")
+ if isinstance(refs, list):
+ internet_refs = refs
+ elif isinstance(refs, str) and refs:
+ internet_refs = [r.strip() for r in refs.split() if r.strip()]
+ except Exception:
+ pass
+
+ categories = []
+ try:
+ cats = safe(msg, "categories")
+ if isinstance(cats, list):
+ categories = [str(c) for c in cats if c]
+ elif isinstance(cats, str) and cats:
+ categories = [c.strip() for c in re.split(r"[;,]", cats) if c.strip()]
+ except Exception:
+ pass
+
+ read_receipt = bool(safe(msg, "readReceiptRequested", "read_receipt_requested", default=False))
+ delivery_receipt = bool(safe(msg, "deliveryReceiptRequested", "delivery_receipt_requested", default=False))
+
+ # ── Internet headers ──────────────────────────────────────────
+ headers = extract_headers(msg)
+
+ if not in_reply_to:
+ in_reply_to = headers.get("in_reply_to", "")
+ if not internet_refs:
+ refs_str = headers.get("references", "")
+ if isinstance(refs_str, str) and refs_str:
+ internet_refs = [r.strip() for r in refs_str.split() if r.strip()]
+
+ # ── Prilohy ───────────────────────────────────────────────────
+ attachments = extract_attachments(msg)
+
+ # ── Raw MAPI ──────────────────────────────────────────────────
+ mapi_raw = extract_mapi_props(msg)
+
+ msg.close()
+
+ # ── Raw-OLE fallback pro degradovana textova pole ─────────────
+ parse_degraded = parse_mode != "normal"
+ forced = parse_mode != "normal"
+ if (forced or _degraded(subject) or _degraded(body_text)
+ or _degraded(sender_email) or (body_html and "�" in body_html)):
+ raw = _raw_mapi_strings(msg_path)
+ if raw["subject"] and (forced or _degraded(subject)):
+ subject = raw["subject"]
+ if raw["normalized_subject"] and (forced or _degraded(normalized_subject)):
+ normalized_subject = raw["normalized_subject"]
+ if raw["body_text"] and (forced or _degraded(body_text)):
+ body_text = raw["body_text"]
+ if raw["body_html"] and (forced or not body_html or "�" in body_html):
+ bh = raw["body_html"]
+ body_html = bh if len(bh) <= 2 * 1024 * 1024 else bh[:2 * 1024 * 1024]
+ if (raw["sender_smtp"] or raw["sender_email"]) and (forced or _degraded(sender_email)):
+ sender_email = raw["sender_smtp"] or raw["sender_email"]
+ if raw["sender_name"] and (forced or _degraded(sender_name)):
+ sender_name = raw["sender_name"]
+ if raw["sender_smtp"] and not sender_smtp:
+ sender_smtp = raw["sender_smtp"]
+
+ # ── Detekce neodeslaneho e-mailu (v1.4) ───────────────────────
+ send_failed, send_error = detect_send_failure(body_text, body_html)
+
+ # ── Dokument ──────────────────────────────────────────────────
+ return {
+ "_id": mid,
+ "filename": msg_path.name,
+
+ "subject": subject,
+ "normalized_subject": normalized_subject,
+ "importance": importance,
+ "sensitivity": sensitivity,
+ "flag_status": flag_status,
+ "read_receipt_requested": read_receipt,
+ "delivery_receipt_requested": delivery_receipt,
+ "has_attachments": len(attachments) > 0,
+ "attachment_count": len(attachments),
+ "message_size_bytes": msg_path.stat().st_size,
+
+ "conversation_topic": conversation_topic,
+ "conversation_index": conversation_index,
+ "in_reply_to": in_reply_to,
+ "internet_references": internet_refs,
+ "categories": categories,
+
+ "received_at": received_at,
+ "sent_at": sent_at,
+
+ "sender": {
+ "email": sender_email,
+ "name": sender_name,
+ "smtp": sender_smtp,
+ },
+ "to": to_raw,
+ "cc": cc_raw,
+ "bcc": bcc_raw,
+ "display_to": display_to,
+ "display_cc": display_cc,
+ "recipients": recipients,
+
+ "body_text": body_text,
+ "body_html": body_html,
+
+ "attachments": attachments,
+ "headers": headers,
+ "mapi": mapi_raw,
+
+ "parse_mode": parse_mode,
+ "parse_degraded": parse_degraded,
+ "send_failed": send_failed,
+ "send_error": send_error,
+
+ "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+ # priznak ze prilohy (pokud nejake) jsou v SeaweedFS — pro backfill
+ "seaweed_synced_at": (datetime.now(timezone.utc).replace(tzinfo=None)
+ if any(a.get("seaweed_path") for a in attachments)
+ else None),
+ }
+
+ except Exception as e:
+ logging.error("extract_message failed [%s]: %s", msg_path.name, e)
+ return None
+
+
+def create_indexes(col):
+ print(" Vytvarim indexy...")
+ col.create_index([("received_at", ASCENDING)])
+ col.create_index([("sent_at", ASCENDING)])
+ col.create_index([("sender.email", ASCENDING)])
+ col.create_index([("filename", ASCENDING)], unique=True, sparse=True)
+ col.create_index([("conversation_topic", ASCENDING)])
+ col.create_index([("has_attachments", ASCENDING)])
+ col.create_index([("categories", ASCENDING)])
+ col.create_index([("importance", ASCENDING)])
+ col.create_index([("flag_status", ASCENDING)])
+ col.create_index([
+ ("subject", TEXT),
+ ("body_text", TEXT),
+ ("to", TEXT),
+ ("cc", TEXT),
+ ], name="text_search", default_language="none")
+ print(" Indexy hotovy.")
+
+
+def run_parse(col, state_col, args, now) -> dict:
+ """FAZE 1: inkrementalni parse .msg -> emaily. Vraci statistiku."""
+ stats = {"mode": None, "total_files": 0, "candidates": 0, "ok": 0, "err": 0}
+ print("\n=== FAZE 1: PARSE (.msg -> emaily) ===")
+
+ all_files = sorted(MSGS_DIR.glob("*.msg"))
+ stats["total_files"] = len(all_files)
+ if not all_files:
+ print(" Zadne .msg ve zdroji -> preskakuji.")
+ return stats
+ max_mtime = max(f.stat().st_mtime for f in all_files)
+
+ ps = state_col.find_one({"_id": "parse_state"}) or {}
+ last_mtime = ps.get("last_parse_mtime")
+
+ if args.full:
+ candidates = all_files
+ mode = "full"
+ elif last_mtime is None:
+ print(" Prvni beh (zadny mtime watermark) -> seed dle filename v Mongu...")
+ existing = set(col.distinct("filename"))
+ candidates = [f for f in all_files if f.name not in existing]
+ mode = "seed"
+ print(f" V Mongu jiz {len(existing)} filename; nove k naparsovani: {len(candidates)}")
+ else:
+ candidates = [f for f in all_files if f.stat().st_mtime > last_mtime]
+ mode = "incremental"
+ if args.limit:
+ candidates = candidates[:args.limit]
+
+ stats["mode"] = mode
+ stats["candidates"] = len(candidates)
+ wm_str = datetime.fromtimestamp(last_mtime).strftime("%Y-%m-%d %H:%M:%S") if last_mtime else "(zadny)"
+ print(f" Rezim: {mode} | .msg celkem {len(all_files)} | watermark {wm_str} | ke zpracovani {len(candidates)}")
+
+ if not candidates:
+ print(" Nic noveho k parsovani.")
+ # I tak posun watermark na nejnovejsi soubor (krome --full a dry-run)
+ if not args.dry_run and mode != "full":
+ state_col.update_one({"_id": "parse_state"},
+ {"$set": {"last_parse_mtime": max_mtime, "last_parse_at": now}}, upsert=True)
+ return stats
+
+ if args.dry_run:
+ print(f" DRY-RUN: naparsoval bych {len(candidates)} souboru (Mongo se nemeni). Ukazka:")
+ for f in candidates[:10]:
+ mt = datetime.fromtimestamp(f.stat().st_mtime).strftime("%Y-%m-%d %H:%M:%S")
+ print(f" + {f.name} (mtime {mt})")
+ if len(candidates) > 10:
+ print(f" ... a dalsich {len(candidates) - 10}")
+ return stats
+
+ batch = []
+ verbose = len(candidates) <= 30
+
+ def flush():
+ if not batch:
+ return
+ try:
+ col.bulk_write(batch, ordered=False)
+ except Exception as e:
+ logging.error("bulk_write spadl (%s) -- prepinam na per-dokument", e)
+ print(f" CHYBA bulk_write: {e} -- zkousim per-dokument")
+ for op in batch:
+ try:
+ col.bulk_write([op], ordered=False)
+ except Exception as e2:
+ try:
+ bad_id = getattr(op, "_filter", {}).get("_id", "?")
+ except Exception:
+ bad_id = "?"
+ logging.error("per-dokument selhal [_id=%s]: %s", bad_id, e2)
+ print(f" ZAHOZEN _id={bad_id}: {e2}")
+ stats["ok"] -= 1
+ stats["err"] += 1
+ batch.clear()
+
+ for i, msg_path in enumerate(candidates, 1):
+ doc = extract_message(msg_path)
+ if doc is None:
+ stats["err"] += 1
+ else:
+ batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+ stats["ok"] += 1
+ if len(batch) >= BATCH_SIZE:
+ flush()
+ if verbose:
+ status = "ERR " if doc is None else "OK "
+ subj = (doc.get("subject") or "")[:60] if doc else "?"
+ print(f" {i:>5}/{len(candidates)} {status} {subj}")
+ elif i % 500 == 0:
+ print(f" prubeh {i}/{len(candidates)} ok={stats['ok']} err={stats['err']}")
+ flush()
+
+ # Indexy jen pri full/seed/--reindex (v inkrementalnim behu uz existuji)
+ if mode in ("full", "seed") or args.reindex:
+ create_indexes(col)
+
+ # Posun watermark na nejnovejsi soubor
+ state_col.update_one({"_id": "parse_state"},
+ {"$set": {"last_parse_mtime": max_mtime, "last_parse_at": now,
+ "last_parsed_count": stats["ok"], "last_parse_mode": mode}},
+ upsert=True)
+ print(f" PARSE hotovo: ok={stats['ok']} err={stats['err']} "
+ f"watermark={datetime.fromtimestamp(max_mtime):%Y-%m-%d %H:%M:%S}")
+ return stats
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# FAZE 2 — SYNC (SQLite -> Mongo jnj_messages + emaily cesta)
+# [drive sync_jnj_state_v1.0.py]
+# ══════════════════════════════════════════════════════════════════════════════
+
+def norm_mid(s: str) -> str:
+ return (s or "").strip().strip("<>").strip()
+
+
+def coalesce_path(jnjf, fld) -> str:
+ return jnjf if (jnjf and jnjf.strip()) else (fld or "")
+
+
+def newest_db():
+ cands = glob.glob(os.path.join(DB_DIR, "jnjemails_*.db")) or glob.glob(os.path.join(DB_DIR, "*.db"))
+ return max(cands, key=os.path.getmtime) if cands else None
+
+
+def run_sync(db, args, now) -> dict:
+ """FAZE 2: SQLite -> jnj_messages (zrcadlo) + emaily (cesta/stav)."""
+ stats = {"total": 0, "matched": 0, "skipped": False}
+ print("\n=== FAZE 2: SYNC (SQLite -> jnj_messages + emaily cesta) ===")
+
+ emails = db[EMAILS_COL]
+ state_col = db[STATE_COL]
+
+ db_path = newest_db()
+ if not db_path:
+ print(f" Zadna .db v {DB_DIR} -> preskakuji.")
+ stats["skipped"] = True
+ return stats
+ db_name = os.path.basename(db_path)
+ print(f" SQLite: {db_name}")
+
+ st = state_col.find_one({"_id": "watermark"}) or {}
+
+ # ── Zkratka: tuto DB uz jsme zpracovali? (jen inkrementalni rezim) ─────
+ if not args.full and not args.force and st.get("last_db") == db_name:
+ print(f" DB {db_name} uz byla zpracovana (last_db) -> nic na praci.")
+ stats["skipped"] = True
+ return stats
+
+ wm = None if args.full else st.get("last_updated_at")
+ print(f" Watermark: {wm or '(zadny -> vse)'}")
+
+ # ── SQLite (read-only) ────────────────────────────────────────────────
+ con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+ con.row_factory = sqlite3.Row
+ available = {row[1] for row in con.execute("PRAGMA table_info(messages)")}
+ sel_cols = [c for c in ROW_COLS if c in available]
+ missing = [c for c in ROW_COLS if c not in available]
+ if missing:
+ print(f" (DB nema sloupce: {', '.join(missing)} -> default None/0)")
+ has_updated = "updated_at" in available
+
+ # ── NULL-safe vyber radku ─────────────────────────────────────────────
+ # Stary inbox_full_sync zapisuje radky s updated_at=NULL; cisty watermark
+ # filtr "updated_at > wm" je v SQL TISE zahazuje (NULL > x = false).
+ # Bereme proto i radky s updated_at IS NULL, ktere jeste NEJSOU v zrcadle
+ # jnj_messages (aby se zpracovaly prave jednou). --full bere vse.
+ mirrored_ids = set()
+ if not args.full:
+ mirrored_ids = {d["_id"] for d in db[MIRROR_COL].find({}, {"_id": 1})}
+
+ q = f"SELECT {', '.join(sel_cols)} FROM messages"
+ params = ()
+ if not args.full and wm and has_updated:
+ q += " WHERE updated_at > ? OR updated_at IS NULL"
+ params = (wm,)
+ elif not args.full and wm and not has_updated:
+ print(" (DB nema updated_at -> watermark ignorovan, beru vse)")
+ wm = None
+ raw_rows = con.execute(q, params).fetchall()
+ con.close()
+
+ rows = []
+ skipped_null = 0
+ for row in raw_rows:
+ d = dict(row)
+ if (not args.full) and d.get("updated_at") is None and d.get("message_id") in mirrored_ids:
+ skipped_null += 1 # NULL radek uz zrcadleny -> hotovo, nepocitame znovu
+ continue
+ rows.append(d)
+ if skipped_null:
+ print(f" (NULL-safe: preskoceno {skipped_null} NULL-updated_at radku uz v jnj_messages)")
+ if args.limit:
+ rows = rows[:args.limit]
+ total = len(rows)
+ stats["total"] = total
+ print(f" Radku ke zpracovani: {total}")
+ if total == 0:
+ print(" Neni co synchronizovat (zadne nove radky).")
+ if not args.dry_run:
+ state_col.update_one({"_id": "watermark"},
+ {"$set": {"last_db": db_name, "synced_at": now}}, upsert=True)
+ return stats
+
+ # ── Indexy z Monga ────────────────────────────────────────────────────
+ print(" Nacitam _id + filename + jnj_folder z Mongo...")
+ ids_exact = set()
+ ids_norm = {}
+ fnames = {}
+ has_path = set()
+ for d in emails.find({}, {"_id": 1, "filename": 1, "jnj_folder": 1}):
+ _id = d["_id"]
+ ids_exact.add(_id)
+ ids_norm.setdefault(norm_mid(_id), _id)
+ fn = d.get("filename")
+ if fn:
+ fnames[fn] = _id
+ if d.get("jnj_folder"):
+ has_path.add(_id)
+ print(f" Mongo dokumentu v {EMAILS_COL}: {len(ids_exact)} (z toho s jnj_folder: {len(has_path)})")
+
+ # ── Plan ──────────────────────────────────────────────────────────────
+ m_exact = m_norm = m_fname = unmatched = 0
+ examples = []
+ mirror_ops = []
+ emaily_ops = []
+ max_wm = wm or ""
+
+ for r in rows:
+ mid = r.get("message_id")
+ uv = r.get("updated_at")
+ if uv and uv > max_wm:
+ max_wm = uv
+
+ # Krok A — zrcadlo (vzdy)
+ doc = {k: r.get(k) for k in ROW_COLS}
+ doc["mirrored_at"] = now
+ mirror_ops.append(UpdateOne({"_id": mid}, {"$set": doc}, upsert=True))
+
+ # Krok B — match do emaily
+ target = None
+ if mid in ids_exact:
+ target = mid; m_exact += 1
+ elif norm_mid(mid) in ids_norm:
+ target = ids_norm[norm_mid(mid)]; m_norm += 1
+ else:
+ eid = r.get("entry_id")
+ fn = (eid[-20:] + ".msg") if eid else None
+ if fn and fn in fnames:
+ target = fnames[fn]; m_fname += 1
+ else:
+ unmatched += 1
+ if len(examples) < 6:
+ examples.append(mid)
+
+ if target is not None:
+ setdoc = {
+ "jnj_folder": coalesce_path(r.get("jnj_folder"), r.get("folder")),
+ "jnj_is_read": bool(r.get("is_read")),
+ "jnj_not_in_mailbox": bool(r.get("not_in_mailbox_anymore")),
+ "jnj_left_mailbox_at": r.get("left_mailbox_at"),
+ "jnj_folder_synced_at": now,
+ }
+ emaily_ops.append(UpdateOne({"_id": target}, {"$set": setdoc}))
+
+ matched = m_exact + m_norm + m_fname
+ stats["matched"] = matched
+ print(" --- PLAN ---")
+ print(f" Zrcadlo -> {MIRROR_COL}: {len(mirror_ops)} upsert")
+ print(f" Emaily match exact (_id): {m_exact}")
+ print(f" Emaily match norm (<>): {m_norm}")
+ print(f" Emaily match filename: {m_fname}")
+ print(f" Emaily match CELKEM: {matched}/{total} ({100.0*matched/total:.1f}%)")
+ print(f" NEnamatchovano: {unmatched}")
+ if examples:
+ print(" Priklady nenamatchovanych message_id:")
+ for e in examples:
+ print(f" {str(e)[:72]}")
+
+ # ── Zapis ─────────────────────────────────────────────────────────────
+ if args.dry_run:
+ print(" DRY-RUN: Mongo se NEMENI.")
+ return stats
+
+ print(" Zapisuji...")
+ if mirror_ops:
+ db[MIRROR_COL].bulk_write(mirror_ops, ordered=False)
+ if emaily_ops:
+ emails.bulk_write(emaily_ops, ordered=False)
+ state_col.update_one(
+ {"_id": "watermark"},
+ {"$set": {"last_updated_at": max_wm, "synced_at": now, "last_db": db_name,
+ "last_total": total, "last_matched": matched}},
+ upsert=True,
+ )
+ print(f" SYNC hotovo: zrcadlo={len(mirror_ops)} emaily={len(emaily_ops)} watermark={max_wm}")
+ return stats
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# FAZE 3 — ENRICH (Mongo -> PG fulltext, deleguje na sdileny 5_enrich)
+# [drive jnj_emails_to_fulltext_v1.0.py]
+# ══════════════════════════════════════════════════════════════════════════════
+
+def newest_enrich():
+ """Najde nejnovejsi /scripts/5_enrich_fulltext_emails_v*.py podle verze vX.Y."""
+ cands = glob.glob(ENRICH_GLOB)
+ if not cands:
+ return None
+
+ def ver(p):
+ m = re.search(r"_v(\d+)\.(\d+)", os.path.basename(p))
+ return (int(m.group(1)), int(m.group(2))) if m else (0, 0)
+
+ return max(cands, key=ver)
+
+
+def run_enrich(args, new_docs, force) -> dict:
+ """FAZE 3: doindexuje JNJ schranku do PG fulltextu pres sdileny enrich.
+ Spousti se jen kdyz parse pridal nove dokumenty (nebo force/enrich-only)."""
+ stats = {"ran": False, "rc": None, "skipped_reason": None}
+ print("\n=== FAZE 3: ENRICH (PG fulltext) ===")
+
+ if args.no_enrich:
+ stats["skipped_reason"] = "--no-enrich"
+ print(" Preskoceno [--no-enrich].")
+ return stats
+ if args.dry_run:
+ enrich = newest_enrich()
+ stats["skipped_reason"] = "dry-run"
+ print(f" DRY-RUN: zavolal bych {enrich or '(enrich nenalezen!)'} --mailbox {EMAILS_COL}"
+ f" (nove doc z parse: {new_docs}, force={force})")
+ return stats
+ if not force and new_docs <= 0:
+ stats["skipped_reason"] = "zadne nove doc"
+ print(" Zadne nove maily z parse -> enrich preskocen "
+ "(JNJ stejne enrichuje pipeline v 6:00/18:00; --enrich-always vynuti).")
+ return stats
+
+ enrich = newest_enrich()
+ if not enrich:
+ stats["skipped_reason"] = "enrich skript nenalezen"
+ print(f" CHYBA: zadny enrich skript ({ENRICH_GLOB}) -> preskakuji.")
+ return stats
+
+ cmd = [sys.executable, enrich, "--mailbox", EMAILS_COL]
+ print(f" Spoustim: {' '.join(cmd)}")
+ sys.stdout.flush()
+ r = subprocess.run(cmd)
+ stats["ran"] = True
+ stats["rc"] = r.returncode
+ print(f" ENRICH hotovo: exit code {r.returncode}")
+ return stats
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# FAZE RECONCILE — smaz provizorni duplikat (no-ID Sent kopie s ID-dvojcetem)
+# ══════════════════════════════════════════════════════════════════════════════
+
+_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
+
+
+def _to_emails(s):
+ return frozenset(e.lower() for e in _EMAIL_RE.findall(s or ""))
+
+
+def _subj_key(d):
+ return (d.get("normalized_subject") or d.get("subject") or "").strip().lower()
+
+
+def _is_provisional_id(_id):
+ return isinstance(_id, str) and (_id.startswith("filename:") or _id.startswith("entryid:"))
+
+
+def run_reconcile(db, args, now):
+ """Smaze provizorni no-ID Sent kopie, ke kterym existuje dvojce s realnym
+ Message-ID (stejni 'to' prijemci + stejny subjekt + received_at do 24h).
+ Neodeslane (bez dvojcete) ponecha. --dry-run = jen plan, nic nemaze.
+
+ Match je na STABILNIM obsahu (emailove adresy + normalized_subject + cas),
+ NE na EntryID — provizorni a finalni kopie maji ruzny EntryID."""
+ stats = {"provisional": 0, "deletable": 0, "deleted": 0, "kept": 0}
+ print("\n=== FAZE RECONCILE (smaz provizorni duplikaty Sent bez Message-ID) ===")
+ emails = db[EMAILS_COL]
+
+ # 1) index dvojcat: realne-ID Sent dokumenty -> klic (to_emails, subj) -> [received_at]
+ twins = {}
+ for d in emails.find(
+ {"jnj_folder": {"$regex": "Sent Items"}},
+ {"_id": 1, "to": 1, "normalized_subject": 1, "subject": 1, "received_at": 1}):
+ if _is_provisional_id(d.get("_id")):
+ continue # jako dvojce berem jen dokumenty s realnym Message-ID
+ key = (_to_emails(d.get("to")), _subj_key(d))
+ if not key[0] or not key[1]:
+ continue
+ twins.setdefault(key, []).append(d.get("received_at"))
+
+ # 2) projdi provizorni a najdi dvojce v casovem okne 24h
+ WINDOW = 24 * 3600
+ to_delete = []
+ examples_keep = []
+ for p in emails.find(
+ {"jnj_folder": {"$regex": "Sent Items"},
+ "_id": {"$regex": "^(filename:|entryid:)"}},
+ {"_id": 1, "to": 1, "normalized_subject": 1, "subject": 1,
+ "received_at": 1, "send_failed": 1}):
+ stats["provisional"] += 1
+ key = (_to_emails(p.get("to")), _subj_key(p))
+ pr = p.get("received_at")
+ matched = False
+ if key[0] and key[1] and key in twins and pr is not None:
+ for tr in twins[key]:
+ if tr is None:
+ continue
+ try:
+ if abs((tr - pr).total_seconds()) <= WINDOW:
+ matched = True
+ break
+ except Exception:
+ continue
+ if matched:
+ stats["deletable"] += 1
+ to_delete.append((p["_id"], p.get("to")))
+ else:
+ stats["kept"] += 1
+ if p.get("send_failed") and len(examples_keep) < 8:
+ examples_keep.append(p.get("to"))
+
+ print(f" Provizornich (Sent bez Message-ID): {stats['provisional']}")
+ print(f" S nalezenym ID-dvojcetem (smazat): {stats['deletable']}")
+ print(f" Bez dvojcete (ponechat): {stats['kept']}")
+ if examples_keep:
+ print(" Priklady ponechanych s priznakem NEODESLANO:")
+ for to in examples_keep:
+ print(f" NEODESLANO | {to}")
+
+ if not to_delete:
+ print(" Nic ke smazani.")
+ return stats
+
+ if args.dry_run:
+ print(" DRY-RUN: NIC se nemaze. Ukazka kandidatu na smazani:")
+ for _id, to in to_delete[:15]:
+ print(f" - {_id} ({to})")
+ if len(to_delete) > 15:
+ print(f" ... a dalsich {len(to_delete) - 15}")
+ return stats
+
+ ids = [x[0] for x in to_delete]
+ res = emails.delete_many({"_id": {"$in": ids}})
+ stats["deleted"] = res.deleted_count
+ print(f" SMAZANO provizornich duplikatu: {stats['deleted']}")
+ return stats
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════════════
+
+def main():
+ ap = argparse.ArgumentParser(description=f"jnj_tower_ingest v{SCRIPT_VERSION}")
+ ap.add_argument("--dry-run", action="store_true", help="nic nezapise, jen plan")
+ ap.add_argument("--full", action="store_true",
+ help="parse: reparsuj vse; sync: ignoruj watermark")
+ ap.add_argument("--limit", type=int, default=0, help="max N souboru/radku (test)")
+ ap.add_argument("--reindex", action="store_true", help="vynut indexy po parse")
+ ap.add_argument("--force", action="store_true",
+ help="sync: ignoruj last_db zkratku")
+ ap.add_argument("--parse-only", action="store_true", help="jen faze PARSE")
+ ap.add_argument("--sync-only", action="store_true", help="jen faze SYNC")
+ ap.add_argument("--enrich-only", action="store_true", help="jen faze ENRICH")
+ ap.add_argument("--no-enrich", action="store_true", help="preskoc fazi ENRICH")
+ ap.add_argument("--enrich-always", action="store_true",
+ help="spust enrich i bez novych dokumentu z parse")
+ ap.add_argument("--reconcile", action="store_true",
+ help="spust fazi RECONCILE (smaz provizorni Sent duplikaty; "
+ "s --dry-run jen plan)")
+ args = ap.parse_args()
+
+ now = datetime.now(timezone.utc).replace(tzinfo=None)
+
+ print(f"=== jnj_tower_ingest v{SCRIPT_VERSION} {'[DRY-RUN]' if args.dry_run else ''} ===")
+ print(f"Start: {datetime.now():%Y-%m-%d %H:%M:%S}")
+ print(f"MongoDB: {MONGO_URI} -> {MONGO_DB}")
+
+ client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+ try:
+ client.admin.command("ping")
+ print(" MongoDB OK")
+ except Exception as e:
+ print(f"CHYBA: MongoDB nedostupna -- {e}")
+ sys.exit(1)
+
+ db = client[MONGO_DB]
+ col = db[EMAILS_COL]
+ state_col = db[STATE_COL]
+
+ p_stats = s_stats = e_stats = r_stats = None
+ if not args.sync_only and not args.enrich_only:
+ p_stats = run_parse(col, state_col, args, now)
+ if not args.parse_only and not args.enrich_only:
+ s_stats = run_sync(db, args, now)
+ # RECONCILE bezi jen na vyzadani (--reconcile); potrebuje jnj_folder ze sync.
+ if args.reconcile and not args.parse_only and not args.enrich_only:
+ r_stats = run_reconcile(db, args, now)
+ if not args.parse_only and not args.sync_only:
+ new_docs = p_stats["ok"] if p_stats else 0
+ force = args.enrich_only or args.enrich_always or args.full
+ e_stats = run_enrich(args, new_docs, force)
+
+ # ── Souhrn ────────────────────────────────────────────────────────────
+ print("\n=== SOUHRN ===")
+ if p_stats is not None:
+ print(f" PARSE: rezim={p_stats['mode']} kandidatu={p_stats['candidates']} "
+ f"ok={p_stats['ok']} err={p_stats['err']}")
+ if s_stats is not None:
+ if s_stats.get("skipped"):
+ print(" SYNC: preskoceno (zadna nova DB / uz zpracovana)")
+ else:
+ print(f" SYNC: radku={s_stats['total']} match={s_stats['matched']}")
+ if r_stats is not None:
+ akce = "plan" if args.dry_run else f"smazano={r_stats['deleted']}"
+ print(f" RECON: provizornich={r_stats['provisional']} "
+ f"smazatelnych={r_stats['deletable']} {akce}")
+ if e_stats is not None:
+ if e_stats.get("ran"):
+ print(f" ENRICH: spusten, exit code {e_stats['rc']}")
+ else:
+ print(f" ENRICH: preskoceno ({e_stats.get('skipped_reason')})")
+ print(f"Konec: {datetime.now():%Y-%m-%d %H:%M:%S}")
+ client.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/EmailsImport/jnj_unsent_probe_v1.0.py b/EmailsImport/jnj_unsent_probe_v1.0.py
new file mode 100644
index 0000000..815e3a6
--- /dev/null
+++ b/EmailsImport/jnj_unsent_probe_v1.0.py
@@ -0,0 +1,272 @@
+"""
+jnj_unsent_probe v1.1
+Nazev: jnj_unsent_probe_v1.0.py (verze 1.1.0 — bohatsi vypis)
+Verze: 1.1.0
+Datum: 2026-06-16
+Autor: vladimir.buzalka
+Bezi: JNJ stroj (Outlook MAPI), Python z Thonny. JEN CTE, nic nezapisuje/nenahrava.
+
+UCEL (diagnostika):
+ Cte e-maily PRIMO z ziveho Outlooku (MAPI) a vypisuje "identifikatory
+ neodeslani", ktere se pri exportu do .msg ztraci nebo nejsou spolehlive.
+ Slouzi k OVERENI, ktery zivy priznak spolehlive oznaci NEODESLANY e-mail
+ (napr. hustakova nabidka, kterou Exchange odmitl SendAsDenied).
+
+ Pro kazdou nalezenou polozku vypise vedle sebe:
+ - folder, subject, prijemce
+ - item.Sent (object model bool — odeslano?)
+ - PR_MESSAGE_FLAGS + dekodovane bity UNSENT / SUBMIT / READ
+ - ma Internet Message-ID? (PR_0x1035)
+ - ma PR_CLIENT_SUBMIT_TIME? (0x0039)
+ - PR_LAST_VERB_EXECUTED (0x1081)
+ - body_has_error (zive item.Body obsahuje SendAsDenied / could not be sent?)
+ - pokud ano -> vypise i snippet chyby
+
+ DULEZITE: tohle je SONDA. Z jejiho vystupu se rozhodne, ktery priznak je
+ spolehlivy detektor, a teprve pak se z toho udela produkcni flagovani.
+
+Filtry (argumenty):
+ --to SUBSTR jen polozky, jejichz prijemce obsahuje SUBSTR (napr. hustak)
+ --subject SUBSTR jen polozky s SUBSTR v predmetu (napr. icotrokinra)
+ --days N okno poslednich N dni dle ReceivedTime (default 90; 0 = vse)
+ --all vypsat VSE (jinak jen "podezrele" = bez Internet Message-ID)
+ --limit N max N vypsanych polozek (default 60)
+ --folders LIST carkou oddelene: inbox,sent,drafts,deleted,outbox,archive
+ (default vse uvedene)
+
+Priklady:
+ python jnj_unsent_probe_v1.0.py --to hustak --all
+ python jnj_unsent_probe_v1.0.py --subject icotrokinra --days 60
+"""
+import argparse
+import sys
+from datetime import datetime, timedelta
+
+import win32com.client
+
+if hasattr(sys.stdout, "reconfigure"):
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# MAPI proptagy
+PR_MESSAGE_FLAGS = "http://schemas.microsoft.com/mapi/proptag/0x0E070003"
+PR_INTERNET_MSG_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
+PR_CLIENT_SUBMIT_TIME = "http://schemas.microsoft.com/mapi/proptag/0x00390040"
+PR_LAST_VERB = "http://schemas.microsoft.com/mapi/proptag/0x10810003"
+
+# MSGFLAG bity
+MSGFLAG_READ = 0x1
+MSGFLAG_UNSENT = 0x8
+MSGFLAG_SUBMIT = 0x4
+
+# Default folder ID (OlDefaultFolders)
+DEFAULT_FOLDERS = {
+ "inbox": 6, "sent": 5, "drafts": 16, "deleted": 3, "outbox": 4,
+}
+
+ERR_MARKERS = ("SendAsDenied", "could not be sent", "TransportSend",
+ "MapiExceptionSendAs", "nemáte oprávnění", "on behalf of")
+
+
+def prop(item, tag, default=None):
+ try:
+ v = item.PropertyAccessor.GetProperty(tag)
+ return v if v is not None else default
+ except Exception:
+ return default
+
+
+def get_to(item):
+ try:
+ return item.To or ""
+ except Exception:
+ return ""
+
+
+def body_error_snippet(item):
+ """Zive telo (item.Body) — obsahuje stopu chyby odeslani?"""
+ try:
+ b = item.Body or ""
+ except Exception:
+ return None
+ for m in ERR_MARKERS:
+ i = b.find(m)
+ if i >= 0:
+ return b[max(0, i - 10):i + 90].replace("\r", " ").replace("\n", " ")
+ return None
+
+
+def describe(item):
+ subj = str(getattr(item, "Subject", "") or "")[:42]
+ to = get_to(item)[:32]
+ try:
+ sent = bool(item.Sent)
+ except Exception:
+ sent = None
+ flags = prop(item, PR_MESSAGE_FLAGS, 0) or 0
+ unsent = bool(flags & MSGFLAG_UNSENT)
+ submit = bool(flags & MSGFLAG_SUBMIT)
+ read = bool(flags & MSGFLAG_READ)
+ mid = prop(item, PR_INTERNET_MSG_ID)
+ if not mid:
+ mid = prop(item, "http://schemas.microsoft.com/mapi/proptag/0x1035001F") # unicode varianta
+ has_mid = bool(mid)
+ submit_time = prop(item, PR_CLIENT_SUBMIT_TIME)
+ last_verb = prop(item, PR_LAST_VERB)
+ err = body_error_snippet(item)
+ try:
+ rdate = item.ReceivedTime.strftime("%Y-%m-%d %H:%M") if item.ReceivedTime else "?"
+ except Exception:
+ rdate = "?"
+ try:
+ eid = str(item.EntryID)[-20:]
+ except Exception:
+ eid = "?"
+ return {
+ "subject": subj, "to": to, "sent": sent, "flags": flags,
+ "unsent": unsent, "submit": submit, "read": read,
+ "has_mid": has_mid, "mid_val": (str(mid)[:60] if mid else "-"),
+ "submit_time": bool(submit_time),
+ "last_verb": last_verb, "err": err, "rdate": rdate, "eid": eid,
+ }
+
+
+def matches(item, args):
+ if args.to:
+ if args.to.lower() not in get_to(item).lower():
+ try:
+ # zkus i recipients
+ rec = "; ".join(str(r.Address or r.Name or "") for r in item.Recipients)
+ except Exception:
+ rec = ""
+ if args.to.lower() not in rec.lower():
+ return False
+ if args.subject:
+ if args.subject.lower() not in str(getattr(item, "Subject", "") or "").lower():
+ return False
+ return True
+
+
+def walk(folder, path, args, cutoff, out, counters):
+ cur = f"{path}/{folder.Name}"
+ try:
+ items = folder.Items
+ try:
+ items.Sort("[ReceivedTime]", True)
+ except Exception:
+ pass
+ except Exception:
+ return
+ for item in items:
+ if len(out) >= args.limit:
+ return
+ try:
+ if not str(getattr(item, "MessageClass", "")).upper().startswith("IPM.NOTE"):
+ continue
+ except Exception:
+ continue
+ if cutoff is not None:
+ try:
+ rt = item.ReceivedTime
+ if rt is not None and rt.replace(tzinfo=None) < cutoff:
+ continue
+ except Exception:
+ pass
+ if not matches(item, args):
+ continue
+ counters["seen"] += 1
+ d = describe(item)
+ if (not args.all) and d["has_mid"]:
+ continue # ma Message-ID -> neni podezrely (pokud neni --all)
+ d["folder"] = cur
+ out.append(d)
+ try:
+ subs = list(folder.Folders)
+ except Exception:
+ subs = []
+ for sub in subs:
+ if len(out) >= args.limit:
+ return
+ walk(sub, cur, args, cutoff, out, counters)
+
+
+def find_archive(ns):
+ try:
+ root = ns.GetDefaultFolder(6).Parent
+ for f in root.Folders:
+ try:
+ if str(f.Name).strip().lower() == "archive":
+ return f, root.Name
+ except Exception:
+ continue
+ except Exception:
+ pass
+ return None, None
+
+
+def main():
+ ap = argparse.ArgumentParser(description="jnj_unsent_probe v1.0 (diagnostika)")
+ ap.add_argument("--to", default="")
+ ap.add_argument("--subject", default="")
+ ap.add_argument("--days", type=int, default=90)
+ ap.add_argument("--all", action="store_true")
+ ap.add_argument("--limit", type=int, default=60)
+ ap.add_argument("--folders", default="inbox,sent,drafts,deleted,outbox,archive")
+ args = ap.parse_args()
+
+ cutoff = None if args.days == 0 else (datetime.now() - timedelta(days=args.days))
+ want = [x.strip().lower() for x in args.folders.split(",") if x.strip()]
+
+ print(f"=== jnj_unsent_probe v1.0 ===")
+ print(f"Filtr: to~'{args.to}' subject~'{args.subject}' okno={'vse' if cutoff is None else str(args.days)+'d'} "
+ f"| {'VSE' if args.all else 'jen bez Message-ID'} | slozky={want}")
+
+ outlook = win32com.client.Dispatch("Outlook.Application")
+ ns = outlook.GetNamespace("MAPI")
+
+ out = []
+ counters = {"seen": 0}
+ for name in want:
+ if len(out) >= args.limit:
+ break
+ if name == "archive":
+ arch, mbox = find_archive(ns)
+ if arch is not None:
+ walk(arch, f"/{mbox}", args, cutoff, out, counters)
+ else:
+ print(" (Archive nenalezena)")
+ continue
+ fid = DEFAULT_FOLDERS.get(name)
+ if not fid:
+ continue
+ try:
+ root = ns.GetDefaultFolder(fid)
+ except Exception as e:
+ print(f" ({name} nedostupna: {e})")
+ continue
+ walk(root, f"/{root.Parent.Name}", args, cutoff, out, counters)
+
+ print(f"\nProsmatrovano polozek: {counters['seen']} vypsano: {len(out)}\n")
+ n_unsent = n_noid = n_err = 0
+ for i, d in enumerate(out, 1):
+ if d["unsent"]:
+ n_unsent += 1
+ if not d["has_mid"]:
+ n_noid += 1
+ if d["err"]:
+ n_err += 1
+ print(f"[{i}] {d['folder']} ({d['rdate']})")
+ print(f" subject : {d['subject']}")
+ print(f" to : {d['to']}")
+ print(f" Sent={d['sent']} UNSENT={d['unsent']} SUBMIT={d['submit']} "
+ f"has_MsgID={d['has_mid']} submit_time={d['submit_time']} ERR={'YES' if d['err'] else '-'}")
+ print(f" MsgID : {d['mid_val']}")
+ print(f" EntryID[-20:] (=jmeno .msg): {d['eid']}")
+ if d["err"]:
+ print(f" ERR : ...{d['err']}...")
+ print()
+
+ print(f"SOUHRN: vypsano={len(out)} UNSENT-flag={n_unsent} bez-MsgID={n_noid} s-chybou-v-tele={n_err}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/EmailsImport/mcp_emaily.py b/EmailsImport/mcp_emaily.py
index f32f954..0b3d971 100644
--- a/EmailsImport/mcp_emaily.py
+++ b/EmailsImport/mcp_emaily.py
@@ -264,7 +264,7 @@ def search(
LIMIT %(limit)s
"""
params = {
- "query": query, "mboxes": mboxes,
+ "query": tsq_text, "mboxes": mboxes,
"since": since_dt, "until": until_dt,
"folder": folder_contains,
"folder_like": f"%{folder_contains}%" if folder_contains else None,
diff --git a/Feasibility/TRASH/forward_offer_outlook_v1.0.md b/Feasibility/TRASH/forward_offer_outlook_v1.0.md
new file mode 100644
index 0000000..6f3ef7c
--- /dev/null
+++ b/Feasibility/TRASH/forward_offer_outlook_v1.0.md
@@ -0,0 +1,48 @@
+# forward_offer_outlook_v1.0.py
+
+**Verze:** 1.0 · **Datum:** 2026-06-16
+
+JNJ-native skript (pywin32 / MAPI). V odeslané poště Outlooku najde **původní
+úvodní nabídku** odeslanou konkrétnímu lékaři dne **31.05.2026** a vytvoří její
+**skutečný Outlook Forward** — zachová originál včetně data, formátování i
+hlavičky (tj. to, co `vbcz-email` `.eml` udělat nedokáže).
+
+## Spuštění (na JNJ stroji s Outlookem)
+```
+pip install pywin32 # jednorázově
+python forward_offer_outlook_v1.0.py
+```
+
+## Co dělá
+1. Otevře MAPI namespace, najde složku **Odeslané** účtu `vbuzalka@its.jnj.com`.
+2. Pro každého lékaře v `TARGETS` najde původní e-mail podle:
+ - subjekt začíná „Nabídka spolupráce na klinickém hodnocení…" (odliší od
+ připomínek `[2. připomínka]` a odpovědí `RE:`),
+ - datum odeslání = **31.05.2026**,
+ - příjemce **To** = e-mail lékaře.
+3. Zavolá `.Forward()` → předvyplní **To** (lékař) + **CC** (Kocourková,
+ Bartošová), volitelně přidá krátký úvod a podle `ACTION`:
+ - `display` (default) — jen **otevře okno** Forwardu, NEODesílá,
+ - `draft` — uloží do Konceptů,
+ - `send` — odešle.
+
+## Konfigurace (nahoře ve skriptu)
+- `TARGETS` — seznam adres. **Defaultně jen Hušták** (odladění); ostatní
+ (Voska, Šerclová, Mináříková) odkomentovat až po ověření.
+- `CC_RECIPIENTS` — Kocourková + Bartošová.
+- `ADD_INTRO` / `INTRO_HTML` — krátký úvod nad přeposlaným originálem
+ (`False` = čisté přeposlání bez textu navíc).
+- `ACTION` — `display` / `draft` / `send`.
+- `SUBJECT_STARTSWITH`, `ORIG_DATE` — kritéria pro nalezení originálu.
+
+## Pozn.
+- Porovnání subjektu je bez diakritiky a malými písmeny (robustní vůči
+ „prípravku"/„přípravku").
+- Když nenajde právě jednu shodu, lékaře **přeskočí** a vypíše varování
+ (nehádá).
+- `display` nevyvolává Outlookový „program se snaží odeslat" dialog —
+ odeslání je vždy na tobě.
+- Pokud by JNJ Outlook měl JNJ schránku jako jiný než výchozí účet, skript
+ si složku Odeslané najde podle `SENDER_SMTP`.
+```
+```
diff --git a/Feasibility/TRASH/forward_offer_outlook_v1.0.py b/Feasibility/TRASH/forward_offer_outlook_v1.0.py
new file mode 100644
index 0000000..e3cf2cb
--- /dev/null
+++ b/Feasibility/TRASH/forward_offer_outlook_v1.0.py
@@ -0,0 +1,175 @@
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Nazev: forward_offer_outlook_v1.0.py
+# Verze: 1.0
+# Datum: 2026-06-16
+# Popis: JNJ-native skript. Pres MAPI (Outlook, pywin32) najde v odeslane
+# poste PUVODNI uvodni nabidku ("Nabidka spoluprace na klinickem
+# hodnoceni pripravku icotrokinra...") odeslanou konkretnimu lekari
+# dne 31.05.2026 a vytvori jeji FORWARD (skutecny Outlook Forward,
+# tj. zachova original vcetne data, formatovani i hlavicky).
+# Forward predvyplni prijemce (lekar) + CC (Kocourkova, Bartosova),
+# volitelne prida kratky uvod a OTEVRE okno k rucni kontrole/odeslani.
+# Pouziti: Spustit v JNJ Pythonu, kde je nakonfigurovany Outlook s JNJ schrankou.
+# Vyzaduje pywin32: pip install pywin32
+# python forward_offer_outlook_v1.0.py
+# Bezpecnost: ACTION = "display" => jen otevre Forward, NEODESILA.
+# "draft" => ulozi do Konceptu. "send" => odesle (uvazene zapnout).
+# =============================================================================
+
+import sys
+import datetime
+import win32com.client # pywin32
+
+# ----------------------------- KONFIGURACE -----------------------------------
+
+# JNJ schranka (odesilatel puvodnich nabidek). Pouzije se jeji slozka Odeslane.
+SENDER_SMTP = "vbuzalka@its.jnj.com"
+
+# Komu forwardovat. Pro odladeni zatim JEN Hustak; ostatni odkomentuj az to klapne.
+TARGETS = [
+ "rastislav.hustak@fntt.sk",
+ # "voska@nemocnice-horovice.cz",
+ # "sercl@seznam.cz",
+ # "petra.minarikova@uvn.cz",
+]
+
+# CC na kazdy forward (nas lokalni tym).
+CC_RECIPIENTS = ["AKocourk@ITS.JNJ.com", "EBartoso@ITS.JNJ.com"]
+
+# Identifikace puvodniho e-mailu:
+# - subjekt zacina na (po ocisteni) tento text (odlisi nabidku od pripominek/RE)
+SUBJECT_STARTSWITH = "nabidka spoluprace na klinickem hodnoceni"
+# - datum odeslani originalu
+ORIG_DATE = datetime.date(2026, 5, 31)
+
+# Volitelny kratky uvod nad forwardovanym originalem.
+# ADD_INTRO = False => ciste preposlani bez jakehokoli textu navic.
+ADD_INTRO = True
+INTRO_HTML = (
+ "Dobry den,
"
+ "dovoluji si Vam znovu preposlat nize uvedenou nabidku ze dne "
+ "31. kvetna 2026. Velmi bych ocenil Vase vyjadreni — a to "
+ "i v pripade, ze o ucast nemate zajem. Dekuji.
"
+ "S pozdravem
MUDr. Vladimir Buzalka
"
+ "
"
+)
+
+# Co s vytvorenym forwardem: "display" | "draft" | "send"
+ACTION = "display"
+
+# -----------------------------------------------------------------------------
+
+OL_FOLDER_SENT = 5 # olFolderSentMail
+OL_TO, OL_CC = 1, 2 # olTo, olCC
+PR_SMTP = "http://schemas.microsoft.com/mapi/proptag/0x39FE001E"
+
+
+def norm(s):
+ """male pismena bez diakritiky pro porovnani subjektu"""
+ import unicodedata
+ s = s or ""
+ s = unicodedata.normalize("NFKD", s)
+ s = "".join(c for c in s if not unicodedata.combining(c))
+ return " ".join(s.lower().split())
+
+
+def smtp_of(recipient):
+ try:
+ return (recipient.PropertyAccessor.GetProperty(PR_SMTP) or "").lower()
+ except Exception:
+ try:
+ return (recipient.Address or "").lower()
+ except Exception:
+ return ""
+
+
+def get_sent_folder(ns):
+ """Slozka Odeslane prislusneho uctu (dle SENDER_SMTP), fallback default."""
+ try:
+ for acct in ns.Accounts:
+ if (acct.SmtpAddress or "").lower() == SENDER_SMTP.lower():
+ return acct.DeliveryStore.GetDefaultFolder(OL_FOLDER_SENT)
+ except Exception:
+ pass
+ return ns.GetDefaultFolder(OL_FOLDER_SENT)
+
+
+def find_original(items, target_email):
+ """Najde puvodni nabidku: subjekt + datum 31.05.2026 + prijemce To."""
+ tgt = target_email.lower()
+ matches = []
+ for it in items:
+ try:
+ if it.Class != 43: # olMail
+ continue
+ if norm(it.Subject)[: len(SUBJECT_STARTSWITH)] != SUBJECT_STARTSWITH:
+ continue
+ sent = it.SentOn
+ if sent is None or sent.date() != ORIG_DATE:
+ continue
+ for r in it.Recipients:
+ if r.Type == OL_TO and smtp_of(r) == tgt:
+ matches.append(it)
+ break
+ except Exception:
+ continue
+ return matches
+
+
+def main():
+ outlook = win32com.client.Dispatch("Outlook.Application")
+ ns = outlook.GetNamespace("MAPI")
+ sent = get_sent_folder(ns)
+ items = sent.Items
+ items.Sort("[SentOn]", True) # nejnovejsi prvni
+
+ print("Slozka Odeslane:", sent.FolderPath)
+ print("Rezim ACTION :", ACTION)
+ print("=" * 60)
+
+ for email in TARGETS:
+ found = find_original(items, email)
+ if not found:
+ print(f"[!] {email}: PUVODNI NABIDKA NENALEZENA (subjekt/datum/prijemce). Preskakuji.")
+ continue
+ if len(found) > 1:
+ print(f"[!] {email}: nalezeno {len(found)} shod — nejednoznacne, preskakuji (over rucne).")
+ continue
+
+ orig = found[0]
+ fwd = orig.Forward() # SKUTECNY Outlook Forward (zachova original + datum)
+
+ # prijemce
+ fwd.Recipients.Add(email).Type = OL_TO
+ for cc in CC_RECIPIENTS:
+ fwd.Recipients.Add(cc).Type = OL_CC
+ fwd.Recipients.ResolveAll()
+
+ # volitelny uvod nad forward blokem
+ if ADD_INTRO:
+ try:
+ fwd.HTMLBody = INTRO_HTML + fwd.HTMLBody
+ except Exception:
+ pass
+
+ if ACTION == "send":
+ fwd.Send()
+ print(f"[ODESLANO] {email} (subjekt: {fwd.Subject})")
+ elif ACTION == "draft":
+ fwd.Save()
+ print(f"[KONCEPT ] {email} (subjekt: {fwd.Subject})")
+ else: # display
+ fwd.Display()
+ print(f"[OTEVRENO] {email} (subjekt: {fwd.Subject}) — zkontroluj a posli rucne")
+
+ print("=" * 60)
+ print("Hotovo.")
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except Exception as e:
+ print("CHYBA:", e)
+ sys.exit(1)
diff --git a/Feasibility/forward_offer_outlook_v1.1.md b/Feasibility/forward_offer_outlook_v1.1.md
new file mode 100644
index 0000000..921dfc4
--- /dev/null
+++ b/Feasibility/forward_offer_outlook_v1.1.md
@@ -0,0 +1,45 @@
+# forward_offer_outlook_v1.1.py
+
+**Verze:** 1.1 · **Datum:** 2026-06-16
+
+JNJ-native skript (pywin32 / MAPI). Pro daného lékaře vytvoří **skutečný Outlook
+Forward** jeho původní úvodní nabídky (zachová originál včetně data 31.05.2026,
+formátování i hlavičky).
+
+## Změna oproti v1.0
+- **Primárně hledá zprávu přímo podle jednoznačného `EntryID`** (MAPI) přes
+ `Namespace.GetItemFromID(entry_id, store_id)` → nalezení „na první dobrou",
+ nulová nejednoznačnost.
+- `EntryID` se bere z **JNJ SQLite** (`messages.entry_id`). Pro Huštáka je už
+ předvyplněný v `TARGETS`.
+- **Fallback** (když EntryID nesedne) = původní heuristika subjekt + datum
+ 31.05.2026 + příjemce To.
+
+## Spuštění (JNJ stroj s Outlookem)
+```
+pip install pywin32
+python forward_offer_outlook_v1.1.py
+```
+
+## Konfigurace
+- `TARGETS` — list `{"email", "entry_id"}`. Defaultně jen **Hušták**
+ (ostatní zakomentované; doplň jim EntryID ze SQLite, jinak poběží fallback).
+- `CC_RECIPIENTS` — Kocourková + Bartošová.
+- `ADD_INTRO` / `INTRO_HTML` — krátký úvod nad forwardem (`False` = čistý forward).
+- `ACTION` — `display` (default, jen otevře) / `draft` / `send`.
+
+## Kde vzít EntryID dalších lékařů
+JNJ SQLite, tabulka `messages`:
+```sql
+SELECT entry_id, subject, received_at, source
+FROM messages
+WHERE source LIKE '%%';
+```
+(`message_id` je tam uložený jako `entryid:` + EntryID; sloupec `entry_id`
+obsahuje čistý EntryID. Internet Message-ID `<…@…>` v datech NENÍ —
+`.msg` jsou X-Unsent drafty, RFC Message-ID přiděluje až Exchange při odeslání.)
+
+## Pozn.
+- `GetItemFromID` zkusí variantu se `store_id` i bez něj.
+- `display` nevyvolá Outlookový „program se snaží odeslat" dialog.
+- Když se nenajde ani EntryID, ani jednoznačný fallback → lékaře přeskočí.
diff --git a/Feasibility/forward_offer_outlook_v1.1.py b/Feasibility/forward_offer_outlook_v1.1.py
new file mode 100644
index 0000000..edaf0a2
--- /dev/null
+++ b/Feasibility/forward_offer_outlook_v1.1.py
@@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Nazev: forward_offer_outlook_v1.1.py
+# Verze: 1.1
+# Datum: 2026-06-16
+# Popis: JNJ-native skript (MAPI / pywin32). Pro daneho lekare najde PUVODNI
+# uvodni nabidku ve slozce Odeslane a vytvori jeji skutecny Outlook
+# FORWARD (zachova original vcetne data 31.05.2026, formatu i hlavicky).
+# PRIMARNE hleda zpravu PRIMO podle jednoznacneho EntryID (MAPI) pres
+# GetItemFromID -> nalezeni "na prvni dobrou", nulova nejednoznacnost.
+# FALLBACK (kdyz EntryID nesedne) = subjekt + datum + prijemce To.
+# Forward predvyplni To (lekar) + CC (Kocourkova, Bartosova),
+# volitelne prida kratky uvod a OTEVRE okno k rucni kontrole/odeslani.
+# Pouziti: Spustit v JNJ Pythonu s nakonfigurovanym Outlookem (JNJ schranka).
+# pip install pywin32 ; python forward_offer_outlook_v1.1.py
+# Bezpecnost: ACTION="display" => jen otevre Forward, NEODESILA.
+# Zmeny v1.1: primarni hledani podle EntryID (GetItemFromID) ziskaneho z JNJ
+# SQLite (tabulka messages.entry_id). v1.0 hledalo jen heuristicky.
+# =============================================================================
+
+import sys
+import datetime
+import win32com.client # pywin32
+
+# ----------------------------- KONFIGURACE -----------------------------------
+
+SENDER_SMTP = "vbuzalka@its.jnj.com"
+
+# Cile. entry_id = jednoznacny MAPI EntryID puvodni nabidky (z JNJ SQLite,
+# tabulka messages.entry_id). Kdyz entry_id chybi/nesedne, pouzije se fallback
+# podle subjektu+data+prijemce.
+TARGETS = [
+ {
+ "email": "rastislav.hustak@fntt.sk",
+ "entry_id": "000000008431528824F96740840A72BAD506477D070092544C32292E3A46AC27E91F5A4CDB1100000007A91B00005BFD391558BBC54FA9172E1614A2FC13000530495B210000",
+ },
+ # {"email": "voska@nemocnice-horovice.cz", "entry_id": ""},
+ # {"email": "sercl@seznam.cz", "entry_id": ""},
+ # {"email": "petra.minarikova@uvn.cz", "entry_id": ""},
+]
+
+CC_RECIPIENTS = ["AKocourk@ITS.JNJ.com", "EBartoso@ITS.JNJ.com"]
+
+# Fallback kriteria (kdyz EntryID nesedne):
+SUBJECT_STARTSWITH = "nabidka spoluprace na klinickem hodnoceni"
+ORIG_DATE = datetime.date(2026, 5, 31)
+
+ADD_INTRO = True
+INTRO_HTML = (
+ "Dobry den,
"
+ "dovoluji si Vam znovu preposlat nize uvedenou nabidku ze dne "
+ "31. kvetna 2026. Velmi bych ocenil Vase vyjadreni — a to "
+ "i v pripade, ze o ucast nemate zajem. Dekuji.
"
+ "S pozdravem
MUDr. Vladimir Buzalka
"
+ "
"
+)
+
+ACTION = "display" # "display" | "draft" | "send"
+
+# -----------------------------------------------------------------------------
+
+OL_FOLDER_SENT = 5
+OL_TO, OL_CC = 1, 2
+PR_SMTP = "http://schemas.microsoft.com/mapi/proptag/0x39FE001E"
+
+
+def norm(s):
+ import unicodedata
+ s = s or ""
+ s = unicodedata.normalize("NFKD", s)
+ s = "".join(c for c in s if not unicodedata.combining(c))
+ return " ".join(s.lower().split())
+
+
+def smtp_of(recipient):
+ try:
+ return (recipient.PropertyAccessor.GetProperty(PR_SMTP) or "").lower()
+ except Exception:
+ try:
+ return (recipient.Address or "").lower()
+ except Exception:
+ return ""
+
+
+def get_sent_folder(ns):
+ try:
+ for acct in ns.Accounts:
+ if (acct.SmtpAddress or "").lower() == SENDER_SMTP.lower():
+ return acct.DeliveryStore.GetDefaultFolder(OL_FOLDER_SENT)
+ except Exception:
+ pass
+ return ns.GetDefaultFolder(OL_FOLDER_SENT)
+
+
+def get_by_entryid(ns, entry_id, store_id):
+ """Nacte zpravu PRIMO podle EntryID. Vrati MailItem nebo None."""
+ if not entry_id:
+ return None
+ for args in ((entry_id, store_id), (entry_id,)):
+ try:
+ it = ns.GetItemFromID(*args)
+ if it is not None:
+ return it
+ except Exception:
+ continue
+ return None
+
+
+def find_original_fallback(items, target_email):
+ """Fallback: subjekt + datum 31.05.2026 + prijemce To."""
+ tgt = target_email.lower()
+ out = []
+ for it in items:
+ try:
+ if it.Class != 43:
+ continue
+ if norm(it.Subject)[: len(SUBJECT_STARTSWITH)] != SUBJECT_STARTSWITH:
+ continue
+ s = it.SentOn
+ if s is None or s.date() != ORIG_DATE:
+ continue
+ for r in it.Recipients:
+ if r.Type == OL_TO and smtp_of(r) == tgt:
+ out.append(it)
+ break
+ except Exception:
+ continue
+ return out
+
+
+def make_forward(orig, email):
+ fwd = orig.Forward()
+ fwd.Recipients.Add(email).Type = OL_TO
+ for cc in CC_RECIPIENTS:
+ fwd.Recipients.Add(cc).Type = OL_CC
+ fwd.Recipients.ResolveAll()
+ if ADD_INTRO:
+ try:
+ fwd.HTMLBody = INTRO_HTML + fwd.HTMLBody
+ except Exception:
+ pass
+ return fwd
+
+
+def main():
+ outlook = win32com.client.Dispatch("Outlook.Application")
+ ns = outlook.GetNamespace("MAPI")
+ sent = get_sent_folder(ns)
+ store_id = sent.Store.StoreID
+ items = None # lazy, jen kdyz bude potreba fallback
+
+ print("Slozka Odeslane:", sent.FolderPath)
+ print("Rezim ACTION :", ACTION)
+ print("=" * 60)
+
+ for t in TARGETS:
+ email = t["email"]
+ orig = get_by_entryid(ns, t.get("entry_id", ""), store_id)
+ how = "EntryID"
+
+ if orig is None:
+ # fallback heuristika
+ if items is None:
+ items = sent.Items
+ items.Sort("[SentOn]", True)
+ found = find_original_fallback(items, email)
+ if not found:
+ print(f"[!] {email}: NENALEZENO (EntryID nesedl ani fallback). Preskakuji.")
+ continue
+ if len(found) > 1:
+ print(f"[!] {email}: fallback nasel {len(found)} shod — nejednoznacne, preskakuji.")
+ continue
+ orig = found[0]
+ how = "fallback(subjekt+datum+prijemce)"
+
+ # kontrola, ze to je opravdu ta nabidka tomu lekari
+ try:
+ print(f" nalezeno pres {how}: \"{orig.Subject}\" | odeslano {orig.SentOn}")
+ except Exception:
+ pass
+
+ fwd = make_forward(orig, email)
+
+ if ACTION == "send":
+ fwd.Send()
+ print(f"[ODESLANO] {email}")
+ elif ACTION == "draft":
+ fwd.Save()
+ print(f"[KONCEPT ] {email}")
+ else:
+ fwd.Display()
+ print(f"[OTEVRENO] {email} — zkontroluj a posli rucne")
+
+ print("=" * 60)
+ print("Hotovo.")
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except Exception as e:
+ print("CHYBA:", e)
+ sys.exit(1)
diff --git a/claude-memory/MEMORY.md b/claude-memory/MEMORY.md
index eee1662..14bdd5b 100644
--- a/claude-memory/MEMORY.md
+++ b/claude-memory/MEMORY.md
@@ -17,3 +17,4 @@
- [Claude Code learning path](project_claude_learning.md) — Level 2 Intermediate, mezery: Skills/Subagenty/Hooks/Print mode, tutoriál v `claude-howto/`
- [Admin PowerShell nefunguje](feedback_admin_powershell.md) — příkazy vyžadující admin práva rovnou napsat uživateli, nezkoušet alternativy
- [SeaweedFS na Tower1](project_seaweedfs.md) — kontejner na .50 (bloby na array, metadata SSD), noční backup filer metadat na tower .76, retence 7+4
+- [SeaweedFS offsite záloha](project_seaweedfs_offsite.md) — offsite na Synology DS213+ v ordinaci; NAS-pull rsync přes MikroTik 2250→.50 FUSE mount; ~50 GiB ověřeno; runbook v Triliu
diff --git a/claude-memory/project_seaweedfs_offsite.md b/claude-memory/project_seaweedfs_offsite.md
new file mode 100644
index 0000000..af6d5ac
--- /dev/null
+++ b/claude-memory/project_seaweedfs_offsite.md
@@ -0,0 +1,23 @@
+---
+name: project-seaweedfs-offsite
+description: "Offsite záloha SeaweedFS blobů na Synology DS213+ v ordinaci — NAS si tahá rsync přes MikroTik na Tower1 .50 FUSE mount; postaveno 2026-06-14/15, runbook v Triliu"
+metadata:
+ node_type: memory
+ type: project
+ originSessionId: a121e6b4-5ecf-4cb2-87e5-328fd3488630
+---
+
+Offsite záloha SeaweedFS příloh (viz [[project-seaweedfs]]) na staré **Synology DS213+** v ordinaci. Postaveno v noci 2026-06-14→15 koordinací dvou instancí Claude přes sdílenou Trilium note (HQ = doma u Tower1, ORDINACE = na Lenovu v ordinaci). První plný mirror **~50 GiB / ~109k souborů, 0 mismatchů (sha256==jméno)**.
+
+**Datová cesta (NAS si tahá sám, „pull"):**
+`Synology .40 (ordinace) ──ssh -p2250──> 78.80.38.51 (domácí WAN, PPPoE pevná) ──MikroTik NAT──> Tower1 192.168.1.50:22 ──> /mnt/seaweedfuse (FUSE RO)`
+
+- **Zdroj .50:** read-only FUSE mount celého fileru `/mnt/seaweedfuse` (weed mount, `/topics` skryté). Persistence: `/boot/config/seaweed-mount.sh` (volá se `bash`, protože /boot je VFAT noexec) + watchdog cron `/boot/config/plugins/dynamix/seaweed-fuse.cron` á 5 min + hook v `/boot/config/go`. weed binárka zkopírovaná z kontejneru do `/usr/local/sbin/weed`.
+- **MikroTik (192.168.1.2, admin):** NAT dstnat `WAN:2250 → 192.168.1.50:22` (src-address-list=ordinace), filter forward accept ordinace→.50:22. **Oddělené od Medicus rsyncu** (ten je `2299→.76`). Address-list `ordinace` = 78.44.195.114 (MySQL/eRecept) + 185.140.244.138 (Synology rsync).
+- **Cíl Synology DS213+ „synologymaly" (192.168.1.40, DSM 6.2.4, PPC, ext4):** `/volume1/SeaweedFS-offsite`. NAS dělá `rsync -a --partial` **aditivně, BEZ --delete** (immutable content-addressed bloby → bezpečný narůstající archiv). Žádné Hyper Backup (rozhodnutí majitele). Docker/Tailscale/rclone na PPC nejdou — proto plain rsync-over-ssh.
+- **Auth:** klíč `swbackup` (ed25519, na NASu `/root/.ssh/swbackup`), autorizován na .50 root s **forced-command** wrapperem `/boot/config/ssh/swbackup-rsync.sh` (pustí jen read-only `rsync --server --sender`).
+- **Plán:** běží přes **`/etc/crontab`** (`0 4 * * * root sh /volume1/SeaweedFS-offsite/_sync.sh`) — DSM Task Scheduler na téhle DSM nešel čistě (chybí v PATH); crond se restartuje přes `/usr/syno/sbin/synoservicecfg --restart crond`. Záloha původního crontabu `/etc/crontab.bak_swoffsite`. Sync skript: rsync `-a --partial --stats --timeout=300`, PID-lock `/tmp/swoffsite.lock`, log `_sync.log`, `known_hosts_sw`. Při `rc!=0` pošle mail, při úspěchu mlčí.
+- **Monitoring:** e-mail **varianta A = NAS posílá sám** přes mailer `/root/swoffsite_mailer.py` (root, chmod 700). POZOR: klinická knihovna `EmailMessagingGraph.py` jede přes **Microsoft Graph (OAuth client-cred, sender reports@buzalka.cz), NE SMTP** — proto mailer napsán čistě ve stdlib Pythonu (urllib/json/ssl, žádný pip; msal/requests na PPC DSM 6.2 nejdou). Alert na vladimir.buzalka@buzalka.cz při chybě. Otestováno (status 202).
+- **Restore (Fáze 5) ověřen:** 3 bloby vytaženy mimo strom, `sha256(obsah)==název` → záloha čitelná i bez SeaweedFS. **Projekt KOMPLETNÍ 2026-06-15.**
+
+**Kompletní runbook (přístupy, provozní úkony, restore, troubleshooting):** Trilium note „OrdinaceOffsiteBackupSeaWeedFS — RUNBOOK". **Komunikační kanál** mezi instancemi: Trilium note `OrdinaceOffsiteBackupSeaWeedFS` (noteId Gh5wzhrjTlv8). Viz [[setup-memory-sync]] princip — Trilium note jako async kanál mezi dvěma Claude.