""" jnj_mailbox_sync v1.1 Nazev: jnj_mailbox_sync_v1.1.py Verze: 1.1.0 Datum: 2026-06-10 Autor: vladimir.buzalka Popis: Synchronizace JNJ Outlooku (MAPI) -> osobni schranka + bookkeeping v SQLite. Nasledník inbox_full_sync_v1.1. Nove navic sleduje PRESUN emailu mezi slozkami a priznak "uz neni ve schrance" — BEZ opetovneho prenosu tela. Scope: primarni schranka, Inbox + Sent Items + Deleted Items vcetne vsech podsložek. (v1.1: pridano Deleted Items — uzivatel po precteni maily MAZE, takze precteny-smazany mail se ted sleduje jako /Deleted Items misto aby skoncil jako "ghost" s posledni cestou /Inbox.) Online Archive se NEskenuje — firemni pravidla tam presouvaji nejstarsi emaily, ktere uz mame davno stazene. Kdyz email ze skenovane schranky zmizi (presun do nesken. slozky / vyprazdneni Deleted), ponecha se POSLEDNI ZNAMA cesta a nastavi se priznak not_in_mailbox_anymore=1. Identita emailu = Internet Message-ID (stabilni pres presuny). EntryID se pri presunu meni — drzime ho jen jako pomocny. Sloupce cest v SQLite: folder = cesta pri PRVNIM zachyceni (historie, neprepisuje se) jnj_folder = AKTUALNI ziva cesta (prepisuje se pri presunu) Sloupec updated_at se bumpne pri insertu i kazde zmene — slouzi pro inkrementalni sync na domaci strane (watermark). Rezimy (--mode): capture (default) Projde cely Inbox+Sent, nove emaily ulozi a nahraje (jako inbox_full_sync). Okno --days se IGNORUJE (bere VSE). Detekce "opustilo schranku" se v tomto rezimu NEdela (neskenuje se archiv, takze by to delalo falesne poplachy). update-paths Jen METADATA. Projde okno poslednich --days dni, aktualizuje cesty/precteno znamych emailu a oznaci ty, co ze schranky zmizely. NIC nenahrava (zadny .msg upload). full-update update-paths + navic dorovna chybejici emaily (SaveAs+upload). Argumenty: --mode {capture,update-paths,full-update} default capture --days N velikost okna ve dnech (default 30). 0 = cely Inbox+Sent. --dry-run NIC nezapise/nenahraje, jen vypise co by udelal (+ souhrn). --limit N zpracovat max N polozek (rychly test). --no-db-upload na konci nenahravat SQLite na server. Spousteni: # 1) Nejdriv si PRECIST, co by full-update prinesl (NIC nezmeni): python jnj_mailbox_sync_v1.1.py --mode full-update --days 30 --dry-run # 2) Pak naostro: python jnj_mailbox_sync_v1.1.py --mode full-update --days 30 Zavislosti: pywin32, requests, cryptography, sqlite3 (stdlib). Python 3.10+, Windows, Outlook musi byt spusteny a prihlaseny. Historie verzi: 1.0.0 2026-06-09 Nova generace: rezimy capture/update-paths/full-update, sledovani presunu (jnj_folder), priznak not_in_mailbox_anymore, sloupec updated_at pro inkrementalni sync domu. Nasledník inbox_full_sync_v1.1. 1.1.0 2026-06-10 + Deleted Items do SYNC_FOLDERS (olFolderDeletedItems=3). Precteny-smazany mail se ted sleduje jako /Deleted Items; drive ghost s posledni cestou /Inbox. Pri 1. behu se drive zghostovane maily najdou v Deleted -> jnj_folder opraven na /Deleted Items + not_in_mailbox_anymore=0. """ import argparse import base64 import hashlib import logging import sqlite3 import sys import tempfile from datetime import datetime, timedelta from pathlib import Path import win32com.client import requests import urllib3 from cryptography.fernet import Fernet if hasattr(sys.stdout, "reconfigure"): sys.stdout.reconfigure(encoding="utf-8", errors="replace") urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # ─── KONFIGURACE ────────────────────────────────────────────────────────────── TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340" UPLOAD_URL = "https://msgs.buzalka.cz/upload" DB_UPLOAD_URL = "https://msgs.buzalka.cz/upload-db" DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db" LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnj_mailbox_sync_errors.log" PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E" SCRIPT_NAME = "jnj_mailbox_sync" SCRIPT_VERSION = "1.1.0" # olFolderInbox=6, olFolderSentMail=5, olFolderDeletedItems=3 SYNC_FOLDERS = [(6, "Inbox"), (5, "Sent Items"), (3, "Deleted Items")] OLSAVE_MSG = 3 # OlSaveAsType.olMSG # Sifrovaci klic odvozeny z TOKENu (stejny algoritmus jako server) _FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest())) logging.basicConfig( filename=LOG_PATH, level=logging.ERROR, format="%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S", encoding="utf-8", ) # ────────────────────────────────────────────────────────────────────────────── # ─── SQLite ─────────────────────────────────────────────────────────────────── def init_db(conn): conn.execute(""" CREATE TABLE IF NOT EXISTS messages ( id INTEGER PRIMARY KEY AUTOINCREMENT, message_id TEXT NOT NULL, subject TEXT, sender TEXT, received_at TEXT, folder TEXT, source TEXT, uploaded_at TEXT DEFAULT (datetime('now')), entry_id TEXT, graph_id TEXT, is_read INTEGER DEFAULT 0, jnj_folder TEXT, not_in_mailbox_anymore INTEGER DEFAULT 0, left_mailbox_at TEXT, updated_at TEXT ) """) conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)") conn.execute(""" CREATE TABLE IF NOT EXISTS runs ( id INTEGER PRIMARY KEY AUTOINCREMENT, script TEXT NOT NULL, version TEXT, started_at TEXT NOT NULL, finished_at TEXT, mode TEXT, window_days INTEGER, dry_run INTEGER DEFAULT 0, found INTEGER DEFAULT 0, new_captured INTEGER DEFAULT 0, path_updated INTEGER DEFAULT 0, read_updated INTEGER DEFAULT 0, returned INTEGER DEFAULT 0, left_mailbox INTEGER DEFAULT 0, skipped INTEGER DEFAULT 0, errors INTEGER DEFAULT 0 ) """) conn.execute(""" CREATE TABLE IF NOT EXISTS log ( id INTEGER PRIMARY KEY AUTOINCREMENT, run_id INTEGER REFERENCES runs(id), level TEXT NOT NULL, event TEXT NOT NULL, subject TEXT, folder TEXT, graph_id TEXT, detail TEXT, created_at TEXT DEFAULT (datetime('now')) ) """) conn.execute("CREATE INDEX IF NOT EXISTS idx_log_run_id ON log(run_id)") # Migrace existujici jnjemails.db (z inbox_full_sync) — pridej chybejici sloupce for col, ddl in [ ("entry_id", "TEXT"), ("graph_id", "TEXT"), ("is_read", "INTEGER DEFAULT 0"), ("jnj_folder", "TEXT"), ("not_in_mailbox_anymore", "INTEGER DEFAULT 0"), ("left_mailbox_at", "TEXT"), ("updated_at", "TEXT"), ]: try: conn.execute(f"ALTER TABLE messages ADD COLUMN {col} {ddl}") except Exception: pass for col, ddl in [ ("mode", "TEXT"), ("window_days", "INTEGER"), ("dry_run", "INTEGER DEFAULT 0"), ("found", "INTEGER DEFAULT 0"), ("new_captured", "INTEGER DEFAULT 0"), ("path_updated", "INTEGER DEFAULT 0"), ("read_updated", "INTEGER DEFAULT 0"), ("returned", "INTEGER DEFAULT 0"), ("left_mailbox", "INTEGER DEFAULT 0"), ]: try: conn.execute(f"ALTER TABLE runs ADD COLUMN {col} {ddl}") except Exception: pass # Indexy na sloupce, ktere mohly vzniknout az migraci vyse conn.execute("CREATE INDEX IF NOT EXISTS idx_updated_at ON messages(updated_at)") conn.commit() def start_run(conn, mode, days, dry): cur = conn.execute( """INSERT INTO runs (script, version, started_at, mode, window_days, dry_run) VALUES (?, ?, datetime('now'), ?, ?, ?)""", (SCRIPT_NAME, SCRIPT_VERSION, mode, days, 1 if dry else 0), ) conn.commit() return cur.lastrowid def finish_run(conn, run_id, stats): conn.execute( """UPDATE runs SET finished_at=datetime('now'), found=?, new_captured=?, path_updated=?, read_updated=?, returned=?, left_mailbox=?, skipped=?, errors=? WHERE id=?""", (stats["found"], stats["new_captured"], stats["path_updated"], stats["read_updated"], stats["returned"], stats["left_mailbox"], stats["skipped"], stats["errors"], run_id), ) conn.commit() def db_log(conn, run_id, level, event, subject=None, folder=None, graph_id=None, detail=None): conn.execute( """INSERT INTO log (run_id, level, event, subject, folder, graph_id, detail) VALUES (?, ?, ?, ?, ?, ?, ?)""", (run_id, level, event, subject, folder, graph_id, detail), ) conn.commit() def info(conn, run_id, event, **kw): db_log(conn, run_id, "INFO", event, **kw) def error(conn, run_id, event, **kw): db_log(conn, run_id, "ERROR", event, **kw) def db_get(conn, mid): cur = conn.execute( """SELECT message_id, folder, jnj_folder, is_read, not_in_mailbox_anymore FROM messages WHERE message_id=?""", (mid,)) r = cur.fetchone() if not r: return None return {"message_id": r[0], "folder": r[1], "jnj_folder": r[2], "is_read": r[3], "not_in_mailbox_anymore": r[4]} def apply_update(conn, mid, changes): sets, vals = [], [] for k, v in changes.items(): sets.append(f"{k}=?") vals.append(v) sets.append("updated_at=datetime('now')") vals.append(mid) conn.execute(f"UPDATE messages SET {', '.join(sets)} WHERE message_id=?", vals) conn.commit() # ─── Outlook / prenos ──────────────────────────────────────────────────────── def get_mid(item) -> str: try: mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID) except Exception: mid = None return mid or f"entryid:{item.EntryID}" def upload_msg(msg_path, filename, folder=""): with open(msg_path, "rb") as f: encrypted = _FERNET.encrypt(f.read()) enc_filename = Path(filename).stem + ".emsg" resp = requests.post( UPLOAD_URL, headers={"Authorization": f"Bearer {TOKEN}"}, files={"file": (enc_filename, encrypted, "application/octet-stream")}, data={"folder": folder}, timeout=60, ) if not resp.ok: raise requests.HTTPError(f"{resp.status_code} {resp.reason} | {resp.text[:200]}") return resp.json() def upload_db(db_path): ts = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"jnjemails_{ts}.db" try: with open(db_path, "rb") as f: resp = requests.post( DB_UPLOAD_URL, headers={"Authorization": f"Bearer {TOKEN}"}, files={"file": (filename, f, "application/octet-stream")}, timeout=120, ) print(f" DB upload: {resp.json()}") except Exception as e: print(f" DB upload CHYBA: {e}") def capture_new(conn, run_id, item, mid, current, is_read, subject, stats): """Novy email: SaveAs -> upload -> insert. Vraci True pri uspechu.""" with tempfile.TemporaryDirectory() as tmp: safe = f"{item.EntryID[-20:]}.msg" p = Path(tmp) / safe item.SaveAs(str(p), OLSAVE_MSG) result = upload_msg(p, safe, current) graph_id = result.get("graph_id") try: received = item.ReceivedTime.isoformat() if item.ReceivedTime else None except Exception: received = None try: sender = item.SenderEmailAddress or "" except Exception: sender = "" conn.execute( """INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source, entry_id, graph_id, is_read, jnj_folder, not_in_mailbox_anymore, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, datetime('now'))""", (mid, subject, sender, received, current, SCRIPT_NAME, item.EntryID, graph_id, is_read, current), ) conn.commit() info(conn, run_id, "captured", subject=subject, folder=current, graph_id=graph_id) print(f" NEW | {subject[:70]}") return True def process_item(conn, run_id, item, current, stats, seen, mode, dry): try: mid = get_mid(item) except Exception: return seen.add(mid) stats["found"] += 1 try: is_read = 0 if item.UnRead else 1 except Exception: is_read = 0 subject = str(getattr(item, "Subject", "") or "") row = db_get(conn, mid) # ── Novy email (neni v DB) ──────────────────────────────────────────── if row is None: if mode in ("capture", "full-update"): if dry: stats["new_captured"] += 1 print(f" NEW* | {subject[:70]}") else: try: if capture_new(conn, run_id, item, mid, current, is_read, subject, stats): stats["new_captured"] += 1 except Exception as e: stats["errors"] += 1 error(conn, run_id, "capture_error", subject=subject, folder=current, detail=str(e)) print(f" CHYBA NEW | {subject[:50]} | {e}") else: # update-paths — telo nemame, nelze dorovnat stats["new_uncaptured"] += 1 return # ── Znamy email — porovnej zmeny ────────────────────────────────────── changes = {} current_known = row.get("jnj_folder") or row.get("folder") if current_known != current: changes["jnj_folder"] = current stats["path_updated"] += 1 if row.get("is_read") != is_read: changes["is_read"] = is_read stats["read_updated"] += 1 if row.get("not_in_mailbox_anymore"): changes["not_in_mailbox_anymore"] = 0 changes["left_mailbox_at"] = None stats["returned"] += 1 if changes: if not dry: apply_update(conn, mid, changes) what = [] if "jnj_folder" in changes: what.append(f"-> {current}") if "is_read" in changes: what.append("precteno" if is_read else "neprecteno") if "not_in_mailbox_anymore" in changes: what.append("vraceno do schranky") marker = "*" if dry else " " print(f" UPD{marker} | {subject[:55]} | {', '.join(what)}") info(conn, run_id, "path_update", subject=subject, folder=current, detail="; ".join(what)) else: stats["skipped"] += 1 def walk(conn, run_id, folder, folder_path, cutoff_local, stats, seen, mode, dry, limit): current = f"{folder_path}/{folder.Name}" try: items = folder.Items if cutoff_local is not None: restrict = ("@SQL=\"urn:schemas:httpmail:datereceived\" >= '%s'" % cutoff_local.strftime("%Y/%m/%d %H:%M:%S")) items = items.Restrict(restrict) items.Sort("[ReceivedTime]", True) # newest first except Exception as e: print(f" CHYBA slozka {current}: {e}") error(conn, run_id, "folder_error", folder=current, detail=str(e)) return n = 0 for item in items: if limit and stats["found"] >= limit: break try: if not str(getattr(item, "MessageClass", "")).upper().startswith("IPM.NOTE"): continue except Exception: continue process_item(conn, run_id, item, current, stats, seen, mode, dry) n += 1 print(f" {current}: {n} polozek") info(conn, run_id, "folder_done", folder=current, detail=str(n)) try: subs = list(folder.Folders) except Exception: subs = [] for sub in subs: if limit and stats["found"] >= limit: break walk(conn, run_id, sub, current, cutoff_local, stats, seen, mode, dry, limit) def _parse_dt(s): if not s: return None try: dt = datetime.fromisoformat(s) if dt.tzinfo: dt = dt.astimezone().replace(tzinfo=None) return dt except Exception: return None def flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry): """Emaily v DB v okne, ktere jsme ve SKENOVANE casti schranky (Inbox/Sent) NEvideli -> opustily pracovni schranku. Ponecha posledni znamou cestu, nastavi priznak. DULEZITE: hodnotime JEN emaily, jejichz POSLEDNI ZNAMA cesta je pod nekterym skenovanym korenem (scanned_roots = Inbox/Sent/Deleted Items primarni schranky). Emaily naposledy videne MIMO skenovany rozsah (Archive, Online Archive, Junk, Drafts, Sync Issues, vlastni top-level slozky, ...) se NEhodnoti — tam jsme je necekali, takze jejich absence nic neznamena (jinak falesne GONE). Pozn.: po vyprazdneni Deleted Items se tamni maily korektne oznaci GONE (posledni cesta /Deleted Items zustane).""" cur = conn.execute( """SELECT message_id, received_at, jnj_folder, folder, not_in_mailbox_anymore FROM messages""") to_flag = [] for mid, received_at, jnjf, fld, flag in cur.fetchall(): if mid in seen or flag: continue path = jnjf or fld or "" if not any(path.startswith(root) for root in scanned_roots): continue # posledni znama cesta mimo skenovany rozsah -> nehodnotime rec = _parse_dt(received_at) if rec is None or rec < cutoff_local: continue # mimo okno / neparsovatelne -> nehodnotime to_flag.append((mid, path)) for mid, path in to_flag: if not dry: conn.execute( """UPDATE messages SET not_in_mailbox_anymore=1, left_mailbox_at=datetime('now'), updated_at=datetime('now') WHERE message_id=?""", (mid,)) stats["left_mailbox"] += 1 print(f" GONE{'*' if dry else ' '} | {path}") if not dry and to_flag: conn.commit() info(conn, run_id, "left_mailbox", detail=str(len(to_flag))) # ─── MAIN ───────────────────────────────────────────────────────────────────── def main(): ap = argparse.ArgumentParser(description=f"jnj_mailbox_sync v{SCRIPT_VERSION}") ap.add_argument("--mode", choices=["capture", "update-paths", "full-update"], default="capture") ap.add_argument("--days", type=int, default=30, help="Okno ve dnech pro update-paths/full-update (0 = vse)") ap.add_argument("--dry-run", action="store_true", help="Nic nezapise/nenahraje, jen vypise co by udelal") ap.add_argument("--limit", type=int, default=0, help="Max N polozek (test)") ap.add_argument("--no-db-upload", action="store_true") args = ap.parse_args() mode, dry = args.mode, args.dry_run # capture ignoruje okno (bere vse); ostatni rezimy okno pouzivaji (0 = vse) if mode == "capture": cutoff_local = None else: cutoff_local = None if args.days == 0 else (datetime.now() - timedelta(days=args.days)) win = "vse" if cutoff_local is None else f"{args.days} dni (od {cutoff_local:%Y-%m-%d %H:%M})" print(f"=== jnj_mailbox_sync v{SCRIPT_VERSION} ===") print(f"Start: {datetime.now():%Y-%m-%d %H:%M:%S}") print(f"Rezim: {mode} Okno: {win} {'[DRY-RUN — nic se nemeni]' if dry else ''}") print(f"DB: {DB_PATH}") conn = sqlite3.connect(DB_PATH) init_db(conn) run_id = start_run(conn, mode, args.days, dry) outlook = win32com.client.Dispatch("Outlook.Application") ns = outlook.GetNamespace("MAPI") stats = {"found": 0, "new_captured": 0, "new_uncaptured": 0, "path_updated": 0, "read_updated": 0, "returned": 0, "left_mailbox": 0, "skipped": 0, "errors": 0} seen = set() scanned_roots = set() for fid, label in SYNC_FOLDERS: root = ns.GetDefaultFolder(fid) mailbox = root.Parent.Name scanned_roots.add(f"/{mailbox}/{root.Name}") print(f"\n=== {label} ({mailbox}) ===") walk(conn, run_id, root, f"/{mailbox}", cutoff_local, stats, seen, mode, dry, args.limit) # Detekce "opustilo schranku" — jen oknove rezimy s platnym cutoff. # Hodnoti jen emaily naposledy videne pod scanned_roots (Inbox/Sent/Deleted). if mode in ("update-paths", "full-update") and cutoff_local is not None and not (args.limit): print("\n--- Kontrola 'opustilo schranku' (v okne, Inbox/Sent/Deleted) ---") flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry) elif args.limit: print("\n(--limit aktivni -> detekce 'opustilo schranku' preskocena)") finish_run(conn, run_id, stats) # ── Souhrn ───────────────────────────────────────────────────────────── print(f"\n{'='*60}") print(f"SOUHRN [{mode}{' / DRY-RUN' if dry else ''}]") print(f" Nalezeno ve schrance: {stats['found']}") if mode in ("capture", "full-update"): lbl = "by se nahralo" if dry else "nahrano" print(f" Nove zachyceno ({lbl}): {stats['new_captured']}") else: print(f" Nove (bez tela, nedorovnano):{stats['new_uncaptured']}") print(f" Aktualizovana cesta: {stats['path_updated']}") print(f" Zmena precteno/neprecteno: {stats['read_updated']}") print(f" Vraceno do schranky: {stats['returned']}") print(f" Opustilo schranku (GONE): {stats['left_mailbox']}") print(f" Beze zmeny (skip): {stats['skipped']}") print(f" Chyby: {stats['errors']}") print(f"{'='*60}") if dry: print("DRY-RUN: SQLite ani server se NEMENILY.") elif not args.no_db_upload: print("\nUpload SQLite na server...") upload_db(DB_PATH) print(f"\nKonec: {datetime.now():%Y-%m-%d %H:%M:%S}") if stats["errors"]: print(f"Chyby logovany do: {LOG_PATH}") conn.close() if __name__ == "__main__": main()