665 lines
27 KiB
Python
665 lines
27 KiB
Python
"""
|
|
jnj_mailbox_sync v1.3
|
|
Nazev: jnj_mailbox_sync_v1.3.py
|
|
Verze: 1.3.0
|
|
Datum: 2026-06-16
|
|
Autor: vladimir.buzalka
|
|
|
|
Popis:
|
|
Synchronizace JNJ Outlooku (MAPI) -> osobni schranka + bookkeeping v SQLite.
|
|
Nasledník inbox_full_sync_v1.1 / jnj_mailbox_sync_v1.2. Sleduje PRESUN emailu
|
|
mezi slozkami a priznak "uz neni ve schrance" — BEZ opetovneho prenosu tela.
|
|
|
|
Scope: primarni schranka, Inbox + Sent Items + Deleted Items vcetne vsech
|
|
podsložek. Online Archive se NEskenuje.
|
|
|
|
Identita emailu = Internet Message-ID (stabilni pres presuny). Kdyz Message-ID
|
|
chybi (typicky cerstve odeslane / NEODESLANE Sent polozky — Exchange ho doplni
|
|
az po skutecnem transportu), pouzije se fallback "entryid:<EntryID>".
|
|
|
|
Sloupce cest v SQLite:
|
|
folder = cesta pri PRVNIM zachyceni (historie, neprepisuje se)
|
|
jnj_folder = AKTUALNI ziva cesta (prepisuje se pri presunu)
|
|
updated_at se bumpne pri insertu i kazde zmene — watermark pro domaci sync.
|
|
|
|
NOVINKA v1.3 — DETEKCE ZMENY OBSAHU (re-upload zmeneneho emailu)
|
|
Problem: e-mail bez Message-ID (napr. NEODESLANY Sent kvuli SendAsDenied) ma
|
|
STABILNI EntryID. Kdyz do nej Outlook PO zachyceni dopise chybu odeslani,
|
|
obsah se zmeni, ale identita (entryid:<EID>) zustane — stary sync to vyhodnotil
|
|
jako "zname, beze zmeny" a aktualizovany (chybovy) e-mail uz domu NEPRENESL.
|
|
Naproti tomu uspesne odeslany e-mail dostane NOVE EntryID + Message-ID, takze
|
|
se zachytil jako novy. Vznikla asymetrie: failed-update se ztracel.
|
|
|
|
Reseni: identita zustava (Message-ID / entryid:), ale navic se sleduje VERZNI
|
|
OTISK = PR_LAST_MODIFICATION_TIME (0x30080040). U ZNAMEHO emailu BEZ Message-ID
|
|
(mid zacina "entryid:") se otisk porovna; kdyz se posunul, e-mail se znovu
|
|
ulozi (SaveAs) a nahraje s priznakem overwrite=true (server prepise puvodni
|
|
.msg na miste -> Tower ho preparsuje -> dokument v Mongu se aktualizuje, vc.
|
|
tela s chybou). Tim doteche i "zmeneny hustak". Hlidani je levne — druhe cteni
|
|
property jen u znamych no-ID polozek (desitky kusu); polozky s Message-ID jsou
|
|
finalizovane a nesleduji se.
|
|
|
|
Re-upload bezi jen v rezimech, ktere smeji nahravat (capture, full-update),
|
|
a posila se BEZ folderu (folder="") => server NEdela Graph re-import (zadny
|
|
duplikat v Graph zrcadle); jen prepise /msgs soubor pro Tower parse.
|
|
|
|
Vyzaduje msgreceiver app.py >= v2.4 (overwrite na /upload). Bez nej se
|
|
re-upload chova jako "exists" (stary skip) — neprepise, ale nic nerozbije.
|
|
|
|
Upload SQLite (zustava z v1.2): DB se pred odeslanim KOMPRIMUJE (lzma/xz, max) a
|
|
SIFRUJE (Fernet, klic z TOKENu) a nahrava jako .db.xz.enc.
|
|
|
|
Rezimy (--mode):
|
|
capture (default) Projde cely Inbox+Sent+Deleted, nove emaily ulozi a
|
|
nahraje + NOVE re-uploadne zmenene znamé no-ID polozky.
|
|
Okno --days se IGNORUJE (bere VSE).
|
|
update-paths Jen METADATA cesty/precteno + "opustilo schranku". NIC nenahrava
|
|
(ani re-upload).
|
|
full-update update-paths + dorovna chybejici (SaveAs+upload) + re-upload
|
|
zmenenych znamých no-ID polozek.
|
|
|
|
Argumenty:
|
|
--mode {capture,update-paths,full-update} default capture
|
|
--days N velikost okna ve dnech (default 30). 0 = cely Inbox+Sent.
|
|
--dry-run NIC nezapise/nenahraje, jen vypise co by udelal.
|
|
--limit N zpracovat max N polozek (rychly test).
|
|
--no-db-upload na konci nenahravat SQLite na server.
|
|
|
|
Spousteni:
|
|
# Refresh poslednich 60 dni + zachytit zmenene (chybove) Sent polozky:
|
|
python jnj_mailbox_sync_v1.3.py --mode full-update --days 60
|
|
|
|
Zavislosti:
|
|
pywin32, requests, cryptography, sqlite3 + lzma (stdlib).
|
|
Python 3.10+, Windows, Outlook musi byt spusteny a prihlaseny.
|
|
|
|
Historie verzi:
|
|
1.0.0 2026-06-09 Rezimy capture/update-paths/full-update, sledovani presunu,
|
|
not_in_mailbox_anymore, updated_at watermark.
|
|
1.1.0 2026-06-10 + Deleted Items do SYNC_FOLDERS.
|
|
1.2.0 2026-06-10 Upload SQLite komprimovan (lzma) + sifrovan (Fernet) ->
|
|
.db.xz.enc. Vyzaduje app.py >= v2.1.
|
|
1.3.0 2026-06-16 + DETEKCE ZMENY OBSAHU pres PR_LAST_MODIFICATION_TIME:
|
|
zname no-ID polozky (entryid:), ktere se po zachyceni
|
|
zmenily (napr. dopsana chyba SendAsDenied), se znovu
|
|
nahravaji s overwrite=true. Nove SQLite sloupce
|
|
last_mod_time, content_uploads; runs.content_updated.
|
|
Vyzaduje app.py >= v2.4 (overwrite na /upload).
|
|
"""
|
|
import argparse
|
|
import base64
|
|
import hashlib
|
|
import logging
|
|
import lzma
|
|
import sqlite3
|
|
import sys
|
|
import tempfile
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
import win32com.client
|
|
import requests
|
|
import urllib3
|
|
from cryptography.fernet import Fernet
|
|
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
|
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
|
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
|
|
DB_UPLOAD_URL = "https://msgs.buzalka.cz/upload-db"
|
|
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
|
|
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnj_mailbox_sync_errors.log"
|
|
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
|
|
PR_LAST_MOD_TIME = "http://schemas.microsoft.com/mapi/proptag/0x30080040" # PR_LAST_MODIFICATION_TIME
|
|
SCRIPT_NAME = "jnj_mailbox_sync"
|
|
SCRIPT_VERSION = "1.3.0"
|
|
|
|
# olFolderInbox=6, olFolderSentMail=5, olFolderDeletedItems=3
|
|
SYNC_FOLDERS = [(6, "Inbox"), (5, "Sent Items"), (3, "Deleted Items")]
|
|
OLSAVE_MSG = 3 # OlSaveAsType.olMSG
|
|
|
|
# Sifrovaci klic odvozeny z TOKENu (stejny algoritmus jako server)
|
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
|
|
|
logging.basicConfig(
|
|
filename=LOG_PATH,
|
|
level=logging.ERROR,
|
|
format="%(asctime)s | %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
encoding="utf-8",
|
|
)
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
# ─── SQLite ───────────────────────────────────────────────────────────────────
|
|
|
|
def init_db(conn):
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS messages (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
message_id TEXT NOT NULL,
|
|
subject TEXT,
|
|
sender TEXT,
|
|
received_at TEXT,
|
|
folder TEXT,
|
|
source TEXT,
|
|
uploaded_at TEXT DEFAULT (datetime('now')),
|
|
entry_id TEXT,
|
|
graph_id TEXT,
|
|
is_read INTEGER DEFAULT 0,
|
|
jnj_folder TEXT,
|
|
not_in_mailbox_anymore INTEGER DEFAULT 0,
|
|
left_mailbox_at TEXT,
|
|
updated_at TEXT,
|
|
last_mod_time TEXT,
|
|
content_uploads INTEGER DEFAULT 1
|
|
)
|
|
""")
|
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
|
|
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS runs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
script TEXT NOT NULL,
|
|
version TEXT,
|
|
started_at TEXT NOT NULL,
|
|
finished_at TEXT,
|
|
mode TEXT,
|
|
window_days INTEGER,
|
|
dry_run INTEGER DEFAULT 0,
|
|
found INTEGER DEFAULT 0,
|
|
new_captured INTEGER DEFAULT 0,
|
|
path_updated INTEGER DEFAULT 0,
|
|
read_updated INTEGER DEFAULT 0,
|
|
returned INTEGER DEFAULT 0,
|
|
left_mailbox INTEGER DEFAULT 0,
|
|
content_updated INTEGER DEFAULT 0,
|
|
skipped INTEGER DEFAULT 0,
|
|
errors INTEGER DEFAULT 0
|
|
)
|
|
""")
|
|
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
run_id INTEGER REFERENCES runs(id),
|
|
level TEXT NOT NULL,
|
|
event TEXT NOT NULL,
|
|
subject TEXT,
|
|
folder TEXT,
|
|
graph_id TEXT,
|
|
detail TEXT,
|
|
created_at TEXT DEFAULT (datetime('now'))
|
|
)
|
|
""")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_log_run_id ON log(run_id)")
|
|
|
|
# Migrace existujici jnjemails.db — pridej chybejici sloupce
|
|
for col, ddl in [
|
|
("entry_id", "TEXT"), ("graph_id", "TEXT"), ("is_read", "INTEGER DEFAULT 0"),
|
|
("jnj_folder", "TEXT"), ("not_in_mailbox_anymore", "INTEGER DEFAULT 0"),
|
|
("left_mailbox_at", "TEXT"), ("updated_at", "TEXT"),
|
|
("last_mod_time", "TEXT"), ("content_uploads", "INTEGER DEFAULT 1"),
|
|
]:
|
|
try:
|
|
conn.execute(f"ALTER TABLE messages ADD COLUMN {col} {ddl}")
|
|
except Exception:
|
|
pass
|
|
for col, ddl in [
|
|
("mode", "TEXT"), ("window_days", "INTEGER"), ("dry_run", "INTEGER DEFAULT 0"),
|
|
("found", "INTEGER DEFAULT 0"), ("new_captured", "INTEGER DEFAULT 0"),
|
|
("path_updated", "INTEGER DEFAULT 0"), ("read_updated", "INTEGER DEFAULT 0"),
|
|
("returned", "INTEGER DEFAULT 0"), ("left_mailbox", "INTEGER DEFAULT 0"),
|
|
("content_updated", "INTEGER DEFAULT 0"),
|
|
]:
|
|
try:
|
|
conn.execute(f"ALTER TABLE runs ADD COLUMN {col} {ddl}")
|
|
except Exception:
|
|
pass
|
|
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_updated_at ON messages(updated_at)")
|
|
conn.commit()
|
|
|
|
|
|
def start_run(conn, mode, days, dry):
|
|
cur = conn.execute(
|
|
"""INSERT INTO runs (script, version, started_at, mode, window_days, dry_run)
|
|
VALUES (?, ?, datetime('now'), ?, ?, ?)""",
|
|
(SCRIPT_NAME, SCRIPT_VERSION, mode, days, 1 if dry else 0),
|
|
)
|
|
conn.commit()
|
|
return cur.lastrowid
|
|
|
|
|
|
def finish_run(conn, run_id, stats):
|
|
conn.execute(
|
|
"""UPDATE runs SET finished_at=datetime('now'),
|
|
found=?, new_captured=?, path_updated=?, read_updated=?,
|
|
returned=?, left_mailbox=?, content_updated=?, skipped=?, errors=?
|
|
WHERE id=?""",
|
|
(stats["found"], stats["new_captured"], stats["path_updated"],
|
|
stats["read_updated"], stats["returned"], stats["left_mailbox"],
|
|
stats["content_updated"], stats["skipped"], stats["errors"], run_id),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def db_log(conn, run_id, level, event, subject=None, folder=None, graph_id=None, detail=None):
|
|
conn.execute(
|
|
"""INSERT INTO log (run_id, level, event, subject, folder, graph_id, detail)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
(run_id, level, event, subject, folder, graph_id, detail),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def info(conn, run_id, event, **kw):
|
|
db_log(conn, run_id, "INFO", event, **kw)
|
|
|
|
|
|
def error(conn, run_id, event, **kw):
|
|
db_log(conn, run_id, "ERROR", event, **kw)
|
|
|
|
|
|
def db_get(conn, mid):
|
|
cur = conn.execute(
|
|
"""SELECT message_id, folder, jnj_folder, is_read, not_in_mailbox_anymore,
|
|
last_mod_time, content_uploads
|
|
FROM messages WHERE message_id=?""", (mid,))
|
|
r = cur.fetchone()
|
|
if not r:
|
|
return None
|
|
return {"message_id": r[0], "folder": r[1], "jnj_folder": r[2],
|
|
"is_read": r[3], "not_in_mailbox_anymore": r[4],
|
|
"last_mod_time": r[5], "content_uploads": r[6]}
|
|
|
|
|
|
def apply_update(conn, mid, changes):
|
|
sets, vals = [], []
|
|
for k, v in changes.items():
|
|
sets.append(f"{k}=?")
|
|
vals.append(v)
|
|
sets.append("updated_at=datetime('now')")
|
|
vals.append(mid)
|
|
conn.execute(f"UPDATE messages SET {', '.join(sets)} WHERE message_id=?", vals)
|
|
conn.commit()
|
|
|
|
|
|
# ─── Outlook / prenos ────────────────────────────────────────────────────────
|
|
|
|
def get_mid(item) -> str:
|
|
try:
|
|
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
|
|
except Exception:
|
|
mid = None
|
|
return mid or f"entryid:{item.EntryID}"
|
|
|
|
|
|
def get_lastmod(item):
|
|
"""PR_LAST_MODIFICATION_TIME jako ISO string (verzni otisk). None pri chybe."""
|
|
try:
|
|
v = item.PropertyAccessor.GetProperty(PR_LAST_MOD_TIME)
|
|
if v is None:
|
|
return None
|
|
try:
|
|
return v.isoformat()
|
|
except Exception:
|
|
return str(v)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def upload_msg(msg_path, filename, folder="", overwrite=False):
|
|
with open(msg_path, "rb") as f:
|
|
encrypted = _FERNET.encrypt(f.read())
|
|
enc_filename = Path(filename).stem + ".emsg"
|
|
data = {"folder": folder}
|
|
if overwrite:
|
|
data["overwrite"] = "1"
|
|
resp = requests.post(
|
|
UPLOAD_URL,
|
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
|
files={"file": (enc_filename, encrypted, "application/octet-stream")},
|
|
data=data,
|
|
timeout=60,
|
|
)
|
|
if not resp.ok:
|
|
raise requests.HTTPError(f"{resp.status_code} {resp.reason} | {resp.text[:200]}")
|
|
return resp.json()
|
|
|
|
|
|
def save_and_upload(item, folder="", overwrite=False):
|
|
"""SaveAs do temp -> upload (sifrovane). Vraci (filename, server_json)."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
safe = f"{item.EntryID[-20:]}.msg"
|
|
p = Path(tmp) / safe
|
|
item.SaveAs(str(p), OLSAVE_MSG)
|
|
result = upload_msg(p, safe, folder, overwrite=overwrite)
|
|
return safe, result
|
|
|
|
|
|
def capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
|
"""Novy email: SaveAs -> upload -> insert. Vraci True pri uspechu."""
|
|
_, result = save_and_upload(item, current, overwrite=False)
|
|
graph_id = result.get("graph_id")
|
|
lm = get_lastmod(item)
|
|
try:
|
|
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
|
|
except Exception:
|
|
received = None
|
|
try:
|
|
sender = item.SenderEmailAddress or ""
|
|
except Exception:
|
|
sender = ""
|
|
conn.execute(
|
|
"""INSERT OR IGNORE INTO messages
|
|
(message_id, subject, sender, received_at, folder, source,
|
|
entry_id, graph_id, is_read, jnj_folder,
|
|
not_in_mailbox_anymore, updated_at, last_mod_time, content_uploads)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, datetime('now'), ?, 1)""",
|
|
(mid, subject, sender, received, current, SCRIPT_NAME,
|
|
item.EntryID, graph_id, is_read, current, lm),
|
|
)
|
|
conn.commit()
|
|
info(conn, run_id, "captured", subject=subject, folder=current, graph_id=graph_id)
|
|
print(f" NEW | {subject[:70]}")
|
|
return True
|
|
|
|
|
|
def reupload_changed(item, current):
|
|
"""Znovu nahraj zmeneny (znamy) email — overwrite na serveru.
|
|
Folder="" => server NEdela Graph re-import (jen prepise /msgs soubor)."""
|
|
save_and_upload(item, folder="", overwrite=True)
|
|
|
|
|
|
def process_item(conn, run_id, item, current, stats, seen, mode, dry):
|
|
try:
|
|
mid = get_mid(item)
|
|
except Exception:
|
|
return
|
|
seen.add(mid)
|
|
stats["found"] += 1
|
|
|
|
try:
|
|
is_read = 0 if item.UnRead else 1
|
|
except Exception:
|
|
is_read = 0
|
|
subject = str(getattr(item, "Subject", "") or "")
|
|
|
|
row = db_get(conn, mid)
|
|
|
|
# ── Novy email (neni v DB) ────────────────────────────────────────────
|
|
if row is None:
|
|
if mode in ("capture", "full-update"):
|
|
if dry:
|
|
stats["new_captured"] += 1
|
|
print(f" NEW* | {subject[:70]}")
|
|
else:
|
|
try:
|
|
if capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
|
stats["new_captured"] += 1
|
|
except Exception as e:
|
|
stats["errors"] += 1
|
|
error(conn, run_id, "capture_error", subject=subject, folder=current, detail=str(e))
|
|
print(f" CHYBA NEW | {subject[:50]} | {e}")
|
|
else: # update-paths — telo nemame, nelze dorovnat
|
|
stats["new_uncaptured"] += 1
|
|
return
|
|
|
|
# ── Znamy email — porovnej zmeny ──────────────────────────────────────
|
|
changes = {}
|
|
current_known = row.get("jnj_folder") or row.get("folder")
|
|
if current_known != current:
|
|
changes["jnj_folder"] = current
|
|
stats["path_updated"] += 1
|
|
if row.get("is_read") != is_read:
|
|
changes["is_read"] = is_read
|
|
stats["read_updated"] += 1
|
|
if row.get("not_in_mailbox_anymore"):
|
|
changes["not_in_mailbox_anymore"] = 0
|
|
changes["left_mailbox_at"] = None
|
|
stats["returned"] += 1
|
|
|
|
# ── DETEKCE ZMENY OBSAHU (v1.3) ───────────────────────────────────────
|
|
# Jen u znamých polozek BEZ Message-ID (mid zacina "entryid:") — tam ma
|
|
# EntryID stabilni a obsah se muze zmenit pod stejnou identitou (napr.
|
|
# dopsana chyba SendAsDenied). Polozky s Message-ID jsou finalizovane.
|
|
# Re-upload jen v rezimech, ktere smeji nahravat, a ne v dry-run.
|
|
if (mode in ("capture", "full-update") and mid.startswith("entryid:")):
|
|
cur_lm = get_lastmod(item)
|
|
if cur_lm and cur_lm != row.get("last_mod_time"):
|
|
stats["content_updated"] += 1
|
|
if dry:
|
|
# DRY-RUN: jen napocitej + ukaz, NIC nenahrava (nahled pred ostrym behem)
|
|
print(f" REUP* | {subject[:55]} | obsah zmenen -> by se re-uploadl")
|
|
else:
|
|
try:
|
|
reupload_changed(item, current)
|
|
changes["last_mod_time"] = cur_lm
|
|
changes["content_uploads"] = (row.get("content_uploads") or 1) + 1
|
|
print(f" REUP | {subject[:55]} | obsah zmenen -> re-upload")
|
|
info(conn, run_id, "content_reupload", subject=subject, folder=current,
|
|
detail=f"last_mod {row.get('last_mod_time')} -> {cur_lm}")
|
|
except Exception as e:
|
|
stats["content_updated"] -= 1
|
|
stats["errors"] += 1
|
|
error(conn, run_id, "reupload_error", subject=subject, folder=current, detail=str(e))
|
|
print(f" CHYBA REUP | {subject[:50]} | {e}")
|
|
|
|
if changes:
|
|
if not dry:
|
|
apply_update(conn, mid, changes)
|
|
what = []
|
|
if "jnj_folder" in changes:
|
|
what.append(f"-> {current}")
|
|
if "is_read" in changes:
|
|
what.append("precteno" if is_read else "neprecteno")
|
|
if "not_in_mailbox_anymore" in changes:
|
|
what.append("vraceno do schranky")
|
|
if "last_mod_time" in changes:
|
|
what.append("obsah aktualizovan")
|
|
marker = "*" if dry else " "
|
|
print(f" UPD{marker} | {subject[:55]} | {', '.join(what)}")
|
|
info(conn, run_id, "path_update", subject=subject, folder=current, detail="; ".join(what))
|
|
else:
|
|
stats["skipped"] += 1
|
|
|
|
|
|
def walk(conn, run_id, folder, folder_path, cutoff_local, stats, seen, mode, dry, limit):
|
|
current = f"{folder_path}/{folder.Name}"
|
|
try:
|
|
items = folder.Items
|
|
if cutoff_local is not None:
|
|
restrict = ("@SQL=\"urn:schemas:httpmail:datereceived\" >= '%s'"
|
|
% cutoff_local.strftime("%Y/%m/%d %H:%M:%S"))
|
|
items = items.Restrict(restrict)
|
|
items.Sort("[ReceivedTime]", True) # newest first
|
|
except Exception as e:
|
|
print(f" CHYBA slozka {current}: {e}")
|
|
error(conn, run_id, "folder_error", folder=current, detail=str(e))
|
|
return
|
|
|
|
n = 0
|
|
for item in items:
|
|
if limit and stats["found"] >= limit:
|
|
break
|
|
try:
|
|
if not str(getattr(item, "MessageClass", "")).upper().startswith("IPM.NOTE"):
|
|
continue
|
|
except Exception:
|
|
continue
|
|
process_item(conn, run_id, item, current, stats, seen, mode, dry)
|
|
n += 1
|
|
|
|
print(f" {current}: {n} polozek")
|
|
info(conn, run_id, "folder_done", folder=current, detail=str(n))
|
|
|
|
try:
|
|
subs = list(folder.Folders)
|
|
except Exception:
|
|
subs = []
|
|
for sub in subs:
|
|
if limit and stats["found"] >= limit:
|
|
break
|
|
walk(conn, run_id, sub, current, cutoff_local, stats, seen, mode, dry, limit)
|
|
|
|
|
|
def _parse_dt(s):
|
|
if not s:
|
|
return None
|
|
try:
|
|
dt = datetime.fromisoformat(s)
|
|
if dt.tzinfo:
|
|
dt = dt.astimezone().replace(tzinfo=None)
|
|
return dt
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry):
|
|
"""Emaily v DB v okne, ktere jsme ve SKENOVANE casti schranky NEvideli ->
|
|
opustily pracovni schranku. Ponecha posledni znamou cestu, nastavi priznak."""
|
|
cur = conn.execute(
|
|
"""SELECT message_id, received_at, jnj_folder, folder, not_in_mailbox_anymore
|
|
FROM messages""")
|
|
to_flag = []
|
|
for mid, received_at, jnjf, fld, flag in cur.fetchall():
|
|
if mid in seen or flag:
|
|
continue
|
|
path = jnjf or fld or ""
|
|
if not any(path.startswith(root) for root in scanned_roots):
|
|
continue
|
|
rec = _parse_dt(received_at)
|
|
if rec is None or rec < cutoff_local:
|
|
continue
|
|
to_flag.append((mid, path))
|
|
|
|
for mid, path in to_flag:
|
|
if not dry:
|
|
conn.execute(
|
|
"""UPDATE messages SET not_in_mailbox_anymore=1,
|
|
left_mailbox_at=datetime('now'), updated_at=datetime('now')
|
|
WHERE message_id=?""", (mid,))
|
|
stats["left_mailbox"] += 1
|
|
print(f" GONE{'*' if dry else ' '} | {path}")
|
|
if not dry and to_flag:
|
|
conn.commit()
|
|
info(conn, run_id, "left_mailbox", detail=str(len(to_flag)))
|
|
|
|
|
|
# ─── MAIN ─────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(description=f"jnj_mailbox_sync v{SCRIPT_VERSION}")
|
|
ap.add_argument("--mode", choices=["capture", "update-paths", "full-update"],
|
|
default="capture")
|
|
ap.add_argument("--days", type=int, default=30,
|
|
help="Okno ve dnech pro update-paths/full-update (0 = vse)")
|
|
ap.add_argument("--dry-run", action="store_true",
|
|
help="Nic nezapise/nenahraje, jen vypise co by udelal")
|
|
ap.add_argument("--limit", type=int, default=0, help="Max N polozek (test)")
|
|
ap.add_argument("--no-db-upload", action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
mode, dry = args.mode, args.dry_run
|
|
|
|
if mode == "capture":
|
|
cutoff_local = None
|
|
else:
|
|
cutoff_local = None if args.days == 0 else (datetime.now() - timedelta(days=args.days))
|
|
|
|
win = "vse" if cutoff_local is None else f"{args.days} dni (od {cutoff_local:%Y-%m-%d %H:%M})"
|
|
print(f"=== jnj_mailbox_sync v{SCRIPT_VERSION} ===")
|
|
print(f"Start: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
|
print(f"Rezim: {mode} Okno: {win} {'[DRY-RUN — nic se nemeni]' if dry else ''}")
|
|
print(f"DB: {DB_PATH}")
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
init_db(conn)
|
|
run_id = start_run(conn, mode, args.days, dry)
|
|
|
|
outlook = win32com.client.Dispatch("Outlook.Application")
|
|
ns = outlook.GetNamespace("MAPI")
|
|
|
|
stats = {"found": 0, "new_captured": 0, "new_uncaptured": 0, "path_updated": 0,
|
|
"read_updated": 0, "returned": 0, "left_mailbox": 0, "content_updated": 0,
|
|
"skipped": 0, "errors": 0}
|
|
seen = set()
|
|
|
|
scanned_roots = set()
|
|
for fid, label in SYNC_FOLDERS:
|
|
root = ns.GetDefaultFolder(fid)
|
|
mailbox = root.Parent.Name
|
|
scanned_roots.add(f"/{mailbox}/{root.Name}")
|
|
print(f"\n=== {label} ({mailbox}) ===")
|
|
walk(conn, run_id, root, f"/{mailbox}", cutoff_local, stats, seen, mode, dry, args.limit)
|
|
|
|
if mode in ("update-paths", "full-update") and cutoff_local is not None and not (args.limit):
|
|
print("\n--- Kontrola 'opustilo schranku' (v okne, Inbox/Sent/Deleted) ---")
|
|
flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry)
|
|
elif args.limit:
|
|
print("\n(--limit aktivni -> detekce 'opustilo schranku' preskocena)")
|
|
|
|
finish_run(conn, run_id, stats)
|
|
|
|
# ── Souhrn ─────────────────────────────────────────────────────────────
|
|
print(f"\n{'='*60}")
|
|
print(f"SOUHRN [{mode}{' / DRY-RUN' if dry else ''}]")
|
|
print(f" Nalezeno ve schrance: {stats['found']}")
|
|
if mode in ("capture", "full-update"):
|
|
lbl = "by se nahralo" if dry else "nahrano"
|
|
print(f" Nove zachyceno ({lbl}): {stats['new_captured']}")
|
|
else:
|
|
print(f" Nove (bez tela, nedorovnano):{stats['new_uncaptured']}")
|
|
print(f" Aktualizovana cesta: {stats['path_updated']}")
|
|
print(f" Zmena precteno/neprecteno: {stats['read_updated']}")
|
|
print(f" Vraceno do schranky: {stats['returned']}")
|
|
print(f" Obsah zmenen (re-upload): {stats['content_updated']}")
|
|
print(f" Opustilo schranku (GONE): {stats['left_mailbox']}")
|
|
print(f" Beze zmeny (skip): {stats['skipped']}")
|
|
print(f" Chyby: {stats['errors']}")
|
|
print(f"{'='*60}")
|
|
|
|
if dry:
|
|
print("DRY-RUN: SQLite ani server se NEMENILY.")
|
|
elif not args.no_db_upload:
|
|
print("\nUpload SQLite na server...")
|
|
upload_db(DB_PATH)
|
|
|
|
print(f"\nKonec: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
|
if stats["errors"]:
|
|
print(f"Chyby logovany do: {LOG_PATH}")
|
|
conn.close()
|
|
|
|
|
|
def upload_db(db_path):
|
|
"""Komprese (lzma/xz, max) -> Fernet sifra -> upload jako .db.xz.enc."""
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"jnjemails_{ts}.db"
|
|
try:
|
|
with open(db_path, "rb") as f:
|
|
raw = f.read()
|
|
compressed = lzma.compress(raw, preset=9 | lzma.PRESET_EXTREME)
|
|
encrypted = _FERNET.encrypt(compressed)
|
|
enc_filename = filename + ".xz.enc"
|
|
resp = requests.post(
|
|
DB_UPLOAD_URL,
|
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
|
files={"file": (enc_filename, encrypted, "application/octet-stream")},
|
|
timeout=300,
|
|
)
|
|
mb_raw, mb_xz, mb_enc = (len(raw) / 1048576,
|
|
len(compressed) / 1048576,
|
|
len(encrypted) / 1048576)
|
|
print(f" DB upload: {resp.json()} "
|
|
f"({mb_raw:.1f} MB -> xz {mb_xz:.1f} MB -> enc {mb_enc:.1f} MB)")
|
|
except Exception as e:
|
|
print(f" DB upload CHYBA: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|