Files
janssen/EmailsImport/inbox_full_sync_v1.1.py
T

390 lines
14 KiB
Python

"""
inbox_full_sync v1.1
Název: inbox_full_sync_v1.1.py
Verze: 1.1.0
Datum: 2026-06-08
Autor: vladimir.buzalka
Popis:
Jednorázový skript pro úplný přenos Inboxu A Sent Items z JNJ Outlooku (MAPI)
do osobní schránky vladimir.buzalka@buzalka.cz přes Graph API.
Prochází celý Inbox i Sent Items včetně všech podsložek. Online Archive se
nepřenáší (GetDefaultFolder(5/6) vrátí pouze primární schránku).
Každý email se uloží jako .msg do temp složky, odešle na https://msgs.buzalka.cz/upload
a přes Graph API se importuje do odpovídající složky v osobní schránce.
Dedup zajišťuje SQLite DB — email který je v DB (message_id) se přeskočí.
Spouštění:
Spouštět ručně jako záchranná síť nebo iniciální sync.
Bezpečné opakovat — duplicity se přeskočí.
Závislosti:
win32com, requests, sqlite3 (stdlib)
Python 3.10+, Windows, Outlook musí být spuštěn
Konfigurace (konstanty v kódu):
TOKEN Bearer token pro msgs.buzalka.cz
UPLOAD_URL https://msgs.buzalka.cz/upload
DB_UPLOAD_URL https://msgs.buzalka.cz/upload-db
DB_PATH C:\\Users\\vbuzalka\\SQLITE\\jnjemails.db
LOG_PATH C:\\Users\\vbuzalka\\SQLITE\\inbox_full_sync_errors.log
SQLite DB (jnjemails.db):
messages — přenesené emaily (message_id, entry_id, graph_id, is_read, jnj_folder, ...)
runs — jeden záznam na běh (script, version, started_at, finished_at, counts)
log — flat event log per run (level, event, subject, folder, graph_id, detail)
Dotaz pro posledn běh:
SELECT r.script, r.version, r.started_at, l.level, l.event,
l.subject, l.folder, l.detail, l.created_at
FROM log l JOIN runs r ON r.id = l.run_id
WHERE l.run_id = (SELECT MAX(id) FROM runs)
ORDER BY l.created_at
Log události (log.event):
run_start — start skriptu
mailbox — název schránky
folder_start — vstup do složky (detail = počet položek)
folder_done — konec složky (detail = přeneseno/skip)
upload_saved — nový email přenesen
upload_exists — email již v DB, přeskočen
upload_error — chyba při uploadu (detail = chybová zpráva)
progress — každých 100 přenesených
db_upload — úspěšný upload DB na server
db_upload_error — chyba uploadu DB
run_done — konec skriptu (detail = souhrn)
Historie verzí:
1.0.0 2026-06-01 Základní funkce: Inbox full scan, dedup přes DB, entry_id/graph_id/is_read
1.0.1 2026-06-01 DB upload každých 100 emailů + finální upload
1.0.2 2026-06-01 SQLite tabulky runs + log
1.0.3 2026-06-01 Kompletní konzolový výstup zrcadlen do log tabulky, skipped counter
1.0.4 2026-06-01 Šifrování Fernet (.emsg) pro bypass Zscaler DLP; rozšířený error detail
1.1.0 2026-06-08 Synchronizace i složky Sent Items (GetDefaultFolder(5)) vedle Inboxu
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
import hashlib
import base64
from pathlib import Path
from datetime import datetime
from cryptography.fernet import Fernet
import tempfile
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\inbox_full_sync_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
DB_UPLOAD_URL = "https://msgs.buzalka.cz/upload-db"
SCRIPT_NAME = "inbox_full_sync"
SCRIPT_VERSION = "1.1.0"
# Výchozí složky k synchronizaci: (olFolderID, label) — Inbox=6, Sent Items=5
SYNC_FOLDERS = [(6, "Inbox"), (5, "Sent Items")]
# Šifrovací klíč odvozený z TOKENu — stejný algoritmus jako na serveru
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now')),
entry_id TEXT,
graph_id TEXT,
is_read INTEGER DEFAULT 0,
jnj_folder TEXT
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.execute("""
CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
script TEXT NOT NULL,
version TEXT,
started_at TEXT NOT NULL,
finished_at TEXT,
transferred INTEGER DEFAULT 0,
skipped INTEGER DEFAULT 0,
sync_updated INTEGER DEFAULT 0,
sync_deleted INTEGER DEFAULT 0,
errors INTEGER DEFAULT 0
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id INTEGER REFERENCES runs(id),
level TEXT NOT NULL,
event TEXT NOT NULL,
subject TEXT,
folder TEXT,
graph_id TEXT,
detail TEXT,
created_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_log_run_id ON log(run_id)")
for col, definition in [
("entry_id", "TEXT"),
("graph_id", "TEXT"),
("is_read", "INTEGER DEFAULT 0"),
("jnj_folder", "TEXT"),
]:
try:
conn.execute(f"ALTER TABLE messages ADD COLUMN {col} {definition}")
except Exception:
pass
conn.commit()
def start_run(conn):
cur = conn.execute(
"INSERT INTO runs (script, version, started_at) VALUES (?, ?, datetime('now'))",
(SCRIPT_NAME, SCRIPT_VERSION)
)
conn.commit()
return cur.lastrowid
def finish_run(conn, run_id, transferred, skipped, errors):
conn.execute("""
UPDATE runs SET finished_at=datetime('now'), transferred=?, skipped=?, errors=?
WHERE id=?
""", (transferred, skipped, errors, run_id))
conn.commit()
def db_log(conn, run_id, level, event, subject=None, folder=None, graph_id=None, detail=None):
conn.execute("""
INSERT INTO log (run_id, level, event, subject, folder, graph_id, detail)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (run_id, level, event, subject, folder, graph_id, detail))
conn.commit()
def info(conn, run_id, event, **kwargs):
db_log(conn, run_id, "INFO", event, **kwargs)
def error(conn, run_id, event, **kwargs):
db_log(conn, run_id, "ERROR", event, **kwargs)
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder,
entry_id=None, graph_id=None, is_read=0):
conn.execute("""
INSERT OR IGNORE INTO messages
(message_id, subject, sender, received_at, folder, source,
entry_id, graph_id, is_read, jnj_folder)
VALUES (?, ?, ?, ?, ?, 'inbox_full_sync', ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder,
entry_id, graph_id, is_read, folder))
conn.commit()
def upload_db(conn, run_id):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
try:
with open(DB_PATH, "rb") as f:
resp = requests.post(
DB_UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60,
)
result = resp.json()
msg = f"DB upload: {result}"
print(f" {msg}")
info(conn, run_id, "db_upload", detail=msg)
except Exception as e:
msg = str(e)
print(f" DB upload CHYBA: {msg}")
error(conn, run_id, "db_upload_error", detail=msg)
def upload_msg(msg_path, filename, folder=""):
size_kb = Path(msg_path).stat().st_size // 1024
with open(msg_path, "rb") as f:
encrypted = _FERNET.encrypt(f.read())
enc_filename = Path(filename).stem + ".emsg"
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (enc_filename, encrypted, "application/octet-stream")},
data={"folder": folder},
timeout=60,
)
if not resp.ok:
raise requests.HTTPError(
f"{resp.status_code} {resp.reason} | size={size_kb}KB | body={resp.text[:300]}",
response=resp,
)
return resp.json()
def process_folder(conn, run_id, folder, folder_path, counter, skipped_counter, error_counter):
current_path = f"{folder_path}/{folder.Name}"
items = folder.Items
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
total = items.Count
msg = f"Složka: {current_path} ({total} položek)"
print(f"\n {msg}")
info(conn, run_id, "folder_start", folder=current_path, detail=str(total))
for item in items:
subject = getattr(item, 'Subject', '?')
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except Exception:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
skipped_counter[0] += 1
continue
try:
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
size_kb = tmp_path.stat().st_size // 1024
result = upload_msg(tmp_path, safe_name, current_path)
status = result.get("status", "?")
graph_id = result.get("graph_id")
is_read = 0 if item.UnRead else 1
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, subject, item.SenderEmailAddress,
received, current_path,
entry_id=item.EntryID, graph_id=graph_id, is_read=is_read)
info(conn, run_id, f"upload_{status}",
subject=subject, folder=current_path, graph_id=graph_id,
detail=f"size={size_kb}KB")
counter[0] += 1
count += 1
if counter[0] % 100 == 0:
msg = f"celkem přeneseno: {counter[0]}"
print(f"{msg}, uploaduji DB...")
info(conn, run_id, "progress", detail=msg)
upload_db(conn, run_id)
print(f" {status.upper():6} | {subject[:70]}")
except Exception as e:
sender_str = getattr(item, 'SenderEmailAddress', '?')
received_str = getattr(item, 'ReceivedTime', None)
received_str = received_str.isoformat() if received_str else '?'
entry_id_str = getattr(item, 'EntryID', '?')
detail = (
f"error={e} | "
f"sender={sender_str} | "
f"received={received_str} | "
f"entry_id={entry_id_str} | "
f"message_id={mid}"
)
print(f" CHYBA | {subject[:50]} | sender={sender_str} | received={received_str} | {e}")
error(conn, run_id, "upload_error",
subject=subject, folder=current_path, detail=detail)
logging.error("folder=%s | %s", current_path, detail)
error_counter[0] += 1
except Exception as e:
# Neočekávaná chyba mimo upload blok (MessageClass, EntryID, apod.)
print(f" CHYBA (item) | {subject[:50]} | {e}")
logging.error("folder=%s | item_error | subject=%s | error=%s", current_path, subject, e)
error_counter[0] += 1
msg = f"složka hotova: přeneseno {count} | skip {skipped}"
print(f"{msg}")
info(conn, run_id, "folder_done", folder=current_path, detail=msg)
for subfolder in folder.Folders:
process_folder(conn, run_id, subfolder, current_path, counter, skipped_counter, error_counter)
# --- MAIN ---
print(f"=== inbox_full_sync v{SCRIPT_VERSION} ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
conn = sqlite3.connect(DB_PATH)
init_db(conn)
run_id = start_run(conn)
info(conn, run_id, "run_start", detail=f"script={SCRIPT_NAME} version={SCRIPT_VERSION}")
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
counter = [0]
skipped_counter = [0]
error_counter = [0]
for folder_id, folder_label in SYNC_FOLDERS:
root = ns.GetDefaultFolder(folder_id) # primární schránka, bez Online Archive
mailbox_name = root.Parent.Name
print(f"\nSchránka: {mailbox_name} | kořen: {folder_label}")
info(conn, run_id, "mailbox", detail=f"{mailbox_name} | {folder_label}")
process_folder(conn, run_id, root, f"/{mailbox_name}", counter, skipped_counter, error_counter)
finish_run(conn, run_id,
transferred=counter[0],
skipped=skipped_counter[0],
errors=error_counter[0])
summary = f"přeneseno {counter[0]} | skip {skipped_counter[0]} | chyby {error_counter[0]}"
print(f"\n=== Hotovo: {summary} ===")
info(conn, run_id, "run_done", detail=summary)
print("Uploaduji DB...")
upload_db(conn, run_id)
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Chyby logovány do: {LOG_PATH}")
conn.close()