""" mailbox_restore_v1.0.py | 2026-06-08 Importuje emaily z .msg souborů na Toweru do schránky vladimir.buzalka@buzalka.cz přes Graph API. Zpracuje záznamy v SQLite které mají entry_id ale nemají graph_id. Spouštět doma — přistupuje přímo na \\tower\JNJEMAILS\. Bezpečně opakovatelný — přeskočí záznamy které graph_id již mají. Závislosti: msal, requests, extract_msg, python-dateutil """ import sqlite3 import sys import base64 import hashlib import time from pathlib import Path from datetime import timezone, datetime import msal import requests import extract_msg as extract_msg_lib from dateutil import parser as dtparser from cryptography.fernet import Fernet sys.stdout.reconfigure(encoding="utf-8") DB_DIR = Path(r"\\tower\JNJEMAILS\db") MSGS_DIR = Path(r"\\tower\JNJEMAILS") GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9" GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f" GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk" GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz" GRAPH_ROOT_FOLDER = "JNJ" GRAPH_URL = "https://graph.microsoft.com/v1.0" BATCH_COMMIT = 50 # commit do DB každých N importů RATE_DELAY = 0.1 # sekund mezi requesty (Graph limit ~10k/10min) _graph_token: str | None = None _folder_cache: dict[str, str] = {} def get_latest_db() -> Path: files = sorted(DB_DIR.glob("jnjemails_*.db"), key=lambda f: f.name) if not files: raise FileNotFoundError(f"Žádný jnjemails_*.db v {DB_DIR}") return files[-1] def get_token() -> str: global _graph_token app = msal.ConfidentialClientApplication( GRAPH_CLIENT_ID, authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}", client_credential=GRAPH_CLIENT_SECRET, ) result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"]) if "access_token" not in result: raise RuntimeError(f"Graph auth failed: {result}") _graph_token = result["access_token"] return _graph_token def graph_headers() -> dict: return {"Authorization": f"Bearer {_graph_token or get_token()}"} def ensure_folder(path_parts: list[str]) -> str: cache_key = "/".join(path_parts) if cache_key in _folder_cache: return _folder_cache[cache_key] headers = graph_headers() parent_id = "Inbox" for i, part in enumerate(path_parts): partial_key = "/".join(path_parts[: i + 1]) if partial_key in _folder_cache: parent_id = _folder_cache[partial_key] continue if parent_id == "Inbox": url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders" else: url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders" r = requests.get(url, headers=headers, timeout=15) if r.status_code == 401: get_token() headers = graph_headers() r = requests.get(url, headers=headers, timeout=15) found = None for f in r.json().get("value", []): if f["displayName"].lower() == part.lower(): found = f["id"] break if not found: cr = requests.post(url, headers=headers, json={"displayName": part}, timeout=15) if cr.status_code in (200, 201): found = cr.json()["id"] elif cr.status_code == 409: r2 = requests.get(url, headers=headers, timeout=15) for f in r2.json().get("value", []): if f["displayName"].lower() == part.lower(): found = f["id"] break if not found: raise RuntimeError(f"Cannot create folder '{part}': {cr.text}") _folder_cache[partial_key] = found parent_id = found return parent_id def map_folder(jnj_folder: str) -> list[str]: parts = [p for p in jnj_folder.split("/") if p] if not parts: return [GRAPH_ROOT_FOLDER] mailbox = parts[0] rest = parts[1:] prefix = [GRAPH_ROOT_FOLDER] if "online archive" in mailbox.lower(): prefix.append("Online Archive") return prefix + rest if rest else prefix def make_recipient(addr: str) -> dict: if "<" in addr and ">" in addr: name = addr[: addr.index("<")].strip().strip('"') email = addr[addr.index("<") + 1 : addr.index(">")].strip() else: name = addr email = addr return {"emailAddress": {"name": name, "address": email}} def import_msg(msg_path: Path, jnj_folder: str) -> str | None: try: msg = extract_msg_lib.Message(str(msg_path)) subject = msg.subject or "(no subject)" try: body_html = msg.htmlBody if isinstance(body_html, bytes): body_html = body_html.decode("utf-8", errors="replace") except Exception: body_html = None try: body_text = msg.body or "" except Exception: body_text = "" sender_email = "" sender_name = "" to_raw = "" cc_raw = "" date_raw = None try: sender_email = msg.sender or "" except Exception: pass try: sender_name = getattr(msg, "senderName", None) or sender_email except Exception: sender_name = sender_email try: to_raw = msg.to or "" except Exception: pass try: cc_raw = msg.cc or "" except Exception: pass try: date_raw = msg.date except Exception: pass att_list = [] for att in msg.attachments: if att.data and att.longFilename: att_list.append({ "@odata.type": "#microsoft.graph.fileAttachment", "name": att.longFilename, "contentType": getattr(att, "mimetype", None) or "application/octet-stream", "contentBytes": base64.b64encode(att.data).decode(), }) msg.close() to_list = [a.strip() for a in to_raw.split(";") if a.strip()] cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()] folder_parts = map_folder(jnj_folder) folder_id = ensure_folder(folder_parts) ext_props = [{"id": "Integer 0x0E07", "value": "1"}] dt_str = None if date_raw: try: dt = dtparser.parse(str(date_raw)) dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str}) except Exception: pass payload = { "subject": subject, "body": { "contentType": "HTML" if body_html else "Text", "content": body_html or body_text, }, "from": make_recipient(f"{sender_name} <{sender_email}>"), "toRecipients": [make_recipient(a) for a in to_list], "ccRecipients": [make_recipient(a) for a in cc_list], "isRead": True, "singleValueExtendedProperties": ext_props, } if dt_str: payload["sentDateTime"] = dt_str if att_list: payload["attachments"] = att_list headers = graph_headers() url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages" r = requests.post(url, headers=headers, json=payload, timeout=30) if r.status_code == 401: get_token() headers = graph_headers() r = requests.post(url, headers=headers, json=payload, timeout=30) if r.status_code in (200, 201): return r.json().get("id") else: print(f" Graph FAIL [{r.status_code}]: {r.text[:200]}") return None except Exception as e: print(f" Chyba import: {e}") return None def main(): print(f"=== mailbox_restore v1.0 ===") print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") db_path = get_latest_db() print(f"DB: {db_path.name}") conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row rows = conn.execute(""" SELECT id, entry_id, jnj_folder, subject FROM messages WHERE entry_id IS NOT NULL AND graph_id IS NULL ORDER BY received_at """).fetchall() total = len(rows) print(f"K importu: {total}\n") if not total: print("Nic k importu.") conn.close() return get_token() imported = 0 skipped = 0 errors = 0 for i, row in enumerate(rows, 1): msg_file = MSGS_DIR / (row["entry_id"][-20:] + ".msg") folder = row["jnj_folder"] or "/vbuzalka@its.jnj.com/Inbox" if not msg_file.exists(): skipped += 1 continue graph_id = import_msg(msg_file, folder) if graph_id: conn.execute( "UPDATE messages SET graph_id = ? WHERE id = ?", (graph_id, row["id"]) ) imported += 1 if imported % BATCH_COMMIT == 0: conn.commit() print(f" [{datetime.now().strftime('%H:%M:%S')}] " f"{imported}/{total} importováno | skip {skipped} | chyby {errors}") else: errors += 1 time.sleep(RATE_DELAY) conn.commit() conn.close() print(f"\n=== Hotovo ===") print(f"Importováno: {imported}") print(f"Chybí soubor: {skipped}") print(f"Chyby Graph: {errors}") print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") if __name__ == "__main__": main()