notebook
This commit is contained in:
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
mailbox_restore_v1.0.py | 2026-06-08
|
||||
Importuje emaily z .msg souborů na Toweru do schránky vladimir.buzalka@buzalka.cz
|
||||
přes Graph API. Zpracuje záznamy v SQLite které mají entry_id ale nemají graph_id.
|
||||
|
||||
Spouštět doma — přistupuje přímo na \\tower\JNJEMAILS\.
|
||||
Bezpečně opakovatelný — přeskočí záznamy které graph_id již mají.
|
||||
|
||||
Závislosti: msal, requests, extract_msg, python-dateutil
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
import base64
|
||||
import hashlib
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import timezone, datetime
|
||||
|
||||
import msal
|
||||
import requests
|
||||
import extract_msg as extract_msg_lib
|
||||
from dateutil import parser as dtparser
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
DB_DIR = Path(r"\\tower\JNJEMAILS\db")
|
||||
MSGS_DIR = Path(r"\\tower\JNJEMAILS")
|
||||
|
||||
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
|
||||
GRAPH_ROOT_FOLDER = "JNJ"
|
||||
GRAPH_URL = "https://graph.microsoft.com/v1.0"
|
||||
|
||||
BATCH_COMMIT = 50 # commit do DB každých N importů
|
||||
RATE_DELAY = 0.1 # sekund mezi requesty (Graph limit ~10k/10min)
|
||||
|
||||
_graph_token: str | None = None
|
||||
_folder_cache: dict[str, str] = {}
|
||||
|
||||
|
||||
def get_latest_db() -> Path:
|
||||
files = sorted(DB_DIR.glob("jnjemails_*.db"), key=lambda f: f.name)
|
||||
if not files:
|
||||
raise FileNotFoundError(f"Žádný jnjemails_*.db v {DB_DIR}")
|
||||
return files[-1]
|
||||
|
||||
|
||||
def get_token() -> str:
|
||||
global _graph_token
|
||||
app = msal.ConfidentialClientApplication(
|
||||
GRAPH_CLIENT_ID,
|
||||
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
|
||||
client_credential=GRAPH_CLIENT_SECRET,
|
||||
)
|
||||
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
||||
if "access_token" not in result:
|
||||
raise RuntimeError(f"Graph auth failed: {result}")
|
||||
_graph_token = result["access_token"]
|
||||
return _graph_token
|
||||
|
||||
|
||||
def graph_headers() -> dict:
|
||||
return {"Authorization": f"Bearer {_graph_token or get_token()}"}
|
||||
|
||||
|
||||
def ensure_folder(path_parts: list[str]) -> str:
|
||||
cache_key = "/".join(path_parts)
|
||||
if cache_key in _folder_cache:
|
||||
return _folder_cache[cache_key]
|
||||
|
||||
headers = graph_headers()
|
||||
parent_id = "Inbox"
|
||||
|
||||
for i, part in enumerate(path_parts):
|
||||
partial_key = "/".join(path_parts[: i + 1])
|
||||
if partial_key in _folder_cache:
|
||||
parent_id = _folder_cache[partial_key]
|
||||
continue
|
||||
|
||||
if parent_id == "Inbox":
|
||||
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
|
||||
else:
|
||||
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
|
||||
|
||||
r = requests.get(url, headers=headers, timeout=15)
|
||||
if r.status_code == 401:
|
||||
get_token()
|
||||
headers = graph_headers()
|
||||
r = requests.get(url, headers=headers, timeout=15)
|
||||
|
||||
found = None
|
||||
for f in r.json().get("value", []):
|
||||
if f["displayName"].lower() == part.lower():
|
||||
found = f["id"]
|
||||
break
|
||||
|
||||
if not found:
|
||||
cr = requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
|
||||
if cr.status_code in (200, 201):
|
||||
found = cr.json()["id"]
|
||||
elif cr.status_code == 409:
|
||||
r2 = requests.get(url, headers=headers, timeout=15)
|
||||
for f in r2.json().get("value", []):
|
||||
if f["displayName"].lower() == part.lower():
|
||||
found = f["id"]
|
||||
break
|
||||
if not found:
|
||||
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
|
||||
|
||||
_folder_cache[partial_key] = found
|
||||
parent_id = found
|
||||
|
||||
return parent_id
|
||||
|
||||
|
||||
def map_folder(jnj_folder: str) -> list[str]:
|
||||
parts = [p for p in jnj_folder.split("/") if p]
|
||||
if not parts:
|
||||
return [GRAPH_ROOT_FOLDER]
|
||||
mailbox = parts[0]
|
||||
rest = parts[1:]
|
||||
prefix = [GRAPH_ROOT_FOLDER]
|
||||
if "online archive" in mailbox.lower():
|
||||
prefix.append("Online Archive")
|
||||
return prefix + rest if rest else prefix
|
||||
|
||||
|
||||
def make_recipient(addr: str) -> dict:
|
||||
if "<" in addr and ">" in addr:
|
||||
name = addr[: addr.index("<")].strip().strip('"')
|
||||
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
|
||||
else:
|
||||
name = addr
|
||||
email = addr
|
||||
return {"emailAddress": {"name": name, "address": email}}
|
||||
|
||||
|
||||
def import_msg(msg_path: Path, jnj_folder: str) -> str | None:
|
||||
try:
|
||||
msg = extract_msg_lib.Message(str(msg_path))
|
||||
subject = msg.subject or "(no subject)"
|
||||
|
||||
try:
|
||||
body_html = msg.htmlBody
|
||||
if isinstance(body_html, bytes):
|
||||
body_html = body_html.decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
body_html = None
|
||||
|
||||
try:
|
||||
body_text = msg.body or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
|
||||
sender_email = ""
|
||||
sender_name = ""
|
||||
to_raw = ""
|
||||
cc_raw = ""
|
||||
date_raw = None
|
||||
|
||||
try:
|
||||
sender_email = msg.sender or ""
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
sender_name = getattr(msg, "senderName", None) or sender_email
|
||||
except Exception:
|
||||
sender_name = sender_email
|
||||
try:
|
||||
to_raw = msg.to or ""
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
cc_raw = msg.cc or ""
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
date_raw = msg.date
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
att_list = []
|
||||
for att in msg.attachments:
|
||||
if att.data and att.longFilename:
|
||||
att_list.append({
|
||||
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||
"name": att.longFilename,
|
||||
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
|
||||
"contentBytes": base64.b64encode(att.data).decode(),
|
||||
})
|
||||
msg.close()
|
||||
|
||||
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
|
||||
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
|
||||
|
||||
folder_parts = map_folder(jnj_folder)
|
||||
folder_id = ensure_folder(folder_parts)
|
||||
|
||||
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
|
||||
dt_str = None
|
||||
if date_raw:
|
||||
try:
|
||||
dt = dtparser.parse(str(date_raw))
|
||||
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
payload = {
|
||||
"subject": subject,
|
||||
"body": {
|
||||
"contentType": "HTML" if body_html else "Text",
|
||||
"content": body_html or body_text,
|
||||
},
|
||||
"from": make_recipient(f"{sender_name} <{sender_email}>"),
|
||||
"toRecipients": [make_recipient(a) for a in to_list],
|
||||
"ccRecipients": [make_recipient(a) for a in cc_list],
|
||||
"isRead": True,
|
||||
"singleValueExtendedProperties": ext_props,
|
||||
}
|
||||
if dt_str:
|
||||
payload["sentDateTime"] = dt_str
|
||||
if att_list:
|
||||
payload["attachments"] = att_list
|
||||
|
||||
headers = graph_headers()
|
||||
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
|
||||
r = requests.post(url, headers=headers, json=payload, timeout=30)
|
||||
if r.status_code == 401:
|
||||
get_token()
|
||||
headers = graph_headers()
|
||||
r = requests.post(url, headers=headers, json=payload, timeout=30)
|
||||
|
||||
if r.status_code in (200, 201):
|
||||
return r.json().get("id")
|
||||
else:
|
||||
print(f" Graph FAIL [{r.status_code}]: {r.text[:200]}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f" Chyba import: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
print(f"=== mailbox_restore v1.0 ===")
|
||||
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
db_path = get_latest_db()
|
||||
print(f"DB: {db_path.name}")
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
rows = conn.execute("""
|
||||
SELECT id, entry_id, jnj_folder, subject
|
||||
FROM messages
|
||||
WHERE entry_id IS NOT NULL AND graph_id IS NULL
|
||||
ORDER BY received_at
|
||||
""").fetchall()
|
||||
|
||||
total = len(rows)
|
||||
print(f"K importu: {total}\n")
|
||||
|
||||
if not total:
|
||||
print("Nic k importu.")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
get_token()
|
||||
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for i, row in enumerate(rows, 1):
|
||||
msg_file = MSGS_DIR / (row["entry_id"][-20:] + ".msg")
|
||||
folder = row["jnj_folder"] or "/vbuzalka@its.jnj.com/Inbox"
|
||||
|
||||
if not msg_file.exists():
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
graph_id = import_msg(msg_file, folder)
|
||||
|
||||
if graph_id:
|
||||
conn.execute(
|
||||
"UPDATE messages SET graph_id = ? WHERE id = ?",
|
||||
(graph_id, row["id"])
|
||||
)
|
||||
imported += 1
|
||||
if imported % BATCH_COMMIT == 0:
|
||||
conn.commit()
|
||||
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
|
||||
f"{imported}/{total} importováno | skip {skipped} | chyby {errors}")
|
||||
else:
|
||||
errors += 1
|
||||
|
||||
time.sleep(RATE_DELAY)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print(f"\n=== Hotovo ===")
|
||||
print(f"Importováno: {imported}")
|
||||
print(f"Chybí soubor: {skipped}")
|
||||
print(f"Chyby Graph: {errors}")
|
||||
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user