Files
janssen/EmailsImport/Onetime/backfill_entry_id_v1.1.py
T
2026-06-08 07:20:37 +02:00

121 lines
3.6 KiB
Python

"""
backfill_entry_id.py | v1.0 | 2026-06-08
Dohledá entry_id pro záznamy v jnjemails.db které ho nemají (69k starých emailů
přenesených skriptem v1.1). Prochází celý Outlook MAPI strom a páruje emaily
dle Internet Message-ID.
Spouštět na JNJ PC s běžícím Outlookem.
Bezpečné opakovat — přeskočí záznamy které už entry_id mají.
"""
import sqlite3
import win32com.client
from datetime import datetime
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
def load_missing(conn) -> dict:
"""Vrátí dict {message_id: db_id} pro záznamy bez entry_id."""
rows = conn.execute(
"SELECT id, message_id FROM messages WHERE entry_id IS NULL"
).fetchall()
return {r[1]: r[0] for r in rows}
def update_entry_id(conn, db_id: int, entry_id: str):
conn.execute(
"UPDATE messages SET entry_id = ? WHERE id = ?",
(entry_id, db_id)
)
def scan_folder(conn, folder, lookup: dict, stats: dict, path: str = ""):
current = f"{path}/{folder.Name}"
try:
items = folder.Items
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
stats["checked"] += 1
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except Exception:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if mid in lookup:
db_id = lookup.pop(mid)
update_entry_id(conn, db_id, item.EntryID)
stats["updated"] += 1
if stats["updated"] % 100 == 0:
conn.commit()
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
f"aktualizováno {stats['updated']} | "
f"zbývá {len(lookup)} | složka: {current}")
except Exception as e:
stats["errors"] += 1
except Exception as e:
print(f" CHYBA složka {current}: {e}")
stats["errors"] += 1
return # nelze ani procházet podsložky
try:
subfolders = list(folder.Folders)
except Exception as e:
print(f" CHYBA podsložky {current}: {e}")
return
for subfolder in subfolders:
if not lookup:
return
scan_folder(conn, subfolder, lookup, stats, current)
def main():
print(f"=== backfill_entry_id v1.0 ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
conn = sqlite3.connect(DB_PATH)
lookup = load_missing(conn)
total_missing = len(lookup)
print(f"Záznamy bez entry_id: {total_missing}")
if not lookup:
print("Nic k doplnění.")
conn.close()
return
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
stats = {"checked": 0, "updated": 0, "errors": 0}
for i in range(1, ns.Folders.Count + 1):
if not lookup:
break
root = ns.Folders.Item(i)
print(f"\nSložka: {root.Name}")
scan_folder(conn, root, lookup, stats, "")
conn.commit()
conn.close()
print(f"\n=== Hotovo ===")
print(f"Zkontrolováno emailů: {stats['checked']}")
print(f"Doplněno entry_id: {stats['updated']} / {total_missing}")
print(f"Nenalezeno: {len(lookup)}")
print(f"Chyby: {stats['errors']}")
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()