This commit is contained in:
2026-06-08 07:20:37 +02:00
parent 0d3407e664
commit 70899149e4
14 changed files with 1162 additions and 14 deletions
+28
View File
@@ -0,0 +1,28 @@
import sys, glob, os
sys.stdout.reconfigure(encoding="utf-8")
import extract_msg
files = glob.glob(r"\\tower\JNJEMAILS\*.msg")
f = files[0]
fname = os.path.basename(f)
print(f"Soubor: {fname}")
print("(filename = posledních 20 znaků entry_id)")
print()
m = extract_msg.Message(f)
print(f"messageId (Internet Message-ID): {m.messageId!r}")
print()
print("--- MAPI properties v souboru ---")
try:
for pid in sorted(m.props.keys()):
prop = m.props[pid]
name = getattr(prop, "name", "")
print(f" {pid} {name}")
except Exception as e:
print(f" (props nedostupné: {e})")
print()
for attr in ("entryId", "entryID", "entry_id"):
print(f" m.{attr} = {getattr(m, attr, '<není>')!r}")
m.close()
+19
View File
@@ -0,0 +1,19 @@
import sys, glob, os, sqlite3
sys.stdout.reconfigure(encoding="utf-8")
files = sorted(glob.glob(r"\\tower\JNJEMAILS\db\jnjemails_*.db"))
db = files[-1]
print(f"DB: {os.path.basename(db)}\n")
conn = sqlite3.connect(db)
for (tbl,) in conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"):
print(f"=== {tbl} ===")
for cid, name, ctype, notnull, dflt, pk in conn.execute(f"PRAGMA table_info({tbl})"):
flags = []
if pk: flags.append("PK")
if notnull: flags.append("NOT NULL")
if dflt is not None: flags.append(f"default={dflt}")
print(f" {name:14} {ctype:10} {' '.join(flags)}")
cnt = conn.execute(f"SELECT COUNT(*) FROM {tbl}").fetchone()[0]
print(f"{cnt} řádků\n")
conn.close()
@@ -0,0 +1,120 @@
"""
backfill_entry_id.py | v1.0 | 2026-06-08
Dohledá entry_id pro záznamy v jnjemails.db které ho nemají (69k starých emailů
přenesených skriptem v1.1). Prochází celý Outlook MAPI strom a páruje emaily
dle Internet Message-ID.
Spouštět na JNJ PC s běžícím Outlookem.
Bezpečné opakovat — přeskočí záznamy které už entry_id mají.
"""
import sqlite3
import win32com.client
from datetime import datetime
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
def load_missing(conn) -> dict:
"""Vrátí dict {message_id: db_id} pro záznamy bez entry_id."""
rows = conn.execute(
"SELECT id, message_id FROM messages WHERE entry_id IS NULL"
).fetchall()
return {r[1]: r[0] for r in rows}
def update_entry_id(conn, db_id: int, entry_id: str):
conn.execute(
"UPDATE messages SET entry_id = ? WHERE id = ?",
(entry_id, db_id)
)
def scan_folder(conn, folder, lookup: dict, stats: dict, path: str = ""):
current = f"{path}/{folder.Name}"
try:
items = folder.Items
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
stats["checked"] += 1
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except Exception:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if mid in lookup:
db_id = lookup.pop(mid)
update_entry_id(conn, db_id, item.EntryID)
stats["updated"] += 1
if stats["updated"] % 100 == 0:
conn.commit()
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
f"aktualizováno {stats['updated']} | "
f"zbývá {len(lookup)} | složka: {current}")
except Exception as e:
stats["errors"] += 1
except Exception as e:
print(f" CHYBA složka {current}: {e}")
stats["errors"] += 1
return # nelze ani procházet podsložky
try:
subfolders = list(folder.Folders)
except Exception as e:
print(f" CHYBA podsložky {current}: {e}")
return
for subfolder in subfolders:
if not lookup:
return
scan_folder(conn, subfolder, lookup, stats, current)
def main():
print(f"=== backfill_entry_id v1.0 ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
conn = sqlite3.connect(DB_PATH)
lookup = load_missing(conn)
total_missing = len(lookup)
print(f"Záznamy bez entry_id: {total_missing}")
if not lookup:
print("Nic k doplnění.")
conn.close()
return
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
stats = {"checked": 0, "updated": 0, "errors": 0}
for i in range(1, ns.Folders.Count + 1):
if not lookup:
break
root = ns.Folders.Item(i)
print(f"\nSložka: {root.Name}")
scan_folder(conn, root, lookup, stats, "")
conn.commit()
conn.close()
print(f"\n=== Hotovo ===")
print(f"Zkontrolováno emailů: {stats['checked']}")
print(f"Doplněno entry_id: {stats['updated']} / {total_missing}")
print(f"Nenalezeno: {len(lookup)}")
print(f"Chyby: {stats['errors']}")
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()
+90
View File
@@ -0,0 +1,90 @@
"""
check_msg_files.py
Zkontroluje, zda má každý záznam v jnjemails SQLite odpovídající .msg soubor
fyzicky uložený na \\\\tower\\JNJEMAILS\\.
DB: \\\\tower\\JNJEMAILS\\db\\jnjemails_*.db (nejnovější)
Soubory: \\\\tower\\JNJEMAILS\\*.msg
Název souboru = entry_id[-20:] + ".msg"
Záznamy bez entry_id mají fallback message_id "entryid:..." — ty se přeskočí
zvlášť (server je nemohl uložit standardním názvem).
"""
import sqlite3
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
DB_DIR = Path(r"\\tower\JNJEMAILS\db")
MSGS_DIR = Path(r"\\tower\JNJEMAILS")
def get_latest_db() -> Path:
files = sorted(DB_DIR.glob("jnjemails_*.db"), key=lambda f: f.name)
if not files:
raise FileNotFoundError(f"Žádný jnjemails_*.db v {DB_DIR}")
return files[-1]
def main():
db_path = get_latest_db()
print(f"DB: {db_path.name}")
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, message_id, subject, sender, received_at, entry_id, source FROM messages"
).fetchall()
conn.close()
print(f"Celkem záznamů: {len(rows)}\n")
missing = []
no_entry_id = []
for row in rows:
entry_id = row["entry_id"]
if not entry_id:
no_entry_id.append(dict(row))
continue
expected_file = MSGS_DIR / (entry_id[-20:] + ".msg")
if not expected_file.exists():
missing.append({**dict(row), "expected_file": expected_file.name})
msg_files = sum(1 for _ in MSGS_DIR.glob("*.msg"))
print(f"Záznamy bez entry_id (nelze zkontrolovat): {len(no_entry_id)}")
print(f"Záznamy s entry_id: {len(rows) - len(no_entry_id)}")
print(f"Chybějící .msg soubory: {len(missing)}")
print(f"\n--- POROVNÁNÍ POČTŮ ---")
print(f"Záznamy v DB celkem: {len(rows)}")
print(f"Soubory .msg na serveru: {msg_files}")
diff = msg_files - len(rows)
if diff >= 0:
print(f"Rozdíl: +{diff} souborů navíc (OK — všechny záznamy mají soubor)")
else:
print(f"Rozdíl: {diff} — CHYBÍ {abs(diff)} souborů!")
if missing:
print("\n--- CHYBĚJÍCÍ SOUBORY ---")
for r in missing:
print(f" id={r['id']} | {r['received_at']} | {r['subject'][:60]!r}")
print(f" sender={r['sender']} | source={r['source']}")
print(f" entry_id={r['entry_id']}")
print(f" očekávaný soubor: {r['expected_file']}")
if no_entry_id:
print(f"\n--- ZÁZNAMY BEZ ENTRY_ID ({len(no_entry_id)}) ---")
for r in no_entry_id[:20]:
print(f" id={r['id']} | {r['received_at']} | {r['subject'][:60]!r} | source={r['source']}")
if len(no_entry_id) > 20:
print(f" ... a dalších {len(no_entry_id) - 20}")
print("\nHotovo.")
if __name__ == "__main__":
main()
+129
View File
@@ -0,0 +1,129 @@
"""
wipe_jnj_mailbox.py | 2026-06-08
Vyčistí složku Inbox/JNJ ve schránce vladimir.buzalka@buzalka.cz PŘED testem mirroru.
- Zachová samotnou složku Inbox/JNJ
- Trvale smaže (permanentDelete — obchází Deleted Items) všechny zprávy v JNJ
i ve všech podsložkách
- Smaže všechny podsložky JNJ (Inbox, Sent Items, Deleted Items, ...)
Výsledek: Inbox/JNJ existuje a je prázdná. Mirror si podsložky vytvoří znovu.
"""
import sys
import msal
import requests
sys.stdout.reconfigure(encoding="utf-8")
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_URL = "https://graph.microsoft.com/v1.0"
_token = None
def token():
global _token
app = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
res = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in res:
raise RuntimeError(f"auth failed: {res}")
_token = res["access_token"]
return _token
def H():
return {"Authorization": f"Bearer {_token or token()}"}
def get_jnj_id():
r = requests.get(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders?$top=100",
headers=H(), timeout=20).json()
for f in r.get("value", []):
if f["displayName"] == "JNJ":
return f["id"]
return None
def child_folders(fid):
out = []
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
while url:
r = requests.get(url, headers=H(), timeout=20).json()
out += r.get("value", [])
url = r.get("@odata.nextLink")
return out
def all_descendants(root_id):
"""Vrať [(id, displayName)] root + všech podsložek (BFS)."""
result = [(root_id, "JNJ")]
i = 0
while i < len(result):
fid = result[i][0]
i += 1
for f in child_folders(fid):
result.append((f["id"], f["displayName"]))
return result
def wipe_messages(fid, name):
deleted = 0
while True:
r = requests.get(
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages?$select=id&$top=100",
headers=H(), timeout=30).json()
msgs = r.get("value", [])
if not msgs:
break
for m in msgs:
pd = requests.post(
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{m['id']}/permanentDelete",
headers=H(), timeout=20)
if pd.status_code in (200, 204):
deleted += 1
else:
# fallback: běžné smazání
requests.delete(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{m['id']}",
headers=H(), timeout=20)
deleted += 1
print(f" {name}: smazáno {deleted} zpráv")
return deleted
def main():
print("=== wipe_jnj_mailbox ===")
token()
jnj_id = get_jnj_id()
if not jnj_id:
print("Složka Inbox/JNJ neexistuje — není co mazat.")
return
folders = all_descendants(jnj_id)
print(f"Nalezeno složek pod JNJ (vč. JNJ): {len(folders)}\n")
print("Mažu zprávy (trvale)...")
total = 0
for fid, name in folders:
total += wipe_messages(fid, name)
# smaž podsložky JNJ (ne samotnou JNJ)
print("\nMažu podsložky JNJ...")
subs = child_folders(jnj_id)
for f in subs:
r = requests.delete(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{f['id']}",
headers=H(), timeout=20)
print(f" podsložka {f['displayName']}: {'smazána' if r.status_code in (200,204) else 'CHYBA '+str(r.status_code)}")
print(f"\n=== Hotovo: smazáno {total} zpráv, Inbox/JNJ je prázdná ===")
if __name__ == "__main__":
main()