This commit is contained in:
2026-06-08 07:20:37 +02:00
parent 0d3407e664
commit 70899149e4
14 changed files with 1162 additions and 14 deletions
+118 -12
View File
@@ -1,8 +1,9 @@
# app.py | v1.7 | 2026-06-05
# app.py | v1.9 | 2026-06-08
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
# /pending-files (seznam souborů k odeslání na JNJ), /download-file/{filename}.
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
@@ -151,6 +152,55 @@ def _map_jnj_folder(folder: str) -> list[str]:
return prefix + rest if rest else prefix
def _norm_mid(mid: str) -> str:
"""Normalizuj Internet Message-ID pro porovnání (osekej <> a whitespace)."""
return (mid or "").strip().strip("<>").strip()
def _enumerate_jnj_mailbox(cutoff_iso: str) -> dict[str, str]:
"""Vrať {normalizované internetMessageId: graph_id} pro všechny zprávy ve
složkách JNJ/* schránky, které mají receivedDateTime >= cutoff_iso.
Slouží jako 'co už ve schránce je' pro mirror diff. Starší zprávy než cutoff
(např. únorový archiv) se nenačtou — mirror se jich tedy nikdy nedotkne.
"""
jnj_id = _ensure_folder([GRAPH_ROOT_FOLDER])
# BFS přes JNJ root + všechny podsložky
all_folders = [jnj_id]
i = 0
while i < len(all_folders):
fid = all_folders[i]
i += 1
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
while url:
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=20)
data = r.json()
for f in data.get("value", []):
all_folders.append(f["id"])
url = data.get("@odata.nextLink")
# Posbírej message-id z každé složky (filtrováno na okno)
result: dict[str, str] = {}
cutoff_enc = cutoff_iso.replace(":", "%3A")
for fid in all_folders:
url = (
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages"
f"?$filter=receivedDateTime ge {cutoff_enc}"
f"&$select=id,internetMessageId&$top=200"
)
while url:
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=30)
data = r.json()
for m in data.get("value", []):
mid = _norm_mid(m.get("internetMessageId", ""))
if mid:
result[mid] = m["id"]
url = data.get("@odata.nextLink")
return result
def _make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
@@ -221,6 +271,20 @@ def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
folder_parts = _map_jnj_folder(folder)
folder_id = _ensure_folder(folder_parts)
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# PR_MESSAGE_DELIVERY_TIME (0x0E06) — jediný způsob jak nastavit
# receivedDateTime přes Graph API (přímé pole je read-only)
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
except Exception:
dt_str = None
else:
dt_str = None
payload = {
"subject": subject,
"body": {
@@ -231,19 +295,11 @@ def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
"toRecipients": [_make_recipient(a) for a in to_list],
"ccRecipients": [_make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": [
{"id": "Integer 0x0E07", "value": "1"}
],
"singleValueExtendedProperties": ext_props,
}
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
except Exception:
pass
if dt_str:
payload["sentDateTime"] = dt_str
if att_list:
payload["attachments"] = att_list
@@ -393,6 +449,56 @@ async def message_update(req: MessageUpdateRequest, authorization: str = Header(
return result
class MirrorPlanRequest(BaseModel):
manifest: list[dict] # [{"message_id": ..., "folder": ..., "is_read": ...}]
cutoff: str # ISO8601 UTC, např. "2026-05-09T00:00:00Z"
@app.post("/mirror-plan")
async def mirror_plan(req: MirrorPlanRequest, authorization: str = Header(None)):
"""Porovná manifest zpráv z JNJ (posledních 30 dní) se stavem schránky.
- smaže ze schránky zprávy které v manifestu nejsou (smazané v JNJ / vypadlé z okna)
- vrátí to_add = message_id které ve schránce chybí (klient je pak nahraje na /upload)
Maže POUZE v rámci okna (cutoff) — starší archiv zůstává nedotčen.
"""
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
# manifest: normalizované id → původní message_id (pro echo zpět klientovi)
manifest_map: dict[str, str] = {}
for e in req.manifest:
mid = _norm_mid(e.get("message_id", ""))
if mid:
manifest_map[mid] = e["message_id"]
mailbox = _enumerate_jnj_mailbox(req.cutoff) # {norm_mid: graph_id}
to_add = [orig for nmid, orig in manifest_map.items() if nmid not in mailbox]
to_delete = [(nmid, gid) for nmid, gid in mailbox.items() if nmid not in manifest_map]
deleted = 0
for nmid, gid in to_delete:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{gid}"
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
if r.status_code in (200, 204):
deleted += 1
else:
log.error("mirror delete FAIL [%d]: %s", r.status_code, r.text[:150])
log.info(
"mirror-plan: manifest=%d mailbox=%d → add=%d delete=%d",
len(manifest_map), len(mailbox), len(to_add), deleted,
)
return {
"to_add": to_add,
"deleted": deleted,
"manifest_count": len(manifest_map),
"mailbox_count": len(mailbox),
}
@app.post("/upload-file")
async def upload_file(
file: UploadFile = File(...),
+29 -2
View File
@@ -1,3 +1,4 @@
import time
import win32com.client
import pandas as pd
from pathlib import Path
@@ -18,9 +19,14 @@ entries = gal.AddressEntries
rows = []
print(f"Počet položek v GAL: {entries.Count}")
total = entries.Count
print(f"Počet položek v GAL: {total}")
for i in range(1, entries.Count + 1): # Outlook COM je 1-based
start = time.perf_counter()
last = start
PROGRESS_EVERY = 100 # každých N položek vypsat rychlost
for i in range(1, total + 1): # Outlook COM je 1-based
try:
entry = entries.Item(i)
@@ -80,6 +86,27 @@ for i in range(1, entries.Count + 1): # Outlook COM je 1-based
"error": str(e),
})
# průběžný výpis rychlosti
if i % PROGRESS_EVERY == 0 or i == total:
now = time.perf_counter()
elapsed = now - start
rate = i / elapsed if elapsed else 0
recent_rate = PROGRESS_EVERY / (now - last) if now > last else 0
remaining = (total - i) / rate if rate else 0
print(
f" {i}/{total} ({i / total:.0%}) | "
f"{rate:.1f} pol./s (akt. {recent_rate:.1f}) | "
f"uplynulo {elapsed:.1f}s | zbývá ~{remaining:.0f}s",
flush=True,
)
last = now
total_elapsed = time.perf_counter() - start
print(
f"Zpracováno {total} položek za {total_elapsed:.1f}s "
f"({total / total_elapsed:.1f} pol./s)"
)
df = pd.DataFrame(rows)
df.to_excel(OUT_XLSX, index=False)
+28
View File
@@ -0,0 +1,28 @@
import sys, glob, os
sys.stdout.reconfigure(encoding="utf-8")
import extract_msg
files = glob.glob(r"\\tower\JNJEMAILS\*.msg")
f = files[0]
fname = os.path.basename(f)
print(f"Soubor: {fname}")
print("(filename = posledních 20 znaků entry_id)")
print()
m = extract_msg.Message(f)
print(f"messageId (Internet Message-ID): {m.messageId!r}")
print()
print("--- MAPI properties v souboru ---")
try:
for pid in sorted(m.props.keys()):
prop = m.props[pid]
name = getattr(prop, "name", "")
print(f" {pid} {name}")
except Exception as e:
print(f" (props nedostupné: {e})")
print()
for attr in ("entryId", "entryID", "entry_id"):
print(f" m.{attr} = {getattr(m, attr, '<není>')!r}")
m.close()
+19
View File
@@ -0,0 +1,19 @@
import sys, glob, os, sqlite3
sys.stdout.reconfigure(encoding="utf-8")
files = sorted(glob.glob(r"\\tower\JNJEMAILS\db\jnjemails_*.db"))
db = files[-1]
print(f"DB: {os.path.basename(db)}\n")
conn = sqlite3.connect(db)
for (tbl,) in conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"):
print(f"=== {tbl} ===")
for cid, name, ctype, notnull, dflt, pk in conn.execute(f"PRAGMA table_info({tbl})"):
flags = []
if pk: flags.append("PK")
if notnull: flags.append("NOT NULL")
if dflt is not None: flags.append(f"default={dflt}")
print(f" {name:14} {ctype:10} {' '.join(flags)}")
cnt = conn.execute(f"SELECT COUNT(*) FROM {tbl}").fetchone()[0]
print(f"{cnt} řádků\n")
conn.close()
@@ -0,0 +1,120 @@
"""
backfill_entry_id.py | v1.0 | 2026-06-08
Dohledá entry_id pro záznamy v jnjemails.db které ho nemají (69k starých emailů
přenesených skriptem v1.1). Prochází celý Outlook MAPI strom a páruje emaily
dle Internet Message-ID.
Spouštět na JNJ PC s běžícím Outlookem.
Bezpečné opakovat — přeskočí záznamy které už entry_id mají.
"""
import sqlite3
import win32com.client
from datetime import datetime
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
def load_missing(conn) -> dict:
"""Vrátí dict {message_id: db_id} pro záznamy bez entry_id."""
rows = conn.execute(
"SELECT id, message_id FROM messages WHERE entry_id IS NULL"
).fetchall()
return {r[1]: r[0] for r in rows}
def update_entry_id(conn, db_id: int, entry_id: str):
conn.execute(
"UPDATE messages SET entry_id = ? WHERE id = ?",
(entry_id, db_id)
)
def scan_folder(conn, folder, lookup: dict, stats: dict, path: str = ""):
current = f"{path}/{folder.Name}"
try:
items = folder.Items
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
stats["checked"] += 1
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except Exception:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if mid in lookup:
db_id = lookup.pop(mid)
update_entry_id(conn, db_id, item.EntryID)
stats["updated"] += 1
if stats["updated"] % 100 == 0:
conn.commit()
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
f"aktualizováno {stats['updated']} | "
f"zbývá {len(lookup)} | složka: {current}")
except Exception as e:
stats["errors"] += 1
except Exception as e:
print(f" CHYBA složka {current}: {e}")
stats["errors"] += 1
return # nelze ani procházet podsložky
try:
subfolders = list(folder.Folders)
except Exception as e:
print(f" CHYBA podsložky {current}: {e}")
return
for subfolder in subfolders:
if not lookup:
return
scan_folder(conn, subfolder, lookup, stats, current)
def main():
print(f"=== backfill_entry_id v1.0 ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
conn = sqlite3.connect(DB_PATH)
lookup = load_missing(conn)
total_missing = len(lookup)
print(f"Záznamy bez entry_id: {total_missing}")
if not lookup:
print("Nic k doplnění.")
conn.close()
return
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
stats = {"checked": 0, "updated": 0, "errors": 0}
for i in range(1, ns.Folders.Count + 1):
if not lookup:
break
root = ns.Folders.Item(i)
print(f"\nSložka: {root.Name}")
scan_folder(conn, root, lookup, stats, "")
conn.commit()
conn.close()
print(f"\n=== Hotovo ===")
print(f"Zkontrolováno emailů: {stats['checked']}")
print(f"Doplněno entry_id: {stats['updated']} / {total_missing}")
print(f"Nenalezeno: {len(lookup)}")
print(f"Chyby: {stats['errors']}")
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()
+90
View File
@@ -0,0 +1,90 @@
"""
check_msg_files.py
Zkontroluje, zda má každý záznam v jnjemails SQLite odpovídající .msg soubor
fyzicky uložený na \\\\tower\\JNJEMAILS\\.
DB: \\\\tower\\JNJEMAILS\\db\\jnjemails_*.db (nejnovější)
Soubory: \\\\tower\\JNJEMAILS\\*.msg
Název souboru = entry_id[-20:] + ".msg"
Záznamy bez entry_id mají fallback message_id "entryid:..." — ty se přeskočí
zvlášť (server je nemohl uložit standardním názvem).
"""
import sqlite3
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
DB_DIR = Path(r"\\tower\JNJEMAILS\db")
MSGS_DIR = Path(r"\\tower\JNJEMAILS")
def get_latest_db() -> Path:
files = sorted(DB_DIR.glob("jnjemails_*.db"), key=lambda f: f.name)
if not files:
raise FileNotFoundError(f"Žádný jnjemails_*.db v {DB_DIR}")
return files[-1]
def main():
db_path = get_latest_db()
print(f"DB: {db_path.name}")
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, message_id, subject, sender, received_at, entry_id, source FROM messages"
).fetchall()
conn.close()
print(f"Celkem záznamů: {len(rows)}\n")
missing = []
no_entry_id = []
for row in rows:
entry_id = row["entry_id"]
if not entry_id:
no_entry_id.append(dict(row))
continue
expected_file = MSGS_DIR / (entry_id[-20:] + ".msg")
if not expected_file.exists():
missing.append({**dict(row), "expected_file": expected_file.name})
msg_files = sum(1 for _ in MSGS_DIR.glob("*.msg"))
print(f"Záznamy bez entry_id (nelze zkontrolovat): {len(no_entry_id)}")
print(f"Záznamy s entry_id: {len(rows) - len(no_entry_id)}")
print(f"Chybějící .msg soubory: {len(missing)}")
print(f"\n--- POROVNÁNÍ POČTŮ ---")
print(f"Záznamy v DB celkem: {len(rows)}")
print(f"Soubory .msg na serveru: {msg_files}")
diff = msg_files - len(rows)
if diff >= 0:
print(f"Rozdíl: +{diff} souborů navíc (OK — všechny záznamy mají soubor)")
else:
print(f"Rozdíl: {diff} — CHYBÍ {abs(diff)} souborů!")
if missing:
print("\n--- CHYBĚJÍCÍ SOUBORY ---")
for r in missing:
print(f" id={r['id']} | {r['received_at']} | {r['subject'][:60]!r}")
print(f" sender={r['sender']} | source={r['source']}")
print(f" entry_id={r['entry_id']}")
print(f" očekávaný soubor: {r['expected_file']}")
if no_entry_id:
print(f"\n--- ZÁZNAMY BEZ ENTRY_ID ({len(no_entry_id)}) ---")
for r in no_entry_id[:20]:
print(f" id={r['id']} | {r['received_at']} | {r['subject'][:60]!r} | source={r['source']}")
if len(no_entry_id) > 20:
print(f" ... a dalších {len(no_entry_id) - 20}")
print("\nHotovo.")
if __name__ == "__main__":
main()
+129
View File
@@ -0,0 +1,129 @@
"""
wipe_jnj_mailbox.py | 2026-06-08
Vyčistí složku Inbox/JNJ ve schránce vladimir.buzalka@buzalka.cz PŘED testem mirroru.
- Zachová samotnou složku Inbox/JNJ
- Trvale smaže (permanentDelete — obchází Deleted Items) všechny zprávy v JNJ
i ve všech podsložkách
- Smaže všechny podsložky JNJ (Inbox, Sent Items, Deleted Items, ...)
Výsledek: Inbox/JNJ existuje a je prázdná. Mirror si podsložky vytvoří znovu.
"""
import sys
import msal
import requests
sys.stdout.reconfigure(encoding="utf-8")
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_URL = "https://graph.microsoft.com/v1.0"
_token = None
def token():
global _token
app = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
res = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in res:
raise RuntimeError(f"auth failed: {res}")
_token = res["access_token"]
return _token
def H():
return {"Authorization": f"Bearer {_token or token()}"}
def get_jnj_id():
r = requests.get(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders?$top=100",
headers=H(), timeout=20).json()
for f in r.get("value", []):
if f["displayName"] == "JNJ":
return f["id"]
return None
def child_folders(fid):
out = []
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
while url:
r = requests.get(url, headers=H(), timeout=20).json()
out += r.get("value", [])
url = r.get("@odata.nextLink")
return out
def all_descendants(root_id):
"""Vrať [(id, displayName)] root + všech podsložek (BFS)."""
result = [(root_id, "JNJ")]
i = 0
while i < len(result):
fid = result[i][0]
i += 1
for f in child_folders(fid):
result.append((f["id"], f["displayName"]))
return result
def wipe_messages(fid, name):
deleted = 0
while True:
r = requests.get(
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages?$select=id&$top=100",
headers=H(), timeout=30).json()
msgs = r.get("value", [])
if not msgs:
break
for m in msgs:
pd = requests.post(
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{m['id']}/permanentDelete",
headers=H(), timeout=20)
if pd.status_code in (200, 204):
deleted += 1
else:
# fallback: běžné smazání
requests.delete(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{m['id']}",
headers=H(), timeout=20)
deleted += 1
print(f" {name}: smazáno {deleted} zpráv")
return deleted
def main():
print("=== wipe_jnj_mailbox ===")
token()
jnj_id = get_jnj_id()
if not jnj_id:
print("Složka Inbox/JNJ neexistuje — není co mazat.")
return
folders = all_descendants(jnj_id)
print(f"Nalezeno složek pod JNJ (vč. JNJ): {len(folders)}\n")
print("Mažu zprávy (trvale)...")
total = 0
for fid, name in folders:
total += wipe_messages(fid, name)
# smaž podsložky JNJ (ne samotnou JNJ)
print("\nMažu podsložky JNJ...")
subs = child_folders(jnj_id)
for f in subs:
r = requests.delete(f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{f['id']}",
headers=H(), timeout=20)
print(f" podsložka {f['displayName']}: {'smazána' if r.status_code in (200,204) else 'CHYBA '+str(r.status_code)}")
print(f"\n=== Hotovo: smazáno {total} zpráv, Inbox/JNJ je prázdná ===")
if __name__ == "__main__":
main()
+199
View File
@@ -0,0 +1,199 @@
"""
mailbox_mirror v1.0 | 2026-06-08 | vladimir.buzalka
Zrcadlí primární JNJ schránku (BEZ Online Archive) za posledních 30 dní do
osobní schránky vladimir.buzalka@buzalka.cz.
Princip — bezestavový diff přes Internet Message-ID:
1. Projdi Inbox(+podsložky), Sent, Deleted; vyber emaily z posledních 30 dní.
Sestav manifest = [{message_id, folder, is_read}] (jen metadata, žádná těla).
2. POST /mirror-plan → server porovná manifest se stavem schránky:
- smaže ze schránky zprávy které v manifestu nejsou (smazané v JNJ)
- vrátí to_add = message_id které ve schránce chybí
3. Pro každé to_add: ulož .msg, zašifruj (Fernet → .emsg), POST /upload.
Žádná SQLite, žádný graph_id bookkeeping — zdrojem pravdy jsou obě schránky.
Mazání běží jen v rámci 30denního okna, starší archiv zůstává nedotčen.
Omezení JNJ:
- Zscaler DLP → soubory se posílají šifrované (.emsg)
- Online Archive vynechán (GetDefaultFolder vrací jen primární schránku)
Spouštění: opakovaně (Task Scheduler). Bezpečně opakovatelné a idempotentní.
Závislosti: pywin32, requests, cryptography. Outlook musí běžet.
"""
import sys
import base64
import hashlib
import tempfile
from pathlib import Path
from datetime import datetime, timedelta, timezone
import win32com.client
import requests
import urllib3
from cryptography.fernet import Fernet
sys.stdout.reconfigure(encoding="utf-8")
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
BASE_URL = "https://msgs.buzalka.cz"
PLAN_URL = f"{BASE_URL}/mirror-plan"
UPLOAD_URL = f"{BASE_URL}/upload"
WINDOW_DAYS = 30
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
# olFolderInbox=6, olFolderSentMail=5, olFolderDeletedItems=3
FOLDERS_TO_MIRROR = [6, 5, 3]
# Šifrovací klíč odvozený z TOKENu (stejný algoritmus jako server)
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
def get_mid(item) -> str:
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except Exception:
mid = None
return mid or f"entryid:{item.EntryID}"
def collect_manifest(ns, cutoff_local):
"""Projdi cílové složky + podsložky, vrať (manifest, index).
manifest = [{message_id, folder, is_read}]
index = {message_id: (entry_id, folder_path)} — pro fázi uploadu
"""
restrict = (
"@SQL=\"urn:schemas:httpmail:datereceived\" >= '%s'"
% cutoff_local.strftime("%Y/%m/%d %H:%M:%S")
)
manifest = []
index = {}
def walk(folder, folder_path):
current = f"{folder_path}/{folder.Name}"
try:
items = folder.Items.Restrict(restrict)
items.Sort("[ReceivedTime]", False)
n = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
mid = get_mid(item)
manifest.append({
"message_id": mid,
"folder": current,
"is_read": (not item.UnRead),
})
index[mid] = (item.EntryID, current)
n += 1
except Exception as e:
print(f" chyba položky v {current}: {e}")
print(f" {current}: {n}")
except Exception as e:
print(f" CHYBA složka {current}: {e}")
return # nedostupná složka → nelez do podsložek
try:
subfolders = list(folder.Folders)
except Exception:
subfolders = []
for sub in subfolders:
walk(sub, current)
seen_roots = set()
for fid in FOLDERS_TO_MIRROR:
root = ns.GetDefaultFolder(fid)
mailbox = root.Parent.Name
key = (mailbox, root.Name)
if key in seen_roots:
continue
seen_roots.add(key)
walk(root, f"/{mailbox}")
return manifest, index
def upload_one(ns, entry_id, folder):
"""Ulož email jako .msg, zašifruj a nahraj na /upload (server naimportuje)."""
item = ns.GetItemFromID(entry_id)
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{entry_id[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3) # 3 = olMSG
with open(tmp_path, "rb") as f:
encrypted = _FERNET.encrypt(f.read())
enc_name = safe_name[:-4] + ".emsg"
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (enc_name, encrypted, "application/octet-stream")},
data={"folder": folder},
timeout=60,
)
resp.raise_for_status()
return resp.json()
def main():
print(f"=== mailbox_mirror v1.0 ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
cutoff_utc = datetime.now(timezone.utc) - timedelta(days=WINDOW_DAYS)
cutoff_graph = cutoff_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
cutoff_local = cutoff_utc.astimezone()
print(f"Okno: posledních {WINDOW_DAYS} dní (cutoff {cutoff_graph})\n")
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
print("1) Sestavuji manifest z JNJ schránky...")
manifest, index = collect_manifest(ns, cutoff_local)
print(f"{len(manifest)} emailů v okně\n")
print("2) Posílám plán na server (diff + mazání přebytků)...")
resp = requests.post(
PLAN_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
json={"manifest": manifest, "cutoff": cutoff_graph},
timeout=300,
)
resp.raise_for_status()
plan = resp.json()
to_add = plan.get("to_add", [])
print(f" schránka={plan.get('mailbox_count')} | manifest={plan.get('manifest_count')}")
print(f" smazáno ze schránky: {plan.get('deleted')}")
print(f" k nahrání: {len(to_add)}\n")
if not to_add:
print("Schránka je v synchronu, nic nenahrávám.")
else:
print("3) Nahrávám chybějící emaily...")
uploaded = 0
errors = 0
for i, mid in enumerate(to_add, 1):
entry_id, folder = index.get(mid, (None, None))
if not entry_id:
print(f" [{i}/{len(to_add)}] chybí index pro {mid[:40]} — přeskočeno")
errors += 1
continue
try:
upload_one(ns, entry_id, folder)
uploaded += 1
if uploaded % 50 == 0:
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
f"nahráno {uploaded}/{len(to_add)}")
except Exception as e:
print(f" CHYBA upload {mid[:40]}: {e}")
errors += 1
print(f"\n nahráno {uploaded} | chyby {errors}")
print(f"\n=== Hotovo === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()
+316
View File
@@ -0,0 +1,316 @@
"""
mailbox_restore_v1.0.py | 2026-06-08
Importuje emaily z .msg souborů na Toweru do schránky vladimir.buzalka@buzalka.cz
přes Graph API. Zpracuje záznamy v SQLite které mají entry_id ale nemají graph_id.
Spouštět doma — přistupuje přímo na \\tower\JNJEMAILS\.
Bezpečně opakovatelný — přeskočí záznamy které graph_id již mají.
Závislosti: msal, requests, extract_msg, python-dateutil
"""
import sqlite3
import sys
import base64
import hashlib
import time
from pathlib import Path
from datetime import timezone, datetime
import msal
import requests
import extract_msg as extract_msg_lib
from dateutil import parser as dtparser
from cryptography.fernet import Fernet
sys.stdout.reconfigure(encoding="utf-8")
DB_DIR = Path(r"\\tower\JNJEMAILS\db")
MSGS_DIR = Path(r"\\tower\JNJEMAILS")
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_ROOT_FOLDER = "JNJ"
GRAPH_URL = "https://graph.microsoft.com/v1.0"
BATCH_COMMIT = 50 # commit do DB každých N importů
RATE_DELAY = 0.1 # sekund mezi requesty (Graph limit ~10k/10min)
_graph_token: str | None = None
_folder_cache: dict[str, str] = {}
def get_latest_db() -> Path:
files = sorted(DB_DIR.glob("jnjemails_*.db"), key=lambda f: f.name)
if not files:
raise FileNotFoundError(f"Žádný jnjemails_*.db v {DB_DIR}")
return files[-1]
def get_token() -> str:
global _graph_token
app = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise RuntimeError(f"Graph auth failed: {result}")
_graph_token = result["access_token"]
return _graph_token
def graph_headers() -> dict:
return {"Authorization": f"Bearer {_graph_token or get_token()}"}
def ensure_folder(path_parts: list[str]) -> str:
cache_key = "/".join(path_parts)
if cache_key in _folder_cache:
return _folder_cache[cache_key]
headers = graph_headers()
parent_id = "Inbox"
for i, part in enumerate(path_parts):
partial_key = "/".join(path_parts[: i + 1])
if partial_key in _folder_cache:
parent_id = _folder_cache[partial_key]
continue
if parent_id == "Inbox":
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
else:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
r = requests.get(url, headers=headers, timeout=15)
if r.status_code == 401:
get_token()
headers = graph_headers()
r = requests.get(url, headers=headers, timeout=15)
found = None
for f in r.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
cr = requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
if cr.status_code in (200, 201):
found = cr.json()["id"]
elif cr.status_code == 409:
r2 = requests.get(url, headers=headers, timeout=15)
for f in r2.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
_folder_cache[partial_key] = found
parent_id = found
return parent_id
def map_folder(jnj_folder: str) -> list[str]:
parts = [p for p in jnj_folder.split("/") if p]
if not parts:
return [GRAPH_ROOT_FOLDER]
mailbox = parts[0]
rest = parts[1:]
prefix = [GRAPH_ROOT_FOLDER]
if "online archive" in mailbox.lower():
prefix.append("Online Archive")
return prefix + rest if rest else prefix
def make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def import_msg(msg_path: Path, jnj_folder: str) -> str | None:
try:
msg = extract_msg_lib.Message(str(msg_path))
subject = msg.subject or "(no subject)"
try:
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
except Exception:
body_html = None
try:
body_text = msg.body or ""
except Exception:
body_text = ""
sender_email = ""
sender_name = ""
to_raw = ""
cc_raw = ""
date_raw = None
try:
sender_email = msg.sender or ""
except Exception:
pass
try:
sender_name = getattr(msg, "senderName", None) or sender_email
except Exception:
sender_name = sender_email
try:
to_raw = msg.to or ""
except Exception:
pass
try:
cc_raw = msg.cc or ""
except Exception:
pass
try:
date_raw = msg.date
except Exception:
pass
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
folder_parts = map_folder(jnj_folder)
folder_id = ensure_folder(folder_parts)
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
dt_str = None
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
except Exception:
pass
payload = {
"subject": subject,
"body": {
"contentType": "HTML" if body_html else "Text",
"content": body_html or body_text,
},
"from": make_recipient(f"{sender_name} <{sender_email}>"),
"toRecipients": [make_recipient(a) for a in to_list],
"ccRecipients": [make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": ext_props,
}
if dt_str:
payload["sentDateTime"] = dt_str
if att_list:
payload["attachments"] = att_list
headers = graph_headers()
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
r = requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 401:
get_token()
headers = graph_headers()
r = requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
return r.json().get("id")
else:
print(f" Graph FAIL [{r.status_code}]: {r.text[:200]}")
return None
except Exception as e:
print(f" Chyba import: {e}")
return None
def main():
print(f"=== mailbox_restore v1.0 ===")
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
db_path = get_latest_db()
print(f"DB: {db_path.name}")
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute("""
SELECT id, entry_id, jnj_folder, subject
FROM messages
WHERE entry_id IS NOT NULL AND graph_id IS NULL
ORDER BY received_at
""").fetchall()
total = len(rows)
print(f"K importu: {total}\n")
if not total:
print("Nic k importu.")
conn.close()
return
get_token()
imported = 0
skipped = 0
errors = 0
for i, row in enumerate(rows, 1):
msg_file = MSGS_DIR / (row["entry_id"][-20:] + ".msg")
folder = row["jnj_folder"] or "/vbuzalka@its.jnj.com/Inbox"
if not msg_file.exists():
skipped += 1
continue
graph_id = import_msg(msg_file, folder)
if graph_id:
conn.execute(
"UPDATE messages SET graph_id = ? WHERE id = ?",
(graph_id, row["id"])
)
imported += 1
if imported % BATCH_COMMIT == 0:
conn.commit()
print(f" [{datetime.now().strftime('%H:%M:%S')}] "
f"{imported}/{total} importováno | skip {skipped} | chyby {errors}")
else:
errors += 1
time.sleep(RATE_DELAY)
conn.commit()
conn.close()
print(f"\n=== Hotovo ===")
print(f"Importováno: {imported}")
print(f"Chybí soubor: {skipped}")
print(f"Chyby Graph: {errors}")
print(f"Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()
+79
View File
@@ -42,6 +42,17 @@ from pathlib import Path
SCRIPTS_DIR = Path("/scripts")
LOGS_DIR = SCRIPTS_DIR # vse do /scripts/
# --- Auto-install dependencies ---
_REQ_FILE = SCRIPTS_DIR / "requirements.txt"
if _REQ_FILE.exists():
_ret = subprocess.run(
[sys.executable, "-m", "pip", "install", "-q", "-r", str(_REQ_FILE)],
capture_output=True, text=True,
)
if _ret.returncode != 0:
print(f"[WARN] pip install selhal:\n{_ret.stderr.strip()}")
# ---------------------------------
# Definice pipeline (step_id, label, executable filename)
STEPS = [
("1b", "Graph delta sync", "1b_parse_emails_graph_delta_v1.0.py"),
@@ -165,9 +176,77 @@ def main() -> int:
print(f" Konec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f" Per-krok logy: {LOGS_DIR}/pipeline_<id>.log")
_send_report(results, failed, total_dur)
return 1 if failed else 0
def _send_report(results: list, failed: int, total_dur: float) -> None:
try:
import importlib.util, sys as _sys
_lib = SCRIPTS_DIR / "EmailMessagingGraph.py"
spec = importlib.util.spec_from_file_location("EmailMessagingGraph", _lib)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
except Exception as e:
print(f"[report] Nelze nacist EmailMessagingGraph: {e}")
return
ok_icon = ""
err_icon = ""
overall = ok_icon if failed == 0 else err_icon
rows = ""
for sid, label, ret, dur in results:
icon = ok_icon if ret == 0 else err_icon
color = "#d4edda" if ret == 0 else "#f8d7da"
status = "OK" if ret == 0 else f"FAIL ({ret})"
rows += (
f"<tr style='background:{color}'>"
f"<td style='padding:4px 10px'>{icon} {label}</td>"
f"<td style='padding:4px 10px;text-align:center'>{status}</td>"
f"<td style='padding:4px 10px;text-align:right'>{fmt_dur(dur)}</td>"
f"</tr>"
)
body = f"""
<html><body style="font-family:sans-serif;font-size:14px">
<p>{overall} <b>Email pipeline</b> — {datetime.now().strftime('%Y-%m-%d %H:%M')}
&nbsp;|&nbsp; celkem {fmt_dur(total_dur)}
&nbsp;|&nbsp; {len(results)} kroků, {failed} chyb</p>
<table border="0" cellspacing="1" cellpadding="0" style="border-collapse:collapse">
<tr style="background:#343a40;color:white">
<th style="padding:4px 10px;text-align:left">Krok</th>
<th style="padding:4px 10px">Status</th>
<th style="padding:4px 10px;text-align:right">Čas</th>
</tr>
{rows}
</table>
</body></html>
"""
# Attach logs of failed steps
attachments = []
for sid, label, ret, dur in results:
if ret != 0:
log_path = LOGS_DIR / f"pipeline_{sid}.log"
if log_path.exists() and log_path.stat().st_size > 0:
attachments.append(log_path)
subject = f"{overall} Email pipeline — {datetime.now().strftime('%Y-%m-%d %H:%M')}"
try:
mod.send_mail(
"vladimir.buzalka@buzalka.cz",
subject,
body,
html=True,
attachments=attachments or None,
)
print(f"[report] Email odeslan na vladimir.buzalka@buzalka.cz")
except Exception as e:
print(f"[report] Chyba pri odesilani: {e}")
if __name__ == "__main__":
try:
raise SystemExit(main())
+18
View File
@@ -0,0 +1,18 @@
msal
requests
pymongo
python-dateutil
extract-msg
cryptography
asn1crypto
beautifulsoup4
oletools
msoffcrypto-tool
olefile
RTFDE
compressed-rtf
lark
pcodedmp
tzlocal
six
psycopg
+1
View File
@@ -14,3 +14,4 @@
- [MCP emaily](project_mcp_emaily.md) — MCP server nad PG fulltextem + Mongo emailů z Graph importu (9 schránek, ~268k mailů; search/read_email/by_sender/conversation_thread/find_attachment/...)
- [Python-runner pipeline](project_python_runner.md) — Docker kontejner na Unraidu, email pipeline 2×/den, auto-install deps, report z reports@buzalka.cz
- [Claude Code learning path](project_claude_learning.md) — Level 2 Intermediate, mezery: Skills/Subagenty/Hooks/Print mode, tutoriál v `claude-howto/`
- [Admin PowerShell nefunguje](feedback_admin_powershell.md) — příkazy vyžadující admin práva rovnou napsat uživateli, nezkoušet alternativy
@@ -0,0 +1,14 @@
---
name: feedback-admin-powershell
description: "PowerShell jako admin nefunguje z Claude Code — když je to potřeba, rovnou napsat uživateli"
metadata:
node_type: memory
type: feedback
originSessionId: 49cbd8a2-c71e-49be-8c52-59dfa5ac7680
---
PowerShell příkazy vyžadující admin práva (winget install, Enable-PSRemoting, Set-Item WSMan, Start-Service WinRM) nelze spustit z Claude Code — vždy selžou s "Access is denied" nebo jsou blokovány permission promptem.
**Why:** Claude Code neběží jako Administrator a bypass permissions to neřeší pro privilegované systémové operace.
**How to apply:** Jakmile identifikuji že příkaz vyžaduje admin práva, okamžitě napíšu uživateli příkaz k ručnímu spuštění v "PowerShell jako Administrator". Neplýtvat časem zkoušením alternativ — rovnou říct co má udělat.
@@ -19,6 +19,8 @@ Kontejner msgreceiver nyní také importuje JNJ emaily do Graph API — viz [[gr
**Why:** JNJ počítač nemá přímý přístup k Dropboxu, přenos jde přes Unraid jako prostředníka.
**JNJ web-proxy blokuje GET podle názvu URL (2026-06-07):** `file_send` (POST /upload) prochází, ale `file_receive` (GET) začal vracet 403 Forbidden + proxy přepsala URL na `?_sm_nck=1`. Příčina = bezpečnostní brána JNJ práská GET requesty podle "mluvícího" názvu cesty (`pending-files`, `download-file`). Řešení: přejmenovat endpointy na neutrální → `/pending-files``/status`, `/download-file``/item` (na klientu `janssenpc_file_receive.py` i serverovém `app.py`). Metoda zůstala GET, projde. Tj. filtr je keyword-based na názvu URL, ne method-based. `app.py` je na bind-mountu `/mnt/user/appdata/msgreceiver/`, takže redeploy = nahrát soubor + `docker restart msgreceiver` (rebuild netřeba). SSH: paramiko root@192.168.1.76.
**How to apply:** Při změnách v `DockerCustomApp/` je potřeba rebuild image na Unraidu (SSH root@192.168.1.76, heslo v BUILD.md). Postup: SFTP upload souborů → `docker build``docker stop/rm/run`. Bez redeploye se změny neprojeví (2026-05-29: 442 .db souborů se nehromadilo kvůli chybějícímu redeployi). Refresh token z `10 GetOneTimeDropBoxAuth.py` platí dokud se appka neodvolá.
Souvisí s [[edc-mongo-import]] — stejný Docker server.