This commit is contained in:
2026-06-05 21:21:30 +02:00
parent 1ec9e40196
commit a347051145
28 changed files with 7402 additions and 0 deletions
+753
View File
@@ -0,0 +1,753 @@
#!/usr/bin/env python3
"""
==============================================================================
MCP server: EMAILY (vsechny schranky importovane z Microsoft Graph)
Hybridni dotaz nad:
- PostgreSQL 192.168.1.76 db=MongoEmaily tabulka=emails
(fulltext tsvector - subject + sender + recipients +
attachments + body, GIN index, ts_headline, ts_rank)
- MongoDB 192.168.1.76 db=emaily kolekce=<mailbox>
(puvodni dokumenty z parse_emails_graph_v1.3.py:
headers, body_html, recipients[], attachments[], ...)
Source: U:\\janssen\\EmailsImport\\enrich_fulltext_emails_v1.0.py
Spusteni:
python mcp_emaily.py (stdio MCP)
==============================================================================
"""
from __future__ import annotations
import sys
import traceback
from datetime import datetime, timezone, timedelta
from typing import Optional, Union
import psycopg
from mcp.server.fastmcp import FastMCP
from pymongo import MongoClient
MONGO_URI = "mongodb://192.168.1.76:27017"
MONGO_DB = "emaily"
PG_DSN = ("host=192.168.1.76 port=5432 dbname=MongoEmaily "
"user=vladimir.buzalka password=Vlado7309208104++")
DEFAULT_BODY_CHARS = 8000
MAX_BODY_CHARS = 200_000
SKIP_COLLECTIONS = {"attachments_index", "sync_state"}
def log(msg: str) -> None:
print(msg, file=sys.stderr, flush=True)
try:
mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
mongo.admin.command("ping")
log(f"Mongo OK ({MONGO_URI})")
except Exception as e:
log(f"Mongo connection failed: {e}")
sys.exit(1)
try:
_t = psycopg.connect(PG_DSN, connect_timeout=10)
_t.close()
log("Postgres OK")
except Exception as e:
log(f"Postgres connection failed: {e}")
sys.exit(1)
def pg_conn():
return psycopg.connect(PG_DSN, connect_timeout=10)
def serialize(obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, bytes):
return obj.decode("utf-8", errors="replace")
if isinstance(obj, dict):
return {k: serialize(v) for k, v in obj.items()}
if isinstance(obj, list):
return [serialize(v) for v in obj]
return obj
def normalize_mailbox(mailbox: Optional[Union[str, list]]) -> Optional[list[str]]:
if mailbox is None or mailbox == "" or mailbox == []:
return None
if isinstance(mailbox, str):
return [mailbox]
return list(mailbox)
def parse_since(s: Optional[str]) -> Optional[datetime]:
if not s:
return None
try:
if "T" in s:
return datetime.fromisoformat(s.replace("Z", "+00:00"))
return datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=timezone.utc)
except Exception as e:
raise ValueError(f"Bad date {s!r}: {e}")
# --- MCP --------------------------------------------------------------------
mcp = FastMCP("emaily")
@mcp.tool()
def ping() -> dict:
"""Quick health check. Reports Mongo + Postgres connectivity, total mailboxes,
PG indexed emails count, ok/error breakdown.
"""
try:
info = mongo.admin.command("buildInfo")
mailboxes = [c for c in mongo[MONGO_DB].list_collection_names()
if c not in SKIP_COLLECTIONS]
mongo_counts = {}
for mb in mailboxes:
mongo_counts[mb] = mongo[MONGO_DB][mb].estimated_document_count()
with pg_conn() as pg, pg.cursor() as cur:
cur.execute("SELECT mailbox, ok, count(*) FROM emails "
"GROUP BY mailbox, ok ORDER BY mailbox, ok")
rows = cur.fetchall()
pg_summary: dict = {}
for mb, ok, c in rows:
pg_summary.setdefault(mb, {})[("ok" if ok else "error")] = c
return {
"status": "ok",
"mongo_version": info.get("version"),
"mailboxes": mailboxes,
"mongo_email_count": mongo_counts,
"pg_indexed_per_mailbox": pg_summary,
}
except Exception as e:
log(traceback.format_exc())
return {"status": "error", "error": str(e)}
@mcp.tool()
def list_mailboxes() -> dict:
"""Overview of all mailboxes — totals, indexed coverage, earliest/latest received_at,
top senders by volume. Use to understand the corpus before searching.
"""
out = {}
try:
mailboxes = [c for c in mongo[MONGO_DB].list_collection_names()
if c not in SKIP_COLLECTIONS]
for mb in mailboxes:
with pg_conn() as pg, pg.cursor() as cur:
cur.execute("""
SELECT count(*) FILTER (WHERE ok) AS ok,
count(*) AS total,
min(received_at) AS first_at,
max(received_at) AS last_at,
count(*) FILTER (WHERE has_attachments) AS with_att
FROM emails WHERE mailbox = %s
""", (mb,))
ok, total, first_at, last_at, with_att = cur.fetchone()
cur.execute("""
SELECT sender_email, count(*) c FROM emails
WHERE mailbox = %s AND sender_email IS NOT NULL
GROUP BY sender_email ORDER BY c DESC LIMIT 5
""", (mb,))
top_senders = [{"email": s, "count": c} for s, c in cur.fetchall()]
out[mb] = {
"indexed_ok": ok,
"indexed_total": total,
"with_attachments": with_att,
"first_received": serialize(first_at),
"last_received": serialize(last_at),
"top_senders": top_senders,
}
return {"mailboxes": out}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def search(
query: str,
mailbox: Optional[Union[str, list]] = None,
since: Optional[str] = None,
until: Optional[str] = None,
folder_contains: Optional[str] = None,
sender_contains: Optional[str] = None,
has_attachments: Optional[bool] = None,
limit: int = 20,
) -> dict:
"""PRIMARY TOOL — fulltext search across all indexed emails.
Index includes: subject, sender (email + name), recipients (to/cc),
attachment filenames, AND full body text.
query: websearch_to_tsquery syntax:
invoice payment -> AND
"lot expiration" -> phrase
SAE OR "serious adverse" -> OR
urgent -newsletter -> exclude
mailbox: one mailbox string or list (e.g. "vbuzalka@its.jnj.com"). None = all.
since/until: ISO date "YYYY-MM-DD" on received_at
folder_contains: substring match against folder_path (case-insensitive)
sender_contains: substring match against sender_email OR sender_name (case-insensitive)
has_attachments: True / False / None (any)
limit: max 100
Returns ranked results with `snippet` showing matches highlighted as <<...>>.
Use `read_email` to fetch full body of any hit.
"""
try:
mboxes = normalize_mailbox(mailbox)
since_dt = parse_since(since)
until_dt = parse_since(until)
limit = min(max(1, limit), 100)
sql = """
WITH q AS (
SELECT websearch_to_tsquery('soubory'::regconfig, %(query)s) AS tsq
)
SELECT
e.id, e.mailbox, e.message_id, e.conversation_id, e.folder_path,
e.subject, e.sender_email, e.sender_name,
e.to_addrs, e.cc_addrs,
e.received_at, e.sent_at, e.is_read,
e.has_attachments, e.attachment_count, e.attachments_summary,
e.body_length, e.body_source,
ts_rank(e.tsv, q.tsq) AS rank,
ts_headline('soubory'::regconfig,
left(coalesce(e.body, e.subject), 200000),
q.tsq,
'MaxFragments=3, MinWords=4, MaxWords=18, '
'StartSel=<<, StopSel=>>, FragmentDelimiter= ... ') AS snippet
FROM emails e, q
WHERE e.tsv @@ q.tsq
AND e.ok = TRUE
AND (%(mboxes)s::text[] IS NULL OR e.mailbox = ANY(%(mboxes)s::text[]))
AND (%(since)s::timestamptz IS NULL OR e.received_at >= %(since)s::timestamptz)
AND (%(until)s::timestamptz IS NULL OR e.received_at < %(until)s::timestamptz)
AND (%(folder)s::text IS NULL OR e.folder_path ILIKE %(folder_like)s)
AND (%(sender)s::text IS NULL
OR e.sender_email ILIKE %(sender_like)s
OR e.sender_name ILIKE %(sender_like)s)
AND (%(has_att)s::boolean IS NULL OR e.has_attachments = %(has_att)s::boolean)
ORDER BY rank DESC, e.received_at DESC NULLS LAST
LIMIT %(limit)s
"""
params = {
"query": query, "mboxes": mboxes,
"since": since_dt, "until": until_dt,
"folder": folder_contains,
"folder_like": f"%{folder_contains}%" if folder_contains else None,
"sender": sender_contains,
"sender_like": f"%{sender_contains}%" if sender_contains else None,
"has_att": has_attachments,
"limit": limit,
}
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, params)
cols = [c.name for c in cur.description]
rows = [dict(zip(cols, r)) for r in cur.fetchall()]
results = []
for r in rows:
results.append({
"mailbox": r["mailbox"],
"message_id": r["message_id"],
"conversation_id": r["conversation_id"],
"folder": r["folder_path"],
"subject": r["subject"],
"from": (f"{r['sender_name']} <{r['sender_email']}>"
if r["sender_name"] else r["sender_email"]),
"to": r["to_addrs"],
"cc": r["cc_addrs"],
"received_at": serialize(r["received_at"]),
"is_read": r["is_read"],
"has_attachments": r["has_attachments"],
"attachment_count": r["attachment_count"],
"attachments": r["attachments_summary"],
"body_length": r["body_length"],
"body_source": r["body_source"],
"rank": round(float(r["rank"]), 5),
"snippet": (r["snippet"] or "").strip(),
})
return {
"query": query,
"filters": {"mailbox": mboxes, "since": since, "until": until,
"folder_contains": folder_contains,
"sender_contains": sender_contains,
"has_attachments": has_attachments,
"limit": limit},
"count": len(results),
"results": results,
"tip": "Use read_email(mailbox=..., message_id=...) for full body or thread.",
}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e), "query": query}
@mcp.tool()
def read_email(
message_id: Optional[str] = None,
mailbox: Optional[str] = None,
offset: int = 0,
length: int = DEFAULT_BODY_CHARS,
around_match: Optional[str] = None,
include_html: bool = False,
) -> dict:
"""Read one email — full plain text body + metadata.
Identify by `message_id` (Internet Message-ID, the _id in Mongo).
`mailbox` narrows the lookup if the same Message-ID appears in multiple mailboxes
(e.g. you got copies in both work and personal accounts).
offset, length: slice the body. length max 200000.
around_match: case-insensitive substring; returns up to 3 windows of ~1000 chars
centered on matches, instead of a flat slice.
include_html: also return raw body_html from Mongo (typically large — only if you
really need the original markup).
"""
if not message_id:
return {"error": "Provide message_id."}
try:
length = min(max(1, length), MAX_BODY_CHARS)
sql = """
SELECT id, mailbox, message_id, graph_id, conversation_id, folder_path,
subject, sender_email, sender_name,
to_addrs, cc_addrs, bcc_addrs,
sent_at, received_at, modified_at, is_read, is_draft,
has_attachments, attachment_count, attachments_summary,
body, body_length, body_source,
extractor_version, extracted_at, ok, error
FROM emails WHERE message_id = %s
"""
params = [message_id]
if mailbox:
sql += " AND mailbox = %s"
params.append(mailbox)
sql += " LIMIT 1"
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, params)
row = cur.fetchone()
cols = [c.name for c in cur.description]
if not row:
return {"error": "Email not found.",
"message_id": message_id, "mailbox": mailbox}
rec = dict(zip(cols, row))
body = rec.get("body") or ""
if around_match and body:
needle = around_match.lower()
hay = body.lower()
windows = []
start = 0
while len(windows) < 3:
pos = hay.find(needle, start)
if pos < 0:
break
lo = max(0, pos - 400)
hi = min(len(body), pos + 600)
windows.append({"offset": lo, "text": body[lo:hi]})
start = pos + len(needle)
body_out = None
slice_info = {"mode": "around_match", "match": around_match,
"windows_found": len(windows), "windows": windows}
else:
body_out = body[offset:offset + length]
slice_info = {
"mode": "slice", "offset": offset,
"length_returned": len(body_out),
"has_more": offset + length < len(body),
"next_offset": offset + length if offset + length < len(body) else None,
}
out = {
"mailbox": rec["mailbox"],
"message_id": rec["message_id"],
"conversation_id": rec["conversation_id"],
"folder": rec["folder_path"],
"subject": rec["subject"],
"from": (f"{rec['sender_name']} <{rec['sender_email']}>"
if rec["sender_name"] else rec["sender_email"]),
"to": rec["to_addrs"],
"cc": rec["cc_addrs"],
"bcc": rec["bcc_addrs"],
"received_at": serialize(rec["received_at"]),
"sent_at": serialize(rec["sent_at"]),
"is_read": rec["is_read"],
"is_draft": rec["is_draft"],
"has_attachments": rec["has_attachments"],
"attachment_count": rec["attachment_count"],
"attachments": rec["attachments_summary"],
"body_length": rec["body_length"],
"body_source": rec["body_source"],
"extractor_version": rec["extractor_version"],
"ok": rec["ok"],
"error": rec["error"],
}
if body_out is not None:
out["body"] = body_out
out["slice"] = slice_info
if include_html:
mdoc = mongo[MONGO_DB][rec["mailbox"]].find_one(
{"_id": rec["message_id"]}, {"body_html": 1, "attachments": 1})
if mdoc:
out["body_html"] = mdoc.get("body_html")
out["attachments_detail"] = mdoc.get("attachments")
return out
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def by_sender(
sender: str,
mailbox: Optional[Union[str, list]] = None,
since: Optional[str] = None,
has_attachments: Optional[bool] = None,
limit: int = 30,
) -> dict:
"""List emails from a specific sender (substring match on sender_email or sender_name,
case-insensitive). Use for "what did X send me" or "all newsletters from Y".
Returned sorted by received_at DESC.
"""
try:
mboxes = normalize_mailbox(mailbox)
since_dt = parse_since(since)
limit = min(max(1, limit), 200)
sql = """
SELECT mailbox, message_id, subject, sender_email, sender_name,
to_addrs, folder_path, received_at, has_attachments, attachment_count,
attachments_summary, body_length
FROM emails
WHERE ok = TRUE
AND (sender_email ILIKE %(s)s OR sender_name ILIKE %(s)s)
AND (%(mboxes)s::text[] IS NULL OR mailbox = ANY(%(mboxes)s::text[]))
AND (%(since)s::timestamptz IS NULL OR received_at >= %(since)s::timestamptz)
AND (%(has_att)s::boolean IS NULL OR has_attachments = %(has_att)s::boolean)
ORDER BY received_at DESC NULLS LAST
LIMIT %(limit)s
"""
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, {"s": f"%{sender}%", "mboxes": mboxes,
"since": since_dt, "has_att": has_attachments,
"limit": limit})
cols = [c.name for c in cur.description]
rows = [dict(zip(cols, r)) for r in cur.fetchall()]
for r in rows:
r["received_at"] = serialize(r["received_at"])
return {"sender_match": sender, "count": len(rows), "results": rows}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def recent_emails(
mailbox: Optional[Union[str, list]] = None,
days: int = 7,
folder_contains: Optional[str] = None,
has_attachments: Optional[bool] = None,
limit: int = 30,
) -> dict:
"""List recent emails (by received_at). Use for "what came in today/this week".
days=0 to ignore time window (just top-N newest).
"""
try:
mboxes = normalize_mailbox(mailbox)
limit = min(max(1, limit), 200)
since_dt = None
if days and days > 0:
since_dt = datetime.now(timezone.utc) - timedelta(days=days)
sql = """
SELECT mailbox, message_id, subject, sender_email, sender_name,
folder_path, received_at, has_attachments, attachment_count,
attachments_summary, body_length, is_read
FROM emails
WHERE ok = TRUE
AND (%(mboxes)s::text[] IS NULL OR mailbox = ANY(%(mboxes)s::text[]))
AND (%(since)s::timestamptz IS NULL OR received_at >= %(since)s::timestamptz)
AND (%(folder)s::text IS NULL OR folder_path ILIKE %(folder_like)s)
AND (%(has_att)s::boolean IS NULL OR has_attachments = %(has_att)s::boolean)
ORDER BY received_at DESC NULLS LAST
LIMIT %(limit)s
"""
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, {
"mboxes": mboxes, "since": since_dt,
"folder": folder_contains,
"folder_like": f"%{folder_contains}%" if folder_contains else None,
"has_att": has_attachments, "limit": limit,
})
cols = [c.name for c in cur.description]
rows = [dict(zip(cols, r)) for r in cur.fetchall()]
for r in rows:
r["received_at"] = serialize(r["received_at"])
return {"days": days, "count": len(rows), "results": rows}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def conversation_thread(conversation_id: str, limit: int = 50) -> dict:
"""Return all emails in one Outlook conversation thread (conversation_id from Graph).
Ordered chronologically. Use to see the full back-and-forth on a topic.
"""
try:
limit = min(max(1, limit), 200)
with pg_conn() as pg, pg.cursor() as cur:
cur.execute("""
SELECT mailbox, message_id, subject, sender_email, sender_name,
to_addrs, received_at, folder_path, body_length, has_attachments,
attachments_summary
FROM emails
WHERE conversation_id = %s AND ok = TRUE
ORDER BY received_at ASC NULLS LAST
LIMIT %s
""", (conversation_id, limit))
cols = [c.name for c in cur.description]
rows = [dict(zip(cols, r)) for r in cur.fetchall()]
for r in rows:
r["received_at"] = serialize(r["received_at"])
return {"conversation_id": conversation_id, "count": len(rows), "thread": rows}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def find_attachment(
name_contains: str,
mailbox: Optional[Union[str, list]] = None,
since: Optional[str] = None,
limit: int = 30,
) -> dict:
"""Find emails whose attachment filename contains the substring (case-insensitive).
Use for "find emails with that protocol PDF" or "any invoice attachments".
Returns emails ordered by received_at DESC.
"""
try:
mboxes = normalize_mailbox(mailbox)
since_dt = parse_since(since)
limit = min(max(1, limit), 200)
sql = """
SELECT mailbox, message_id, subject, sender_email, sender_name,
received_at, attachment_count, attachments_summary, folder_path
FROM emails
WHERE ok = TRUE
AND has_attachments = TRUE
AND attachments_summary ILIKE %(s)s
AND (%(mboxes)s::text[] IS NULL OR mailbox = ANY(%(mboxes)s::text[]))
AND (%(since)s::timestamptz IS NULL OR received_at >= %(since)s::timestamptz)
ORDER BY received_at DESC NULLS LAST
LIMIT %(limit)s
"""
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, {"s": f"%{name_contains}%",
"mboxes": mboxes, "since": since_dt, "limit": limit})
cols = [c.name for c in cur.description]
rows = [dict(zip(cols, r)) for r in cur.fetchall()]
for r in rows:
r["received_at"] = serialize(r["received_at"])
return {"name_match": name_contains, "count": len(rows), "results": rows}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def top_senders(
mailbox: Optional[Union[str, list]] = None,
since: Optional[str] = None,
limit: int = 20,
) -> dict:
"""Top senders by volume (count of received emails). Optionally limit by mailbox or date window.
Use for "who emails me most" or "top senders this month".
"""
try:
mboxes = normalize_mailbox(mailbox)
since_dt = parse_since(since)
limit = min(max(1, limit), 100)
sql = """
SELECT sender_email, count(*) AS c, max(received_at) AS last_at
FROM emails
WHERE ok = TRUE AND sender_email IS NOT NULL
AND (%(mboxes)s::text[] IS NULL OR mailbox = ANY(%(mboxes)s::text[]))
AND (%(since)s::timestamptz IS NULL OR received_at >= %(since)s::timestamptz)
GROUP BY sender_email
ORDER BY c DESC
LIMIT %(limit)s
"""
with pg_conn() as pg, pg.cursor() as cur:
cur.execute(sql, {"mboxes": mboxes, "since": since_dt, "limit": limit})
rows = [{"sender_email": s, "count": c, "last_at": serialize(t)}
for s, c, t in cur.fetchall()]
return {"count": len(rows), "results": rows}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def pipeline_status(mailbox: Optional[Union[str, list]] = None) -> dict:
"""End-to-end status of the email-ingest pipeline per mailbox.
Reports, for each mailbox, where it stands in the 5-step pipeline:
1. parse_emails_graph -> mongo_total
2. (refetch text bodies) -> body_text_missing (legacy v1.3 emails)
3. download_attachments -> attach_done / attach_pending
attach_missing (404 — marked, won't retry)
attach_reference (OneDrive/SharePoint link, no content)
4. unwrap_smime -> smime_p7m_total / smime_unwrapped / smime_pending
smime_p7s_count (informational; not unwrapped by design)
5. enrich_fulltext -> pg_indexed
Plus:
- permanently_deleted (marked by delta sync)
Use this instead of running multiple Mongo count queries by hand. Returns
one row per mailbox; if `mailbox` is given, returns just those rows.
"""
try:
mbs = normalize_mailbox(mailbox)
all_mb = [c for c in mongo[MONGO_DB].list_collection_names()
if c not in SKIP_COLLECTIONS]
targets = [m for m in all_mb if (mbs is None or m in mbs)]
# PG counts in one pass
pg_counts: dict[str, int] = {}
with pg_conn() as pg, pg.cursor() as cur:
cur.execute("SELECT mailbox, count(*) FROM emails "
"WHERE ok = true GROUP BY mailbox")
for mb, c in cur.fetchall():
pg_counts[mb] = c
out = {}
for mb in targets:
col = mongo[MONGO_DB][mb]
mongo_total = col.estimated_document_count()
with_att = col.count_documents({"has_attachments": True})
attach_pending = col.count_documents({
"has_attachments": True,
"attachments": {"$elemMatch": {
"is_inline": False,
"file_hash": {"$exists": False},
"attachment_missing": {"$ne": True},
"attachment_reference": {"$ne": True},
}},
})
attach_missing = col.count_documents({
"attachments.attachment_missing": True,
})
attach_reference = col.count_documents({
"attachments.attachment_reference": True,
})
attach_done = with_att - attach_pending - attach_missing - attach_reference
smime_p7m_total = col.count_documents(
{"attachments.filename": {"$regex": r"^smime\.p7m$", "$options": "i"}}
)
smime_unwrapped = col.count_documents({
"attachments.filename": {"$regex": r"^smime\.p7m$", "$options": "i"},
"smime_unwrapped": True,
})
smime_p7s_count = col.count_documents(
{"attachments.filename": {"$regex": r"^smime\.p7s$", "$options": "i"}}
)
body_text_missing = col.count_documents({
"body_html": {"$in": [None, ""]},
"body_text": {"$exists": False},
"graph_id": {"$exists": True},
})
permanently_deleted = col.count_documents({"permanently_deleted": True})
out[mb] = {
"mongo_total": mongo_total,
"with_attachments": with_att,
"attach_done": attach_done,
"attach_pending": attach_pending,
"attach_missing": attach_missing,
"attach_reference": attach_reference,
"smime_p7m_total": smime_p7m_total,
"smime_unwrapped": smime_unwrapped,
"smime_pending": smime_p7m_total - smime_unwrapped,
"smime_p7s_count": smime_p7s_count,
"body_text_missing": body_text_missing,
"pg_indexed": pg_counts.get(mb, 0),
"permanently_deleted": permanently_deleted,
}
return {"mailboxes": out}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
@mcp.tool()
def sync_state_overview(mailbox: Optional[Union[str, list]] = None) -> dict:
"""Delta-sync state across mailboxes (collection `emaily.sync_state`).
For each (mailbox, folder) pair shows: deltaLink present?, last_run_at,
cumulative new/sync/removed/run_count. Use to confirm a mailbox is
incrementally synced and to spot folders that haven't run in a while.
"""
try:
sync_col = mongo[MONGO_DB]["sync_state"]
q: dict = {}
mbs = normalize_mailbox(mailbox)
if mbs:
q["mailbox"] = {"$in": mbs}
cursor = sync_col.find(q, {
"mailbox": 1, "folder_path": 1, "folder_id": 1,
"delta_link": 1, "last_run_at": 1,
"cumulative_new": 1, "cumulative_sync": 1,
"cumulative_removed": 1, "run_count": 1,
}).sort([("mailbox", 1), ("folder_path", 1)])
by_mailbox: dict[str, list] = {}
for d in cursor:
row = {
"folder_path": d.get("folder_path"),
"folder_id": d.get("folder_id"),
"has_delta_link": bool(d.get("delta_link")),
"last_run_at": serialize(d.get("last_run_at")),
"cumulative_new": d.get("cumulative_new", 0),
"cumulative_sync": d.get("cumulative_sync", 0),
"cumulative_removed": d.get("cumulative_removed", 0),
"run_count": d.get("run_count", 0),
}
by_mailbox.setdefault(d["mailbox"], []).append(row)
# mailboxes that have collections but ZERO sync_state entries
all_mb = {c for c in mongo[MONGO_DB].list_collection_names()
if c not in SKIP_COLLECTIONS}
not_synced = sorted(all_mb - set(by_mailbox.keys()))
if mbs:
not_synced = [m for m in not_synced if m in mbs]
return {
"mailboxes": by_mailbox,
"never_delta_synced": not_synced,
}
except Exception as e:
log(traceback.format_exc())
return {"error": str(e)}
if __name__ == "__main__":
log("MCP emaily server started (FastMCP)")
mcp.run()