Add batch embeddings to avoid MCP timeout on store_conversation

store_conversation now collects all texts (summary + key_memories)
and calls Voyage AI once via get_embeddings_batch() instead of N
sequential calls. Prevents MCP timeout for sessions with many memories.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 06:45:28 +02:00
parent 586c2c4484
commit 797de01e60
+38 -13
View File
@@ -74,21 +74,37 @@ def get_conn() -> psycopg.Connection:
_voyage_client = None
def get_embedding(text: str) -> Optional[list[float]]:
"""Return 1024-dim embedding via Voyage AI, or None if unavailable."""
def _get_voyage_client():
global _voyage_client
if _voyage_client is None:
import voyageai
_voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
return _voyage_client
def get_embedding(text: str) -> Optional[list[float]]:
"""Jeden embedding přes Voyage AI, nebo None."""
if not VOYAGE_API_KEY:
return None
try:
if _voyage_client is None:
import voyageai
_voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
result = _voyage_client.embed([text[:8000]], model=EMBED_MODEL)
result = _get_voyage_client().embed([text[:8000]], model=EMBED_MODEL)
return result.embeddings[0]
except Exception as e:
log(f"Embedding error: {e}")
return None
def get_embeddings_batch(texts: list[str]) -> list[Optional[list[float]]]:
"""Batch embedding — jedno API volání pro N textů. Mnohem rychlejší než N jednotlivých."""
if not VOYAGE_API_KEY or not texts:
return [None] * len(texts)
try:
truncated = [t[:8000] for t in texts]
result = _get_voyage_client().embed(truncated, model=EMBED_MODEL)
return result.embeddings
except Exception as e:
log(f"Batch embedding error: {e}")
return [None] * len(texts)
return None
# ─── Helpers ─────────────────────────────────────────────────────────────────
def _row_to_dict(row: dict) -> dict:
@@ -266,9 +282,22 @@ def store_conversation(
(sid, msg.get("role", "unknown"), msg.get("content", ""), i),
)
# ── Batch embedding: summary + všechny key_memories najednou ──
valid_kms = [km for km in (key_memories or []) if km.get("content")]
batch_texts = []
if summary:
batch_texts.append(f"{title or ''} {summary}")
for km in valid_kms:
batch_texts.append(f"{km.get('title','') or ''} {km.get('content','')}")
embeddings = get_embeddings_batch(batch_texts) # 1 API volání
emb_iter = iter(embeddings)
# summary jako paměťový záznam
if summary:
emb = get_embedding(f"{title or ''} {summary}")
emb = next(emb_iter)
_insert_memory_in_tx(conn, {
"mem_type": "summary",
"title": title or f"Session {sid}",
@@ -282,12 +311,8 @@ def store_conversation(
# key memories
stored_km = 0
for km in (key_memories or []):
if not km.get("content"):
continue
emb = get_embedding(
f"{km.get('title','') or ''} {km.get('content','')}"
)
for km in valid_kms:
emb = next(emb_iter)
_insert_memory_in_tx(conn, {
"mem_type": km.get("mem_type", "fact"),
"title": km.get("title"),