Add batch embeddings to avoid MCP timeout on store_conversation
store_conversation now collects all texts (summary + key_memories) and calls Voyage AI once via get_embeddings_batch() instead of N sequential calls. Prevents MCP timeout for sessions with many memories. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+38
-13
@@ -74,21 +74,37 @@ def get_conn() -> psycopg.Connection:
|
||||
|
||||
_voyage_client = None
|
||||
|
||||
def get_embedding(text: str) -> Optional[list[float]]:
|
||||
"""Return 1024-dim embedding via Voyage AI, or None if unavailable."""
|
||||
def _get_voyage_client():
|
||||
global _voyage_client
|
||||
if _voyage_client is None:
|
||||
import voyageai
|
||||
_voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
|
||||
return _voyage_client
|
||||
|
||||
def get_embedding(text: str) -> Optional[list[float]]:
|
||||
"""Jeden embedding přes Voyage AI, nebo None."""
|
||||
if not VOYAGE_API_KEY:
|
||||
return None
|
||||
try:
|
||||
if _voyage_client is None:
|
||||
import voyageai
|
||||
_voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
|
||||
result = _voyage_client.embed([text[:8000]], model=EMBED_MODEL)
|
||||
result = _get_voyage_client().embed([text[:8000]], model=EMBED_MODEL)
|
||||
return result.embeddings[0]
|
||||
except Exception as e:
|
||||
log(f"Embedding error: {e}")
|
||||
return None
|
||||
|
||||
def get_embeddings_batch(texts: list[str]) -> list[Optional[list[float]]]:
|
||||
"""Batch embedding — jedno API volání pro N textů. Mnohem rychlejší než N jednotlivých."""
|
||||
if not VOYAGE_API_KEY or not texts:
|
||||
return [None] * len(texts)
|
||||
try:
|
||||
truncated = [t[:8000] for t in texts]
|
||||
result = _get_voyage_client().embed(truncated, model=EMBED_MODEL)
|
||||
return result.embeddings
|
||||
except Exception as e:
|
||||
log(f"Batch embedding error: {e}")
|
||||
return [None] * len(texts)
|
||||
return None
|
||||
|
||||
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def _row_to_dict(row: dict) -> dict:
|
||||
@@ -266,9 +282,22 @@ def store_conversation(
|
||||
(sid, msg.get("role", "unknown"), msg.get("content", ""), i),
|
||||
)
|
||||
|
||||
# ── Batch embedding: summary + všechny key_memories najednou ──
|
||||
valid_kms = [km for km in (key_memories or []) if km.get("content")]
|
||||
|
||||
batch_texts = []
|
||||
if summary:
|
||||
batch_texts.append(f"{title or ''} {summary}")
|
||||
for km in valid_kms:
|
||||
batch_texts.append(f"{km.get('title','') or ''} {km.get('content','')}")
|
||||
|
||||
embeddings = get_embeddings_batch(batch_texts) # 1 API volání
|
||||
|
||||
emb_iter = iter(embeddings)
|
||||
|
||||
# summary jako paměťový záznam
|
||||
if summary:
|
||||
emb = get_embedding(f"{title or ''} {summary}")
|
||||
emb = next(emb_iter)
|
||||
_insert_memory_in_tx(conn, {
|
||||
"mem_type": "summary",
|
||||
"title": title or f"Session {sid}",
|
||||
@@ -282,12 +311,8 @@ def store_conversation(
|
||||
|
||||
# key memories
|
||||
stored_km = 0
|
||||
for km in (key_memories or []):
|
||||
if not km.get("content"):
|
||||
continue
|
||||
emb = get_embedding(
|
||||
f"{km.get('title','') or ''} {km.get('content','')}"
|
||||
)
|
||||
for km in valid_kms:
|
||||
emb = next(emb_iter)
|
||||
_insert_memory_in_tx(conn, {
|
||||
"mem_type": km.get("mem_type", "fact"),
|
||||
"title": km.get("title"),
|
||||
|
||||
Reference in New Issue
Block a user