diff --git a/Knowledgebase/requirements.txt b/Knowledgebase/requirements.txt new file mode 100644 index 0000000..4c5d3f7 --- /dev/null +++ b/Knowledgebase/requirements.txt @@ -0,0 +1,4 @@ +mcp[cli] +psycopg[binary] +voyageai +anthropic diff --git a/Knowledgebase/schema.sql b/Knowledgebase/schema.sql new file mode 100644 index 0000000..45e4d9c --- /dev/null +++ b/Knowledgebase/schema.sql @@ -0,0 +1,97 @@ +-- Knowledgebase schema +-- PostgreSQL, bez pgvector (embeddingy jako double precision[]) +-- pg_trgm pro fuzzy matching (volitelné) + +CREATE EXTENSION IF NOT EXISTS pg_trgm; + +-- ─── Conversation sessions ──────────────────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS kb_sessions ( + id VARCHAR(255) PRIMARY KEY, + title TEXT, + summary TEXT, + project VARCHAR(255), + tags TEXT[] DEFAULT '{}', + msg_count INT DEFAULT 0, + started_at TIMESTAMPTZ DEFAULT NOW(), + ended_at TIMESTAMPTZ, + meta JSONB DEFAULT '{}' +); + +CREATE INDEX IF NOT EXISTS kb_sessions_project_idx ON kb_sessions(project); +CREATE INDEX IF NOT EXISTS kb_sessions_started_idx ON kb_sessions(started_at DESC); +CREATE INDEX IF NOT EXISTS kb_sessions_tags_idx ON kb_sessions USING GIN(tags); + +-- ─── Individual messages within a session ───────────────────────────────────── + +CREATE TABLE IF NOT EXISTS kb_messages ( + id BIGSERIAL PRIMARY KEY, + session_id VARCHAR(255) REFERENCES kb_sessions(id) ON DELETE CASCADE, + role VARCHAR(20) NOT NULL, -- 'user' | 'assistant' | 'system' + content TEXT NOT NULL, + seq INT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS kb_messages_session_idx ON kb_messages(session_id, seq); + +-- ─── Memories (facts, decisions, summaries, document extracts, …) ───────────── + +CREATE TABLE IF NOT EXISTS kb_memories ( + id BIGSERIAL PRIMARY KEY, + + -- classification + mem_type VARCHAR(50) NOT NULL DEFAULT 'fact', + -- fact | decision | preference | summary | document | email | project | person | other + + title TEXT, + content TEXT NOT NULL, + summary TEXT, + + -- linking + session_id VARCHAR(255), -- optional: came from this conversation + source TEXT, -- file path, email id, URL, … + project VARCHAR(255), + + -- search + tags TEXT[] DEFAULT '{}', + importance FLOAT DEFAULT 0.5, -- 0..1 + embedding double precision[], -- Voyage AI voyage-3-lite (1024-dim), Python-side similarity + fts TSVECTOR, + + -- lifecycle + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + expires_at TIMESTAMPTZ, -- NULL = never + deleted BOOLEAN DEFAULT FALSE, + + meta JSONB DEFAULT '{}' +); + +-- full-text (Czech + English, 'simple' covers both without stemming noise) +CREATE OR REPLACE FUNCTION kb_memories_fts_update() RETURNS TRIGGER AS $$ +BEGIN + NEW.fts := to_tsvector('simple', + coalesce(NEW.title, '') || ' ' || + coalesce(NEW.summary, '') || ' ' || + NEW.content + ); + NEW.updated_at := NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS kb_memories_fts_trig ON kb_memories; +CREATE TRIGGER kb_memories_fts_trig + BEFORE INSERT OR UPDATE ON kb_memories + FOR EACH ROW EXECUTE FUNCTION kb_memories_fts_update(); + +-- indexes +CREATE INDEX IF NOT EXISTS kb_memories_fts_idx ON kb_memories USING GIN(fts); +CREATE INDEX IF NOT EXISTS kb_memories_tags_idx ON kb_memories USING GIN(tags); +CREATE INDEX IF NOT EXISTS kb_memories_type_idx ON kb_memories(mem_type); +CREATE INDEX IF NOT EXISTS kb_memories_project_idx ON kb_memories(project); +CREATE INDEX IF NOT EXISTS kb_memories_importance_idx ON kb_memories(importance DESC); +CREATE INDEX IF NOT EXISTS kb_memories_created_idx ON kb_memories(created_at DESC); +CREATE INDEX IF NOT EXISTS kb_memories_session_idx ON kb_memories(session_id); +-- Note: embedding je double precision[] — similarity se počítá Python-side po FTS pre-filtru diff --git a/Knowledgebase/server.py b/Knowledgebase/server.py new file mode 100644 index 0000000..ee28e5e --- /dev/null +++ b/Knowledgebase/server.py @@ -0,0 +1,798 @@ +#!/usr/bin/env python3 +""" +Knowledgebase MCP server +======================== +Persistentní paměť pro Claude konverzace a znalosti. + +Vyhledávání: + - Full-text (tsvector, vždy dostupné) + - Sémantické (Voyage AI embeddingy + Python cosine similarity reranking) + - Hybridní kombinace obou + +Env proměnné: + PG_HOST, PG_PORT, PG_USER, PG_PASSWORD, PG_DB + VOYAGE_API_KEY — pro vektorové embeddingy (volitelné) +""" + +import json +import math +import os +import sys +import traceback +from datetime import datetime +from typing import Any, Optional + +import psycopg +from psycopg.rows import dict_row +from mcp.server.fastmcp import FastMCP + +# ─── Config ────────────────────────────────────────────────────────────────── + +PG_HOST = os.getenv("PG_HOST", "192.168.1.76") +PG_PORT = int(os.getenv("PG_PORT", "5432")) +PG_USER = os.getenv("PG_USER", "vladimir.buzalka") +PG_PASSWORD = os.getenv("PG_PASSWORD", "Vlado7309208104++") +PG_DB = os.getenv("PG_DB", "knowledgebase") + +VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY", "") +EMBED_MODEL = "voyage-3-lite" # 1024-dim, fast & cheap + +# ─── Logging ───────────────────────────────────────────────────────────────── + +def log(msg: str): + print(f"[KB] {msg}", file=sys.stderr, flush=True) + +# ─── DB connection ─────────────────────────────────────────────────────────── + +_conn: Optional[psycopg.Connection] = None + +def get_conn() -> psycopg.Connection: + global _conn + if _conn is not None and not _conn.closed: + try: + _conn.execute("SELECT 1") + return _conn + except Exception: + pass + _conn = psycopg.connect( + host=PG_HOST, port=PG_PORT, + user=PG_USER, password=PG_PASSWORD, + dbname=PG_DB, + row_factory=dict_row, + autocommit=False, + ) + log(f"Connected to {PG_DB}@{PG_HOST}") + return _conn + +# ─── Embeddings ────────────────────────────────────────────────────────────── + +_voyage_client = None + +def get_embedding(text: str) -> Optional[list[float]]: + """Return 1024-dim embedding via Voyage AI, or None if unavailable.""" + global _voyage_client + if not VOYAGE_API_KEY: + return None + try: + if _voyage_client is None: + import voyageai + _voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY) + result = _voyage_client.embed([text[:8000]], model=EMBED_MODEL) + return result.embeddings[0] + except Exception as e: + log(f"Embedding error: {e}") + return None + +# ─── Helpers ───────────────────────────────────────────────────────────────── + +def _row_to_dict(row: dict) -> dict: + """Serialize DB row for JSON output.""" + out = {} + for k, v in row.items(): + if isinstance(v, datetime): + out[k] = v.isoformat() + elif isinstance(v, list) and k == "embedding": + out[k] = None # don't return raw vectors + else: + out[k] = v + return out + +def _fmt_memories(rows: list[dict]) -> str: + if not rows: + return "No results." + parts = [] + for r in rows: + score = f" score={r.get('score', ''):.3f}" if r.get('score') is not None else "" + parts.append( + f"[{r['id']}] {r.get('mem_type','?').upper()} | {r.get('project') or '—'}" + f"{score}\n" + f" Title: {r.get('title') or '—'}\n" + f" Tags: {', '.join(r.get('tags') or []) or '—'}\n" + f" Date: {r.get('created_at','')}\n" + f" Content:\n{_indent(r.get('content',''), 4)}" + ) + return "\n\n".join(parts) + +def _indent(text: str, n: int) -> str: + pad = " " * n + return "\n".join(pad + line for line in text.splitlines()) + +def _cosine(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) + nb = math.sqrt(sum(x * x for x in b)) + if na == 0 or nb == 0: + return 0.0 + return dot / (na * nb) + +# ─── MCP server ────────────────────────────────────────────────────────────── + +mcp = FastMCP("knowledgebase") + +# ────────────────────────────────────────────────────────────────────────────── +# STORE MEMORY +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def store_memory( + content: str, + mem_type: str = "fact", + title: Optional[str] = None, + summary: Optional[str] = None, + tags: Optional[list[str]] = None, + project: Optional[str] = None, + source: Optional[str] = None, + session_id: Optional[str] = None, + importance: float = 0.5, + meta: Optional[dict] = None, +) -> str: + """ + Uloží jedno paměťové záznam (fakt, rozhodnutí, preference, summary, …). + + mem_type: fact | decision | preference | summary | document | email | project | person | other + importance: 0.0 (triviální) … 1.0 (kritické), default 0.5 + tags: seznam klíčových slov pro filtrování + session_id: pokud pochází z konkrétní konverzace + + Příklad: + store_memory("Vlado preferuje stručné odpovědi bez trailing summary", + mem_type="preference", tags=["komunikace", "styl"]) + """ + embedding = get_embedding( + f"{title or ''} {summary or ''} {content}" + ) + conn = get_conn() + try: + with conn.transaction(): + row = conn.execute( + """ + INSERT INTO kb_memories + (mem_type, title, content, summary, tags, project, + source, session_id, importance, embedding, meta) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + RETURNING id, created_at + """, + (mem_type, title, content, summary, + tags or [], project, source, session_id, + importance, embedding, json.dumps(meta or {})), + ).fetchone() + return f"Stored memory id={row['id']} at {row['created_at']}" + except Exception as e: + conn.rollback() + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# STORE CONVERSATION +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def store_conversation( + messages: list[dict], + session_id: Optional[str] = None, + title: Optional[str] = None, + summary: Optional[str] = None, + project: Optional[str] = None, + tags: Optional[list[str]] = None, + key_memories: Optional[list[dict]] = None, +) -> str: + """ + Uloží celou konverzaci (seznam zpráv) jako session + automaticky + extrahuje key_memories jako samostatné záznamy. + + messages: [{"role": "user"|"assistant", "content": "..."}] + session_id: unikátní ID session (pokud není, vygeneruje se z timestampu) + summary: shrnutí konverzace (doporučeno předat) + key_memories: klíčové fakty/rozhodnutí z konverzace k samostatnému uložení + [{"content": "...", "mem_type": "fact", "title": "...", "tags": [...], "importance": 0.7}] + + Příklad: + store_conversation( + messages=[...], + session_id="2026-06-06-knowledgebase", + title="Návrh Knowledgebase systému", + summary="Vlado požadoval paměťový MCP server, rozhodli jsme se pro PG+pgvector+Voyage", + project="knowledgebase", + key_memories=[ + {"content": "Rozhodnutí: PostgreSQL + pgvector + Voyage AI embeddings", + "mem_type": "decision", "importance": 0.9, "tags": ["architektura"]}, + ] + ) + """ + import uuid + sid = session_id or f"session-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}-{uuid.uuid4().hex[:6]}" + + conn = get_conn() + try: + with conn.transaction(): + # upsert session + conn.execute( + """ + INSERT INTO kb_sessions (id, title, summary, project, tags, msg_count, ended_at) + VALUES (%s, %s, %s, %s, %s, %s, NOW()) + ON CONFLICT (id) DO UPDATE SET + title = EXCLUDED.title, + summary = EXCLUDED.summary, + project = EXCLUDED.project, + tags = EXCLUDED.tags, + msg_count = EXCLUDED.msg_count, + ended_at = NOW() + """, + (sid, title, summary, project, tags or [], len(messages)), + ) + + # upsert messages + conn.execute("DELETE FROM kb_messages WHERE session_id = %s", (sid,)) + for i, msg in enumerate(messages): + conn.execute( + """ + INSERT INTO kb_messages (session_id, role, content, seq) + VALUES (%s, %s, %s, %s) + """, + (sid, msg.get("role", "unknown"), msg.get("content", ""), i), + ) + + # summary jako paměťový záznam + if summary: + emb = get_embedding(f"{title or ''} {summary}") + _insert_memory_in_tx(conn, { + "mem_type": "summary", + "title": title or f"Session {sid}", + "content": summary, + "session_id": sid, + "project": project, + "tags": tags or [], + "importance": 0.6, + "embedding": emb, + }) + + # key memories + stored_km = 0 + for km in (key_memories or []): + if not km.get("content"): + continue + emb = get_embedding( + f"{km.get('title','') or ''} {km.get('content','')}" + ) + _insert_memory_in_tx(conn, { + "mem_type": km.get("mem_type", "fact"), + "title": km.get("title"), + "content": km["content"], + "summary": km.get("summary"), + "session_id": sid, + "project": km.get("project", project), + "tags": km.get("tags", tags or []), + "importance": km.get("importance", 0.5), + "source": km.get("source"), + "embedding": emb, + "meta": km.get("meta", {}), + }) + stored_km += 1 + + return ( + f"Stored session '{sid}' with {len(messages)} messages" + f" + {stored_km} key memories." + ) + except Exception as e: + conn.rollback() + log(traceback.format_exc()) + return f"Error: {e}" + + +def _insert_memory_in_tx(conn, data: dict): + """Helper: insert memory within an existing transaction.""" + conn.execute( + """ + INSERT INTO kb_memories + (mem_type, title, content, summary, tags, project, + source, session_id, importance, embedding, meta) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + """, + (data.get("mem_type","fact"), data.get("title"), + data["content"], data.get("summary"), + data.get("tags",[]), data.get("project"), + data.get("source"), data.get("session_id"), + data.get("importance",0.5), + data.get("embedding"), + json.dumps(data.get("meta",{}))), + ) + + +# ────────────────────────────────────────────────────────────────────────────── +# SEARCH +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def search( + query: str, + types: Optional[list[str]] = None, + project: Optional[str] = None, + tags: Optional[list[str]] = None, + limit: int = 10, + min_importance: float = 0.0, + include_sessions: bool = False, +) -> str: + """ + Hybridní vyhledávání v paměti. + Kombinuje full-text (vždy) + vektorové sémantické (pokud embeddingy dostupné). + + query: přirozený jazyk nebo klíčová slova + types: ['fact','decision','preference','summary','document','email','project','person'] + project: filtrovat dle projektu + tags: musí obsahovat alespoň jeden z těchto tagů + limit: max počet výsledků (default 10) + min_importance: min hodnota důležitosti 0..1 + include_sessions: zahrnout i výsledky z session summaries + + Příklad: + search("PostgreSQL architektura", project="knowledgebase", types=["decision"]) + """ + conn = get_conn() + results = [] + + try: + # ── Full-text search ── + conditions = ["deleted = FALSE", "fts @@ plainto_tsquery('simple', %s)"] + params: list[Any] = [query] + + if types: + conditions.append(f"mem_type = ANY(%s)") + params.append(types) + if project: + conditions.append("project = %s") + params.append(project) + if tags: + conditions.append("tags && %s") + params.append(tags) + if min_importance > 0: + conditions.append("importance >= %s") + params.append(min_importance) + if not include_sessions: + conditions.append("mem_type != 'summary' OR session_id IS NOT NULL") + + where = " AND ".join(conditions) + rows = conn.execute( + f""" + SELECT id, mem_type, title, content, summary, tags, + project, source, session_id, importance, created_at, + ts_rank(fts, plainto_tsquery('simple', %s)) AS score + FROM kb_memories + WHERE {where} + ORDER BY score DESC, importance DESC + LIMIT %s + """, + [query] + params + [limit], + ).fetchall() + + fts_ids = {r["id"] for r in rows} + results = [_row_to_dict(r) for r in rows] + + # ── Vector reranking (Python-side cosine similarity) ── + # Fetch candidates with embeddings, compute cosine similarity, merge + query_emb = get_embedding(query) + if query_emb: + try: + vec_conditions = ["deleted = FALSE", "embedding IS NOT NULL"] + vec_params2: list[Any] = [] + + if types: + vec_conditions.append("mem_type = ANY(%s)") + vec_params2.append(types) + if project: + vec_conditions.append("project = %s") + vec_params2.append(project) + if tags: + vec_conditions.append("tags && %s") + vec_params2.append(tags) + if min_importance > 0: + vec_conditions.append("importance >= %s") + vec_params2.append(min_importance) + + vec_where = " AND ".join(vec_conditions) + vec_rows = conn.execute( + f""" + SELECT id, mem_type, title, content, summary, tags, + project, source, session_id, importance, created_at, + embedding + FROM kb_memories + WHERE {vec_where} + LIMIT 200 + """, + vec_params2, + ).fetchall() + + for r in vec_rows: + if r["id"] not in fts_ids and r["embedding"]: + sim = _cosine(query_emb, r["embedding"]) + if sim > 0.5: # threshold + d = _row_to_dict(r) + d["score"] = sim + results.append(d) + + except Exception as e: + log(f"Vector reranking error: {e}") + + # deduplicate & sort by score + seen = set() + deduped = [] + for r in results: + if r["id"] not in seen: + seen.add(r["id"]) + deduped.append(r) + + deduped.sort(key=lambda x: (x.get("score") or 0, x.get("importance", 0)), reverse=True) + + return _fmt_memories(deduped[:limit]) + + except Exception as e: + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# GET CONTEXT (kontext pro aktuální konverzaci) +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def get_context( + topic: str, + project: Optional[str] = None, + limit: int = 8, + include_preferences: bool = True, +) -> str: + """ + Vrátí nejrelevantnější paměti pro daný kontext/téma. + Automaticky přidá preference uživatele (pokud exist.) + + Použití na začátku konverzace: + get_context("IWRS pacienti, notifikace, MongoDB") + """ + conn = get_conn() + parts = [] + + # ── Main context ── + main_result = search( + query=topic, + project=project, + limit=limit, + min_importance=0.3, + ) + parts.append("=== RELEVANTNÍ PAMĚTI ===\n" + main_result) + + # ── User preferences ── + if include_preferences: + try: + rows = conn.execute( + """ + SELECT id, mem_type, title, content, tags, importance, created_at + FROM kb_memories + WHERE mem_type = 'preference' + AND deleted = FALSE + ORDER BY importance DESC, created_at DESC + LIMIT 5 + """, + ).fetchall() + if rows: + pref_lines = [_fmt_memories([_row_to_dict(r) for r in rows])] + parts.append("=== PREFERENCE UŽIVATELE ===\n" + "\n".join(pref_lines)) + except Exception as e: + log(f"Preferences error: {e}") + + return "\n\n".join(parts) + + +# ────────────────────────────────────────────────────────────────────────────── +# GET RECENT +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def get_recent( + limit: int = 10, + mem_type: Optional[str] = None, + project: Optional[str] = None, +) -> str: + """ + Vrátí nejnovější paměti, volitelně filtrované. + + Příklad: + get_recent(limit=5, mem_type="decision") + """ + conn = get_conn() + conditions = ["deleted = FALSE"] + params: list[Any] = [] + + if mem_type: + conditions.append("mem_type = %s") + params.append(mem_type) + if project: + conditions.append("project = %s") + params.append(project) + + where = " AND ".join(conditions) + try: + rows = conn.execute( + f""" + SELECT id, mem_type, title, content, tags, project, importance, created_at + FROM kb_memories + WHERE {where} + ORDER BY created_at DESC + LIMIT %s + """, + params + [limit], + ).fetchall() + return _fmt_memories([_row_to_dict(r) for r in rows]) + except Exception as e: + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# LIST SESSIONS +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def list_sessions( + limit: int = 20, + project: Optional[str] = None, +) -> str: + """ + Vypíše přehled uložených konverzačních sessions. + """ + conn = get_conn() + conditions = [] + params: list[Any] = [] + if project: + conditions.append("project = %s") + params.append(project) + + where = "WHERE " + " AND ".join(conditions) if conditions else "" + try: + rows = conn.execute( + f""" + SELECT id, title, summary, project, tags, msg_count, started_at, ended_at + FROM kb_sessions + {where} + ORDER BY ended_at DESC NULLS LAST + LIMIT %s + """, + params + [limit], + ).fetchall() + if not rows: + return "No sessions found." + lines = [] + for r in rows: + lines.append( + f"[{r['id']}] {r.get('title') or '—'} | {r.get('project') or '—'}" + f" | {r['msg_count']} msgs | {r.get('ended_at','')}\n" + f" {(r.get('summary') or '')[:120]}" + ) + return "\n\n".join(lines) + except Exception as e: + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# GET SESSION +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def get_session( + session_id: str, + include_messages: bool = True, + messages_limit: int = 100, +) -> str: + """ + Vrátí detail konkrétní session (metadata + zprávy). + """ + conn = get_conn() + try: + sess = conn.execute( + "SELECT * FROM kb_sessions WHERE id = %s", (session_id,) + ).fetchone() + if not sess: + return f"Session '{session_id}' not found." + + out = [ + f"Session: {sess['id']}", + f"Title: {sess.get('title') or '—'}", + f"Project: {sess.get('project') or '—'}", + f"Tags: {', '.join(sess.get('tags') or [])}", + f"Date: {sess.get('ended_at') or sess.get('started_at')}", + f"Summary:\n{_indent(sess.get('summary') or '—', 2)}", + ] + + if include_messages: + msgs = conn.execute( + """ + SELECT role, content, seq FROM kb_messages + WHERE session_id = %s ORDER BY seq LIMIT %s + """, + (session_id, messages_limit), + ).fetchall() + out.append(f"\n--- Messages ({len(msgs)}) ---") + for m in msgs: + role = m["role"].upper() + content = m["content"] + if len(content) > 500: + content = content[:500] + "…" + out.append(f"\n[{role}]\n{content}") + + return "\n".join(out) + except Exception as e: + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# UPDATE MEMORY +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def update_memory( + memory_id: int, + content: Optional[str] = None, + title: Optional[str] = None, + summary: Optional[str] = None, + tags: Optional[list[str]] = None, + importance: Optional[float] = None, + project: Optional[str] = None, +) -> str: + """ + Aktualizuje existující paměťový záznam. + Předej jen pole, která chceš změnit. + """ + conn = get_conn() + updates = [] + params: list[Any] = [] + + if content is not None: + updates.append("content = %s") + params.append(content) + new_emb = get_embedding(f"{title or ''} {content}") + if new_emb: + updates.append("embedding = %s") + params.append(new_emb) + if title is not None: + updates.append("title = %s") + params.append(title) + if summary is not None: + updates.append("summary = %s") + params.append(summary) + if tags is not None: + updates.append("tags = %s") + params.append(tags) + if importance is not None: + updates.append("importance = %s") + params.append(importance) + if project is not None: + updates.append("project = %s") + params.append(project) + + if not updates: + return "Nothing to update." + + params.append(memory_id) + try: + with conn.transaction(): + conn.execute( + f"UPDATE kb_memories SET {', '.join(updates)} WHERE id = %s", + params, + ) + return f"Memory {memory_id} updated." + except Exception as e: + conn.rollback() + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# DELETE MEMORY +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def delete_memory(memory_id: int, hard: bool = False) -> str: + """ + Soft-delete (default) nebo hard-delete paměti. + Soft: záznám zůstane v DB, jen se skryje z výsledků. + Hard: smaže fyzicky. + """ + conn = get_conn() + try: + with conn.transaction(): + if hard: + conn.execute("DELETE FROM kb_memories WHERE id = %s", (memory_id,)) + return f"Memory {memory_id} permanently deleted." + else: + conn.execute( + "UPDATE kb_memories SET deleted = TRUE WHERE id = %s", (memory_id,) + ) + return f"Memory {memory_id} soft-deleted." + except Exception as e: + conn.rollback() + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── +# STATS +# ────────────────────────────────────────────────────────────────────────────── + +@mcp.tool() +def stats() -> str: + """ + Přehled obsahu databáze: počty záznamů dle typu, sessions, projekty. + """ + conn = get_conn() + try: + type_counts = conn.execute( + """ + SELECT mem_type, COUNT(*) AS cnt + FROM kb_memories + WHERE deleted = FALSE + GROUP BY mem_type ORDER BY cnt DESC + """ + ).fetchall() + + session_count = conn.execute( + "SELECT COUNT(*) AS cnt FROM kb_sessions" + ).fetchone()["cnt"] + + projects = conn.execute( + """ + SELECT project, COUNT(*) AS cnt + FROM kb_memories + WHERE deleted = FALSE AND project IS NOT NULL + GROUP BY project ORDER BY cnt DESC LIMIT 10 + """ + ).fetchall() + + embed_count = conn.execute( + "SELECT COUNT(*) AS cnt FROM kb_memories WHERE embedding IS NOT NULL" + ).fetchone()["cnt"] + + lines = ["=== Knowledgebase Stats ==="] + lines.append(f"\nSessions: {session_count}") + lines.append(f"Embeddings: {embed_count} (Voyage AI {'active' if VOYAGE_API_KEY else 'not configured'})") + + lines.append("\nMemories by type:") + for r in type_counts: + lines.append(f" {r['mem_type']:15} {r['cnt']:>5}") + + if projects: + lines.append("\nTop projects:") + for r in projects: + lines.append(f" {r['project']:25} {r['cnt']:>5}") + + return "\n".join(lines) + except Exception as e: + log(traceback.format_exc()) + return f"Error: {e}" + + +# ────────────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + log("Starting Knowledgebase MCP server...") + log(f" DB: {PG_DB}@{PG_HOST}:{PG_PORT}") + log(f" Embeddings: {'Voyage AI' if VOYAGE_API_KEY else 'disabled (set VOYAGE_API_KEY)'}") + mcp.run(transport="stdio") diff --git a/Knowledgebase/setup_db.py b/Knowledgebase/setup_db.py new file mode 100644 index 0000000..f60f0eb --- /dev/null +++ b/Knowledgebase/setup_db.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Inicializace Knowledgebase databáze. +Vytvoří databázi 'knowledgebase' a aplikuje schema.sql. + +Spustit jednou: + python setup_db.py +""" +import os +import sys +import psycopg +from psycopg.rows import dict_row +from pathlib import Path + +PG_HOST = os.getenv("PG_HOST", "192.168.1.76") +PG_PORT = int(os.getenv("PG_PORT", "5432")) +PG_USER = os.getenv("PG_USER", "vladimir.buzalka") +PG_PASSWORD = os.getenv("PG_PASSWORD", "Vlado7309208104++") + +SCHEMA_FILE = Path(__file__).parent / "schema.sql" + + +def main(): + # ── Připoj se k postgres a vytvoř DB ── + print("Connecting to postgres...") + with psycopg.connect( + host=PG_HOST, port=PG_PORT, + user=PG_USER, password=PG_PASSWORD, + dbname="postgres", + autocommit=True, + ) as conn: + exists = conn.execute( + "SELECT 1 FROM pg_database WHERE datname = 'knowledgebase'" + ).fetchone() + if not exists: + conn.execute("CREATE DATABASE knowledgebase") + print("Created database 'knowledgebase'") + else: + print("Database 'knowledgebase' already exists") + + # ── Aplikuj schema ── + print("Applying schema...") + schema = SCHEMA_FILE.read_text(encoding="utf-8") + with psycopg.connect( + host=PG_HOST, port=PG_PORT, + user=PG_USER, password=PG_PASSWORD, + dbname="knowledgebase", + autocommit=True, + ) as conn: + conn.execute(schema) + print("Schema applied successfully.") + print() + print("Done! Knowledgebase database is ready.") + print() + print("Next steps:") + print(" 1. pip install -r requirements.txt") + print(" 2. Optional: set VOYAGE_API_KEY=...") + print(" 3. Add server to Claude Code settings (see below)") + print() + print(' "knowledgebase": {') + print(' "command": "python",') + print(' "args": ["U:/janssen/Knowledgebase/server.py"],') + print(' "env": {"VOYAGE_API_KEY": "..."}') + print(' }') + + +if __name__ == "__main__": + main() diff --git a/claude-memory/MEMORY.md b/claude-memory/MEMORY.md index 698d8d8..d9807c1 100644 --- a/claude-memory/MEMORY.md +++ b/claude-memory/MEMORY.md @@ -10,5 +10,7 @@ - [Graph email import](project_graph_email_import.md) — import JNJ emailů do schránky vladimir.buzalka@buzalka.cz přes Graph API - [Memory sync přes Giteu](setup_memory_sync.md) — paměť je v `claude-memory/` v janssen repu, junction + git push synchronizuje mezi PC - [MCP soubory](project_mcp_soubory.md) — MCP server nad PG fulltextem + Mongo metadaty pro soubory studií (search/read/duplicates/by_author/...) +- [Knowledgebase MCP](project_knowledgebase_mcp.md) — persistentní paměť konverzací/znalostí; PG+tsvector+cosine; nástroje: get_context/store_memory/store_conversation/search - [MCP emaily](project_mcp_emaily.md) — MCP server nad PG fulltextem + Mongo emailů z Graph importu (9 schránek, ~268k mailů; search/read_email/by_sender/conversation_thread/find_attachment/...) +- [Python-runner pipeline](project_python_runner.md) — Docker kontejner na Unraidu, email pipeline 2×/den, auto-install deps, report z reports@buzalka.cz - [Claude Code learning path](project_claude_learning.md) — Level 2 Intermediate, mezery: Skills/Subagenty/Hooks/Print mode, tutoriál v `claude-howto/` diff --git a/claude-memory/project_knowledgebase_mcp.md b/claude-memory/project_knowledgebase_mcp.md new file mode 100644 index 0000000..882cb6e --- /dev/null +++ b/claude-memory/project_knowledgebase_mcp.md @@ -0,0 +1,50 @@ +--- +name: project-knowledgebase-mcp +description: "Knowledgebase MCP server — persistentní paměť pro Claude konverzace a znalosti, PostgreSQL, Voyage AI embeddingy" +metadata: + node_type: memory + type: project + originSessionId: 31aa5741-7484-4b3c-b608-1a5833b91602 +--- + +Knowledgebase MCP server pro persistentní paměť konverzací a znalostí. + +**Why:** Claude má statická file-based memories v claude-memory/, ale bez fulltextového/sémantického vyhledávání. Tento systém umožňuje efektivní retrospektivní dotazy přes všechny minulé konverzace a uložené znalosti. + +**How to apply:** Na začátku relevantní konverzace zavolej `get_context("téma")` pro orientaci. Na konci konverzace zavolej `store_conversation(...)` se summary a `key_memories`. Fakta/rozhodnutí ukládej průběžně přes `store_memory(...)`. + +## Technický stack + +- **Server:** `U:/janssen/Knowledgebase/server.py` (FastMCP, stdio transport) +- **DB:** PostgreSQL `knowledgebase` @ 192.168.1.76:5432 +- **Tabulky:** `kb_memories`, `kb_sessions`, `kb_messages` +- **Vyhledávání:** Full-text (tsvector, vždy) + Python-side cosine similarity (pokud VOYAGE_API_KEY) +- **Embeddingy:** Voyage AI voyage-3-lite (1024-dim) — volitelné, bez nich funguje FTS + +## MCP nástroje + +- `store_memory(content, mem_type, title, tags, project, importance)` — uloží fakt/rozhodnutí/preferenci +- `store_conversation(messages, session_id, title, summary, project, key_memories)` — uloží celou konverzaci +- `search(query, types, project, tags, limit)` — hybridní vyhledávání +- `get_context(topic, project)` — kontext pro novou konverzaci (search + preferences) +- `get_recent(limit, mem_type, project)` — nejnovější záznamy +- `list_sessions(limit, project)` — přehled konverzací +- `get_session(session_id)` — detail konverzace s messages +- `update_memory(id, ...)` — aktualizace +- `delete_memory(id)` — soft/hard delete +- `stats()` — přehled obsahu DB + +## mem_type hodnoty + +fact | decision | preference | summary | document | email | project | person | other + +## Konfigurace + +MCP server je zaregistrován v `claude_desktop_config.json` jako `"knowledgebase"`. +Pro Voyage AI embeddingy nastav `VOYAGE_API_KEY` v env sekci configu. + +## Workflow + +1. `get_context("téma")` na začátku session +2. `store_memory(...)` pro klíčová fakta/rozhodnutí průběžně +3. `store_conversation(messages, session_id, summary, key_memories=[...])` na konci session diff --git a/claude-memory/project_python_runner.md b/claude-memory/project_python_runner.md new file mode 100644 index 0000000..9a40d6f --- /dev/null +++ b/claude-memory/project_python_runner.md @@ -0,0 +1,33 @@ +--- +name: python-runner +description: "Docker kontejner na Unraidu s email pipeline — architektura, závislosti, scheduling, hlášení" +metadata: + node_type: memory + type: project + originSessionId: ec13e72a-c0a8-4862-93d9-6da9b29b82f3 +--- + +Email pipeline běží v Docker kontejneru `python-runner` na Unraidu (192.168.1.76). +Skripty jsou v `/mnt/user/Scripts/` (volume → `/scripts/`). + +**Pipeline** spouštěna 2× denně cronem (`0 6,18 * * *`) přes `run_pipeline.sh`: +- `1b` — Graph delta sync (nové/změněné emaily → MongoDB) +- `3` — Download attachments +- `4` — Unwrap S/MIME +- `5` — Enrich fulltext → PostgreSQL + +**Wrapper:** `0_run_pipeline_v1.0.py` +- Na začátku auto-install: `pip install -q -r /scripts/requirements.txt` — odolné vůči recreate kontejneru +- Na konci email report: HTML tabulka výsledků z `reports@buzalka.cz` → `vladimir.buzalka@buzalka.cz`; při chybě přiloží log selhavšího kroku + +**Závislosti:** `requirements.txt` v `/scripts/` + v repo `Python-runner/requirements.txt` +Klíčové: `msal`, `asn1crypto`, `pymongo`, `psycopg`, `extract-msg`, `beautifulsoup4` + +**Incident 2026-06-05:** kontejner byl recreated → ztraceny balíčky → pipeline padala 24h (kroky 1b/3/4 FAIL s ModuleNotFoundError). Opraveno auto-installem. + +**Knihovna pro odesílání emailů:** `Knihovny/EmailMessagingGraph.py` (kopie na serveru `/scripts/EmailMessagingGraph.py`) +- SENDER: `reports@buzalka.cz`, tenant TrialHelp s.r.o. + +**Logy:** `/mnt/user/Scripts/logs/pipeline_YYYYMMDD_HHMM.log`, symlink `pipeline_latest.log`, retence 30 dní. + +Souvisí s [[graph-email-import]], [[project-mcp-emaily]].