Files
administrator 82d7bc375f Fix pgvector integration: VECTOR(512) for voyage-3-lite, register_vector on connect
- voyage-3-lite returns 512 dims (not 1024) — migrated column + schema
- register_vector now called once at connection time, not per-query
- Removes per-function register_vector calls that caused type cast conflicts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-07 06:16:16 +02:00

100 lines
4.0 KiB
PL/PgSQL

-- Knowledgebase schema
-- PostgreSQL, bez pgvector (embeddingy jako double precision[])
-- pg_trgm pro fuzzy matching (volitelné)
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- ─── Conversation sessions ────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS kb_sessions (
id VARCHAR(255) PRIMARY KEY,
title TEXT,
summary TEXT,
project VARCHAR(255),
tags TEXT[] DEFAULT '{}',
msg_count INT DEFAULT 0,
started_at TIMESTAMPTZ DEFAULT NOW(),
ended_at TIMESTAMPTZ,
meta JSONB DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS kb_sessions_project_idx ON kb_sessions(project);
CREATE INDEX IF NOT EXISTS kb_sessions_started_idx ON kb_sessions(started_at DESC);
CREATE INDEX IF NOT EXISTS kb_sessions_tags_idx ON kb_sessions USING GIN(tags);
-- ─── Individual messages within a session ─────────────────────────────────────
CREATE TABLE IF NOT EXISTS kb_messages (
id BIGSERIAL PRIMARY KEY,
session_id VARCHAR(255) REFERENCES kb_sessions(id) ON DELETE CASCADE,
role VARCHAR(20) NOT NULL, -- 'user' | 'assistant' | 'system'
content TEXT NOT NULL,
seq INT NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS kb_messages_session_idx ON kb_messages(session_id, seq);
-- ─── Memories (facts, decisions, summaries, document extracts, …) ─────────────
CREATE TABLE IF NOT EXISTS kb_memories (
id BIGSERIAL PRIMARY KEY,
-- classification
mem_type VARCHAR(50) NOT NULL DEFAULT 'fact',
-- fact | decision | preference | summary | document | email | project | person | other
title TEXT,
content TEXT NOT NULL,
summary TEXT,
-- linking
session_id VARCHAR(255), -- optional: came from this conversation
source TEXT, -- file path, email id, URL, …
project VARCHAR(255),
-- search
tags TEXT[] DEFAULT '{}',
importance FLOAT DEFAULT 0.5, -- 0..1
embedding vector(512), -- Voyage AI voyage-3-lite (512-dim)
fts TSVECTOR,
-- lifecycle
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
expires_at TIMESTAMPTZ, -- NULL = never
deleted BOOLEAN DEFAULT FALSE,
meta JSONB DEFAULT '{}'
);
-- full-text (Czech + English, 'simple' covers both without stemming noise)
CREATE OR REPLACE FUNCTION kb_memories_fts_update() RETURNS TRIGGER AS $$
BEGIN
NEW.fts := to_tsvector('simple',
coalesce(NEW.title, '') || ' ' ||
coalesce(NEW.summary, '') || ' ' ||
NEW.content
);
NEW.updated_at := NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS kb_memories_fts_trig ON kb_memories;
CREATE TRIGGER kb_memories_fts_trig
BEFORE INSERT OR UPDATE ON kb_memories
FOR EACH ROW EXECUTE FUNCTION kb_memories_fts_update();
-- indexes
CREATE INDEX IF NOT EXISTS kb_memories_fts_idx ON kb_memories USING GIN(fts);
CREATE INDEX IF NOT EXISTS kb_memories_tags_idx ON kb_memories USING GIN(tags);
CREATE INDEX IF NOT EXISTS kb_memories_type_idx ON kb_memories(mem_type);
CREATE INDEX IF NOT EXISTS kb_memories_project_idx ON kb_memories(project);
CREATE INDEX IF NOT EXISTS kb_memories_importance_idx ON kb_memories(importance DESC);
CREATE INDEX IF NOT EXISTS kb_memories_created_idx ON kb_memories(created_at DESC);
CREATE INDEX IF NOT EXISTS kb_memories_session_idx ON kb_memories(session_id);
-- Vector index (aktivovat po ~1k řádcích pro lepší recall)
-- CREATE INDEX kb_memories_vec_idx ON kb_memories USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);