Fix pgvector integration: VECTOR(512) for voyage-3-lite, register_vector on connect
- voyage-3-lite returns 512 dims (not 1024) — migrated column + schema - register_vector now called once at connection time, not per-query - Removes per-function register_vector calls that caused type cast conflicts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,86 @@
|
|||||||
|
import win32com.client
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
OUT_XLSX = Path(r"C:\Temp\GAL_export.xlsx")
|
||||||
|
|
||||||
|
def safe_get(obj, attr):
|
||||||
|
try:
|
||||||
|
return getattr(obj, attr)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
outlook = win32com.client.Dispatch("Outlook.Application")
|
||||||
|
ns = outlook.GetNamespace("MAPI")
|
||||||
|
|
||||||
|
gal = ns.GetGlobalAddressList()
|
||||||
|
entries = gal.AddressEntries
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
|
||||||
|
print(f"Počet položek v GAL: {entries.Count}")
|
||||||
|
|
||||||
|
for i in range(1, entries.Count + 1): # Outlook COM je 1-based
|
||||||
|
try:
|
||||||
|
entry = entries.Item(i)
|
||||||
|
|
||||||
|
name = safe_get(entry, "Name")
|
||||||
|
address = safe_get(entry, "Address")
|
||||||
|
entry_type = safe_get(entry, "AddressEntryUserType")
|
||||||
|
|
||||||
|
smtp = None
|
||||||
|
job_title = None
|
||||||
|
department = None
|
||||||
|
company = None
|
||||||
|
office = None
|
||||||
|
phone = None
|
||||||
|
mobile = None
|
||||||
|
|
||||||
|
# Exchange user
|
||||||
|
try:
|
||||||
|
exch_user = entry.GetExchangeUser()
|
||||||
|
except Exception:
|
||||||
|
exch_user = None
|
||||||
|
|
||||||
|
if exch_user:
|
||||||
|
smtp = safe_get(exch_user, "PrimarySmtpAddress")
|
||||||
|
job_title = safe_get(exch_user, "JobTitle")
|
||||||
|
department = safe_get(exch_user, "Department")
|
||||||
|
company = safe_get(exch_user, "CompanyName")
|
||||||
|
office = safe_get(exch_user, "OfficeLocation")
|
||||||
|
phone = safe_get(exch_user, "BusinessTelephoneNumber")
|
||||||
|
mobile = safe_get(exch_user, "MobileTelephoneNumber")
|
||||||
|
|
||||||
|
# Distribution list
|
||||||
|
try:
|
||||||
|
exch_dl = entry.GetExchangeDistributionList()
|
||||||
|
except Exception:
|
||||||
|
exch_dl = None
|
||||||
|
|
||||||
|
if exch_dl and not smtp:
|
||||||
|
smtp = safe_get(exch_dl, "PrimarySmtpAddress")
|
||||||
|
|
||||||
|
rows.append({
|
||||||
|
"name": name,
|
||||||
|
"smtp": smtp,
|
||||||
|
"address": address,
|
||||||
|
"entry_type": entry_type,
|
||||||
|
"job_title": job_title,
|
||||||
|
"department": department,
|
||||||
|
"company": company,
|
||||||
|
"office": office,
|
||||||
|
"phone": phone,
|
||||||
|
"mobile": mobile,
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
rows.append({
|
||||||
|
"name": None,
|
||||||
|
"smtp": None,
|
||||||
|
"error": str(e),
|
||||||
|
})
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
df.to_excel(OUT_XLSX, index=False)
|
||||||
|
|
||||||
|
print(f"Hotovo: {OUT_XLSX}")
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
-- PostgreSQL, bez pgvector (embeddingy jako double precision[])
|
-- PostgreSQL, bez pgvector (embeddingy jako double precision[])
|
||||||
-- pg_trgm pro fuzzy matching (volitelné)
|
-- pg_trgm pro fuzzy matching (volitelné)
|
||||||
|
|
||||||
|
CREATE EXTENSION IF NOT EXISTS vector;
|
||||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
|
||||||
-- ─── Conversation sessions ────────────────────────────────────────────────────
|
-- ─── Conversation sessions ────────────────────────────────────────────────────
|
||||||
@@ -56,7 +57,7 @@ CREATE TABLE IF NOT EXISTS kb_memories (
|
|||||||
-- search
|
-- search
|
||||||
tags TEXT[] DEFAULT '{}',
|
tags TEXT[] DEFAULT '{}',
|
||||||
importance FLOAT DEFAULT 0.5, -- 0..1
|
importance FLOAT DEFAULT 0.5, -- 0..1
|
||||||
embedding double precision[], -- Voyage AI voyage-3-lite (1024-dim), Python-side similarity
|
embedding vector(512), -- Voyage AI voyage-3-lite (512-dim)
|
||||||
fts TSVECTOR,
|
fts TSVECTOR,
|
||||||
|
|
||||||
-- lifecycle
|
-- lifecycle
|
||||||
@@ -94,4 +95,5 @@ CREATE INDEX IF NOT EXISTS kb_memories_project_idx ON kb_memories(project);
|
|||||||
CREATE INDEX IF NOT EXISTS kb_memories_importance_idx ON kb_memories(importance DESC);
|
CREATE INDEX IF NOT EXISTS kb_memories_importance_idx ON kb_memories(importance DESC);
|
||||||
CREATE INDEX IF NOT EXISTS kb_memories_created_idx ON kb_memories(created_at DESC);
|
CREATE INDEX IF NOT EXISTS kb_memories_created_idx ON kb_memories(created_at DESC);
|
||||||
CREATE INDEX IF NOT EXISTS kb_memories_session_idx ON kb_memories(session_id);
|
CREATE INDEX IF NOT EXISTS kb_memories_session_idx ON kb_memories(session_id);
|
||||||
-- Note: embedding je double precision[] — similarity se počítá Python-side po FTS pre-filtru
|
-- Vector index (aktivovat po ~1k řádcích pro lepší recall)
|
||||||
|
-- CREATE INDEX kb_memories_vec_idx ON kb_memories USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||||
|
|||||||
@@ -34,7 +34,8 @@ PG_PASSWORD = os.getenv("PG_PASSWORD", "Vlado7309208104++")
|
|||||||
PG_DB = os.getenv("PG_DB", "knowledgebase")
|
PG_DB = os.getenv("PG_DB", "knowledgebase")
|
||||||
|
|
||||||
VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY", "")
|
VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY", "")
|
||||||
EMBED_MODEL = "voyage-3-lite" # 1024-dim, fast & cheap
|
EMBED_MODEL = "voyage-3-lite" # 512-dim, fast & cheap
|
||||||
|
EMBED_DIM = 512
|
||||||
|
|
||||||
# ─── Logging ─────────────────────────────────────────────────────────────────
|
# ─── Logging ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -60,6 +61,12 @@ def get_conn() -> psycopg.Connection:
|
|||||||
row_factory=dict_row,
|
row_factory=dict_row,
|
||||||
autocommit=False,
|
autocommit=False,
|
||||||
)
|
)
|
||||||
|
# register_vector jednou na connection — umožní psycopg správně serializovat np.array jako vector
|
||||||
|
try:
|
||||||
|
from pgvector.psycopg import register_vector
|
||||||
|
register_vector(_conn)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"pgvector register warning: {e}")
|
||||||
log(f"Connected to {PG_DB}@{PG_HOST}")
|
log(f"Connected to {PG_DB}@{PG_HOST}")
|
||||||
return _conn
|
return _conn
|
||||||
|
|
||||||
@@ -156,9 +163,7 @@ def store_memory(
|
|||||||
try:
|
try:
|
||||||
with conn.transaction():
|
with conn.transaction():
|
||||||
if embedding:
|
if embedding:
|
||||||
from pgvector.psycopg import register_vector
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
register_vector(conn)
|
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO kb_memories
|
INSERT INTO kb_memories
|
||||||
@@ -312,9 +317,7 @@ def _insert_memory_in_tx(conn, data: dict):
|
|||||||
"""Helper: insert memory within an existing transaction."""
|
"""Helper: insert memory within an existing transaction."""
|
||||||
embedding = data.get("embedding")
|
embedding = data.get("embedding")
|
||||||
if embedding:
|
if embedding:
|
||||||
from pgvector.psycopg import register_vector
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
register_vector(conn)
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO kb_memories
|
INSERT INTO kb_memories
|
||||||
@@ -420,9 +423,7 @@ def search(
|
|||||||
query_emb = get_embedding(query)
|
query_emb = get_embedding(query)
|
||||||
if query_emb:
|
if query_emb:
|
||||||
try:
|
try:
|
||||||
from pgvector.psycopg import register_vector
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
register_vector(conn)
|
|
||||||
|
|
||||||
vec_conditions = ["deleted = FALSE", "embedding IS NOT NULL"]
|
vec_conditions = ["deleted = FALSE", "embedding IS NOT NULL"]
|
||||||
vec_params2: list[Any] = []
|
vec_params2: list[Any] = []
|
||||||
@@ -700,9 +701,7 @@ def update_memory(
|
|||||||
params.append(content)
|
params.append(content)
|
||||||
new_emb = get_embedding(f"{title or ''} {content}")
|
new_emb = get_embedding(f"{title or ''} {content}")
|
||||||
if new_emb:
|
if new_emb:
|
||||||
from pgvector.psycopg import register_vector
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
register_vector(conn)
|
|
||||||
updates.append("embedding = %s")
|
updates.append("embedding = %s")
|
||||||
params.append(np.array(new_emb))
|
params.append(np.array(new_emb))
|
||||||
if title is not None:
|
if title is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user