Vapora/migrations/012_kg_hybrid_search.surql

32 lines
1.4 KiB
Plaintext
Raw Permalink Normal View History

-- Migration 012: Knowledge Graph Hybrid Search
-- Adds HNSW vector index and BM25 full-text index for hybrid retrieval
-- Fixes missing fields in kg_executions (agent_role, provider, cost_cents)
-- that caused deserialization failures on SELECT queries.
-- Missing fields added to make kg_executions round-trip correctly
DEFINE FIELD agent_role ON TABLE kg_executions TYPE option<string>;
DEFINE FIELD provider ON TABLE kg_executions TYPE string DEFAULT 'unknown';
DEFINE FIELD cost_cents ON TABLE kg_executions TYPE int DEFAULT 0;
-- BM25 full-text search on task descriptions
-- class tokenizer preserves word boundaries (code terms, identifiers)
-- snowball(english) reduces "compiling" → "compil" for better recall
DEFINE ANALYZER kg_text_analyzer
TOKENIZERS class
FILTERS lowercase, snowball(english);
DEFINE INDEX idx_kg_executions_ft
ON TABLE kg_executions
FIELDS task_description
SEARCH ANALYZER kg_text_analyzer BM25;
-- HNSW approximate nearest neighbor index for semantic similarity
-- DIST COSINE: matches cosine similarity used throughout the codebase
-- TYPE F32: float32 embeddings (OpenAI ada-002 / compatible providers)
-- M 16: graph connectivity (16 edges per node, standard for 1536-dim)
-- EF_CONSTRUCTION 200: index build quality vs speed tradeoff
DEFINE INDEX idx_kg_executions_hnsw
ON TABLE kg_executions
FIELDS embedding
HNSW DIMENSION 1536 DIST COSINE TYPE F32 M 16 EF_CONSTRUCTION 200;