32 lines
1.4 KiB
Plaintext
32 lines
1.4 KiB
Plaintext
|
|
-- Migration 012: Knowledge Graph Hybrid Search
|
||
|
|
-- Adds HNSW vector index and BM25 full-text index for hybrid retrieval
|
||
|
|
-- Fixes missing fields in kg_executions (agent_role, provider, cost_cents)
|
||
|
|
-- that caused deserialization failures on SELECT queries.
|
||
|
|
|
||
|
|
-- Missing fields added to make kg_executions round-trip correctly
|
||
|
|
DEFINE FIELD agent_role ON TABLE kg_executions TYPE option<string>;
|
||
|
|
DEFINE FIELD provider ON TABLE kg_executions TYPE string DEFAULT 'unknown';
|
||
|
|
DEFINE FIELD cost_cents ON TABLE kg_executions TYPE int DEFAULT 0;
|
||
|
|
|
||
|
|
-- BM25 full-text search on task descriptions
|
||
|
|
-- class tokenizer preserves word boundaries (code terms, identifiers)
|
||
|
|
-- snowball(english) reduces "compiling" → "compil" for better recall
|
||
|
|
DEFINE ANALYZER kg_text_analyzer
|
||
|
|
TOKENIZERS class
|
||
|
|
FILTERS lowercase, snowball(english);
|
||
|
|
|
||
|
|
DEFINE INDEX idx_kg_executions_ft
|
||
|
|
ON TABLE kg_executions
|
||
|
|
FIELDS task_description
|
||
|
|
SEARCH ANALYZER kg_text_analyzer BM25;
|
||
|
|
|
||
|
|
-- HNSW approximate nearest neighbor index for semantic similarity
|
||
|
|
-- DIST COSINE: matches cosine similarity used throughout the codebase
|
||
|
|
-- TYPE F32: float32 embeddings (OpenAI ada-002 / compatible providers)
|
||
|
|
-- M 16: graph connectivity (16 edges per node, standard for 1536-dim)
|
||
|
|
-- EF_CONSTRUCTION 200: index build quality vs speed tradeoff
|
||
|
|
DEFINE INDEX idx_kg_executions_hnsw
|
||
|
|
ON TABLE kg_executions
|
||
|
|
FIELDS embedding
|
||
|
|
HNSW DIMENSION 1536 DIST COSINE TYPE F32 M 16 EF_CONSTRUCTION 200;
|