Vapora/migrations/004_rag.surql

-- Migration 004: RAG (Retrieval-Augmented Generation)
-- Creates tables for document storage and semantic search

-- Documents table
DEFINE TABLE documents SCHEMAFULL
    PERMISSIONS
        FOR select WHERE tenant_id = $auth.tenant_id
        FOR create, update, delete WHERE tenant_id = $auth.tenant_id;

DEFINE FIELD id ON TABLE documents TYPE record<documents>;
DEFINE FIELD tenant_id ON TABLE documents TYPE string ASSERT $value != NONE;
DEFINE FIELD project_id ON TABLE documents TYPE option<string>;
DEFINE FIELD title ON TABLE documents TYPE string ASSERT $value != NONE;
DEFINE FIELD content ON TABLE documents TYPE string ASSERT $value != NONE;
DEFINE FIELD content_type ON TABLE documents TYPE string ASSERT $value INSIDE ["markdown", "code", "text", "json"] DEFAULT "text";
DEFINE FIELD metadata ON TABLE documents TYPE object DEFAULT {};
DEFINE FIELD embedding ON TABLE documents TYPE option<array<float>>;
DEFINE FIELD source_path ON TABLE documents TYPE option<string>;
DEFINE FIELD tags ON TABLE documents TYPE array<string> DEFAULT [];
DEFINE FIELD created_at ON TABLE documents TYPE datetime DEFAULT time::now();
DEFINE FIELD updated_at ON TABLE documents TYPE datetime DEFAULT time::now() VALUE time::now();

DEFINE INDEX idx_documents_tenant ON TABLE documents COLUMNS tenant_id;
DEFINE INDEX idx_documents_project ON TABLE documents COLUMNS project_id;
DEFINE INDEX idx_documents_content_type ON TABLE documents COLUMNS content_type;
DEFINE INDEX idx_documents_tags ON TABLE documents COLUMNS tags;

-- Vector index for semantic search (HNSW)
-- Note: SurrealDB 2.x+ supports vector search with MTREE indexes
DEFINE INDEX idx_documents_embedding ON TABLE documents FIELDS embedding MTREE DIMENSION 1536;

-- Document chunks table (for large documents split into chunks)
DEFINE TABLE document_chunks SCHEMAFULL
    PERMISSIONS
        FOR select WHERE $parent.tenant_id = $auth.tenant_id
        FOR create, update, delete WHERE $parent.tenant_id = $auth.tenant_id;

DEFINE FIELD id ON TABLE document_chunks TYPE record<document_chunks>;
DEFINE FIELD document_id ON TABLE document_chunks TYPE string ASSERT $value != NONE;
DEFINE FIELD chunk_index ON TABLE document_chunks TYPE int ASSERT $value >= 0;
DEFINE FIELD content ON TABLE document_chunks TYPE string ASSERT $value != NONE;
DEFINE FIELD embedding ON TABLE document_chunks TYPE option<array<float>>;
DEFINE FIELD token_count ON TABLE document_chunks TYPE option<int>;
DEFINE FIELD created_at ON TABLE document_chunks TYPE datetime DEFAULT time::now();

DEFINE INDEX idx_document_chunks_document ON TABLE document_chunks COLUMNS document_id;
DEFINE INDEX idx_document_chunks_document_index ON TABLE document_chunks COLUMNS document_id, chunk_index UNIQUE;
DEFINE INDEX idx_document_chunks_embedding ON TABLE document_chunks FIELDS embedding MTREE DIMENSION 1536;

-- Search history table (for analytics and improvement)
DEFINE TABLE search_history SCHEMAFULL
    PERMISSIONS
        FOR select WHERE tenant_id = $auth.tenant_id
        FOR create WHERE tenant_id = $auth.tenant_id;

DEFINE FIELD id ON TABLE search_history TYPE record<search_history>;
DEFINE FIELD tenant_id ON TABLE search_history TYPE string ASSERT $value != NONE;
DEFINE FIELD query ON TABLE search_history TYPE string ASSERT $value != NONE;
DEFINE FIELD results_count ON TABLE search_history TYPE int DEFAULT 0;
DEFINE FIELD top_result_id ON TABLE search_history TYPE option<string>;
DEFINE FIELD search_time_ms ON TABLE search_history TYPE int;
DEFINE FIELD created_at ON TABLE search_history TYPE datetime DEFAULT time::now();

DEFINE INDEX idx_search_history_tenant ON TABLE search_history COLUMNS tenant_id;
DEFINE INDEX idx_search_history_created ON TABLE search_history COLUMNS created_at;