// RLM Engine - Core Orchestration // Coordinates chunking, storage, hybrid search, and LLM dispatch use std::sync::Arc; use std::time::Instant; use tracing::{debug, info, warn}; use vapora_llm_router::providers::LLMClient; use crate::chunking::{create_chunker, ChunkingConfig}; use crate::dispatch::{AggregatedResult, LLMDispatcher}; use crate::embeddings::{EmbeddingConfig, EmbeddingGenerator}; use crate::metrics::{CHUNKS_TOTAL, QUERY_DURATION}; use crate::search::bm25::BM25Index; use crate::search::hybrid::{HybridSearch, ScoredChunk}; use crate::storage::{Chunk, Storage}; use crate::RLMError; /// RLM Engine configuration #[derive(Debug, Clone)] pub struct RLMEngineConfig { /// Default chunking configuration pub chunking: ChunkingConfig, /// Embedding configuration (optional - if None, no embeddings generated) pub embedding: Option, /// Enable automatic BM25 index rebuilds pub auto_rebuild_bm25: bool, /// Maximum chunks per document (safety limit) pub max_chunks_per_doc: usize, } impl Default for RLMEngineConfig { fn default() -> Self { Self { chunking: ChunkingConfig::default(), embedding: Some(EmbeddingConfig::default()), // Enable embeddings by default auto_rebuild_bm25: true, max_chunks_per_doc: 10_000, } } } /// RLM Engine - orchestrates chunking, storage, and hybrid search pub struct RLMEngine { storage: Arc, bm25_index: Arc, hybrid_search: HybridSearch, embedding_generator: Option>, dispatcher: Arc, config: RLMEngineConfig, } impl RLMEngine { /// Create a new RLM engine pub fn new(storage: Arc, bm25_index: Arc) -> crate::Result { let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?; let config = RLMEngineConfig::default(); let embedding_generator = config .embedding .as_ref() .map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone()))); // Phase 6: No LLM client configured by default let dispatcher = Arc::new(LLMDispatcher::new(None)); Ok(Self { storage, bm25_index, hybrid_search, embedding_generator, dispatcher, config, }) } /// Create with custom configuration pub fn with_config( storage: Arc, bm25_index: Arc, config: RLMEngineConfig, ) -> crate::Result { let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?; let embedding_generator = config .embedding .as_ref() .map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone()))); // Phase 6: No LLM client configured by default let dispatcher = Arc::new(LLMDispatcher::new(None)); Ok(Self { storage, bm25_index, hybrid_search, embedding_generator, dispatcher, config, }) } /// Create with LLM client for production use pub fn with_llm_client( storage: Arc, bm25_index: Arc, llm_client: Arc, config: Option, ) -> crate::Result { let config = config.unwrap_or_default(); let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?; let embedding_generator = config .embedding .as_ref() .map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone()))); // Production: LLM client configured let dispatcher = Arc::new(LLMDispatcher::new(Some(llm_client))); Ok(Self { storage, bm25_index, hybrid_search, embedding_generator, dispatcher, config, }) } /// Load a document: chunk → embed (placeholder) → persist → index /// /// # Arguments /// - `doc_id`: Unique document identifier /// - `content`: Document content to chunk /// - `chunking_config`: Optional chunking configuration (uses default if /// None) /// /// # Returns /// Number of chunks created pub async fn load_document( &self, doc_id: &str, content: &str, chunking_config: Option, ) -> crate::Result { let start = Instant::now(); info!("Loading document: {}", doc_id); // Use provided config or default let config = chunking_config.unwrap_or_else(|| self.config.chunking.clone()); // Create chunker and chunk content let chunker = create_chunker(&config); let chunk_results = chunker.chunk(content)?; // Safety check if chunk_results.len() > self.config.max_chunks_per_doc { warn!( "Document {} has {} chunks, exceeds max {}", doc_id, chunk_results.len(), self.config.max_chunks_per_doc ); return Err(RLMError::ChunkingError(format!( "Document exceeds max chunks: {} > {}", chunk_results.len(), self.config.max_chunks_per_doc ))); } debug!( "Chunked document {} into {} chunks using {:?} strategy", doc_id, chunk_results.len(), config.strategy ); // Generate embeddings if enabled let embeddings = if let Some(ref generator) = self.embedding_generator { debug!("Generating embeddings for {} chunks", chunk_results.len()); let texts: Vec = chunk_results.iter().map(|c| c.content.clone()).collect(); Some(generator.embed_batch(&texts).await?) } else { debug!("Embedding generation disabled"); None }; // Convert ChunkResult to Chunk and persist let mut chunks = Vec::new(); for (idx, chunk_result) in chunk_results.iter().enumerate() { let chunk_id = format!("{}-chunk-{}", doc_id, idx); // Get embedding for this chunk (if generated) let embedding = embeddings.as_ref().and_then(|embs| embs.get(idx)).cloned(); let chunk = Chunk { chunk_id: chunk_id.clone(), doc_id: doc_id.to_string(), content: chunk_result.content.clone(), embedding, // Phase 5: Real embeddings from multi-provider start_idx: chunk_result.start_idx, end_idx: chunk_result.end_idx, metadata: None, created_at: chrono::Utc::now().to_rfc3339(), }; // Save to storage self.storage.save_chunk(chunk.clone()).await?; // Add to BM25 index self.bm25_index.add_document(&chunk)?; chunks.push(chunk); } // Commit BM25 index self.bm25_index.commit()?; // Update metrics CHUNKS_TOTAL .with_label_values(&[&format!("{:?}", config.strategy)]) .inc_by(chunks.len() as u64); let duration = start.elapsed(); info!( "Loaded document {} with {} chunks in {:?}", doc_id, chunks.len(), duration ); Ok(chunks.len()) } /// Query with hybrid search (semantic + BM25 + RRF fusion) /// /// # Arguments /// - `doc_id`: Document to search within /// - `query_text`: Keyword query for BM25 /// - `query_embedding`: Optional vector embedding for semantic search /// - `limit`: Maximum results to return /// /// # Returns /// Scored chunks ranked by hybrid search pub async fn query( &self, doc_id: &str, query_text: &str, query_embedding: Option<&[f32]>, limit: usize, ) -> crate::Result> { let start = Instant::now(); let results = if let Some(embedding) = query_embedding { // Full hybrid search: BM25 + semantic + RRF debug!( "Hybrid query: doc={}, query='{}', limit={}", doc_id, query_text, limit ); self.hybrid_search .search(doc_id, query_text, embedding, limit) .await? } else { // BM25-only search (no embedding provided) debug!( "BM25-only query: doc={}, query='{}', limit={}", doc_id, query_text, limit ); let bm25_results = self.hybrid_search.bm25_search(query_text, limit)?; // Get chunks from storage let all_chunks = self.storage.get_chunks(doc_id).await?; // Map BM25 results to ScoredChunk bm25_results .into_iter() .filter_map(|bm25_result| { all_chunks .iter() .find(|c| c.chunk_id == bm25_result.chunk_id) .map(|chunk| ScoredChunk { chunk: chunk.clone(), score: bm25_result.score, bm25_score: Some(bm25_result.score), semantic_score: None, }) }) .collect() }; let duration = start.elapsed(); QUERY_DURATION .with_label_values(&[if query_embedding.is_some() { "hybrid" } else { "bm25_only" }]) .observe(duration.as_secs_f64()); debug!("Query returned {} results in {:?}", results.len(), duration); Ok(results) } /// Dispatch subtask to LLM for distributed reasoning /// /// # Arguments /// - `doc_id`: Document to query /// - `query_text`: Query/task description /// - `query_embedding`: Optional embedding for hybrid search /// - `limit`: Max chunks to retrieve /// /// # Returns /// Aggregated result from LLM analysis of relevant chunks pub async fn dispatch_subtask( &self, doc_id: &str, query_text: &str, query_embedding: Option<&[f32]>, limit: usize, ) -> crate::Result { info!("Dispatching subtask: doc={}, query={}", doc_id, query_text); // Step 1: Retrieve relevant chunks via hybrid search let chunks = self .query(doc_id, query_text, query_embedding, limit) .await?; debug!("Retrieved {} chunks for dispatch", chunks.len()); // Step 2: Dispatch to LLM let result = self.dispatcher.dispatch(query_text, &chunks).await?; info!( "Dispatch completed: {} LLM calls, {} total tokens", result.num_calls, result.total_input_tokens + result.total_output_tokens ); Ok(result) } /// Get BM25 index statistics pub fn index_stats(&self) -> crate::search::bm25::IndexStats { self.bm25_index.stats() } /// Rebuild BM25 index from all chunks for a document pub async fn rebuild_index(&self, doc_id: &str) -> crate::Result<()> { info!("Rebuilding BM25 index for document: {}", doc_id); let chunks = self.storage.get_chunks(doc_id).await?; self.bm25_index.rebuild_from_chunks(&chunks)?; info!( "Rebuilt BM25 index for {} with {} chunks", doc_id, chunks.len() ); Ok(()) } /// Delete all chunks for a document pub async fn delete_document(&self, doc_id: &str) -> crate::Result { info!("Deleting document: {}", doc_id); let deleted_count = self.storage.delete_chunks(doc_id).await?; // Rebuild BM25 index to remove deleted chunks if self.config.auto_rebuild_bm25 { // For now, we can't selectively delete from BM25, so we'd need to rebuild // For Phase 3, we'll just warn - full rebuild happens on next load warn!( "BM25 index may contain stale entries for deleted doc {}. Rebuild recommended.", doc_id ); } Ok(deleted_count) } } #[cfg(test)] mod tests { use std::collections::HashMap; use std::sync::Mutex; use async_trait::async_trait; use super::*; use crate::chunking::ChunkingStrategy; use crate::storage::{Buffer, ExecutionHistory}; // Mock storage for testing struct MockStorage { chunks: Arc>>>, } impl MockStorage { fn new() -> Self { Self { chunks: Arc::new(Mutex::new(HashMap::new())), } } } #[async_trait] impl Storage for MockStorage { async fn save_chunk(&self, chunk: Chunk) -> crate::Result<()> { let mut chunks = self.chunks.lock().unwrap(); chunks.entry(chunk.doc_id.clone()).or_default().push(chunk); Ok(()) } async fn get_chunks(&self, doc_id: &str) -> crate::Result> { let chunks = self.chunks.lock().unwrap(); Ok(chunks.get(doc_id).cloned().unwrap_or_default()) } async fn get_chunk(&self, chunk_id: &str) -> crate::Result> { let chunks = self.chunks.lock().unwrap(); for chunk_list in chunks.values() { if let Some(chunk) = chunk_list.iter().find(|c| c.chunk_id == chunk_id) { return Ok(Some(chunk.clone())); } } Ok(None) } async fn search_by_embedding( &self, _embedding: &[f32], _limit: usize, ) -> crate::Result> { Ok(Vec::new()) } async fn save_buffer(&self, _buffer: Buffer) -> crate::Result<()> { Ok(()) } async fn get_buffer(&self, _buffer_id: &str) -> crate::Result> { Ok(None) } async fn cleanup_expired_buffers(&self) -> crate::Result { Ok(0) } async fn save_execution(&self, _execution: ExecutionHistory) -> crate::Result<()> { Ok(()) } async fn get_executions( &self, _doc_id: &str, _limit: usize, ) -> crate::Result> { Ok(Vec::new()) } async fn delete_chunks(&self, doc_id: &str) -> crate::Result { let mut chunks = self.chunks.lock().unwrap(); let count = chunks.remove(doc_id).map(|v| v.len()).unwrap_or(0); Ok(count as u64) } } #[tokio::test] async fn test_engine_creation() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage, bm25_index); assert!(engine.is_ok()); } #[tokio::test] async fn test_load_document_fixed_chunking() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap(); let content = "a".repeat(250); // 250 chars let config = ChunkingConfig { strategy: ChunkingStrategy::Fixed, chunk_size: 100, overlap: 20, }; let chunk_count = engine .load_document("doc-1", &content, Some(config)) .await .unwrap(); assert!(chunk_count >= 2, "Should create at least 2 chunks"); // Verify chunks are persisted let chunks = storage.get_chunks("doc-1").await.unwrap(); assert_eq!(chunks.len(), chunk_count); } #[tokio::test] async fn test_load_document_semantic_chunking() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap(); let content = "First sentence. Second sentence! Third sentence?"; let config = ChunkingConfig { strategy: ChunkingStrategy::Semantic, chunk_size: 50, overlap: 10, }; let chunk_count = engine .load_document("doc-2", content, Some(config)) .await .unwrap(); assert!(chunk_count > 0, "Should create at least 1 chunk"); // Verify chunks are persisted let chunks = storage.get_chunks("doc-2").await.unwrap(); assert_eq!(chunks.len(), chunk_count); } #[tokio::test] async fn test_query_bm25_only() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap(); // Load document let content = "Rust programming language. Python programming tutorial. Rust async patterns."; engine.load_document("doc-3", content, None).await.unwrap(); // Query (BM25-only, no embedding) let results = engine.query("doc-3", "Rust", None, 5).await.unwrap(); assert!(!results.is_empty(), "Should find results for 'Rust'"); assert!(results[0].bm25_score.is_some(), "Should have BM25 score"); assert!( results[0].semantic_score.is_none(), "Should not have semantic score" ); } #[tokio::test] async fn test_query_hybrid_search() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap(); // Load document with manual chunk creation (to add embeddings) let chunk = Chunk { chunk_id: "doc-4-chunk-0".to_string(), doc_id: "doc-4".to_string(), content: "Rust programming language".to_string(), embedding: Some(vec![1.0, 0.0, 0.0]), start_idx: 0, end_idx: 26, metadata: None, created_at: chrono::Utc::now().to_rfc3339(), }; storage.save_chunk(chunk.clone()).await.unwrap(); engine.bm25_index.add_document(&chunk).unwrap(); engine.bm25_index.commit().unwrap(); // Query with embedding (hybrid search) let query_embedding = vec![0.9, 0.1, 0.0]; let results = engine .query("doc-4", "Rust", Some(&query_embedding), 5) .await .unwrap(); assert!(!results.is_empty(), "Should find results"); // In hybrid search, we should have both scores (if RRF found matches in both) // But with only 1 chunk, we might only get BM25 or semantic assert!( results[0].bm25_score.is_some() || results[0].semantic_score.is_some(), "Should have at least one score" ); } #[tokio::test] async fn test_delete_document() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap(); // Load document engine .load_document("doc-5", "Test content", None) .await .unwrap(); // Verify it exists let chunks_before = storage.get_chunks("doc-5").await.unwrap(); assert!(!chunks_before.is_empty()); // Delete let deleted = engine.delete_document("doc-5").await.unwrap(); assert_eq!(deleted, chunks_before.len() as u64); // Verify deletion let chunks_after = storage.get_chunks("doc-5").await.unwrap(); assert!(chunks_after.is_empty()); } #[tokio::test] async fn test_max_chunks_safety_limit() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let config = RLMEngineConfig { max_chunks_per_doc: 5, // Very low limit for testing ..Default::default() }; let engine = RLMEngine::with_config(storage, bm25_index, config).unwrap(); // Create content that will exceed limit let content = "a".repeat(1000); // Will create many small chunks let chunking_config = ChunkingConfig { strategy: ChunkingStrategy::Fixed, chunk_size: 10, overlap: 0, }; let result = engine .load_document("doc-6", &content, Some(chunking_config)) .await; assert!( result.is_err(), "Should fail when exceeding max chunks limit" ); } #[tokio::test] async fn test_index_stats() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = RLMEngine::new(storage, bm25_index).unwrap(); // Initially empty let stats = engine.index_stats(); assert_eq!(stats.num_docs, 0); // Load document engine .load_document("doc-7", "Test content", None) .await .unwrap(); // Check stats again let stats = engine.index_stats(); assert!(stats.num_docs > 0); } #[tokio::test] async fn test_embeddings_generated() { use crate::embeddings::EmbeddingConfig; let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); // Create config with embeddings enabled let config = RLMEngineConfig { embedding: Some(EmbeddingConfig::openai_small()), ..Default::default() }; let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap(); // Load document let content = "First chunk. Second chunk. Third chunk."; engine.load_document("doc-8", content, None).await.unwrap(); // Verify chunks have embeddings let chunks = storage.get_chunks("doc-8").await.unwrap(); assert!(!chunks.is_empty(), "Should have created chunks"); for chunk in &chunks { assert!( chunk.embedding.is_some(), "Chunk {} should have embedding", chunk.chunk_id ); assert_eq!( chunk.embedding.as_ref().unwrap().len(), 1536, "Embedding should have 1536 dimensions (OpenAI small)" ); } } #[tokio::test] async fn test_embeddings_disabled() { let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); // Create config with embeddings disabled let config = RLMEngineConfig { embedding: None, ..Default::default() }; let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap(); // Load document let content = "Test content without embeddings"; engine.load_document("doc-9", content, None).await.unwrap(); // Verify chunks do NOT have embeddings let chunks = storage.get_chunks("doc-9").await.unwrap(); assert!(!chunks.is_empty(), "Should have created chunks"); for chunk in &chunks { assert!( chunk.embedding.is_none(), "Chunk {} should not have embedding when disabled", chunk.chunk_id ); } } #[tokio::test] async fn test_query_with_embeddings() { use crate::embeddings::EmbeddingConfig; let storage = Arc::new(MockStorage::new()); let bm25_index = Arc::new(BM25Index::new().unwrap()); // Create config with embeddings enabled let config = RLMEngineConfig { embedding: Some(EmbeddingConfig::openai_small()), ..Default::default() }; let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap(); // Load document with embeddings let content = "Rust programming language. Python tutorial. JavaScript guide."; engine.load_document("doc-10", content, None).await.unwrap(); // Get a chunk to use its embedding as query let chunks = storage.get_chunks("doc-10").await.unwrap(); assert!(!chunks.is_empty()); let query_embedding = chunks[0].embedding.as_ref().unwrap(); // Query with embedding (hybrid search) let results = engine .query("doc-10", "Rust", Some(query_embedding), 3) .await .unwrap(); assert!(!results.is_empty(), "Should find results"); // With real embeddings, should get both BM25 and semantic scores } }