763 lines
24 KiB
Rust
Raw Normal View History

2026-02-16 05:09:51 +00:00
// RLM Engine - Core Orchestration
// Coordinates chunking, storage, hybrid search, and LLM dispatch
use std::sync::Arc;
use std::time::Instant;
use tracing::{debug, info, warn};
use vapora_llm_router::providers::LLMClient;
use crate::chunking::{create_chunker, ChunkingConfig};
use crate::dispatch::{AggregatedResult, LLMDispatcher};
use crate::embeddings::{EmbeddingConfig, EmbeddingGenerator};
use crate::metrics::{CHUNKS_TOTAL, QUERY_DURATION};
use crate::search::bm25::BM25Index;
use crate::search::hybrid::{HybridSearch, ScoredChunk};
use crate::storage::{Chunk, Storage};
use crate::RLMError;
/// RLM Engine configuration
#[derive(Debug, Clone)]
pub struct RLMEngineConfig {
/// Default chunking configuration
pub chunking: ChunkingConfig,
/// Embedding configuration (optional - if None, no embeddings generated)
pub embedding: Option<EmbeddingConfig>,
/// Enable automatic BM25 index rebuilds
pub auto_rebuild_bm25: bool,
/// Maximum chunks per document (safety limit)
pub max_chunks_per_doc: usize,
}
impl Default for RLMEngineConfig {
fn default() -> Self {
Self {
chunking: ChunkingConfig::default(),
embedding: Some(EmbeddingConfig::default()), // Enable embeddings by default
auto_rebuild_bm25: true,
max_chunks_per_doc: 10_000,
}
}
}
/// RLM Engine - orchestrates chunking, storage, and hybrid search
pub struct RLMEngine<S: Storage> {
storage: Arc<S>,
bm25_index: Arc<BM25Index>,
hybrid_search: HybridSearch<S>,
embedding_generator: Option<Arc<EmbeddingGenerator>>,
dispatcher: Arc<LLMDispatcher>,
config: RLMEngineConfig,
}
impl<S: Storage> RLMEngine<S> {
/// Create a new RLM engine
pub fn new(storage: Arc<S>, bm25_index: Arc<BM25Index>) -> crate::Result<Self> {
let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?;
let config = RLMEngineConfig::default();
let embedding_generator = config
.embedding
.as_ref()
.map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone())));
// Phase 6: No LLM client configured by default
let dispatcher = Arc::new(LLMDispatcher::new(None));
Ok(Self {
storage,
bm25_index,
hybrid_search,
embedding_generator,
dispatcher,
config,
})
}
/// Create with custom configuration
pub fn with_config(
storage: Arc<S>,
bm25_index: Arc<BM25Index>,
config: RLMEngineConfig,
) -> crate::Result<Self> {
let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?;
let embedding_generator = config
.embedding
.as_ref()
.map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone())));
// Phase 6: No LLM client configured by default
let dispatcher = Arc::new(LLMDispatcher::new(None));
Ok(Self {
storage,
bm25_index,
hybrid_search,
embedding_generator,
dispatcher,
config,
})
}
/// Create with LLM client for production use
pub fn with_llm_client(
storage: Arc<S>,
bm25_index: Arc<BM25Index>,
llm_client: Arc<dyn LLMClient + Send + Sync>,
config: Option<RLMEngineConfig>,
) -> crate::Result<Self> {
let config = config.unwrap_or_default();
let hybrid_search = HybridSearch::new(storage.clone(), bm25_index.clone())?;
let embedding_generator = config
.embedding
.as_ref()
.map(|cfg| Arc::new(EmbeddingGenerator::new(cfg.clone())));
// Production: LLM client configured
let dispatcher = Arc::new(LLMDispatcher::new(Some(llm_client)));
Ok(Self {
storage,
bm25_index,
hybrid_search,
embedding_generator,
dispatcher,
config,
})
}
/// Load a document: chunk → embed (placeholder) → persist → index
///
/// # Arguments
/// - `doc_id`: Unique document identifier
/// - `content`: Document content to chunk
/// - `chunking_config`: Optional chunking configuration (uses default if
/// None)
///
/// # Returns
/// Number of chunks created
pub async fn load_document(
&self,
doc_id: &str,
content: &str,
chunking_config: Option<ChunkingConfig>,
) -> crate::Result<usize> {
let start = Instant::now();
info!("Loading document: {}", doc_id);
// Use provided config or default
let config = chunking_config.unwrap_or_else(|| self.config.chunking.clone());
// Create chunker and chunk content
let chunker = create_chunker(&config);
let chunk_results = chunker.chunk(content)?;
// Safety check
if chunk_results.len() > self.config.max_chunks_per_doc {
warn!(
"Document {} has {} chunks, exceeds max {}",
doc_id,
chunk_results.len(),
self.config.max_chunks_per_doc
);
return Err(RLMError::ChunkingError(format!(
"Document exceeds max chunks: {} > {}",
chunk_results.len(),
self.config.max_chunks_per_doc
)));
}
debug!(
"Chunked document {} into {} chunks using {:?} strategy",
doc_id,
chunk_results.len(),
config.strategy
);
// Generate embeddings if enabled
let embeddings = if let Some(ref generator) = self.embedding_generator {
debug!("Generating embeddings for {} chunks", chunk_results.len());
let texts: Vec<String> = chunk_results.iter().map(|c| c.content.clone()).collect();
Some(generator.embed_batch(&texts).await?)
} else {
debug!("Embedding generation disabled");
None
};
// Convert ChunkResult to Chunk and persist
let mut chunks = Vec::new();
for (idx, chunk_result) in chunk_results.iter().enumerate() {
let chunk_id = format!("{}-chunk-{}", doc_id, idx);
// Get embedding for this chunk (if generated)
let embedding = embeddings.as_ref().and_then(|embs| embs.get(idx)).cloned();
let chunk = Chunk {
chunk_id: chunk_id.clone(),
doc_id: doc_id.to_string(),
content: chunk_result.content.clone(),
embedding, // Phase 5: Real embeddings from multi-provider
start_idx: chunk_result.start_idx,
end_idx: chunk_result.end_idx,
metadata: None,
created_at: chrono::Utc::now().to_rfc3339(),
};
// Save to storage
self.storage.save_chunk(chunk.clone()).await?;
// Add to BM25 index
self.bm25_index.add_document(&chunk)?;
chunks.push(chunk);
}
// Commit BM25 index
self.bm25_index.commit()?;
// Update metrics
CHUNKS_TOTAL
.with_label_values(&[&format!("{:?}", config.strategy)])
.inc_by(chunks.len() as u64);
let duration = start.elapsed();
info!(
"Loaded document {} with {} chunks in {:?}",
doc_id,
chunks.len(),
duration
);
Ok(chunks.len())
}
/// Query with hybrid search (semantic + BM25 + RRF fusion)
///
/// # Arguments
/// - `doc_id`: Document to search within
/// - `query_text`: Keyword query for BM25
/// - `query_embedding`: Optional vector embedding for semantic search
/// - `limit`: Maximum results to return
///
/// # Returns
/// Scored chunks ranked by hybrid search
pub async fn query(
&self,
doc_id: &str,
query_text: &str,
query_embedding: Option<&[f32]>,
limit: usize,
) -> crate::Result<Vec<ScoredChunk>> {
let start = Instant::now();
let results = if let Some(embedding) = query_embedding {
// Full hybrid search: BM25 + semantic + RRF
debug!(
"Hybrid query: doc={}, query='{}', limit={}",
doc_id, query_text, limit
);
self.hybrid_search
.search(doc_id, query_text, embedding, limit)
.await?
} else {
// BM25-only search (no embedding provided)
debug!(
"BM25-only query: doc={}, query='{}', limit={}",
doc_id, query_text, limit
);
let bm25_results = self.hybrid_search.bm25_search(query_text, limit)?;
// Get chunks from storage
let all_chunks = self.storage.get_chunks(doc_id).await?;
// Map BM25 results to ScoredChunk
bm25_results
.into_iter()
.filter_map(|bm25_result| {
all_chunks
.iter()
.find(|c| c.chunk_id == bm25_result.chunk_id)
.map(|chunk| ScoredChunk {
chunk: chunk.clone(),
score: bm25_result.score,
bm25_score: Some(bm25_result.score),
semantic_score: None,
})
})
.collect()
};
let duration = start.elapsed();
QUERY_DURATION
.with_label_values(&[if query_embedding.is_some() {
"hybrid"
} else {
"bm25_only"
}])
.observe(duration.as_secs_f64());
debug!("Query returned {} results in {:?}", results.len(), duration);
Ok(results)
}
/// Dispatch subtask to LLM for distributed reasoning
///
/// # Arguments
/// - `doc_id`: Document to query
/// - `query_text`: Query/task description
/// - `query_embedding`: Optional embedding for hybrid search
/// - `limit`: Max chunks to retrieve
///
/// # Returns
/// Aggregated result from LLM analysis of relevant chunks
pub async fn dispatch_subtask(
&self,
doc_id: &str,
query_text: &str,
query_embedding: Option<&[f32]>,
limit: usize,
) -> crate::Result<AggregatedResult> {
info!("Dispatching subtask: doc={}, query={}", doc_id, query_text);
// Step 1: Retrieve relevant chunks via hybrid search
let chunks = self
.query(doc_id, query_text, query_embedding, limit)
.await?;
debug!("Retrieved {} chunks for dispatch", chunks.len());
// Step 2: Dispatch to LLM
let result = self.dispatcher.dispatch(query_text, &chunks).await?;
info!(
"Dispatch completed: {} LLM calls, {} total tokens",
result.num_calls,
result.total_input_tokens + result.total_output_tokens
);
Ok(result)
}
/// Get BM25 index statistics
pub fn index_stats(&self) -> crate::search::bm25::IndexStats {
self.bm25_index.stats()
}
/// Rebuild BM25 index from all chunks for a document
pub async fn rebuild_index(&self, doc_id: &str) -> crate::Result<()> {
info!("Rebuilding BM25 index for document: {}", doc_id);
let chunks = self.storage.get_chunks(doc_id).await?;
self.bm25_index.rebuild_from_chunks(&chunks)?;
info!(
"Rebuilt BM25 index for {} with {} chunks",
doc_id,
chunks.len()
);
Ok(())
}
/// Delete all chunks for a document
pub async fn delete_document(&self, doc_id: &str) -> crate::Result<u64> {
info!("Deleting document: {}", doc_id);
let deleted_count = self.storage.delete_chunks(doc_id).await?;
// Rebuild BM25 index to remove deleted chunks
if self.config.auto_rebuild_bm25 {
// For now, we can't selectively delete from BM25, so we'd need to rebuild
// For Phase 3, we'll just warn - full rebuild happens on next load
warn!(
"BM25 index may contain stale entries for deleted doc {}. Rebuild recommended.",
doc_id
);
}
Ok(deleted_count)
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::sync::Mutex;
use async_trait::async_trait;
use super::*;
use crate::chunking::ChunkingStrategy;
use crate::storage::{Buffer, ExecutionHistory};
// Mock storage for testing
struct MockStorage {
chunks: Arc<Mutex<HashMap<String, Vec<Chunk>>>>,
}
impl MockStorage {
fn new() -> Self {
Self {
chunks: Arc::new(Mutex::new(HashMap::new())),
}
}
}
#[async_trait]
impl Storage for MockStorage {
async fn save_chunk(&self, chunk: Chunk) -> crate::Result<()> {
let mut chunks = self.chunks.lock().unwrap();
chunks.entry(chunk.doc_id.clone()).or_default().push(chunk);
Ok(())
}
async fn get_chunks(&self, doc_id: &str) -> crate::Result<Vec<Chunk>> {
let chunks = self.chunks.lock().unwrap();
Ok(chunks.get(doc_id).cloned().unwrap_or_default())
}
async fn get_chunk(&self, chunk_id: &str) -> crate::Result<Option<Chunk>> {
let chunks = self.chunks.lock().unwrap();
for chunk_list in chunks.values() {
if let Some(chunk) = chunk_list.iter().find(|c| c.chunk_id == chunk_id) {
return Ok(Some(chunk.clone()));
}
}
Ok(None)
}
async fn search_by_embedding(
&self,
_embedding: &[f32],
_limit: usize,
) -> crate::Result<Vec<Chunk>> {
Ok(Vec::new())
}
async fn save_buffer(&self, _buffer: Buffer) -> crate::Result<()> {
Ok(())
}
async fn get_buffer(&self, _buffer_id: &str) -> crate::Result<Option<Buffer>> {
Ok(None)
}
async fn cleanup_expired_buffers(&self) -> crate::Result<u64> {
Ok(0)
}
async fn save_execution(&self, _execution: ExecutionHistory) -> crate::Result<()> {
Ok(())
}
async fn get_executions(
&self,
_doc_id: &str,
_limit: usize,
) -> crate::Result<Vec<ExecutionHistory>> {
Ok(Vec::new())
}
async fn delete_chunks(&self, doc_id: &str) -> crate::Result<u64> {
let mut chunks = self.chunks.lock().unwrap();
let count = chunks.remove(doc_id).map(|v| v.len()).unwrap_or(0);
Ok(count as u64)
}
}
#[tokio::test]
async fn test_engine_creation() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage, bm25_index);
assert!(engine.is_ok());
}
#[tokio::test]
async fn test_load_document_fixed_chunking() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap();
let content = "a".repeat(250); // 250 chars
let config = ChunkingConfig {
strategy: ChunkingStrategy::Fixed,
chunk_size: 100,
overlap: 20,
};
let chunk_count = engine
.load_document("doc-1", &content, Some(config))
.await
.unwrap();
assert!(chunk_count >= 2, "Should create at least 2 chunks");
// Verify chunks are persisted
let chunks = storage.get_chunks("doc-1").await.unwrap();
assert_eq!(chunks.len(), chunk_count);
}
#[tokio::test]
async fn test_load_document_semantic_chunking() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap();
let content = "First sentence. Second sentence! Third sentence?";
let config = ChunkingConfig {
strategy: ChunkingStrategy::Semantic,
chunk_size: 50,
overlap: 10,
};
let chunk_count = engine
.load_document("doc-2", content, Some(config))
.await
.unwrap();
assert!(chunk_count > 0, "Should create at least 1 chunk");
// Verify chunks are persisted
let chunks = storage.get_chunks("doc-2").await.unwrap();
assert_eq!(chunks.len(), chunk_count);
}
#[tokio::test]
async fn test_query_bm25_only() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap();
// Load document
let content =
"Rust programming language. Python programming tutorial. Rust async patterns.";
engine.load_document("doc-3", content, None).await.unwrap();
// Query (BM25-only, no embedding)
let results = engine.query("doc-3", "Rust", None, 5).await.unwrap();
assert!(!results.is_empty(), "Should find results for 'Rust'");
assert!(results[0].bm25_score.is_some(), "Should have BM25 score");
assert!(
results[0].semantic_score.is_none(),
"Should not have semantic score"
);
}
#[tokio::test]
async fn test_query_hybrid_search() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap();
// Load document with manual chunk creation (to add embeddings)
let chunk = Chunk {
chunk_id: "doc-4-chunk-0".to_string(),
doc_id: "doc-4".to_string(),
content: "Rust programming language".to_string(),
embedding: Some(vec![1.0, 0.0, 0.0]),
start_idx: 0,
end_idx: 26,
metadata: None,
created_at: chrono::Utc::now().to_rfc3339(),
};
storage.save_chunk(chunk.clone()).await.unwrap();
engine.bm25_index.add_document(&chunk).unwrap();
engine.bm25_index.commit().unwrap();
// Query with embedding (hybrid search)
let query_embedding = vec![0.9, 0.1, 0.0];
let results = engine
.query("doc-4", "Rust", Some(&query_embedding), 5)
.await
.unwrap();
assert!(!results.is_empty(), "Should find results");
// In hybrid search, we should have both scores (if RRF found matches in both)
// But with only 1 chunk, we might only get BM25 or semantic
assert!(
results[0].bm25_score.is_some() || results[0].semantic_score.is_some(),
"Should have at least one score"
);
}
#[tokio::test]
async fn test_delete_document() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage.clone(), bm25_index).unwrap();
// Load document
engine
.load_document("doc-5", "Test content", None)
.await
.unwrap();
// Verify it exists
let chunks_before = storage.get_chunks("doc-5").await.unwrap();
assert!(!chunks_before.is_empty());
// Delete
let deleted = engine.delete_document("doc-5").await.unwrap();
assert_eq!(deleted, chunks_before.len() as u64);
// Verify deletion
let chunks_after = storage.get_chunks("doc-5").await.unwrap();
assert!(chunks_after.is_empty());
}
#[tokio::test]
async fn test_max_chunks_safety_limit() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let config = RLMEngineConfig {
max_chunks_per_doc: 5, // Very low limit for testing
..Default::default()
};
let engine = RLMEngine::with_config(storage, bm25_index, config).unwrap();
// Create content that will exceed limit
let content = "a".repeat(1000); // Will create many small chunks
let chunking_config = ChunkingConfig {
strategy: ChunkingStrategy::Fixed,
chunk_size: 10,
overlap: 0,
};
let result = engine
.load_document("doc-6", &content, Some(chunking_config))
.await;
assert!(
result.is_err(),
"Should fail when exceeding max chunks limit"
);
}
#[tokio::test]
async fn test_index_stats() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = RLMEngine::new(storage, bm25_index).unwrap();
// Initially empty
let stats = engine.index_stats();
assert_eq!(stats.num_docs, 0);
// Load document
engine
.load_document("doc-7", "Test content", None)
.await
.unwrap();
// Check stats again
let stats = engine.index_stats();
assert!(stats.num_docs > 0);
}
#[tokio::test]
async fn test_embeddings_generated() {
use crate::embeddings::EmbeddingConfig;
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
// Create config with embeddings enabled
let config = RLMEngineConfig {
embedding: Some(EmbeddingConfig::openai_small()),
..Default::default()
};
let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap();
// Load document
let content = "First chunk. Second chunk. Third chunk.";
engine.load_document("doc-8", content, None).await.unwrap();
// Verify chunks have embeddings
let chunks = storage.get_chunks("doc-8").await.unwrap();
assert!(!chunks.is_empty(), "Should have created chunks");
for chunk in &chunks {
assert!(
chunk.embedding.is_some(),
"Chunk {} should have embedding",
chunk.chunk_id
);
assert_eq!(
chunk.embedding.as_ref().unwrap().len(),
1536,
"Embedding should have 1536 dimensions (OpenAI small)"
);
}
}
#[tokio::test]
async fn test_embeddings_disabled() {
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
// Create config with embeddings disabled
let config = RLMEngineConfig {
embedding: None,
..Default::default()
};
let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap();
// Load document
let content = "Test content without embeddings";
engine.load_document("doc-9", content, None).await.unwrap();
// Verify chunks do NOT have embeddings
let chunks = storage.get_chunks("doc-9").await.unwrap();
assert!(!chunks.is_empty(), "Should have created chunks");
for chunk in &chunks {
assert!(
chunk.embedding.is_none(),
"Chunk {} should not have embedding when disabled",
chunk.chunk_id
);
}
}
#[tokio::test]
async fn test_query_with_embeddings() {
use crate::embeddings::EmbeddingConfig;
let storage = Arc::new(MockStorage::new());
let bm25_index = Arc::new(BM25Index::new().unwrap());
// Create config with embeddings enabled
let config = RLMEngineConfig {
embedding: Some(EmbeddingConfig::openai_small()),
..Default::default()
};
let engine = RLMEngine::with_config(storage.clone(), bm25_index, config).unwrap();
// Load document with embeddings
let content = "Rust programming language. Python tutorial. JavaScript guide.";
engine.load_document("doc-10", content, None).await.unwrap();
// Get a chunk to use its embedding as query
let chunks = storage.get_chunks("doc-10").await.unwrap();
assert!(!chunks.is_empty());
let query_embedding = chunks[0].embedding.as_ref().unwrap();
// Query with embedding (hybrid search)
let results = engine
.query("doc-10", "Rust", Some(query_embedding), 3)
.await
.unwrap();
assert!(!results.is_empty(), "Should find results");
// With real embeddings, should get both BM25 and semantic scores
}
}