Vapora/crates/vapora-rlm/tests/e2e_integration.rs
2026-02-16 05:09:51 +00:00

533 lines
17 KiB
Rust

// End-to-End Integration Tests for RLM
// Tests require: SurrealDB (ws://127.0.0.1:8000) + NATS (nats://127.0.0.1:4222)
// + Docker
//
// Run with:
// cargo test -p vapora-rlm --test e2e_integration -- --ignored
// --test-threads=1
use std::sync::Arc;
use std::time::Instant;
use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_knowledge_graph::persistence::{KGPersistence, PersistedRlmExecution};
use vapora_knowledge_graph::TimePeriod;
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
use vapora_rlm::dispatch::AggregationStrategy;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;
async fn setup_test_environment() -> (
Arc<RLMEngine<SurrealDBStorage>>,
Arc<KGPersistence>,
Arc<BM25Index>,
Arc<SurrealDBStorage>,
) {
// Connect to SurrealDB
let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
db.signin(Root {
username: "root",
password: "root",
})
.await
.unwrap();
db.use_ns("test_rlm_e2e")
.use_db("test_rlm_e2e")
.await
.unwrap();
// Create RLM engine
let storage = Arc::new(SurrealDBStorage::new(db.clone()));
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = Arc::new(RLMEngine::new(storage.clone(), bm25_index.clone()).unwrap());
// Create KG persistence
let kg_persistence = Arc::new(KGPersistence::new(db));
(engine, kg_persistence, bm25_index, storage)
}
#[tokio::test]
#[ignore] // Requires SurrealDB + NATS + Docker
async fn test_e2e_full_workflow() {
let (engine, kg_persistence, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("e2e-doc-{}", uuid::Uuid::new_v4());
// Step 1: Load large document
let large_content = generate_large_document(1000); // 1000 lines
let start = Instant::now();
let chunk_count = engine
.load_document(&doc_id, &large_content, None)
.await
.unwrap();
let load_duration = start.elapsed();
println!(
"✓ Document loaded: {} chunks in {:?}",
chunk_count, load_duration
);
assert!(chunk_count > 0, "Should create at least one chunk");
// Small delay to ensure BM25 index commit completes
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// Step 2: Query with hybrid search
let query = "error handling patterns";
let start = Instant::now();
let results = engine.query(&doc_id, query, None, 5).await.unwrap();
let query_duration = start.elapsed();
println!(
"✓ Query completed: {} results in {:?}",
results.len(),
query_duration
);
assert!(!results.is_empty(), "Should find relevant chunks");
// Verify hybrid scores
for (i, result) in results.iter().enumerate() {
println!(
" Chunk {}: score={:.3}, bm25={:?}, semantic={:?}",
i + 1,
result.score,
result.bm25_score,
result.semantic_score
);
assert!(result.score > 0.0, "Score should be positive");
}
// Step 3: Dispatch to LLM (with mock for now)
let start = Instant::now();
let dispatch_result = engine.dispatch_subtask(&doc_id, query, None, 5).await;
let dispatch_duration = start.elapsed();
match dispatch_result {
Ok(result) => {
println!("✓ LLM dispatch completed in {:?}", dispatch_duration);
println!(" Result: {} chars", result.text.len());
println!(
" Tokens: {} in, {} out",
result.total_input_tokens, result.total_output_tokens
);
println!(" LLM calls: {}", result.num_calls);
}
Err(e) => {
// Expected when no LLM client configured
println!("⚠ LLM dispatch skipped (no client): {}", e);
}
}
// Step 4: Persist to Knowledge Graph
let execution = PersistedRlmExecution::builder(
format!("exec-{}", uuid::Uuid::new_v4()),
doc_id.clone(),
query.to_string(),
)
.chunks_used(results.iter().map(|r| r.chunk.chunk_id.clone()).collect())
.duration_ms(query_duration.as_millis() as u64)
.tokens(1000, 500)
.provider("mock".to_string())
.success(true)
.build();
kg_persistence
.persist_rlm_execution(execution)
.await
.unwrap();
println!("✓ Execution persisted to Knowledge Graph");
// Step 5: Verify retrieval
let executions = kg_persistence
.get_rlm_executions_by_doc(&doc_id, 10)
.await
.unwrap();
assert!(
!executions.is_empty(),
"Should retrieve persisted execution"
);
println!("✓ Retrieved {} executions from KG", executions.len());
// Performance assertion
let total_duration = load_duration + query_duration;
println!("\n📊 Total workflow duration: {:?}", total_duration);
assert!(
total_duration.as_millis() < 5000,
"Full workflow should complete in <5s"
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_chunking_strategies() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
let content = "fn main() {\n println!(\"Hello, world!\");\n}\n\nfn add(a: i32, b: i32) -> \
i32 {\n a + b\n}";
// Test different chunking strategies
let strategies = vec![
("fixed", ChunkingStrategy::Fixed),
("semantic", ChunkingStrategy::Semantic),
("code", ChunkingStrategy::Code),
];
for (name, strategy) in strategies {
let doc_id = format!("chunk-test-{}-{}", name, uuid::Uuid::new_v4());
let config = ChunkingConfig {
strategy,
chunk_size: 1000,
overlap: 100,
};
let chunk_count = engine
.load_document(&doc_id, content, Some(config))
.await
.unwrap();
println!("✓ Strategy '{}': {} chunks created", name, chunk_count);
assert!(chunk_count > 0, "Strategy '{}' should create chunks", name);
}
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_hybrid_search_quality() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("search-quality-{}", uuid::Uuid::new_v4());
// Load document with known content
let content = r#"
Error handling in Rust uses the Result type.
The Result<T, E> enum has two variants: Ok(T) and Err(E).
The ? operator propagates errors automatically.
Panic should be used for unrecoverable errors.
Custom error types can be created with thiserror.
The anyhow crate provides easy error handling.
Ownership rules prevent memory safety issues.
Borrowing allows temporary access without ownership transfer.
Lifetimes ensure references are valid.
"#;
engine.load_document(&doc_id, content, None).await.unwrap();
// Query for error handling
let results = engine
.query(&doc_id, "error handling Result", None, 5)
.await
.unwrap();
assert!(!results.is_empty(), "Should find relevant chunks");
// First result should be most relevant
assert!(
results[0].chunk.content.contains("Error handling")
|| results[0].chunk.content.contains("Result"),
"Top result should contain query terms"
);
println!("✓ Top result score: {:.3}", results[0].score);
println!(
" Content: {}",
results[0].chunk.content.lines().next().unwrap()
);
// Verify hybrid scoring
for result in &results {
if let (Some(bm25), Some(semantic)) = (result.bm25_score, result.semantic_score) {
println!(
" Chunk: bm25={:.3}, semantic={:.3}, combined={:.3}",
bm25, semantic, result.score
);
}
}
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_knowledge_graph_learning() {
let (engine, kg_persistence, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("learning-{}", uuid::Uuid::new_v4());
// Load document
let content = generate_large_document(100);
engine.load_document(&doc_id, &content, None).await.unwrap();
// Simulate multiple queries over time
for i in 0..10 {
let query = format!("query number {}", i);
let results = engine.query(&doc_id, &query, None, 3).await.unwrap();
let execution =
PersistedRlmExecution::builder(format!("exec-learning-{}", i), doc_id.clone(), query)
.chunks_used(results.iter().map(|r| r.chunk.chunk_id.clone()).collect())
.duration_ms(100 + (i as u64 * 10))
.tokens(800, 400)
.provider("claude".to_string())
.success(i % 3 != 0) // 66% success rate
.build();
kg_persistence
.persist_rlm_execution(execution)
.await
.unwrap();
// Small delay
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
}
// Get learning curve
let curve = kg_persistence
.get_rlm_learning_curve(&doc_id, 1)
.await
.unwrap();
println!("✓ Learning curve: {} data points", curve.len());
assert!(!curve.is_empty(), "Should have learning data");
// Get success rate
let success_rate = kg_persistence.get_rlm_success_rate(&doc_id).await.unwrap();
println!(" Success rate: {:.2}%", success_rate * 100.0);
assert!(
(success_rate - 0.66).abs() < 0.1,
"Success rate should be ~66%"
);
// Get cost summary
let (cost, input_tokens, output_tokens) = kg_persistence
.get_rlm_cost_summary(&doc_id, TimePeriod::LastDay)
.await
.unwrap();
println!(
" Cost summary: ${:.2}, {} input, {} output tokens",
cost / 100.0,
input_tokens,
output_tokens
);
assert_eq!(input_tokens, 8000, "Should track input tokens");
assert_eq!(output_tokens, 4000, "Should track output tokens");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_large_document_performance() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("perf-{}", uuid::Uuid::new_v4());
// Generate 10,000 line document
let large_content = generate_large_document(10_000);
let start = Instant::now();
let chunk_count = engine
.load_document(&doc_id, &large_content, None)
.await
.unwrap();
let load_duration = start.elapsed();
println!("✓ Loaded 10k line document in {:?}", load_duration);
println!(" Created {} chunks", chunk_count);
// Should create reasonable number of chunks
// With 10k lines @ ~170 chars each = ~1.7M chars
// With default chunk_size=1000, expect ~1700-2800 chunks (depending on overlap)
assert!(chunk_count > 100, "Should create multiple chunks");
assert!(chunk_count < 3000, "Should not create excessive chunks");
// Query performance
let start = Instant::now();
let results = engine
.query(&doc_id, "test query pattern", None, 10)
.await
.unwrap();
let query_duration = start.elapsed();
println!("✓ Query completed in {:?}", query_duration);
println!(" Found {} results", results.len());
// Performance assertions (adjusted for real persistence + BM25 indexing)
assert!(
load_duration.as_millis() < 30_000,
"Load should complete in <30s (actual: {}ms)",
load_duration.as_millis()
);
assert!(
query_duration.as_millis() < 2_000,
"Query should complete in <2s (actual: {}ms)",
query_duration.as_millis()
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_concurrent_queries() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("concurrent-{}", uuid::Uuid::new_v4());
// Load document
let content = generate_large_document(500);
engine.load_document(&doc_id, &content, None).await.unwrap();
// Run 10 concurrent queries
let mut handles = vec![];
for i in 0..10 {
let engine = engine.clone();
let doc_id = doc_id.clone();
let handle = tokio::spawn(async move {
let query = format!("concurrent query {}", i);
let start = Instant::now();
let results = engine.query(&doc_id, &query, None, 5).await.unwrap();
let duration = start.elapsed();
(results.len(), duration)
});
handles.push(handle);
}
// Wait for all queries
let start = Instant::now();
let mut total_results = 0;
for handle in handles {
let (count, duration) = handle.await.unwrap();
total_results += count;
println!(" Query completed: {} results in {:?}", count, duration);
}
let total_duration = start.elapsed();
println!("✓ 10 concurrent queries completed in {:?}", total_duration);
println!(" Total results: {}", total_results);
// Should handle concurrency well
assert!(
total_duration.as_millis() < 5_000,
"Concurrent queries should complete in <5s"
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_aggregation_strategies() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
let doc_id = format!("agg-{}", uuid::Uuid::new_v4());
// Load document
let content = "Test content for aggregation strategies.";
engine.load_document(&doc_id, content, None).await.unwrap();
// Test different aggregation strategies
let strategies = vec![
("concatenate", AggregationStrategy::Concatenate),
("first_only", AggregationStrategy::FirstOnly),
("majority_vote", AggregationStrategy::MajorityVote),
];
for (name, _strategy) in strategies {
// Note: dispatch_subtask doesn't expose config yet
// This is a placeholder for when config is exposed
let result = engine
.dispatch_subtask(&doc_id, "test query", None, 3)
.await;
match result {
Ok(dispatch) => {
println!(
"✓ Strategy '{}': {} chars, {} calls",
name,
dispatch.text.len(),
dispatch.num_calls
);
}
Err(_) => {
println!("⚠ Strategy '{}': skipped (no LLM client)", name);
}
}
}
}
// Helper function to generate large test documents
fn generate_large_document(lines: usize) -> String {
let mut content = String::new();
for i in 0..lines {
content.push_str(&format!(
"Line {}: This is test content with some keywords like error, handling, pattern, and \
Rust. It contains enough text to be meaningful for chunking and search. The content \
varies slightly on each line to ensure diversity in the chunks.\n",
i + 1
));
}
content
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_empty_and_edge_cases() {
let (engine, _, _bm25_index, _storage) = setup_test_environment().await;
// Test empty document
let doc_id = format!("empty-{}", uuid::Uuid::new_v4());
let result = engine.load_document(&doc_id, "", None).await;
assert!(result.is_ok(), "Should handle empty document");
assert_eq!(result.unwrap(), 0, "Empty document should create 0 chunks");
// Test single word
let doc_id = format!("single-{}", uuid::Uuid::new_v4());
let result = engine.load_document(&doc_id, "word", None).await;
assert!(result.is_ok(), "Should handle single word");
// Test very long line
let doc_id = format!("long-{}", uuid::Uuid::new_v4());
let long_line = "word ".repeat(10_000);
let result = engine.load_document(&doc_id, &long_line, None).await;
assert!(result.is_ok(), "Should handle very long line");
// Test special characters
let doc_id = format!("special-{}", uuid::Uuid::new_v4());
let special = "!@#$%^&*(){}[]|\\:;\"'<>?,./~`\n\t\r";
let result = engine.load_document(&doc_id, special, None).await;
assert!(result.is_ok(), "Should handle special characters");
println!("✓ All edge cases handled correctly");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_cleanup_and_maintenance() {
let (_, kg_persistence, _bm25_index, _storage) = setup_test_environment().await;
let initial_count = kg_persistence.get_rlm_execution_count().await.unwrap();
println!("Initial execution count: {}", initial_count);
// Create test executions
for i in 0..5 {
let execution = PersistedRlmExecution::builder(
format!("cleanup-{}", i),
"cleanup-doc".to_string(),
"test query".to_string(),
)
.success(true)
.build();
kg_persistence
.persist_rlm_execution(execution)
.await
.unwrap();
}
let after_count = kg_persistence.get_rlm_execution_count().await.unwrap();
assert!(after_count >= initial_count + 5, "Should add 5 executions");
// Cleanup old executions
let result = kg_persistence.cleanup_old_rlm_executions(0).await;
assert!(result.is_ok(), "Cleanup should succeed");
println!("✓ Cleanup completed successfully");
}