// Performance Tests for RLM // Tests require: SurrealDB (ws://127.0.0.1:8000) // // Run with: // cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture use std::sync::Arc; use std::time::Instant; use surrealdb::engine::remote::ws::Ws; use surrealdb::opt::auth::Root; use surrealdb::Surreal; use vapora_rlm::search::bm25::BM25Index; use vapora_rlm::storage::SurrealDBStorage; use vapora_rlm::RLMEngine; async fn setup_engine() -> Arc> { let db = Surreal::new::("127.0.0.1:8000").await.unwrap(); db.signin(Root { username: "root", password: "root", }) .await .unwrap(); db.use_ns("test_rlm_perf") .use_db("test_rlm_perf") .await .unwrap(); let storage = Arc::new(SurrealDBStorage::new(db)); let bm25_index = Arc::new(BM25Index::new().unwrap()); Arc::new(RLMEngine::new(storage, bm25_index).unwrap()) } fn generate_document(lines: usize) -> String { (0..lines) .map(|i| { format!( "Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \ and Rust programming patterns. This line contains meaningful text for search.\n", i + 1 ) }) .collect() } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_document_loading_1k_lines() { let engine = setup_engine().await; let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4()); let content = generate_document(1_000); let start = Instant::now(); let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap(); let duration = start.elapsed(); println!("\n📊 Load 1K lines:"); println!(" Duration: {:?}", duration); println!(" Chunks: {}", chunk_count); println!( " Throughput: {:.0} lines/sec", 1_000.0 / duration.as_secs_f64() ); assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s"); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_document_loading_10k_lines() { let engine = setup_engine().await; let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4()); let content = generate_document(10_000); let start = Instant::now(); let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap(); let duration = start.elapsed(); println!("\n📊 Load 10K lines:"); println!(" Duration: {:?}", duration); println!(" Chunks: {}", chunk_count); println!( " Throughput: {:.0} lines/sec", 10_000.0 / duration.as_secs_f64() ); assert!( duration.as_millis() < 10_000, "Should load 10K lines in <10s" ); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_document_loading_100k_lines() { let engine = setup_engine().await; let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4()); let content = generate_document(100_000); let start = Instant::now(); let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap(); let duration = start.elapsed(); println!("\n📊 Load 100K lines:"); println!(" Duration: {:?}", duration); println!(" Chunks: {}", chunk_count); println!( " Throughput: {:.0} lines/sec", 100_000.0 / duration.as_secs_f64() ); assert!(duration.as_secs() < 60, "Should load 100K lines in <60s"); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_query_latency() { let engine = setup_engine().await; let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4()); let content = generate_document(1_000); // Load document first engine.load_document(&doc_id, &content, None).await.unwrap(); // Warm up for _ in 0..5 { engine.query(&doc_id, "test query", None, 5).await.unwrap(); } // Measure query latency let mut latencies = Vec::new(); for _ in 0..100 { let start = Instant::now(); engine .query(&doc_id, "error handling", None, 5) .await .unwrap(); latencies.push(start.elapsed()); } let avg_latency = latencies.iter().sum::() / latencies.len() as u32; let min_latency = latencies.iter().min().unwrap(); let max_latency = latencies.iter().max().unwrap(); let p50 = latencies[latencies.len() / 2]; let p95 = latencies[latencies.len() * 95 / 100]; let p99 = latencies[latencies.len() * 99 / 100]; println!("\n📊 Query Latency (100 queries):"); println!(" Average: {:?}", avg_latency); println!(" Min: {:?}", min_latency); println!(" Max: {:?}", max_latency); println!(" P50: {:?}", p50); println!(" P95: {:?}", p95); println!(" P99: {:?}", p99); assert!( avg_latency.as_millis() < 500, "Average query should be <500ms" ); assert!(p95.as_millis() < 1_000, "P95 query should be <1s"); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_concurrent_query_throughput() { let engine = setup_engine().await; let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4()); let content = generate_document(5_000); // Load document engine.load_document(&doc_id, &content, None).await.unwrap(); // Run 50 concurrent queries let start = Instant::now(); let mut handles = vec![]; for i in 0..50 { let engine = engine.clone(); let doc_id = doc_id.clone(); let handle = tokio::spawn(async move { let query = format!("query {}", i); engine.query(&doc_id, &query, None, 5).await.unwrap() }); handles.push(handle); } let mut total_results = 0; for handle in handles { let results = handle.await.unwrap(); total_results += results.len(); } let duration = start.elapsed(); let throughput = 50.0 / duration.as_secs_f64(); println!("\n📊 Concurrent Query Throughput:"); println!(" Total queries: 50"); println!(" Duration: {:?}", duration); println!(" Throughput: {:.1} queries/sec", throughput); println!(" Total results: {}", total_results); assert!( duration.as_secs() < 10, "50 concurrent queries should complete in <10s" ); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_bm25_index_build() { let engine = setup_engine().await; let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4()); let content = generate_document(10_000); // Load document (includes BM25 indexing) let start = Instant::now(); engine.load_document(&doc_id, &content, None).await.unwrap(); let index_duration = start.elapsed(); println!("\n📊 BM25 Index Build (10K lines):"); println!(" Duration: {:?}", index_duration); // Query to verify index works let start = Instant::now(); let results = engine .query(&doc_id, "error handling", None, 10) .await .unwrap(); let query_duration = start.elapsed(); println!( " First query: {:?} ({} results)", query_duration, results.len() ); // Verify BM25 scores are computed assert!( results.iter().any(|r| r.bm25_score.is_some()), "Should have BM25 scores" ); } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_full_workflow_target() { let engine = setup_engine().await; let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4()); let content = generate_document(1_000); // Full workflow: load → query → (dispatch would go here) let workflow_start = Instant::now(); // Load let load_start = Instant::now(); let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap(); let load_duration = load_start.elapsed(); // Query let query_start = Instant::now(); let results = engine .query(&doc_id, "error handling", None, 5) .await .unwrap(); let query_duration = query_start.elapsed(); let workflow_duration = workflow_start.elapsed(); println!("\n📊 Full Workflow Performance:"); println!(" Load: {:?} ({} chunks)", load_duration, chunk_count); println!(" Query: {:?} ({} results)", query_duration, results.len()); println!(" Total: {:?}", workflow_duration); // Target: <500ms for the workflow (excluding LLM dispatch) println!("\n🎯 Performance Target:"); if workflow_duration.as_millis() < 500 { println!( " ✅ PASS - Completed in {:?} (<500ms target)", workflow_duration ); } else { println!( " ⚠️ SLOW - Completed in {:?} (target: <500ms)", workflow_duration ); } // Don't fail test, just report if workflow_duration.as_millis() >= 500 { println!("\n Note: Performance target not met but this may be acceptable"); println!(" Consider optimizations if this becomes a bottleneck"); } } #[tokio::test] #[ignore] // Requires SurrealDB async fn perf_memory_efficiency() { let engine = setup_engine().await; // Measure memory usage pattern println!("\n📊 Memory Efficiency Test:"); for doc_size in [100, 1_000, 10_000] { let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4()); let content = generate_document(doc_size); let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap(); // Query to ensure everything works let results = engine.query(&doc_id, "test query", None, 5).await.unwrap(); println!( " {} lines: {} chunks, {} results", doc_size, chunk_count, results.len() ); } println!(" ✓ Memory test completed (manual monitoring recommended)"); }