Vapora/crates/vapora-rlm/tests/performance_test.rs
2026-02-16 05:09:51 +00:00

323 lines
9.6 KiB
Rust

// Performance Tests for RLM
// Tests require: SurrealDB (ws://127.0.0.1:8000)
//
// Run with:
// cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture
use std::sync::Arc;
use std::time::Instant;
use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;
async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {
let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
db.signin(Root {
username: "root",
password: "root",
})
.await
.unwrap();
db.use_ns("test_rlm_perf")
.use_db("test_rlm_perf")
.await
.unwrap();
let storage = Arc::new(SurrealDBStorage::new(db));
let bm25_index = Arc::new(BM25Index::new().unwrap());
Arc::new(RLMEngine::new(storage, bm25_index).unwrap())
}
fn generate_document(lines: usize) -> String {
(0..lines)
.map(|i| {
format!(
"Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \
and Rust programming patterns. This line contains meaningful text for search.\n",
i + 1
)
})
.collect()
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_1k_lines() {
let engine = setup_engine().await;
let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4());
let content = generate_document(1_000);
let start = Instant::now();
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
let duration = start.elapsed();
println!("\n📊 Load 1K lines:");
println!(" Duration: {:?}", duration);
println!(" Chunks: {}", chunk_count);
println!(
" Throughput: {:.0} lines/sec",
1_000.0 / duration.as_secs_f64()
);
assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_10k_lines() {
let engine = setup_engine().await;
let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4());
let content = generate_document(10_000);
let start = Instant::now();
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
let duration = start.elapsed();
println!("\n📊 Load 10K lines:");
println!(" Duration: {:?}", duration);
println!(" Chunks: {}", chunk_count);
println!(
" Throughput: {:.0} lines/sec",
10_000.0 / duration.as_secs_f64()
);
assert!(
duration.as_millis() < 10_000,
"Should load 10K lines in <10s"
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_100k_lines() {
let engine = setup_engine().await;
let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4());
let content = generate_document(100_000);
let start = Instant::now();
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
let duration = start.elapsed();
println!("\n📊 Load 100K lines:");
println!(" Duration: {:?}", duration);
println!(" Chunks: {}", chunk_count);
println!(
" Throughput: {:.0} lines/sec",
100_000.0 / duration.as_secs_f64()
);
assert!(duration.as_secs() < 60, "Should load 100K lines in <60s");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_query_latency() {
let engine = setup_engine().await;
let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4());
let content = generate_document(1_000);
// Load document first
engine.load_document(&doc_id, &content, None).await.unwrap();
// Warm up
for _ in 0..5 {
engine.query(&doc_id, "test query", None, 5).await.unwrap();
}
// Measure query latency
let mut latencies = Vec::new();
for _ in 0..100 {
let start = Instant::now();
engine
.query(&doc_id, "error handling", None, 5)
.await
.unwrap();
latencies.push(start.elapsed());
}
let avg_latency = latencies.iter().sum::<std::time::Duration>() / latencies.len() as u32;
let min_latency = latencies.iter().min().unwrap();
let max_latency = latencies.iter().max().unwrap();
let p50 = latencies[latencies.len() / 2];
let p95 = latencies[latencies.len() * 95 / 100];
let p99 = latencies[latencies.len() * 99 / 100];
println!("\n📊 Query Latency (100 queries):");
println!(" Average: {:?}", avg_latency);
println!(" Min: {:?}", min_latency);
println!(" Max: {:?}", max_latency);
println!(" P50: {:?}", p50);
println!(" P95: {:?}", p95);
println!(" P99: {:?}", p99);
assert!(
avg_latency.as_millis() < 500,
"Average query should be <500ms"
);
assert!(p95.as_millis() < 1_000, "P95 query should be <1s");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_concurrent_query_throughput() {
let engine = setup_engine().await;
let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4());
let content = generate_document(5_000);
// Load document
engine.load_document(&doc_id, &content, None).await.unwrap();
// Run 50 concurrent queries
let start = Instant::now();
let mut handles = vec![];
for i in 0..50 {
let engine = engine.clone();
let doc_id = doc_id.clone();
let handle = tokio::spawn(async move {
let query = format!("query {}", i);
engine.query(&doc_id, &query, None, 5).await.unwrap()
});
handles.push(handle);
}
let mut total_results = 0;
for handle in handles {
let results = handle.await.unwrap();
total_results += results.len();
}
let duration = start.elapsed();
let throughput = 50.0 / duration.as_secs_f64();
println!("\n📊 Concurrent Query Throughput:");
println!(" Total queries: 50");
println!(" Duration: {:?}", duration);
println!(" Throughput: {:.1} queries/sec", throughput);
println!(" Total results: {}", total_results);
assert!(
duration.as_secs() < 10,
"50 concurrent queries should complete in <10s"
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_bm25_index_build() {
let engine = setup_engine().await;
let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4());
let content = generate_document(10_000);
// Load document (includes BM25 indexing)
let start = Instant::now();
engine.load_document(&doc_id, &content, None).await.unwrap();
let index_duration = start.elapsed();
println!("\n📊 BM25 Index Build (10K lines):");
println!(" Duration: {:?}", index_duration);
// Query to verify index works
let start = Instant::now();
let results = engine
.query(&doc_id, "error handling", None, 10)
.await
.unwrap();
let query_duration = start.elapsed();
println!(
" First query: {:?} ({} results)",
query_duration,
results.len()
);
// Verify BM25 scores are computed
assert!(
results.iter().any(|r| r.bm25_score.is_some()),
"Should have BM25 scores"
);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_full_workflow_target() {
let engine = setup_engine().await;
let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4());
let content = generate_document(1_000);
// Full workflow: load → query → (dispatch would go here)
let workflow_start = Instant::now();
// Load
let load_start = Instant::now();
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
let load_duration = load_start.elapsed();
// Query
let query_start = Instant::now();
let results = engine
.query(&doc_id, "error handling", None, 5)
.await
.unwrap();
let query_duration = query_start.elapsed();
let workflow_duration = workflow_start.elapsed();
println!("\n📊 Full Workflow Performance:");
println!(" Load: {:?} ({} chunks)", load_duration, chunk_count);
println!(" Query: {:?} ({} results)", query_duration, results.len());
println!(" Total: {:?}", workflow_duration);
// Target: <500ms for the workflow (excluding LLM dispatch)
println!("\n🎯 Performance Target:");
if workflow_duration.as_millis() < 500 {
println!(
" ✅ PASS - Completed in {:?} (<500ms target)",
workflow_duration
);
} else {
println!(
" ⚠️ SLOW - Completed in {:?} (target: <500ms)",
workflow_duration
);
}
// Don't fail test, just report
if workflow_duration.as_millis() >= 500 {
println!("\n Note: Performance target not met but this may be acceptable");
println!(" Consider optimizations if this becomes a bottleneck");
}
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_memory_efficiency() {
let engine = setup_engine().await;
// Measure memory usage pattern
println!("\n📊 Memory Efficiency Test:");
for doc_size in [100, 1_000, 10_000] {
let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4());
let content = generate_document(doc_size);
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
// Query to ensure everything works
let results = engine.query(&doc_id, "test query", None, 5).await.unwrap();
println!(
" {} lines: {} chunks, {} results",
doc_size,
chunk_count,
results.len()
);
}
println!(" ✓ Memory test completed (manual monitoring recommended)");
}