323 lines
9.6 KiB
Rust
323 lines
9.6 KiB
Rust
|
|
// Performance Tests for RLM
|
||
|
|
// Tests require: SurrealDB (ws://127.0.0.1:8000)
|
||
|
|
//
|
||
|
|
// Run with:
|
||
|
|
// cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture
|
||
|
|
|
||
|
|
use std::sync::Arc;
|
||
|
|
use std::time::Instant;
|
||
|
|
|
||
|
|
use surrealdb::engine::remote::ws::Ws;
|
||
|
|
use surrealdb::opt::auth::Root;
|
||
|
|
use surrealdb::Surreal;
|
||
|
|
use vapora_rlm::search::bm25::BM25Index;
|
||
|
|
use vapora_rlm::storage::SurrealDBStorage;
|
||
|
|
use vapora_rlm::RLMEngine;
|
||
|
|
|
||
|
|
async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {
|
||
|
|
let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
|
||
|
|
db.signin(Root {
|
||
|
|
username: "root",
|
||
|
|
password: "root",
|
||
|
|
})
|
||
|
|
.await
|
||
|
|
.unwrap();
|
||
|
|
db.use_ns("test_rlm_perf")
|
||
|
|
.use_db("test_rlm_perf")
|
||
|
|
.await
|
||
|
|
.unwrap();
|
||
|
|
|
||
|
|
let storage = Arc::new(SurrealDBStorage::new(db));
|
||
|
|
let bm25_index = Arc::new(BM25Index::new().unwrap());
|
||
|
|
Arc::new(RLMEngine::new(storage, bm25_index).unwrap())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn generate_document(lines: usize) -> String {
|
||
|
|
(0..lines)
|
||
|
|
.map(|i| {
|
||
|
|
format!(
|
||
|
|
"Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \
|
||
|
|
and Rust programming patterns. This line contains meaningful text for search.\n",
|
||
|
|
i + 1
|
||
|
|
)
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_document_loading_1k_lines() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(1_000);
|
||
|
|
|
||
|
|
let start = Instant::now();
|
||
|
|
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
let duration = start.elapsed();
|
||
|
|
|
||
|
|
println!("\n📊 Load 1K lines:");
|
||
|
|
println!(" Duration: {:?}", duration);
|
||
|
|
println!(" Chunks: {}", chunk_count);
|
||
|
|
println!(
|
||
|
|
" Throughput: {:.0} lines/sec",
|
||
|
|
1_000.0 / duration.as_secs_f64()
|
||
|
|
);
|
||
|
|
|
||
|
|
assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_document_loading_10k_lines() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(10_000);
|
||
|
|
|
||
|
|
let start = Instant::now();
|
||
|
|
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
let duration = start.elapsed();
|
||
|
|
|
||
|
|
println!("\n📊 Load 10K lines:");
|
||
|
|
println!(" Duration: {:?}", duration);
|
||
|
|
println!(" Chunks: {}", chunk_count);
|
||
|
|
println!(
|
||
|
|
" Throughput: {:.0} lines/sec",
|
||
|
|
10_000.0 / duration.as_secs_f64()
|
||
|
|
);
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
duration.as_millis() < 10_000,
|
||
|
|
"Should load 10K lines in <10s"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_document_loading_100k_lines() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(100_000);
|
||
|
|
|
||
|
|
let start = Instant::now();
|
||
|
|
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
let duration = start.elapsed();
|
||
|
|
|
||
|
|
println!("\n📊 Load 100K lines:");
|
||
|
|
println!(" Duration: {:?}", duration);
|
||
|
|
println!(" Chunks: {}", chunk_count);
|
||
|
|
println!(
|
||
|
|
" Throughput: {:.0} lines/sec",
|
||
|
|
100_000.0 / duration.as_secs_f64()
|
||
|
|
);
|
||
|
|
|
||
|
|
assert!(duration.as_secs() < 60, "Should load 100K lines in <60s");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_query_latency() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(1_000);
|
||
|
|
|
||
|
|
// Load document first
|
||
|
|
engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
|
||
|
|
// Warm up
|
||
|
|
for _ in 0..5 {
|
||
|
|
engine.query(&doc_id, "test query", None, 5).await.unwrap();
|
||
|
|
}
|
||
|
|
|
||
|
|
// Measure query latency
|
||
|
|
let mut latencies = Vec::new();
|
||
|
|
for _ in 0..100 {
|
||
|
|
let start = Instant::now();
|
||
|
|
engine
|
||
|
|
.query(&doc_id, "error handling", None, 5)
|
||
|
|
.await
|
||
|
|
.unwrap();
|
||
|
|
latencies.push(start.elapsed());
|
||
|
|
}
|
||
|
|
|
||
|
|
let avg_latency = latencies.iter().sum::<std::time::Duration>() / latencies.len() as u32;
|
||
|
|
let min_latency = latencies.iter().min().unwrap();
|
||
|
|
let max_latency = latencies.iter().max().unwrap();
|
||
|
|
let p50 = latencies[latencies.len() / 2];
|
||
|
|
let p95 = latencies[latencies.len() * 95 / 100];
|
||
|
|
let p99 = latencies[latencies.len() * 99 / 100];
|
||
|
|
|
||
|
|
println!("\n📊 Query Latency (100 queries):");
|
||
|
|
println!(" Average: {:?}", avg_latency);
|
||
|
|
println!(" Min: {:?}", min_latency);
|
||
|
|
println!(" Max: {:?}", max_latency);
|
||
|
|
println!(" P50: {:?}", p50);
|
||
|
|
println!(" P95: {:?}", p95);
|
||
|
|
println!(" P99: {:?}", p99);
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
avg_latency.as_millis() < 500,
|
||
|
|
"Average query should be <500ms"
|
||
|
|
);
|
||
|
|
assert!(p95.as_millis() < 1_000, "P95 query should be <1s");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_concurrent_query_throughput() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(5_000);
|
||
|
|
|
||
|
|
// Load document
|
||
|
|
engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
|
||
|
|
// Run 50 concurrent queries
|
||
|
|
let start = Instant::now();
|
||
|
|
let mut handles = vec![];
|
||
|
|
|
||
|
|
for i in 0..50 {
|
||
|
|
let engine = engine.clone();
|
||
|
|
let doc_id = doc_id.clone();
|
||
|
|
let handle = tokio::spawn(async move {
|
||
|
|
let query = format!("query {}", i);
|
||
|
|
engine.query(&doc_id, &query, None, 5).await.unwrap()
|
||
|
|
});
|
||
|
|
handles.push(handle);
|
||
|
|
}
|
||
|
|
|
||
|
|
let mut total_results = 0;
|
||
|
|
for handle in handles {
|
||
|
|
let results = handle.await.unwrap();
|
||
|
|
total_results += results.len();
|
||
|
|
}
|
||
|
|
|
||
|
|
let duration = start.elapsed();
|
||
|
|
let throughput = 50.0 / duration.as_secs_f64();
|
||
|
|
|
||
|
|
println!("\n📊 Concurrent Query Throughput:");
|
||
|
|
println!(" Total queries: 50");
|
||
|
|
println!(" Duration: {:?}", duration);
|
||
|
|
println!(" Throughput: {:.1} queries/sec", throughput);
|
||
|
|
println!(" Total results: {}", total_results);
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
duration.as_secs() < 10,
|
||
|
|
"50 concurrent queries should complete in <10s"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_bm25_index_build() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(10_000);
|
||
|
|
|
||
|
|
// Load document (includes BM25 indexing)
|
||
|
|
let start = Instant::now();
|
||
|
|
engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
let index_duration = start.elapsed();
|
||
|
|
|
||
|
|
println!("\n📊 BM25 Index Build (10K lines):");
|
||
|
|
println!(" Duration: {:?}", index_duration);
|
||
|
|
|
||
|
|
// Query to verify index works
|
||
|
|
let start = Instant::now();
|
||
|
|
let results = engine
|
||
|
|
.query(&doc_id, "error handling", None, 10)
|
||
|
|
.await
|
||
|
|
.unwrap();
|
||
|
|
let query_duration = start.elapsed();
|
||
|
|
|
||
|
|
println!(
|
||
|
|
" First query: {:?} ({} results)",
|
||
|
|
query_duration,
|
||
|
|
results.len()
|
||
|
|
);
|
||
|
|
|
||
|
|
// Verify BM25 scores are computed
|
||
|
|
assert!(
|
||
|
|
results.iter().any(|r| r.bm25_score.is_some()),
|
||
|
|
"Should have BM25 scores"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_full_workflow_target() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(1_000);
|
||
|
|
|
||
|
|
// Full workflow: load → query → (dispatch would go here)
|
||
|
|
let workflow_start = Instant::now();
|
||
|
|
|
||
|
|
// Load
|
||
|
|
let load_start = Instant::now();
|
||
|
|
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
let load_duration = load_start.elapsed();
|
||
|
|
|
||
|
|
// Query
|
||
|
|
let query_start = Instant::now();
|
||
|
|
let results = engine
|
||
|
|
.query(&doc_id, "error handling", None, 5)
|
||
|
|
.await
|
||
|
|
.unwrap();
|
||
|
|
let query_duration = query_start.elapsed();
|
||
|
|
|
||
|
|
let workflow_duration = workflow_start.elapsed();
|
||
|
|
|
||
|
|
println!("\n📊 Full Workflow Performance:");
|
||
|
|
println!(" Load: {:?} ({} chunks)", load_duration, chunk_count);
|
||
|
|
println!(" Query: {:?} ({} results)", query_duration, results.len());
|
||
|
|
println!(" Total: {:?}", workflow_duration);
|
||
|
|
|
||
|
|
// Target: <500ms for the workflow (excluding LLM dispatch)
|
||
|
|
println!("\n🎯 Performance Target:");
|
||
|
|
if workflow_duration.as_millis() < 500 {
|
||
|
|
println!(
|
||
|
|
" ✅ PASS - Completed in {:?} (<500ms target)",
|
||
|
|
workflow_duration
|
||
|
|
);
|
||
|
|
} else {
|
||
|
|
println!(
|
||
|
|
" ⚠️ SLOW - Completed in {:?} (target: <500ms)",
|
||
|
|
workflow_duration
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Don't fail test, just report
|
||
|
|
if workflow_duration.as_millis() >= 500 {
|
||
|
|
println!("\n Note: Performance target not met but this may be acceptable");
|
||
|
|
println!(" Consider optimizations if this becomes a bottleneck");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn perf_memory_efficiency() {
|
||
|
|
let engine = setup_engine().await;
|
||
|
|
|
||
|
|
// Measure memory usage pattern
|
||
|
|
println!("\n📊 Memory Efficiency Test:");
|
||
|
|
|
||
|
|
for doc_size in [100, 1_000, 10_000] {
|
||
|
|
let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4());
|
||
|
|
let content = generate_document(doc_size);
|
||
|
|
|
||
|
|
let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
|
||
|
|
|
||
|
|
// Query to ensure everything works
|
||
|
|
let results = engine.query(&doc_id, "test query", None, 5).await.unwrap();
|
||
|
|
|
||
|
|
println!(
|
||
|
|
" {} lines: {} chunks, {} results",
|
||
|
|
doc_size,
|
||
|
|
chunk_count,
|
||
|
|
results.len()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
println!(" ✓ Memory test completed (manual monitoring recommended)");
|
||
|
|
}
|