Vapora/crates/vapora-rlm/tests/performance_test.rs

// Performance Tests for RLM
// Tests require: SurrealDB (ws://127.0.0.1:8000)
//
// Run with:
//   cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture

use std::sync::Arc;
use std::time::Instant;

use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;

async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .unwrap();
    db.use_ns("test_rlm_perf")
        .use_db("test_rlm_perf")
        .await
        .unwrap();

    let storage = Arc::new(SurrealDBStorage::new(db));
    let bm25_index = Arc::new(BM25Index::new().unwrap());
    Arc::new(RLMEngine::new(storage, bm25_index).unwrap())
}

fn generate_document(lines: usize) -> String {
    (0..lines)
        .map(|i| {
            format!(
                "Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \
                 and Rust programming patterns. This line contains meaningful text for search.\n",
                i + 1
            )
        })
        .collect()
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_1k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 1K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        1_000.0 / duration.as_secs_f64()
    );

    assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_10k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4());
    let content = generate_document(10_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 10K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        10_000.0 / duration.as_secs_f64()
    );

    assert!(
        duration.as_millis() < 10_000,
        "Should load 10K lines in <10s"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_100k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4());
    let content = generate_document(100_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 100K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        100_000.0 / duration.as_secs_f64()
    );

    assert!(duration.as_secs() < 60, "Should load 100K lines in <60s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_query_latency() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    // Load document first
    engine.load_document(&doc_id, &content, None).await.unwrap();

    // Warm up
    for _ in 0..5 {
        engine.query(&doc_id, "test query", None, 5).await.unwrap();
    }

    // Measure query latency
    let mut latencies = Vec::new();
    for _ in 0..100 {
        let start = Instant::now();
        engine
            .query(&doc_id, "error handling", None, 5)
            .await
            .unwrap();
        latencies.push(start.elapsed());
    }

    let avg_latency = latencies.iter().sum::<std::time::Duration>() / latencies.len() as u32;
    let min_latency = latencies.iter().min().unwrap();
    let max_latency = latencies.iter().max().unwrap();
    let p50 = latencies[latencies.len() / 2];
    let p95 = latencies[latencies.len() * 95 / 100];
    let p99 = latencies[latencies.len() * 99 / 100];

    println!("\n📊 Query Latency (100 queries):");
    println!("  Average: {:?}", avg_latency);
    println!("  Min: {:?}", min_latency);
    println!("  Max: {:?}", max_latency);
    println!("  P50: {:?}", p50);
    println!("  P95: {:?}", p95);
    println!("  P99: {:?}", p99);

    assert!(
        avg_latency.as_millis() < 500,
        "Average query should be <500ms"
    );
    assert!(p95.as_millis() < 1_000, "P95 query should be <1s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_concurrent_query_throughput() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4());
    let content = generate_document(5_000);

    // Load document
    engine.load_document(&doc_id, &content, None).await.unwrap();

    // Run 50 concurrent queries
    let start = Instant::now();
    let mut handles = vec![];

    for i in 0..50 {
        let engine = engine.clone();
        let doc_id = doc_id.clone();
        let handle = tokio::spawn(async move {
            let query = format!("query {}", i);
            engine.query(&doc_id, &query, None, 5).await.unwrap()
        });
        handles.push(handle);
    }

    let mut total_results = 0;
    for handle in handles {
        let results = handle.await.unwrap();
        total_results += results.len();
    }

    let duration = start.elapsed();
    let throughput = 50.0 / duration.as_secs_f64();

    println!("\n📊 Concurrent Query Throughput:");
    println!("  Total queries: 50");
    println!("  Duration: {:?}", duration);
    println!("  Throughput: {:.1} queries/sec", throughput);
    println!("  Total results: {}", total_results);

    assert!(
        duration.as_secs() < 10,
        "50 concurrent queries should complete in <10s"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_bm25_index_build() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4());
    let content = generate_document(10_000);

    // Load document (includes BM25 indexing)
    let start = Instant::now();
    engine.load_document(&doc_id, &content, None).await.unwrap();
    let index_duration = start.elapsed();

    println!("\n📊 BM25 Index Build (10K lines):");
    println!("  Duration: {:?}", index_duration);

    // Query to verify index works
    let start = Instant::now();
    let results = engine
        .query(&doc_id, "error handling", None, 10)
        .await
        .unwrap();
    let query_duration = start.elapsed();

    println!(
        "  First query: {:?} ({} results)",
        query_duration,
        results.len()
    );

    // Verify BM25 scores are computed
    assert!(
        results.iter().any(|r| r.bm25_score.is_some()),
        "Should have BM25 scores"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_full_workflow_target() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    // Full workflow: load → query → (dispatch would go here)
    let workflow_start = Instant::now();

    // Load
    let load_start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let load_duration = load_start.elapsed();

    // Query
    let query_start = Instant::now();
    let results = engine
        .query(&doc_id, "error handling", None, 5)
        .await
        .unwrap();
    let query_duration = query_start.elapsed();

    let workflow_duration = workflow_start.elapsed();

    println!("\n📊 Full Workflow Performance:");
    println!("  Load: {:?} ({} chunks)", load_duration, chunk_count);
    println!("  Query: {:?} ({} results)", query_duration, results.len());
    println!("  Total: {:?}", workflow_duration);

    // Target: <500ms for the workflow (excluding LLM dispatch)
    println!("\n🎯 Performance Target:");
    if workflow_duration.as_millis() < 500 {
        println!(
            "  ✅ PASS - Completed in {:?} (<500ms target)",
            workflow_duration
        );
    } else {
        println!(
            "  ⚠️  SLOW - Completed in {:?} (target: <500ms)",
            workflow_duration
        );
    }

    // Don't fail test, just report
    if workflow_duration.as_millis() >= 500 {
        println!("\n  Note: Performance target not met but this may be acceptable");
        println!("  Consider optimizations if this becomes a bottleneck");
    }
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_memory_efficiency() {
    let engine = setup_engine().await;

    // Measure memory usage pattern
    println!("\n📊 Memory Efficiency Test:");

    for doc_size in [100, 1_000, 10_000] {
        let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4());
        let content = generate_document(doc_size);

        let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();

        // Query to ensure everything works
        let results = engine.query(&doc_id, "test query", None, 5).await.unwrap();

        println!(
            "  {} lines: {} chunks, {} results",
            doc_size,
            chunk_count,
            results.len()
        );
    }

    println!("  ✓ Memory test completed (manual monitoring recommended)");
}
chore: add A2A y RLM 2026-02-16 05:09:51 +00:00			`// Performance Tests for RLM`
			`// Tests require: SurrealDB (ws://127.0.0.1:8000)`
			`//`
			`// Run with:`
			`// cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture`

			`use std::sync::Arc;`
			`use std::time::Instant;`

			`use surrealdb::engine::remote::ws::Ws;`
			`use surrealdb::opt::auth::Root;`
			`use surrealdb::Surreal;`
			`use vapora_rlm::search::bm25::BM25Index;`
			`use vapora_rlm::storage::SurrealDBStorage;`
			`use vapora_rlm::RLMEngine;`

			`async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {`
			`let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();`
			`db.signin(Root {`
			`username: "root",`
			`password: "root",`
			`})`
			`.await`
			`.unwrap();`
			`db.use_ns("test_rlm_perf")`
			`.use_db("test_rlm_perf")`
			`.await`
			`.unwrap();`

			`let storage = Arc::new(SurrealDBStorage::new(db));`
			`let bm25_index = Arc::new(BM25Index::new().unwrap());`
			`Arc::new(RLMEngine::new(storage, bm25_index).unwrap())`
			`}`

			`fn generate_document(lines: usize) -> String {`
			`(0..lines)`
			`.map(\|i\| {`
			`format!(`
			`"Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \`
			`and Rust programming patterns. This line contains meaningful text for search.\n",`
			`i + 1`
			`)`
			`})`
			`.collect()`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_document_loading_1k_lines() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(1_000);`

			`let start = Instant::now();`
			`let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();`
			`let duration = start.elapsed();`

			`println!("\n📊 Load 1K lines:");`
			`println!(" Duration: {:?}", duration);`
			`println!(" Chunks: {}", chunk_count);`
			`println!(`
			`" Throughput: {:.0} lines/sec",`
			`1_000.0 / duration.as_secs_f64()`
			`);`

			`assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s");`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_document_loading_10k_lines() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(10_000);`

			`let start = Instant::now();`
			`let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();`
			`let duration = start.elapsed();`

			`println!("\n📊 Load 10K lines:");`
			`println!(" Duration: {:?}", duration);`
			`println!(" Chunks: {}", chunk_count);`
			`println!(`
			`" Throughput: {:.0} lines/sec",`
			`10_000.0 / duration.as_secs_f64()`
			`);`

			`assert!(`
			`duration.as_millis() < 10_000,`
			`"Should load 10K lines in <10s"`
			`);`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_document_loading_100k_lines() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(100_000);`

			`let start = Instant::now();`
			`let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();`
			`let duration = start.elapsed();`

			`println!("\n📊 Load 100K lines:");`
			`println!(" Duration: {:?}", duration);`
			`println!(" Chunks: {}", chunk_count);`
			`println!(`
			`" Throughput: {:.0} lines/sec",`
			`100_000.0 / duration.as_secs_f64()`
			`);`

			`assert!(duration.as_secs() < 60, "Should load 100K lines in <60s");`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_query_latency() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(1_000);`

			`// Load document first`
			`engine.load_document(&doc_id, &content, None).await.unwrap();`

			`// Warm up`
			`for _ in 0..5 {`
			`engine.query(&doc_id, "test query", None, 5).await.unwrap();`
			`}`

			`// Measure query latency`
			`let mut latencies = Vec::new();`
			`for _ in 0..100 {`
			`let start = Instant::now();`
			`engine`
			`.query(&doc_id, "error handling", None, 5)`
			`.await`
			`.unwrap();`
			`latencies.push(start.elapsed());`
			`}`

			`let avg_latency = latencies.iter().sum::<std::time::Duration>() / latencies.len() as u32;`
			`let min_latency = latencies.iter().min().unwrap();`
			`let max_latency = latencies.iter().max().unwrap();`
			`let p50 = latencies[latencies.len() / 2];`
			`let p95 = latencies[latencies.len() * 95 / 100];`
			`let p99 = latencies[latencies.len() * 99 / 100];`

			`println!("\n📊 Query Latency (100 queries):");`
			`println!(" Average: {:?}", avg_latency);`
			`println!(" Min: {:?}", min_latency);`
			`println!(" Max: {:?}", max_latency);`
			`println!(" P50: {:?}", p50);`
			`println!(" P95: {:?}", p95);`
			`println!(" P99: {:?}", p99);`

			`assert!(`
			`avg_latency.as_millis() < 500,`
			`"Average query should be <500ms"`
			`);`
			`assert!(p95.as_millis() < 1_000, "P95 query should be <1s");`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_concurrent_query_throughput() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(5_000);`

			`// Load document`
			`engine.load_document(&doc_id, &content, None).await.unwrap();`

			`// Run 50 concurrent queries`
			`let start = Instant::now();`
			`let mut handles = vec![];`

			`for i in 0..50 {`
			`let engine = engine.clone();`
			`let doc_id = doc_id.clone();`
			`let handle = tokio::spawn(async move {`
			`let query = format!("query {}", i);`
			`engine.query(&doc_id, &query, None, 5).await.unwrap()`
			`});`
			`handles.push(handle);`
			`}`

			`let mut total_results = 0;`
			`for handle in handles {`
			`let results = handle.await.unwrap();`
			`total_results += results.len();`
			`}`

			`let duration = start.elapsed();`
			`let throughput = 50.0 / duration.as_secs_f64();`

			`println!("\n📊 Concurrent Query Throughput:");`
			`println!(" Total queries: 50");`
			`println!(" Duration: {:?}", duration);`
			`println!(" Throughput: {:.1} queries/sec", throughput);`
			`println!(" Total results: {}", total_results);`

			`assert!(`
			`duration.as_secs() < 10,`
			`"50 concurrent queries should complete in <10s"`
			`);`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_bm25_index_build() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(10_000);`

			`// Load document (includes BM25 indexing)`
			`let start = Instant::now();`
			`engine.load_document(&doc_id, &content, None).await.unwrap();`
			`let index_duration = start.elapsed();`

			`println!("\n📊 BM25 Index Build (10K lines):");`
			`println!(" Duration: {:?}", index_duration);`

			`// Query to verify index works`
			`let start = Instant::now();`
			`let results = engine`
			`.query(&doc_id, "error handling", None, 10)`
			`.await`
			`.unwrap();`
			`let query_duration = start.elapsed();`

			`println!(`
			`" First query: {:?} ({} results)",`
			`query_duration,`
			`results.len()`
			`);`

			`// Verify BM25 scores are computed`
			`assert!(`
			`results.iter().any(\|r\| r.bm25_score.is_some()),`
			`"Should have BM25 scores"`
			`);`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_full_workflow_target() {`
			`let engine = setup_engine().await;`
			`let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4());`
			`let content = generate_document(1_000);`

			`// Full workflow: load → query → (dispatch would go here)`
			`let workflow_start = Instant::now();`

			`// Load`
			`let load_start = Instant::now();`
			`let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();`
			`let load_duration = load_start.elapsed();`

			`// Query`
			`let query_start = Instant::now();`
			`let results = engine`
			`.query(&doc_id, "error handling", None, 5)`
			`.await`
			`.unwrap();`
			`let query_duration = query_start.elapsed();`

			`let workflow_duration = workflow_start.elapsed();`

			`println!("\n📊 Full Workflow Performance:");`
			`println!(" Load: {:?} ({} chunks)", load_duration, chunk_count);`
			`println!(" Query: {:?} ({} results)", query_duration, results.len());`
			`println!(" Total: {:?}", workflow_duration);`

			`// Target: <500ms for the workflow (excluding LLM dispatch)`
			`println!("\n🎯 Performance Target:");`
			`if workflow_duration.as_millis() < 500 {`
			`println!(`
			`" ✅ PASS - Completed in {:?} (<500ms target)",`
			`workflow_duration`
			`);`
			`} else {`
			`println!(`
			`" ⚠️ SLOW - Completed in {:?} (target: <500ms)",`
			`workflow_duration`
			`);`
			`}`

			`// Don't fail test, just report`
			`if workflow_duration.as_millis() >= 500 {`
			`println!("\n Note: Performance target not met but this may be acceptable");`
			`println!(" Consider optimizations if this becomes a bottleneck");`
			`}`
			`}`

			`#[tokio::test]`
			`#[ignore] // Requires SurrealDB`
			`async fn perf_memory_efficiency() {`
			`let engine = setup_engine().await;`

			`// Measure memory usage pattern`
			`println!("\n📊 Memory Efficiency Test:");`

			`for doc_size in [100, 1_000, 10_000] {`
			`let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4());`
			`let content = generate_document(doc_size);`

			`let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();`

			`// Query to ensure everything works`
			`let results = engine.query(&doc_id, "test query", None, 5).await.unwrap();`

			`println!(`
			`" {} lines: {} chunks, {} results",`
			`doc_size,`
			`chunk_count,`
			`results.len()`
			`);`
			`}`

			`println!(" ✓ Memory test completed (manual monitoring recommended)");`
			`}`