Vapora/crates/vapora-rlm/tests/performance_test.rs

// Performance Tests for RLM
// Tests require: SurrealDB (ws://127.0.0.1:8000)
//
// Run with:
//   cargo test -p vapora-rlm --test performance_test -- --ignored --nocapture

use std::sync::Arc;
use std::time::Instant;

use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;

async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .unwrap();
    db.use_ns("test_rlm_perf")
        .use_db("test_rlm_perf")
        .await
        .unwrap();

    let storage = Arc::new(SurrealDBStorage::new(db));
    let bm25_index = Arc::new(BM25Index::new().unwrap());
    Arc::new(RLMEngine::new(storage, bm25_index).unwrap())
}

fn generate_document(lines: usize) -> String {
    (0..lines)
        .map(|i| {
            format!(
                "Line {}: Sample content with error handling, ownership, borrowing, lifetimes, \
                 and Rust programming patterns. This line contains meaningful text for search.\n",
                i + 1
            )
        })
        .collect()
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_1k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-1k-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 1K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        1_000.0 / duration.as_secs_f64()
    );

    assert!(duration.as_millis() < 2_000, "Should load 1K lines in <2s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_10k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-10k-{}", uuid::Uuid::new_v4());
    let content = generate_document(10_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 10K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        10_000.0 / duration.as_secs_f64()
    );

    assert!(
        duration.as_millis() < 10_000,
        "Should load 10K lines in <10s"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_document_loading_100k_lines() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-100k-{}", uuid::Uuid::new_v4());
    let content = generate_document(100_000);

    let start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let duration = start.elapsed();

    println!("\n📊 Load 100K lines:");
    println!("  Duration: {:?}", duration);
    println!("  Chunks: {}", chunk_count);
    println!(
        "  Throughput: {:.0} lines/sec",
        100_000.0 / duration.as_secs_f64()
    );

    assert!(duration.as_secs() < 60, "Should load 100K lines in <60s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_query_latency() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-query-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    // Load document first
    engine.load_document(&doc_id, &content, None).await.unwrap();

    // Warm up
    for _ in 0..5 {
        engine.query(&doc_id, "test query", None, 5).await.unwrap();
    }

    // Measure query latency
    let mut latencies = Vec::new();
    for _ in 0..100 {
        let start = Instant::now();
        engine
            .query(&doc_id, "error handling", None, 5)
            .await
            .unwrap();
        latencies.push(start.elapsed());
    }

    let avg_latency = latencies.iter().sum::<std::time::Duration>() / latencies.len() as u32;
    let min_latency = latencies.iter().min().unwrap();
    let max_latency = latencies.iter().max().unwrap();
    let p50 = latencies[latencies.len() / 2];
    let p95 = latencies[latencies.len() * 95 / 100];
    let p99 = latencies[latencies.len() * 99 / 100];

    println!("\n📊 Query Latency (100 queries):");
    println!("  Average: {:?}", avg_latency);
    println!("  Min: {:?}", min_latency);
    println!("  Max: {:?}", max_latency);
    println!("  P50: {:?}", p50);
    println!("  P95: {:?}", p95);
    println!("  P99: {:?}", p99);

    assert!(
        avg_latency.as_millis() < 500,
        "Average query should be <500ms"
    );
    assert!(p95.as_millis() < 1_000, "P95 query should be <1s");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_concurrent_query_throughput() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-concurrent-{}", uuid::Uuid::new_v4());
    let content = generate_document(5_000);

    // Load document
    engine.load_document(&doc_id, &content, None).await.unwrap();

    // Run 50 concurrent queries
    let start = Instant::now();
    let mut handles = vec![];

    for i in 0..50 {
        let engine = engine.clone();
        let doc_id = doc_id.clone();
        let handle = tokio::spawn(async move {
            let query = format!("query {}", i);
            engine.query(&doc_id, &query, None, 5).await.unwrap()
        });
        handles.push(handle);
    }

    let mut total_results = 0;
    for handle in handles {
        let results = handle.await.unwrap();
        total_results += results.len();
    }

    let duration = start.elapsed();
    let throughput = 50.0 / duration.as_secs_f64();

    println!("\n📊 Concurrent Query Throughput:");
    println!("  Total queries: 50");
    println!("  Duration: {:?}", duration);
    println!("  Throughput: {:.1} queries/sec", throughput);
    println!("  Total results: {}", total_results);

    assert!(
        duration.as_secs() < 10,
        "50 concurrent queries should complete in <10s"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_bm25_index_build() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-bm25-{}", uuid::Uuid::new_v4());
    let content = generate_document(10_000);

    // Load document (includes BM25 indexing)
    let start = Instant::now();
    engine.load_document(&doc_id, &content, None).await.unwrap();
    let index_duration = start.elapsed();

    println!("\n📊 BM25 Index Build (10K lines):");
    println!("  Duration: {:?}", index_duration);

    // Query to verify index works
    let start = Instant::now();
    let results = engine
        .query(&doc_id, "error handling", None, 10)
        .await
        .unwrap();
    let query_duration = start.elapsed();

    println!(
        "  First query: {:?} ({} results)",
        query_duration,
        results.len()
    );

    // Verify BM25 scores are computed
    assert!(
        results.iter().any(|r| r.bm25_score.is_some()),
        "Should have BM25 scores"
    );
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_full_workflow_target() {
    let engine = setup_engine().await;
    let doc_id = format!("perf-workflow-{}", uuid::Uuid::new_v4());
    let content = generate_document(1_000);

    // Full workflow: load → query → (dispatch would go here)
    let workflow_start = Instant::now();

    // Load
    let load_start = Instant::now();
    let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();
    let load_duration = load_start.elapsed();

    // Query
    let query_start = Instant::now();
    let results = engine
        .query(&doc_id, "error handling", None, 5)
        .await
        .unwrap();
    let query_duration = query_start.elapsed();

    let workflow_duration = workflow_start.elapsed();

    println!("\n📊 Full Workflow Performance:");
    println!("  Load: {:?} ({} chunks)", load_duration, chunk_count);
    println!("  Query: {:?} ({} results)", query_duration, results.len());
    println!("  Total: {:?}", workflow_duration);

    // Target: <500ms for the workflow (excluding LLM dispatch)
    println!("\n🎯 Performance Target:");
    if workflow_duration.as_millis() < 500 {
        println!(
            "  ✅ PASS - Completed in {:?} (<500ms target)",
            workflow_duration
        );
    } else {
        println!(
            "  ⚠️  SLOW - Completed in {:?} (target: <500ms)",
            workflow_duration
        );
    }

    // Don't fail test, just report
    if workflow_duration.as_millis() >= 500 {
        println!("\n  Note: Performance target not met but this may be acceptable");
        println!("  Consider optimizations if this becomes a bottleneck");
    }
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn perf_memory_efficiency() {
    let engine = setup_engine().await;

    // Measure memory usage pattern
    println!("\n📊 Memory Efficiency Test:");

    for doc_size in [100, 1_000, 10_000] {
        let doc_id = format!("perf-mem-{}-{}", doc_size, uuid::Uuid::new_v4());
        let content = generate_document(doc_size);

        let chunk_count = engine.load_document(&doc_id, &content, None).await.unwrap();

        // Query to ensure everything works
        let results = engine.query(&doc_id, "test query", None, 5).await.unwrap();

        println!(
            "  {} lines: {} chunks, {} results",
            doc_size,
            chunk_count,
            results.len()
        );
    }

    println!("  ✓ Memory test completed (manual monitoring recommended)");
}