Vapora/crates/vapora-rlm/tests/integration_test.rs

// RLM Integration Tests
// Phase 1: Storage + Chunking tests
// These tests require SurrealDB to be running, so they're marked with #[ignore]

use chrono::Utc;
use surrealdb::opt::auth::Root;
use vapora_rlm::chunking::{
    create_chunker, Chunker, ChunkingConfig, ChunkingStrategy, FixedChunker, SemanticChunker,
};
use vapora_rlm::storage::{Buffer, Chunk, ExecutionHistory, Storage, SurrealDBStorage};

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_chunk_persistence() {
    // Connect to SurrealDB
    let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .expect("Failed to sign in");
    db.use_ns("vapora")
        .use_db("test")
        .await
        .expect("Failed to use namespace/database");

    let storage = SurrealDBStorage::new(db);

    // Create a test chunk
    let chunk = Chunk {
        chunk_id: "test-chunk-1".to_string(),
        doc_id: "test-doc-1".to_string(),
        content: "This is a test chunk".to_string(),
        embedding: Some(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
        start_idx: 0,
        end_idx: 20,
        metadata: None,
        created_at: Utc::now().to_rfc3339(),
    };

    // Save chunk
    storage
        .save_chunk(chunk.clone())
        .await
        .expect("Failed to save chunk");

    // Retrieve chunk
    let retrieved = storage
        .get_chunk(&chunk.chunk_id)
        .await
        .expect("Failed to get chunk");
    assert!(retrieved.is_some());
    let retrieved = retrieved.unwrap();
    assert_eq!(retrieved.chunk_id, chunk.chunk_id);
    assert_eq!(retrieved.content, chunk.content);

    // Get chunks by doc_id
    let chunks = storage
        .get_chunks(&chunk.doc_id)
        .await
        .expect("Failed to get chunks");
    assert!(!chunks.is_empty());
    assert_eq!(chunks[0].chunk_id, chunk.chunk_id);

    // Delete chunks
    storage
        .delete_chunks(&chunk.doc_id)
        .await
        .expect("Failed to delete chunks");
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_buffer_operations() {
    let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .expect("Failed to sign in");
    db.use_ns("vapora")
        .use_db("test")
        .await
        .expect("Failed to use namespace/database");

    let storage = SurrealDBStorage::new(db);

    // Create a test buffer
    let buffer = Buffer {
        buffer_id: "test-buffer-1".to_string(),
        content: "Large buffer content".to_string(),
        metadata: None,
        expires_at: None,
        created_at: Utc::now().to_rfc3339(),
    };

    // Save buffer
    storage
        .save_buffer(buffer.clone())
        .await
        .expect("Failed to save buffer");

    // Retrieve buffer
    let retrieved = storage
        .get_buffer(&buffer.buffer_id)
        .await
        .expect("Failed to get buffer");
    assert!(retrieved.is_some());
    let retrieved = retrieved.unwrap();
    assert_eq!(retrieved.buffer_id, buffer.buffer_id);
    assert_eq!(retrieved.content, buffer.content);
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_execution_history() {
    let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .expect("Failed to sign in");
    db.use_ns("vapora")
        .use_db("test")
        .await
        .expect("Failed to use namespace/database");

    let storage = SurrealDBStorage::new(db);

    // Create a test execution
    let now = Utc::now().to_rfc3339();
    let execution = ExecutionHistory {
        execution_id: "test-exec-1".to_string(),
        doc_id: "test-doc-1".to_string(),
        query: "test query".to_string(),
        chunks_used: vec!["chunk-1".to_string(), "chunk-2".to_string()],
        result: Some("test result".to_string()),
        duration_ms: 1000,
        cost_cents: 0.5,
        provider: Some("claude".to_string()),
        success: true,
        error_message: None,
        metadata: None,
        created_at: now.clone(),
        executed_at: now,
    };

    // Save execution
    storage
        .save_execution(execution.clone())
        .await
        .expect("Failed to save execution");

    // Retrieve executions
    let executions = storage
        .get_executions(&execution.doc_id, 10)
        .await
        .expect("Failed to get executions");
    assert!(!executions.is_empty());
    assert_eq!(executions[0].execution_id, execution.execution_id);
    assert!(executions[0].success);
}

#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_embedding_search() {
    let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await
    .expect("Failed to sign in");
    db.use_ns("vapora")
        .use_db("test")
        .await
        .expect("Failed to use namespace/database");

    let storage = SurrealDBStorage::new(db);

    // Create test chunks with embeddings
    let chunk1 = Chunk {
        chunk_id: "emb-chunk-1".to_string(),
        doc_id: "emb-doc-1".to_string(),
        content: "Test content 1".to_string(),
        embedding: Some(vec![0.9, 0.1, 0.1]),
        start_idx: 0,
        end_idx: 14,
        metadata: None,
        created_at: Utc::now().to_rfc3339(),
    };

    let chunk2 = Chunk {
        chunk_id: "emb-chunk-2".to_string(),
        doc_id: "emb-doc-1".to_string(),
        content: "Test content 2".to_string(),
        embedding: Some(vec![0.1, 0.9, 0.1]),
        start_idx: 14,
        end_idx: 28,
        metadata: None,
        created_at: Utc::now().to_rfc3339(),
    };

    storage
        .save_chunk(chunk1.clone())
        .await
        .expect("Failed to save chunk1");
    storage
        .save_chunk(chunk2.clone())
        .await
        .expect("Failed to save chunk2");

    // Search by embedding (query similar to chunk1)
    let query_embedding = vec![1.0, 0.0, 0.0];
    let results = storage
        .search_by_embedding(&query_embedding, 2)
        .await
        .expect("Failed to search by embedding");

    assert!(!results.is_empty());
    // First result should be chunk1 (highest similarity)
    assert_eq!(results[0].chunk_id, chunk1.chunk_id);

    // Cleanup
    storage
        .delete_chunks("emb-doc-1")
        .await
        .expect("Failed to delete chunks");
}

#[test]
fn test_chunking_fixed() {
    let config = ChunkingConfig {
        strategy: ChunkingStrategy::Fixed,
        chunk_size: 100,
        overlap: 20,
    };

    let chunker = create_chunker(&config);
    let content = "a".repeat(250);
    let chunks = chunker.chunk(&content).expect("Failed to chunk");

    assert!(chunks.len() >= 2);
    assert!(chunks[0].content.len() <= 100);
    assert!(chunks[1].start_idx < 100); // Overlap present
}

#[test]
fn test_chunking_semantic() {
    let config = ChunkingConfig {
        strategy: ChunkingStrategy::Semantic,
        chunk_size: 50,
        overlap: 10,
    };

    let chunker = create_chunker(&config);
    let content = "Sentence one. Sentence two! Sentence three? Sentence four. Sentence five.";
    let chunks = chunker.chunk(content).expect("Failed to chunk");

    assert!(!chunks.is_empty());
    // Semantic chunking should respect sentence boundaries
    assert!(chunks.iter().all(|c| !c.content.is_empty()));
}

#[test]
fn test_chunking_code() {
    let config = ChunkingConfig {
        strategy: ChunkingStrategy::Code,
        chunk_size: 100,
        overlap: 20,
    };

    let chunker = create_chunker(&config);
    let content = r#"
fn main() {
    println!("Hello, world!");
}
"#;
    let chunks = chunker.chunk(content).expect("Failed to chunk");

    assert!(!chunks.is_empty());
}

#[test]
fn test_fixed_chunker_direct() {
    let chunker = FixedChunker::new(10, 2);
    let content = "0123456789ABCDEFGHIJ";
    let chunks = chunker.chunk(content).expect("Failed to chunk");

    assert_eq!(chunks.len(), 3);
    assert_eq!(chunks[0].content, "0123456789");
    assert_eq!(chunks[0].start_idx, 0);
    assert_eq!(chunks[0].end_idx, 10);
}

#[test]
fn test_semantic_chunker_direct() {
    let chunker = SemanticChunker::new(50, 10);
    let content = "First sentence. Second sentence! Third sentence?";
    let chunks = chunker.chunk(content).expect("Failed to chunk");

    assert!(!chunks.is_empty());
    assert!(chunks.iter().all(|c| c.end_idx > c.start_idx));
}