Vapora/crates/vapora-rlm/tests/integration_test.rs
2026-02-16 05:09:51 +00:00

316 lines
8.8 KiB
Rust

// RLM Integration Tests
// Phase 1: Storage + Chunking tests
// These tests require SurrealDB to be running, so they're marked with #[ignore]
use chrono::Utc;
use surrealdb::opt::auth::Root;
use vapora_rlm::chunking::{
create_chunker, Chunker, ChunkingConfig, ChunkingStrategy, FixedChunker, SemanticChunker,
};
use vapora_rlm::storage::{Buffer, Chunk, ExecutionHistory, Storage, SurrealDBStorage};
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_chunk_persistence() {
// Connect to SurrealDB
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
.await
.expect("Failed to connect to SurrealDB");
db.signin(Root {
username: "root",
password: "root",
})
.await
.expect("Failed to sign in");
db.use_ns("vapora")
.use_db("test")
.await
.expect("Failed to use namespace/database");
let storage = SurrealDBStorage::new(db);
// Create a test chunk
let chunk = Chunk {
chunk_id: "test-chunk-1".to_string(),
doc_id: "test-doc-1".to_string(),
content: "This is a test chunk".to_string(),
embedding: Some(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
start_idx: 0,
end_idx: 20,
metadata: None,
created_at: Utc::now().to_rfc3339(),
};
// Save chunk
storage
.save_chunk(chunk.clone())
.await
.expect("Failed to save chunk");
// Retrieve chunk
let retrieved = storage
.get_chunk(&chunk.chunk_id)
.await
.expect("Failed to get chunk");
assert!(retrieved.is_some());
let retrieved = retrieved.unwrap();
assert_eq!(retrieved.chunk_id, chunk.chunk_id);
assert_eq!(retrieved.content, chunk.content);
// Get chunks by doc_id
let chunks = storage
.get_chunks(&chunk.doc_id)
.await
.expect("Failed to get chunks");
assert!(!chunks.is_empty());
assert_eq!(chunks[0].chunk_id, chunk.chunk_id);
// Delete chunks
storage
.delete_chunks(&chunk.doc_id)
.await
.expect("Failed to delete chunks");
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_buffer_operations() {
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
.await
.expect("Failed to connect to SurrealDB");
db.signin(Root {
username: "root",
password: "root",
})
.await
.expect("Failed to sign in");
db.use_ns("vapora")
.use_db("test")
.await
.expect("Failed to use namespace/database");
let storage = SurrealDBStorage::new(db);
// Create a test buffer
let buffer = Buffer {
buffer_id: "test-buffer-1".to_string(),
content: "Large buffer content".to_string(),
metadata: None,
expires_at: None,
created_at: Utc::now().to_rfc3339(),
};
// Save buffer
storage
.save_buffer(buffer.clone())
.await
.expect("Failed to save buffer");
// Retrieve buffer
let retrieved = storage
.get_buffer(&buffer.buffer_id)
.await
.expect("Failed to get buffer");
assert!(retrieved.is_some());
let retrieved = retrieved.unwrap();
assert_eq!(retrieved.buffer_id, buffer.buffer_id);
assert_eq!(retrieved.content, buffer.content);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_execution_history() {
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
.await
.expect("Failed to connect to SurrealDB");
db.signin(Root {
username: "root",
password: "root",
})
.await
.expect("Failed to sign in");
db.use_ns("vapora")
.use_db("test")
.await
.expect("Failed to use namespace/database");
let storage = SurrealDBStorage::new(db);
// Create a test execution
let now = Utc::now().to_rfc3339();
let execution = ExecutionHistory {
execution_id: "test-exec-1".to_string(),
doc_id: "test-doc-1".to_string(),
query: "test query".to_string(),
chunks_used: vec!["chunk-1".to_string(), "chunk-2".to_string()],
result: Some("test result".to_string()),
duration_ms: 1000,
cost_cents: 0.5,
provider: Some("claude".to_string()),
success: true,
error_message: None,
metadata: None,
created_at: now.clone(),
executed_at: now,
};
// Save execution
storage
.save_execution(execution.clone())
.await
.expect("Failed to save execution");
// Retrieve executions
let executions = storage
.get_executions(&execution.doc_id, 10)
.await
.expect("Failed to get executions");
assert!(!executions.is_empty());
assert_eq!(executions[0].execution_id, execution.execution_id);
assert!(executions[0].success);
}
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_storage_embedding_search() {
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
.await
.expect("Failed to connect to SurrealDB");
db.signin(Root {
username: "root",
password: "root",
})
.await
.expect("Failed to sign in");
db.use_ns("vapora")
.use_db("test")
.await
.expect("Failed to use namespace/database");
let storage = SurrealDBStorage::new(db);
// Create test chunks with embeddings
let chunk1 = Chunk {
chunk_id: "emb-chunk-1".to_string(),
doc_id: "emb-doc-1".to_string(),
content: "Test content 1".to_string(),
embedding: Some(vec![0.9, 0.1, 0.1]),
start_idx: 0,
end_idx: 14,
metadata: None,
created_at: Utc::now().to_rfc3339(),
};
let chunk2 = Chunk {
chunk_id: "emb-chunk-2".to_string(),
doc_id: "emb-doc-1".to_string(),
content: "Test content 2".to_string(),
embedding: Some(vec![0.1, 0.9, 0.1]),
start_idx: 14,
end_idx: 28,
metadata: None,
created_at: Utc::now().to_rfc3339(),
};
storage
.save_chunk(chunk1.clone())
.await
.expect("Failed to save chunk1");
storage
.save_chunk(chunk2.clone())
.await
.expect("Failed to save chunk2");
// Search by embedding (query similar to chunk1)
let query_embedding = vec![1.0, 0.0, 0.0];
let results = storage
.search_by_embedding(&query_embedding, 2)
.await
.expect("Failed to search by embedding");
assert!(!results.is_empty());
// First result should be chunk1 (highest similarity)
assert_eq!(results[0].chunk_id, chunk1.chunk_id);
// Cleanup
storage
.delete_chunks("emb-doc-1")
.await
.expect("Failed to delete chunks");
}
#[test]
fn test_chunking_fixed() {
let config = ChunkingConfig {
strategy: ChunkingStrategy::Fixed,
chunk_size: 100,
overlap: 20,
};
let chunker = create_chunker(&config);
let content = "a".repeat(250);
let chunks = chunker.chunk(&content).expect("Failed to chunk");
assert!(chunks.len() >= 2);
assert!(chunks[0].content.len() <= 100);
assert!(chunks[1].start_idx < 100); // Overlap present
}
#[test]
fn test_chunking_semantic() {
let config = ChunkingConfig {
strategy: ChunkingStrategy::Semantic,
chunk_size: 50,
overlap: 10,
};
let chunker = create_chunker(&config);
let content = "Sentence one. Sentence two! Sentence three? Sentence four. Sentence five.";
let chunks = chunker.chunk(content).expect("Failed to chunk");
assert!(!chunks.is_empty());
// Semantic chunking should respect sentence boundaries
assert!(chunks.iter().all(|c| !c.content.is_empty()));
}
#[test]
fn test_chunking_code() {
let config = ChunkingConfig {
strategy: ChunkingStrategy::Code,
chunk_size: 100,
overlap: 20,
};
let chunker = create_chunker(&config);
let content = r#"
fn main() {
println!("Hello, world!");
}
"#;
let chunks = chunker.chunk(content).expect("Failed to chunk");
assert!(!chunks.is_empty());
}
#[test]
fn test_fixed_chunker_direct() {
let chunker = FixedChunker::new(10, 2);
let content = "0123456789ABCDEFGHIJ";
let chunks = chunker.chunk(content).expect("Failed to chunk");
assert_eq!(chunks.len(), 3);
assert_eq!(chunks[0].content, "0123456789");
assert_eq!(chunks[0].start_idx, 0);
assert_eq!(chunks[0].end_idx, 10);
}
#[test]
fn test_semantic_chunker_direct() {
let chunker = SemanticChunker::new(50, 10);
let content = "First sentence. Second sentence! Third sentence?";
let chunks = chunker.chunk(content).expect("Failed to chunk");
assert!(!chunks.is_empty());
assert!(chunks.iter().all(|c| c.end_idx > c.start_idx));
}