316 lines
8.8 KiB
Rust
316 lines
8.8 KiB
Rust
|
|
// RLM Integration Tests
|
||
|
|
// Phase 1: Storage + Chunking tests
|
||
|
|
// These tests require SurrealDB to be running, so they're marked with #[ignore]
|
||
|
|
|
||
|
|
use chrono::Utc;
|
||
|
|
use surrealdb::opt::auth::Root;
|
||
|
|
use vapora_rlm::chunking::{
|
||
|
|
create_chunker, Chunker, ChunkingConfig, ChunkingStrategy, FixedChunker, SemanticChunker,
|
||
|
|
};
|
||
|
|
use vapora_rlm::storage::{Buffer, Chunk, ExecutionHistory, Storage, SurrealDBStorage};
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn test_storage_chunk_persistence() {
|
||
|
|
// Connect to SurrealDB
|
||
|
|
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
|
||
|
|
.await
|
||
|
|
.expect("Failed to connect to SurrealDB");
|
||
|
|
db.signin(Root {
|
||
|
|
username: "root",
|
||
|
|
password: "root",
|
||
|
|
})
|
||
|
|
.await
|
||
|
|
.expect("Failed to sign in");
|
||
|
|
db.use_ns("vapora")
|
||
|
|
.use_db("test")
|
||
|
|
.await
|
||
|
|
.expect("Failed to use namespace/database");
|
||
|
|
|
||
|
|
let storage = SurrealDBStorage::new(db);
|
||
|
|
|
||
|
|
// Create a test chunk
|
||
|
|
let chunk = Chunk {
|
||
|
|
chunk_id: "test-chunk-1".to_string(),
|
||
|
|
doc_id: "test-doc-1".to_string(),
|
||
|
|
content: "This is a test chunk".to_string(),
|
||
|
|
embedding: Some(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
|
||
|
|
start_idx: 0,
|
||
|
|
end_idx: 20,
|
||
|
|
metadata: None,
|
||
|
|
created_at: Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
|
||
|
|
// Save chunk
|
||
|
|
storage
|
||
|
|
.save_chunk(chunk.clone())
|
||
|
|
.await
|
||
|
|
.expect("Failed to save chunk");
|
||
|
|
|
||
|
|
// Retrieve chunk
|
||
|
|
let retrieved = storage
|
||
|
|
.get_chunk(&chunk.chunk_id)
|
||
|
|
.await
|
||
|
|
.expect("Failed to get chunk");
|
||
|
|
assert!(retrieved.is_some());
|
||
|
|
let retrieved = retrieved.unwrap();
|
||
|
|
assert_eq!(retrieved.chunk_id, chunk.chunk_id);
|
||
|
|
assert_eq!(retrieved.content, chunk.content);
|
||
|
|
|
||
|
|
// Get chunks by doc_id
|
||
|
|
let chunks = storage
|
||
|
|
.get_chunks(&chunk.doc_id)
|
||
|
|
.await
|
||
|
|
.expect("Failed to get chunks");
|
||
|
|
assert!(!chunks.is_empty());
|
||
|
|
assert_eq!(chunks[0].chunk_id, chunk.chunk_id);
|
||
|
|
|
||
|
|
// Delete chunks
|
||
|
|
storage
|
||
|
|
.delete_chunks(&chunk.doc_id)
|
||
|
|
.await
|
||
|
|
.expect("Failed to delete chunks");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn test_storage_buffer_operations() {
|
||
|
|
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
|
||
|
|
.await
|
||
|
|
.expect("Failed to connect to SurrealDB");
|
||
|
|
db.signin(Root {
|
||
|
|
username: "root",
|
||
|
|
password: "root",
|
||
|
|
})
|
||
|
|
.await
|
||
|
|
.expect("Failed to sign in");
|
||
|
|
db.use_ns("vapora")
|
||
|
|
.use_db("test")
|
||
|
|
.await
|
||
|
|
.expect("Failed to use namespace/database");
|
||
|
|
|
||
|
|
let storage = SurrealDBStorage::new(db);
|
||
|
|
|
||
|
|
// Create a test buffer
|
||
|
|
let buffer = Buffer {
|
||
|
|
buffer_id: "test-buffer-1".to_string(),
|
||
|
|
content: "Large buffer content".to_string(),
|
||
|
|
metadata: None,
|
||
|
|
expires_at: None,
|
||
|
|
created_at: Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
|
||
|
|
// Save buffer
|
||
|
|
storage
|
||
|
|
.save_buffer(buffer.clone())
|
||
|
|
.await
|
||
|
|
.expect("Failed to save buffer");
|
||
|
|
|
||
|
|
// Retrieve buffer
|
||
|
|
let retrieved = storage
|
||
|
|
.get_buffer(&buffer.buffer_id)
|
||
|
|
.await
|
||
|
|
.expect("Failed to get buffer");
|
||
|
|
assert!(retrieved.is_some());
|
||
|
|
let retrieved = retrieved.unwrap();
|
||
|
|
assert_eq!(retrieved.buffer_id, buffer.buffer_id);
|
||
|
|
assert_eq!(retrieved.content, buffer.content);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn test_storage_execution_history() {
|
||
|
|
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
|
||
|
|
.await
|
||
|
|
.expect("Failed to connect to SurrealDB");
|
||
|
|
db.signin(Root {
|
||
|
|
username: "root",
|
||
|
|
password: "root",
|
||
|
|
})
|
||
|
|
.await
|
||
|
|
.expect("Failed to sign in");
|
||
|
|
db.use_ns("vapora")
|
||
|
|
.use_db("test")
|
||
|
|
.await
|
||
|
|
.expect("Failed to use namespace/database");
|
||
|
|
|
||
|
|
let storage = SurrealDBStorage::new(db);
|
||
|
|
|
||
|
|
// Create a test execution
|
||
|
|
let now = Utc::now().to_rfc3339();
|
||
|
|
let execution = ExecutionHistory {
|
||
|
|
execution_id: "test-exec-1".to_string(),
|
||
|
|
doc_id: "test-doc-1".to_string(),
|
||
|
|
query: "test query".to_string(),
|
||
|
|
chunks_used: vec!["chunk-1".to_string(), "chunk-2".to_string()],
|
||
|
|
result: Some("test result".to_string()),
|
||
|
|
duration_ms: 1000,
|
||
|
|
cost_cents: 0.5,
|
||
|
|
provider: Some("claude".to_string()),
|
||
|
|
success: true,
|
||
|
|
error_message: None,
|
||
|
|
metadata: None,
|
||
|
|
created_at: now.clone(),
|
||
|
|
executed_at: now,
|
||
|
|
};
|
||
|
|
|
||
|
|
// Save execution
|
||
|
|
storage
|
||
|
|
.save_execution(execution.clone())
|
||
|
|
.await
|
||
|
|
.expect("Failed to save execution");
|
||
|
|
|
||
|
|
// Retrieve executions
|
||
|
|
let executions = storage
|
||
|
|
.get_executions(&execution.doc_id, 10)
|
||
|
|
.await
|
||
|
|
.expect("Failed to get executions");
|
||
|
|
assert!(!executions.is_empty());
|
||
|
|
assert_eq!(executions[0].execution_id, execution.execution_id);
|
||
|
|
assert!(executions[0].success);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
#[ignore] // Requires SurrealDB
|
||
|
|
async fn test_storage_embedding_search() {
|
||
|
|
let db = surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>("127.0.0.1:8000")
|
||
|
|
.await
|
||
|
|
.expect("Failed to connect to SurrealDB");
|
||
|
|
db.signin(Root {
|
||
|
|
username: "root",
|
||
|
|
password: "root",
|
||
|
|
})
|
||
|
|
.await
|
||
|
|
.expect("Failed to sign in");
|
||
|
|
db.use_ns("vapora")
|
||
|
|
.use_db("test")
|
||
|
|
.await
|
||
|
|
.expect("Failed to use namespace/database");
|
||
|
|
|
||
|
|
let storage = SurrealDBStorage::new(db);
|
||
|
|
|
||
|
|
// Create test chunks with embeddings
|
||
|
|
let chunk1 = Chunk {
|
||
|
|
chunk_id: "emb-chunk-1".to_string(),
|
||
|
|
doc_id: "emb-doc-1".to_string(),
|
||
|
|
content: "Test content 1".to_string(),
|
||
|
|
embedding: Some(vec![0.9, 0.1, 0.1]),
|
||
|
|
start_idx: 0,
|
||
|
|
end_idx: 14,
|
||
|
|
metadata: None,
|
||
|
|
created_at: Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
|
||
|
|
let chunk2 = Chunk {
|
||
|
|
chunk_id: "emb-chunk-2".to_string(),
|
||
|
|
doc_id: "emb-doc-1".to_string(),
|
||
|
|
content: "Test content 2".to_string(),
|
||
|
|
embedding: Some(vec![0.1, 0.9, 0.1]),
|
||
|
|
start_idx: 14,
|
||
|
|
end_idx: 28,
|
||
|
|
metadata: None,
|
||
|
|
created_at: Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
|
||
|
|
storage
|
||
|
|
.save_chunk(chunk1.clone())
|
||
|
|
.await
|
||
|
|
.expect("Failed to save chunk1");
|
||
|
|
storage
|
||
|
|
.save_chunk(chunk2.clone())
|
||
|
|
.await
|
||
|
|
.expect("Failed to save chunk2");
|
||
|
|
|
||
|
|
// Search by embedding (query similar to chunk1)
|
||
|
|
let query_embedding = vec![1.0, 0.0, 0.0];
|
||
|
|
let results = storage
|
||
|
|
.search_by_embedding(&query_embedding, 2)
|
||
|
|
.await
|
||
|
|
.expect("Failed to search by embedding");
|
||
|
|
|
||
|
|
assert!(!results.is_empty());
|
||
|
|
// First result should be chunk1 (highest similarity)
|
||
|
|
assert_eq!(results[0].chunk_id, chunk1.chunk_id);
|
||
|
|
|
||
|
|
// Cleanup
|
||
|
|
storage
|
||
|
|
.delete_chunks("emb-doc-1")
|
||
|
|
.await
|
||
|
|
.expect("Failed to delete chunks");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunking_fixed() {
|
||
|
|
let config = ChunkingConfig {
|
||
|
|
strategy: ChunkingStrategy::Fixed,
|
||
|
|
chunk_size: 100,
|
||
|
|
overlap: 20,
|
||
|
|
};
|
||
|
|
|
||
|
|
let chunker = create_chunker(&config);
|
||
|
|
let content = "a".repeat(250);
|
||
|
|
let chunks = chunker.chunk(&content).expect("Failed to chunk");
|
||
|
|
|
||
|
|
assert!(chunks.len() >= 2);
|
||
|
|
assert!(chunks[0].content.len() <= 100);
|
||
|
|
assert!(chunks[1].start_idx < 100); // Overlap present
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunking_semantic() {
|
||
|
|
let config = ChunkingConfig {
|
||
|
|
strategy: ChunkingStrategy::Semantic,
|
||
|
|
chunk_size: 50,
|
||
|
|
overlap: 10,
|
||
|
|
};
|
||
|
|
|
||
|
|
let chunker = create_chunker(&config);
|
||
|
|
let content = "Sentence one. Sentence two! Sentence three? Sentence four. Sentence five.";
|
||
|
|
let chunks = chunker.chunk(content).expect("Failed to chunk");
|
||
|
|
|
||
|
|
assert!(!chunks.is_empty());
|
||
|
|
// Semantic chunking should respect sentence boundaries
|
||
|
|
assert!(chunks.iter().all(|c| !c.content.is_empty()));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunking_code() {
|
||
|
|
let config = ChunkingConfig {
|
||
|
|
strategy: ChunkingStrategy::Code,
|
||
|
|
chunk_size: 100,
|
||
|
|
overlap: 20,
|
||
|
|
};
|
||
|
|
|
||
|
|
let chunker = create_chunker(&config);
|
||
|
|
let content = r#"
|
||
|
|
fn main() {
|
||
|
|
println!("Hello, world!");
|
||
|
|
}
|
||
|
|
"#;
|
||
|
|
let chunks = chunker.chunk(content).expect("Failed to chunk");
|
||
|
|
|
||
|
|
assert!(!chunks.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_fixed_chunker_direct() {
|
||
|
|
let chunker = FixedChunker::new(10, 2);
|
||
|
|
let content = "0123456789ABCDEFGHIJ";
|
||
|
|
let chunks = chunker.chunk(content).expect("Failed to chunk");
|
||
|
|
|
||
|
|
assert_eq!(chunks.len(), 3);
|
||
|
|
assert_eq!(chunks[0].content, "0123456789");
|
||
|
|
assert_eq!(chunks[0].start_idx, 0);
|
||
|
|
assert_eq!(chunks[0].end_idx, 10);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_semantic_chunker_direct() {
|
||
|
|
let chunker = SemanticChunker::new(50, 10);
|
||
|
|
let content = "First sentence. Second sentence! Third sentence?";
|
||
|
|
let chunks = chunker.chunk(content).expect("Failed to chunk");
|
||
|
|
|
||
|
|
assert!(!chunks.is_empty());
|
||
|
|
assert!(chunks.iter().all(|c| c.end_idx > c.start_idx));
|
||
|
|
}
|