// RLM Integration Tests // Phase 1: Storage + Chunking tests // These tests require SurrealDB to be running, so they're marked with #[ignore] use chrono::Utc; use surrealdb::opt::auth::Root; use vapora_rlm::chunking::{ create_chunker, Chunker, ChunkingConfig, ChunkingStrategy, FixedChunker, SemanticChunker, }; use vapora_rlm::storage::{Buffer, Chunk, ExecutionHistory, Storage, SurrealDBStorage}; #[tokio::test] #[ignore] // Requires SurrealDB async fn test_storage_chunk_persistence() { // Connect to SurrealDB let db = surrealdb::Surreal::new::("127.0.0.1:8000") .await .expect("Failed to connect to SurrealDB"); db.signin(Root { username: "root", password: "root", }) .await .expect("Failed to sign in"); db.use_ns("vapora") .use_db("test") .await .expect("Failed to use namespace/database"); let storage = SurrealDBStorage::new(db); // Create a test chunk let chunk = Chunk { chunk_id: "test-chunk-1".to_string(), doc_id: "test-doc-1".to_string(), content: "This is a test chunk".to_string(), embedding: Some(vec![0.1, 0.2, 0.3, 0.4, 0.5]), start_idx: 0, end_idx: 20, metadata: None, created_at: Utc::now().to_rfc3339(), }; // Save chunk storage .save_chunk(chunk.clone()) .await .expect("Failed to save chunk"); // Retrieve chunk let retrieved = storage .get_chunk(&chunk.chunk_id) .await .expect("Failed to get chunk"); assert!(retrieved.is_some()); let retrieved = retrieved.unwrap(); assert_eq!(retrieved.chunk_id, chunk.chunk_id); assert_eq!(retrieved.content, chunk.content); // Get chunks by doc_id let chunks = storage .get_chunks(&chunk.doc_id) .await .expect("Failed to get chunks"); assert!(!chunks.is_empty()); assert_eq!(chunks[0].chunk_id, chunk.chunk_id); // Delete chunks storage .delete_chunks(&chunk.doc_id) .await .expect("Failed to delete chunks"); } #[tokio::test] #[ignore] // Requires SurrealDB async fn test_storage_buffer_operations() { let db = surrealdb::Surreal::new::("127.0.0.1:8000") .await .expect("Failed to connect to SurrealDB"); db.signin(Root { username: "root", password: "root", }) .await .expect("Failed to sign in"); db.use_ns("vapora") .use_db("test") .await .expect("Failed to use namespace/database"); let storage = SurrealDBStorage::new(db); // Create a test buffer let buffer = Buffer { buffer_id: "test-buffer-1".to_string(), content: "Large buffer content".to_string(), metadata: None, expires_at: None, created_at: Utc::now().to_rfc3339(), }; // Save buffer storage .save_buffer(buffer.clone()) .await .expect("Failed to save buffer"); // Retrieve buffer let retrieved = storage .get_buffer(&buffer.buffer_id) .await .expect("Failed to get buffer"); assert!(retrieved.is_some()); let retrieved = retrieved.unwrap(); assert_eq!(retrieved.buffer_id, buffer.buffer_id); assert_eq!(retrieved.content, buffer.content); } #[tokio::test] #[ignore] // Requires SurrealDB async fn test_storage_execution_history() { let db = surrealdb::Surreal::new::("127.0.0.1:8000") .await .expect("Failed to connect to SurrealDB"); db.signin(Root { username: "root", password: "root", }) .await .expect("Failed to sign in"); db.use_ns("vapora") .use_db("test") .await .expect("Failed to use namespace/database"); let storage = SurrealDBStorage::new(db); // Create a test execution let now = Utc::now().to_rfc3339(); let execution = ExecutionHistory { execution_id: "test-exec-1".to_string(), doc_id: "test-doc-1".to_string(), query: "test query".to_string(), chunks_used: vec!["chunk-1".to_string(), "chunk-2".to_string()], result: Some("test result".to_string()), duration_ms: 1000, cost_cents: 0.5, provider: Some("claude".to_string()), success: true, error_message: None, metadata: None, created_at: now.clone(), executed_at: now, }; // Save execution storage .save_execution(execution.clone()) .await .expect("Failed to save execution"); // Retrieve executions let executions = storage .get_executions(&execution.doc_id, 10) .await .expect("Failed to get executions"); assert!(!executions.is_empty()); assert_eq!(executions[0].execution_id, execution.execution_id); assert!(executions[0].success); } #[tokio::test] #[ignore] // Requires SurrealDB async fn test_storage_embedding_search() { let db = surrealdb::Surreal::new::("127.0.0.1:8000") .await .expect("Failed to connect to SurrealDB"); db.signin(Root { username: "root", password: "root", }) .await .expect("Failed to sign in"); db.use_ns("vapora") .use_db("test") .await .expect("Failed to use namespace/database"); let storage = SurrealDBStorage::new(db); // Create test chunks with embeddings let chunk1 = Chunk { chunk_id: "emb-chunk-1".to_string(), doc_id: "emb-doc-1".to_string(), content: "Test content 1".to_string(), embedding: Some(vec![0.9, 0.1, 0.1]), start_idx: 0, end_idx: 14, metadata: None, created_at: Utc::now().to_rfc3339(), }; let chunk2 = Chunk { chunk_id: "emb-chunk-2".to_string(), doc_id: "emb-doc-1".to_string(), content: "Test content 2".to_string(), embedding: Some(vec![0.1, 0.9, 0.1]), start_idx: 14, end_idx: 28, metadata: None, created_at: Utc::now().to_rfc3339(), }; storage .save_chunk(chunk1.clone()) .await .expect("Failed to save chunk1"); storage .save_chunk(chunk2.clone()) .await .expect("Failed to save chunk2"); // Search by embedding (query similar to chunk1) let query_embedding = vec![1.0, 0.0, 0.0]; let results = storage .search_by_embedding(&query_embedding, 2) .await .expect("Failed to search by embedding"); assert!(!results.is_empty()); // First result should be chunk1 (highest similarity) assert_eq!(results[0].chunk_id, chunk1.chunk_id); // Cleanup storage .delete_chunks("emb-doc-1") .await .expect("Failed to delete chunks"); } #[test] fn test_chunking_fixed() { let config = ChunkingConfig { strategy: ChunkingStrategy::Fixed, chunk_size: 100, overlap: 20, }; let chunker = create_chunker(&config); let content = "a".repeat(250); let chunks = chunker.chunk(&content).expect("Failed to chunk"); assert!(chunks.len() >= 2); assert!(chunks[0].content.len() <= 100); assert!(chunks[1].start_idx < 100); // Overlap present } #[test] fn test_chunking_semantic() { let config = ChunkingConfig { strategy: ChunkingStrategy::Semantic, chunk_size: 50, overlap: 10, }; let chunker = create_chunker(&config); let content = "Sentence one. Sentence two! Sentence three? Sentence four. Sentence five."; let chunks = chunker.chunk(content).expect("Failed to chunk"); assert!(!chunks.is_empty()); // Semantic chunking should respect sentence boundaries assert!(chunks.iter().all(|c| !c.content.is_empty())); } #[test] fn test_chunking_code() { let config = ChunkingConfig { strategy: ChunkingStrategy::Code, chunk_size: 100, overlap: 20, }; let chunker = create_chunker(&config); let content = r#" fn main() { println!("Hello, world!"); } "#; let chunks = chunker.chunk(content).expect("Failed to chunk"); assert!(!chunks.is_empty()); } #[test] fn test_fixed_chunker_direct() { let chunker = FixedChunker::new(10, 2); let content = "0123456789ABCDEFGHIJ"; let chunks = chunker.chunk(content).expect("Failed to chunk"); assert_eq!(chunks.len(), 3); assert_eq!(chunks[0].content, "0123456789"); assert_eq!(chunks[0].start_idx, 0); assert_eq!(chunks[0].end_idx, 10); } #[test] fn test_semantic_chunker_direct() { let chunker = SemanticChunker::new(50, 10); let content = "First sentence. Second sentence! Third sentence?"; let chunks = chunker.chunk(content).expect("Failed to chunk"); assert!(!chunks.is_empty()); assert!(chunks.iter().all(|c| c.end_idx > c.start_idx)); }