Vapora/crates/vapora-rlm/tests/bm25_debug_test.rs

75 lines
2.2 KiB
Rust
Raw Normal View History

2026-02-16 05:09:51 +00:00
// BM25 Debug Test - Verify indexing and search work
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::Chunk;
#[test]
fn test_bm25_basic_functionality() {
// Create BM25 index
let index = BM25Index::new().unwrap();
// Add a test document
let chunk = Chunk {
chunk_id: "test-1".to_string(),
doc_id: "doc-1".to_string(),
content: "error handling patterns in Rust programming".to_string(),
embedding: None,
start_idx: 0,
end_idx: 42,
metadata: None,
created_at: chrono::Utc::now().to_rfc3339(),
};
println!("Adding document: {}", chunk.content);
index.add_document(&chunk).unwrap();
// Commit the index
println!("Committing index...");
index.commit().unwrap();
// Search for the content
println!("Searching for 'error handling'...");
let results = index.search("error handling", 5).unwrap();
println!("BM25 Results: {} found", results.len());
for (i, result) in results.iter().enumerate() {
println!(
" Result {}: chunk_id={}, score={}",
i + 1,
result.chunk_id,
result.score
);
}
assert!(!results.is_empty(), "BM25 search should find the document");
assert_eq!(results[0].chunk_id, "test-1");
assert!(results[0].score > 0.0);
}
#[test]
fn test_bm25_multiple_documents() {
let index = BM25Index::new().unwrap();
// Add multiple chunks
for i in 0..5 {
let chunk = Chunk {
chunk_id: format!("chunk-{}", i),
doc_id: "doc-1".to_string(),
content: format!("Line {}: Sample content with error handling patterns", i),
embedding: None,
start_idx: i * 100,
end_idx: (i + 1) * 100,
metadata: None,
created_at: chrono::Utc::now().to_rfc3339(),
};
index.add_document(&chunk).unwrap();
}
index.commit().unwrap();
let results = index.search("error handling", 10).unwrap();
println!("Found {} results for 'error handling'", results.len());
assert!(!results.is_empty(), "Should find documents");
assert!(results.len() <= 5, "Should not return more than available");
}