75 lines
2.2 KiB
Rust
75 lines
2.2 KiB
Rust
|
|
// BM25 Debug Test - Verify indexing and search work
|
||
|
|
use vapora_rlm::search::bm25::BM25Index;
|
||
|
|
use vapora_rlm::storage::Chunk;
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_bm25_basic_functionality() {
|
||
|
|
// Create BM25 index
|
||
|
|
let index = BM25Index::new().unwrap();
|
||
|
|
|
||
|
|
// Add a test document
|
||
|
|
let chunk = Chunk {
|
||
|
|
chunk_id: "test-1".to_string(),
|
||
|
|
doc_id: "doc-1".to_string(),
|
||
|
|
content: "error handling patterns in Rust programming".to_string(),
|
||
|
|
embedding: None,
|
||
|
|
start_idx: 0,
|
||
|
|
end_idx: 42,
|
||
|
|
metadata: None,
|
||
|
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
|
||
|
|
println!("Adding document: {}", chunk.content);
|
||
|
|
index.add_document(&chunk).unwrap();
|
||
|
|
|
||
|
|
// Commit the index
|
||
|
|
println!("Committing index...");
|
||
|
|
index.commit().unwrap();
|
||
|
|
|
||
|
|
// Search for the content
|
||
|
|
println!("Searching for 'error handling'...");
|
||
|
|
let results = index.search("error handling", 5).unwrap();
|
||
|
|
|
||
|
|
println!("BM25 Results: {} found", results.len());
|
||
|
|
for (i, result) in results.iter().enumerate() {
|
||
|
|
println!(
|
||
|
|
" Result {}: chunk_id={}, score={}",
|
||
|
|
i + 1,
|
||
|
|
result.chunk_id,
|
||
|
|
result.score
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
assert!(!results.is_empty(), "BM25 search should find the document");
|
||
|
|
assert_eq!(results[0].chunk_id, "test-1");
|
||
|
|
assert!(results[0].score > 0.0);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_bm25_multiple_documents() {
|
||
|
|
let index = BM25Index::new().unwrap();
|
||
|
|
|
||
|
|
// Add multiple chunks
|
||
|
|
for i in 0..5 {
|
||
|
|
let chunk = Chunk {
|
||
|
|
chunk_id: format!("chunk-{}", i),
|
||
|
|
doc_id: "doc-1".to_string(),
|
||
|
|
content: format!("Line {}: Sample content with error handling patterns", i),
|
||
|
|
embedding: None,
|
||
|
|
start_idx: i * 100,
|
||
|
|
end_idx: (i + 1) * 100,
|
||
|
|
metadata: None,
|
||
|
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||
|
|
};
|
||
|
|
index.add_document(&chunk).unwrap();
|
||
|
|
}
|
||
|
|
|
||
|
|
index.commit().unwrap();
|
||
|
|
|
||
|
|
let results = index.search("error handling", 10).unwrap();
|
||
|
|
println!("Found {} results for 'error handling'", results.len());
|
||
|
|
|
||
|
|
assert!(!results.is_empty(), "Should find documents");
|
||
|
|
assert!(results.len() <= 5, "Should not return more than available");
|
||
|
|
}
|