Vapora/crates/vapora-rlm/tests/e2e_minimal_debug.rs

100 lines
2.9 KiB
Rust
Raw Normal View History

2026-02-16 05:09:51 +00:00
// Minimal E2E Debug Test - Trace why BM25 returns 0 results
use std::sync::Arc;
use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;
#[tokio::test]
#[ignore] // Requires SurrealDB
async fn test_e2e_minimal_trace() {
// Setup - exactly like E2E test
let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
db.signin(Root {
username: "root",
password: "root",
})
.await
.unwrap();
db.use_ns("test_e2e_minimal")
.use_db("test_e2e_minimal")
.await
.unwrap();
let storage = Arc::new(SurrealDBStorage::new(db.clone()));
let bm25_index = Arc::new(BM25Index::new().unwrap());
let engine = Arc::new(RLMEngine::new(storage, bm25_index.clone()).unwrap());
// Load a simple document
let doc_id = format!("minimal-{}", uuid::Uuid::new_v4());
let content = "This is test content with error handling patterns in Rust programming.";
println!(
"1. BEFORE LOAD - BM25 Index stats: {:?}",
bm25_index.stats()
);
let chunk_count = engine.load_document(&doc_id, content, None).await.unwrap();
println!("2. AFTER LOAD - Chunk count: {}", chunk_count);
println!("2. AFTER LOAD - BM25 Index stats: {:?}", bm25_index.stats());
// Small delay to ensure async operations complete
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
println!(
"3. AFTER DELAY - BM25 Index stats: {:?}",
bm25_index.stats()
);
// Direct BM25 search (bypassing engine)
println!("4. DIRECT BM25 SEARCH:");
let direct_results = bm25_index.search("error handling", 5).unwrap();
println!(
" Direct BM25 search returned {} results",
direct_results.len()
);
for (i, result) in direct_results.iter().enumerate() {
println!(
" Result {}: chunk_id={}, score={}",
i + 1,
result.chunk_id,
result.score
);
}
// Engine query
println!("5. ENGINE QUERY:");
let query_results = engine
.query(&doc_id, "error handling", None, 5)
.await
.unwrap();
println!(" Engine query returned {} results", query_results.len());
for (i, result) in query_results.iter().enumerate() {
println!(
" Result {}: score={}, bm25={:?}, semantic={:?}",
i + 1,
result.score,
result.bm25_score,
result.semantic_score
);
}
// Verify
assert!(chunk_count > 0, "Should create chunks");
assert!(
bm25_index.stats().num_docs > 0,
"BM25 should have documents"
);
assert!(
!direct_results.is_empty(),
"Direct BM25 search should find results"
);
assert!(
!query_results.is_empty(),
"Engine query should find results"
);
}