100 lines
2.9 KiB
Rust
100 lines
2.9 KiB
Rust
// Minimal E2E Debug Test - Trace why BM25 returns 0 results
|
|
use std::sync::Arc;
|
|
|
|
use surrealdb::engine::remote::ws::Ws;
|
|
use surrealdb::opt::auth::Root;
|
|
use surrealdb::Surreal;
|
|
use vapora_rlm::search::bm25::BM25Index;
|
|
use vapora_rlm::storage::SurrealDBStorage;
|
|
use vapora_rlm::RLMEngine;
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires SurrealDB
|
|
async fn test_e2e_minimal_trace() {
|
|
// Setup - exactly like E2E test
|
|
let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
|
|
db.signin(Root {
|
|
username: "root",
|
|
password: "root",
|
|
})
|
|
.await
|
|
.unwrap();
|
|
db.use_ns("test_e2e_minimal")
|
|
.use_db("test_e2e_minimal")
|
|
.await
|
|
.unwrap();
|
|
|
|
let storage = Arc::new(SurrealDBStorage::new(db.clone()));
|
|
let bm25_index = Arc::new(BM25Index::new().unwrap());
|
|
let engine = Arc::new(RLMEngine::new(storage, bm25_index.clone()).unwrap());
|
|
|
|
// Load a simple document
|
|
let doc_id = format!("minimal-{}", uuid::Uuid::new_v4());
|
|
let content = "This is test content with error handling patterns in Rust programming.";
|
|
|
|
println!(
|
|
"1. BEFORE LOAD - BM25 Index stats: {:?}",
|
|
bm25_index.stats()
|
|
);
|
|
|
|
let chunk_count = engine.load_document(&doc_id, content, None).await.unwrap();
|
|
println!("2. AFTER LOAD - Chunk count: {}", chunk_count);
|
|
println!("2. AFTER LOAD - BM25 Index stats: {:?}", bm25_index.stats());
|
|
|
|
// Small delay to ensure async operations complete
|
|
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
|
|
|
println!(
|
|
"3. AFTER DELAY - BM25 Index stats: {:?}",
|
|
bm25_index.stats()
|
|
);
|
|
|
|
// Direct BM25 search (bypassing engine)
|
|
println!("4. DIRECT BM25 SEARCH:");
|
|
let direct_results = bm25_index.search("error handling", 5).unwrap();
|
|
println!(
|
|
" Direct BM25 search returned {} results",
|
|
direct_results.len()
|
|
);
|
|
for (i, result) in direct_results.iter().enumerate() {
|
|
println!(
|
|
" Result {}: chunk_id={}, score={}",
|
|
i + 1,
|
|
result.chunk_id,
|
|
result.score
|
|
);
|
|
}
|
|
|
|
// Engine query
|
|
println!("5. ENGINE QUERY:");
|
|
let query_results = engine
|
|
.query(&doc_id, "error handling", None, 5)
|
|
.await
|
|
.unwrap();
|
|
println!(" Engine query returned {} results", query_results.len());
|
|
for (i, result) in query_results.iter().enumerate() {
|
|
println!(
|
|
" Result {}: score={}, bm25={:?}, semantic={:?}",
|
|
i + 1,
|
|
result.score,
|
|
result.bm25_score,
|
|
result.semantic_score
|
|
);
|
|
}
|
|
|
|
// Verify
|
|
assert!(chunk_count > 0, "Should create chunks");
|
|
assert!(
|
|
bm25_index.stats().num_docs > 0,
|
|
"BM25 should have documents"
|
|
);
|
|
assert!(
|
|
!direct_results.is_empty(),
|
|
"Direct BM25 search should find results"
|
|
);
|
|
assert!(
|
|
!query_results.is_empty(),
|
|
"Engine query should find results"
|
|
);
|
|
}
|