// Minimal E2E Debug Test - Trace why BM25 returns 0 results use std::sync::Arc; use surrealdb::engine::remote::ws::Ws; use surrealdb::opt::auth::Root; use surrealdb::Surreal; use vapora_rlm::search::bm25::BM25Index; use vapora_rlm::storage::SurrealDBStorage; use vapora_rlm::RLMEngine; #[tokio::test] #[ignore] // Requires SurrealDB async fn test_e2e_minimal_trace() { // Setup - exactly like E2E test let db = Surreal::new::("127.0.0.1:8000").await.unwrap(); db.signin(Root { username: "root", password: "root", }) .await .unwrap(); db.use_ns("test_e2e_minimal") .use_db("test_e2e_minimal") .await .unwrap(); let storage = Arc::new(SurrealDBStorage::new(db.clone())); let bm25_index = Arc::new(BM25Index::new().unwrap()); let engine = Arc::new(RLMEngine::new(storage, bm25_index.clone()).unwrap()); // Load a simple document let doc_id = format!("minimal-{}", uuid::Uuid::new_v4()); let content = "This is test content with error handling patterns in Rust programming."; println!( "1. BEFORE LOAD - BM25 Index stats: {:?}", bm25_index.stats() ); let chunk_count = engine.load_document(&doc_id, content, None).await.unwrap(); println!("2. AFTER LOAD - Chunk count: {}", chunk_count); println!("2. AFTER LOAD - BM25 Index stats: {:?}", bm25_index.stats()); // Small delay to ensure async operations complete tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; println!( "3. AFTER DELAY - BM25 Index stats: {:?}", bm25_index.stats() ); // Direct BM25 search (bypassing engine) println!("4. DIRECT BM25 SEARCH:"); let direct_results = bm25_index.search("error handling", 5).unwrap(); println!( " Direct BM25 search returned {} results", direct_results.len() ); for (i, result) in direct_results.iter().enumerate() { println!( " Result {}: chunk_id={}, score={}", i + 1, result.chunk_id, result.score ); } // Engine query println!("5. ENGINE QUERY:"); let query_results = engine .query(&doc_id, "error handling", None, 5) .await .unwrap(); println!(" Engine query returned {} results", query_results.len()); for (i, result) in query_results.iter().enumerate() { println!( " Result {}: score={}, bm25={:?}, semantic={:?}", i + 1, result.score, result.bm25_score, result.semantic_score ); } // Verify assert!(chunk_count > 0, "Should create chunks"); assert!( bm25_index.stats().num_docs > 0, "BM25 should have documents" ); assert!( !direct_results.is_empty(), "Direct BM25 search should find results" ); assert!( !query_results.is_empty(), "Engine query should find results" ); }