// Production Setup Example for RLM // Shows how to configure RLM with LLM client and embeddings use std::sync::Arc; use surrealdb::engine::remote::ws::Ws; use surrealdb::opt::auth::Root; use surrealdb::Surreal; use vapora_llm_router::providers::OpenAIClient; use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy}; use vapora_rlm::embeddings::EmbeddingConfig; use vapora_rlm::engine::RLMEngineConfig; use vapora_rlm::search::bm25::BM25Index; use vapora_rlm::storage::SurrealDBStorage; use vapora_rlm::RLMEngine; #[tokio::main] async fn main() -> anyhow::Result<()> { println!("šŸš€ Production RLM Setup with OpenAI"); println!("Prerequisites:"); println!(" - SurrealDB running on port 8000"); println!(" - OPENAI_API_KEY environment variable set\n"); // 1. Setup SurrealDB let db = Surreal::new::("127.0.0.1:8000").await?; db.signin(Root { username: "root", password: "root", }) .await?; db.use_ns("production").use_db("rlm").await?; // 2. Setup OpenAI client (reads OPENAI_API_KEY from env) let api_key = std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable not set"); let llm_client = Arc::new(OpenAIClient::new( api_key, "gpt-4".to_string(), 4096, // max_tokens 0.7, // temperature 5.0, // cost per 1M input tokens (dollars) 15.0, // cost per 1M output tokens (dollars) )?); // 3. Create storage and BM25 index let storage = Arc::new(SurrealDBStorage::new(db)); let bm25_index = Arc::new(BM25Index::new()?); // 4. Configure RLM engine for production let rlm_config = RLMEngineConfig { chunking: ChunkingConfig { strategy: ChunkingStrategy::Semantic, chunk_size: 1000, overlap: 200, }, embedding: Some(EmbeddingConfig::openai_small()), auto_rebuild_bm25: true, max_chunks_per_doc: 10_000, }; // 5. Create RLM engine with LLM client let engine = RLMEngine::with_llm_client(storage, bm25_index, llm_client, Some(rlm_config))?; println!("āœ“ RLM Engine configured for production"); // 6. Example usage: Load document and query let doc_id = "production-doc-1"; let content = " Rust is a systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety. It has a rich type system and ownership model that ensure memory safety and prevent data races at compile time. "; println!("\nšŸ“„ Loading document..."); let chunk_count = engine.load_document(doc_id, content, None).await?; println!("āœ“ Loaded {} chunks", chunk_count); println!("\nšŸ” Querying..."); let results = engine.query(doc_id, "memory safety", None, 5).await?; println!("āœ“ Found {} results:", results.len()); for (i, result) in results.iter().enumerate() { println!( " {}. Score: {:.3} - {}", i + 1, result.score, &result.chunk.content[..50.min(result.chunk.content.len())] ); } println!("\nšŸš€ Dispatching to LLM..."); let dispatch_result = engine .dispatch_subtask(doc_id, "Explain Rust's memory safety", None, 5) .await?; println!("āœ“ LLM Response:\n{}", dispatch_result.text); println!( " Tokens: {} in, {} out", dispatch_result.total_input_tokens, dispatch_result.total_output_tokens ); Ok(()) }