Vapora/crates/vapora-rlm/examples/production_setup.rs
2026-02-16 05:09:51 +00:00

103 lines
3.4 KiB
Rust

// Production Setup Example for RLM
// Shows how to configure RLM with LLM client and embeddings
use std::sync::Arc;
use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_llm_router::providers::OpenAIClient;
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
use vapora_rlm::embeddings::EmbeddingConfig;
use vapora_rlm::engine::RLMEngineConfig;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
println!("🚀 Production RLM Setup with OpenAI");
println!("Prerequisites:");
println!(" - SurrealDB running on port 8000");
println!(" - OPENAI_API_KEY environment variable set\n");
// 1. Setup SurrealDB
let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
db.signin(Root {
username: "root",
password: "root",
})
.await?;
db.use_ns("production").use_db("rlm").await?;
// 2. Setup OpenAI client (reads OPENAI_API_KEY from env)
let api_key =
std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable not set");
let llm_client = Arc::new(OpenAIClient::new(
api_key,
"gpt-4".to_string(),
4096, // max_tokens
0.7, // temperature
5.0, // cost per 1M input tokens (dollars)
15.0, // cost per 1M output tokens (dollars)
)?);
// 3. Create storage and BM25 index
let storage = Arc::new(SurrealDBStorage::new(db));
let bm25_index = Arc::new(BM25Index::new()?);
// 4. Configure RLM engine for production
let rlm_config = RLMEngineConfig {
chunking: ChunkingConfig {
strategy: ChunkingStrategy::Semantic,
chunk_size: 1000,
overlap: 200,
},
embedding: Some(EmbeddingConfig::openai_small()),
auto_rebuild_bm25: true,
max_chunks_per_doc: 10_000,
};
// 5. Create RLM engine with LLM client
let engine = RLMEngine::with_llm_client(storage, bm25_index, llm_client, Some(rlm_config))?;
println!("✓ RLM Engine configured for production");
// 6. Example usage: Load document and query
let doc_id = "production-doc-1";
let content = "
Rust is a systems programming language that runs blazingly fast,
prevents segfaults, and guarantees thread safety. It has a rich
type system and ownership model that ensure memory safety and
prevent data races at compile time.
";
println!("\n📄 Loading document...");
let chunk_count = engine.load_document(doc_id, content, None).await?;
println!("✓ Loaded {} chunks", chunk_count);
println!("\n🔍 Querying...");
let results = engine.query(doc_id, "memory safety", None, 5).await?;
println!("✓ Found {} results:", results.len());
for (i, result) in results.iter().enumerate() {
println!(
" {}. Score: {:.3} - {}",
i + 1,
result.score,
&result.chunk.content[..50.min(result.chunk.content.len())]
);
}
println!("\n🚀 Dispatching to LLM...");
let dispatch_result = engine
.dispatch_subtask(doc_id, "Explain Rust's memory safety", None, 5)
.await?;
println!("✓ LLM Response:\n{}", dispatch_result.text);
println!(
" Tokens: {} in, {} out",
dispatch_result.total_input_tokens, dispatch_result.total_output_tokens
);
Ok(())
}