103 lines
3.4 KiB
Rust
103 lines
3.4 KiB
Rust
// Production Setup Example for RLM
|
|
// Shows how to configure RLM with LLM client and embeddings
|
|
|
|
use std::sync::Arc;
|
|
|
|
use surrealdb::engine::remote::ws::Ws;
|
|
use surrealdb::opt::auth::Root;
|
|
use surrealdb::Surreal;
|
|
use vapora_llm_router::providers::OpenAIClient;
|
|
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
|
|
use vapora_rlm::embeddings::EmbeddingConfig;
|
|
use vapora_rlm::engine::RLMEngineConfig;
|
|
use vapora_rlm::search::bm25::BM25Index;
|
|
use vapora_rlm::storage::SurrealDBStorage;
|
|
use vapora_rlm::RLMEngine;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> anyhow::Result<()> {
|
|
println!("🚀 Production RLM Setup with OpenAI");
|
|
println!("Prerequisites:");
|
|
println!(" - SurrealDB running on port 8000");
|
|
println!(" - OPENAI_API_KEY environment variable set\n");
|
|
|
|
// 1. Setup SurrealDB
|
|
let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
|
|
db.signin(Root {
|
|
username: "root",
|
|
password: "root",
|
|
})
|
|
.await?;
|
|
db.use_ns("production").use_db("rlm").await?;
|
|
|
|
// 2. Setup OpenAI client (reads OPENAI_API_KEY from env)
|
|
let api_key =
|
|
std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable not set");
|
|
let llm_client = Arc::new(OpenAIClient::new(
|
|
api_key,
|
|
"gpt-4".to_string(),
|
|
4096, // max_tokens
|
|
0.7, // temperature
|
|
5.0, // cost per 1M input tokens (dollars)
|
|
15.0, // cost per 1M output tokens (dollars)
|
|
)?);
|
|
|
|
// 3. Create storage and BM25 index
|
|
let storage = Arc::new(SurrealDBStorage::new(db));
|
|
let bm25_index = Arc::new(BM25Index::new()?);
|
|
|
|
// 4. Configure RLM engine for production
|
|
let rlm_config = RLMEngineConfig {
|
|
chunking: ChunkingConfig {
|
|
strategy: ChunkingStrategy::Semantic,
|
|
chunk_size: 1000,
|
|
overlap: 200,
|
|
},
|
|
embedding: Some(EmbeddingConfig::openai_small()),
|
|
auto_rebuild_bm25: true,
|
|
max_chunks_per_doc: 10_000,
|
|
};
|
|
|
|
// 5. Create RLM engine with LLM client
|
|
let engine = RLMEngine::with_llm_client(storage, bm25_index, llm_client, Some(rlm_config))?;
|
|
|
|
println!("✓ RLM Engine configured for production");
|
|
|
|
// 6. Example usage: Load document and query
|
|
let doc_id = "production-doc-1";
|
|
let content = "
|
|
Rust is a systems programming language that runs blazingly fast,
|
|
prevents segfaults, and guarantees thread safety. It has a rich
|
|
type system and ownership model that ensure memory safety and
|
|
prevent data races at compile time.
|
|
";
|
|
|
|
println!("\n📄 Loading document...");
|
|
let chunk_count = engine.load_document(doc_id, content, None).await?;
|
|
println!("✓ Loaded {} chunks", chunk_count);
|
|
|
|
println!("\n🔍 Querying...");
|
|
let results = engine.query(doc_id, "memory safety", None, 5).await?;
|
|
println!("✓ Found {} results:", results.len());
|
|
for (i, result) in results.iter().enumerate() {
|
|
println!(
|
|
" {}. Score: {:.3} - {}",
|
|
i + 1,
|
|
result.score,
|
|
&result.chunk.content[..50.min(result.chunk.content.len())]
|
|
);
|
|
}
|
|
|
|
println!("\n🚀 Dispatching to LLM...");
|
|
let dispatch_result = engine
|
|
.dispatch_subtask(doc_id, "Explain Rust's memory safety", None, 5)
|
|
.await?;
|
|
println!("✓ LLM Response:\n{}", dispatch_result.text);
|
|
println!(
|
|
" Tokens: {} in, {} out",
|
|
dispatch_result.total_input_tokens, dispatch_result.total_output_tokens
|
|
);
|
|
|
|
Ok(())
|
|
}
|