Vapora/crates/vapora-rlm/examples/local_ollama.rs

// Local Development Setup with Ollama
// No API keys required - uses local Ollama for LLM and embeddings

use std::sync::Arc;

use surrealdb::engine::remote::ws::Ws;
use surrealdb::opt::auth::Root;
use surrealdb::Surreal;
use vapora_llm_router::providers::OllamaClient;
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
use vapora_rlm::embeddings::EmbeddingConfig;
use vapora_rlm::engine::RLMEngineConfig;
use vapora_rlm::search::bm25::BM25Index;
use vapora_rlm::storage::SurrealDBStorage;
use vapora_rlm::RLMEngine;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    println!("🦙 Local RLM Setup with Ollama");
    println!("Prerequisites:");
    println!("  - SurrealDB: docker run -p 8000:8000 surrealdb/surrealdb:latest start");
    println!("  - Ollama: brew install ollama && ollama serve");
    println!("  - Model: ollama pull llama3.2\n");

    // 1. Setup SurrealDB
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
    db.signin(Root {
        username: "root",
        password: "root",
    })
    .await?;
    db.use_ns("local").use_db("rlm").await?;

    // 2. Setup Ollama client (local, no API key needed)
    let llm_client = Arc::new(OllamaClient::new(
        "http://localhost:11434".to_string(),
        "llama3.2".to_string(),
        4096, // max_tokens
        0.7,  // temperature
    )?);

    // 3. Create storage and BM25 index
    let storage = Arc::new(SurrealDBStorage::new(db));
    let bm25_index = Arc::new(BM25Index::new()?);

    // 4. Configure RLM engine for local development
    let rlm_config = RLMEngineConfig {
        chunking: ChunkingConfig {
            strategy: ChunkingStrategy::Fixed,
            chunk_size: 500,
            overlap: 100,
        },
        embedding: Some(EmbeddingConfig::ollama("llama3.2")),
        auto_rebuild_bm25: true,
        max_chunks_per_doc: 5_000,
    };

    // 5. Create RLM engine with Ollama client
    let engine = RLMEngine::with_llm_client(storage, bm25_index, llm_client, Some(rlm_config))?;

    println!("✓ RLM Engine configured with Ollama\n");

    // 6. Example: Analyze Rust code
    let doc_id = "rust-example";
    let content = r#"
        fn fibonacci(n: u32) -> u32 {
            match n {
                0 => 0,
                1 => 1,
                _ => fibonacci(n - 1) + fibonacci(n - 2),
            }
        }

        // This recursive implementation has exponential time complexity.
        // A better approach would use dynamic programming or iteration.
    "#;

    println!("📄 Loading Rust code...");
    let chunk_count = engine.load_document(doc_id, content, None).await?;
    println!("✓ Loaded {} chunks\n", chunk_count);

    println!("🔍 Searching for 'complexity'...");
    let results = engine.query(doc_id, "complexity", None, 3).await?;
    println!("✓ Found {} results\n", results.len());

    println!("🦙 Asking Ollama to explain the code...");
    let response = engine
        .dispatch_subtask(
            doc_id,
            "Explain this Rust code and suggest improvements",
            None,
            3,
        )
        .await?;
    println!("✓ Ollama says:\n{}\n", response.text);
    println!(
        "  (Used {} tokens)",
        response.total_input_tokens + response.total_output_tokens
    );

    Ok(())
}