Vapora/crates/vapora-llm-router/examples/01-provider-selection.rs

//! # LLM Provider Selection Example
//!
//! Demonstrates how to configure LLM providers and route requests to the
//! optimal provider.
//!
//! ## What This Example Shows
//! - Configuring multiple LLM providers (Claude, OpenAI, Ollama)
//! - Creating an LLM router with default preferences
//! - Selecting a provider based on routing rules
//! - Understanding provider capabilities and costs
//!
//! ## Run
//! ```bash
//! cargo run --example 01-provider-selection -p vapora-llm-router
//! ```
//!
//! ## Expected Output
//! ```text
//! === LLM Provider Selection Example ===
//!
//! Available Providers:
//! 1. claude (models: claude-opus-4-5, claude-sonnet-4)
//! 2. gpt-4 (models: gpt-4-turbo, gpt-4)
//! 3. ollama (models: llama2, mistral)
//!
//! Selecting provider for task: "code_analysis"...
//! Selected provider: claude (model: claude-opus-4-5)
//! Cost estimate: $0.075 per 1K tokens
//! Fallback: gpt-4 (if budget exceeded)
//!
//! Selecting provider for task: "documentation"...
//! Selected provider: ollama (model: llama2, local, no cost)
//! Cost estimate: $0.00 (local execution)
//! ```

use std::collections::HashMap;

fn main() {
    println!("=== LLM Provider Selection Example ===\n");

    // Step 1: Display available providers
    println!("Available Providers:");
    println!("1. claude (models: claude-opus-4-5, claude-sonnet-4)");
    println!("   - Use case: Complex reasoning, code generation");
    println!("   - Cost: $15 per 1M input tokens");
    println!("   - Throughput: High\n");

    println!("2. gpt-4 (models: gpt-4-turbo, gpt-4)");
    println!("   - Use case: General-purpose, multimodal");
    println!("   - Cost: $10 per 1M input tokens");
    println!("   - Throughput: High\n");

    println!("3. ollama (models: llama2, mistral)");
    println!("   - Use case: Local execution, no cost");
    println!("   - Cost: $0.00 (local/on-premise)");
    println!("   - Throughput: Depends on hardware\n");

    // Step 2: Define provider preferences for different task types
    let mut routing_rules: HashMap<&str, &str> = HashMap::new();
    routing_rules.insert("code_analysis", "claude");
    routing_rules.insert("documentation", "ollama");
    routing_rules.insert("creative_writing", "gpt-4");
    routing_rules.insert("default", "claude");

    // Step 3: Demonstrate routing for different task types
    let task_types = vec!["code_analysis", "documentation", "creative_writing"];

    for task_type in task_types {
        println!("Task: {} ", task_type);
        let provider = routing_rules.get(task_type).copied().unwrap_or("default");
        println!("  Selected provider: {}", provider);

        // Display provider-specific info
        match provider {
            "claude" => {
                println!("  Model: claude-opus-4-5");
                println!("  Cost: $15.00 per 1M input tokens");
                println!("  Fallback: gpt-4");
            }
            "gpt-4" => {
                println!("  Model: gpt-4-turbo");
                println!("  Cost: $10.00 per 1M input tokens");
                println!("  Fallback: ollama");
            }
            "ollama" => {
                println!("  Model: llama2 (local)");
                println!("  Cost: $0.00 (local execution)");
                println!("  Fallback: None (local-only)");
            }
            _ => {
                println!("  Model: unknown");
            }
        }
        println!();
    }

    // Step 4: Demonstrate cost comparison
    println!("=== Cost Comparison for 1,000,000 Input Tokens ===");
    println!("Provider | Model               | Cost    | Speed   | Best For");
    println!("---------|---------------------|---------|---------|-------------------------");
    println!("Claude   | claude-opus-4-5     | $15.00  | 100 RPS | Complex reasoning");
    println!("OpenAI   | gpt-4-turbo         | $10.00  | 500 RPS | General-purpose");
    println!("Ollama   | llama2 (local)      | $0.00   | 20 RPS  | No cost, privacy");

    println!("\n=== Recommendation ===");
    println!("For this workload:");
    println!("  - HIGH quality required: Use Claude");
    println!("  - Cost-sensitive: Use Ollama (local)");
    println!("  - Balanced: Use OpenAI GPT-4");
    println!("  - Budget-aware: Use Claude with Ollama fallback");
}