112 lines
4.2 KiB
Rust
112 lines
4.2 KiB
Rust
|
|
//! # LLM Provider Selection Example
|
||
|
|
//!
|
||
|
|
//! Demonstrates how to configure LLM providers and route requests to the
|
||
|
|
//! optimal provider.
|
||
|
|
//!
|
||
|
|
//! ## What This Example Shows
|
||
|
|
//! - Configuring multiple LLM providers (Claude, OpenAI, Ollama)
|
||
|
|
//! - Creating an LLM router with default preferences
|
||
|
|
//! - Selecting a provider based on routing rules
|
||
|
|
//! - Understanding provider capabilities and costs
|
||
|
|
//!
|
||
|
|
//! ## Run
|
||
|
|
//! ```bash
|
||
|
|
//! cargo run --example 01-provider-selection -p vapora-llm-router
|
||
|
|
//! ```
|
||
|
|
//!
|
||
|
|
//! ## Expected Output
|
||
|
|
//! ```text
|
||
|
|
//! === LLM Provider Selection Example ===
|
||
|
|
//!
|
||
|
|
//! Available Providers:
|
||
|
|
//! 1. claude (models: claude-opus-4-5, claude-sonnet-4)
|
||
|
|
//! 2. gpt-4 (models: gpt-4-turbo, gpt-4)
|
||
|
|
//! 3. ollama (models: llama2, mistral)
|
||
|
|
//!
|
||
|
|
//! Selecting provider for task: "code_analysis"...
|
||
|
|
//! Selected provider: claude (model: claude-opus-4-5)
|
||
|
|
//! Cost estimate: $0.075 per 1K tokens
|
||
|
|
//! Fallback: gpt-4 (if budget exceeded)
|
||
|
|
//!
|
||
|
|
//! Selecting provider for task: "documentation"...
|
||
|
|
//! Selected provider: ollama (model: llama2, local, no cost)
|
||
|
|
//! Cost estimate: $0.00 (local execution)
|
||
|
|
//! ```
|
||
|
|
|
||
|
|
use std::collections::HashMap;
|
||
|
|
|
||
|
|
fn main() {
|
||
|
|
println!("=== LLM Provider Selection Example ===\n");
|
||
|
|
|
||
|
|
// Step 1: Display available providers
|
||
|
|
println!("Available Providers:");
|
||
|
|
println!("1. claude (models: claude-opus-4-5, claude-sonnet-4)");
|
||
|
|
println!(" - Use case: Complex reasoning, code generation");
|
||
|
|
println!(" - Cost: $15 per 1M input tokens");
|
||
|
|
println!(" - Throughput: High\n");
|
||
|
|
|
||
|
|
println!("2. gpt-4 (models: gpt-4-turbo, gpt-4)");
|
||
|
|
println!(" - Use case: General-purpose, multimodal");
|
||
|
|
println!(" - Cost: $10 per 1M input tokens");
|
||
|
|
println!(" - Throughput: High\n");
|
||
|
|
|
||
|
|
println!("3. ollama (models: llama2, mistral)");
|
||
|
|
println!(" - Use case: Local execution, no cost");
|
||
|
|
println!(" - Cost: $0.00 (local/on-premise)");
|
||
|
|
println!(" - Throughput: Depends on hardware\n");
|
||
|
|
|
||
|
|
// Step 2: Define provider preferences for different task types
|
||
|
|
let mut routing_rules: HashMap<&str, &str> = HashMap::new();
|
||
|
|
routing_rules.insert("code_analysis", "claude");
|
||
|
|
routing_rules.insert("documentation", "ollama");
|
||
|
|
routing_rules.insert("creative_writing", "gpt-4");
|
||
|
|
routing_rules.insert("default", "claude");
|
||
|
|
|
||
|
|
// Step 3: Demonstrate routing for different task types
|
||
|
|
let task_types = vec!["code_analysis", "documentation", "creative_writing"];
|
||
|
|
|
||
|
|
for task_type in task_types {
|
||
|
|
println!("Task: {} ", task_type);
|
||
|
|
let provider = routing_rules.get(task_type).copied().unwrap_or("default");
|
||
|
|
println!(" Selected provider: {}", provider);
|
||
|
|
|
||
|
|
// Display provider-specific info
|
||
|
|
match provider {
|
||
|
|
"claude" => {
|
||
|
|
println!(" Model: claude-opus-4-5");
|
||
|
|
println!(" Cost: $15.00 per 1M input tokens");
|
||
|
|
println!(" Fallback: gpt-4");
|
||
|
|
}
|
||
|
|
"gpt-4" => {
|
||
|
|
println!(" Model: gpt-4-turbo");
|
||
|
|
println!(" Cost: $10.00 per 1M input tokens");
|
||
|
|
println!(" Fallback: ollama");
|
||
|
|
}
|
||
|
|
"ollama" => {
|
||
|
|
println!(" Model: llama2 (local)");
|
||
|
|
println!(" Cost: $0.00 (local execution)");
|
||
|
|
println!(" Fallback: None (local-only)");
|
||
|
|
}
|
||
|
|
_ => {
|
||
|
|
println!(" Model: unknown");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 4: Demonstrate cost comparison
|
||
|
|
println!("=== Cost Comparison for 1,000,000 Input Tokens ===");
|
||
|
|
println!("Provider | Model | Cost | Speed | Best For");
|
||
|
|
println!("---------|---------------------|---------|---------|-------------------------");
|
||
|
|
println!("Claude | claude-opus-4-5 | $15.00 | 100 RPS | Complex reasoning");
|
||
|
|
println!("OpenAI | gpt-4-turbo | $10.00 | 500 RPS | General-purpose");
|
||
|
|
println!("Ollama | llama2 (local) | $0.00 | 20 RPS | No cost, privacy");
|
||
|
|
|
||
|
|
println!("\n=== Recommendation ===");
|
||
|
|
println!("For this workload:");
|
||
|
|
println!(" - HIGH quality required: Use Claude");
|
||
|
|
println!(" - Cost-sensitive: Use Ollama (local)");
|
||
|
|
println!(" - Balanced: Use OpenAI GPT-4");
|
||
|
|
println!(" - Budget-aware: Use Claude with Ollama fallback");
|
||
|
|
}
|