- Exclude problematic markdown files from linting (existing legacy issues) - Make clippy check less aggressive (warnings only, not -D warnings) - Move cargo test to manual stage (too slow for pre-commit) - Exclude SVG files from end-of-file-fixer and trailing-whitespace - Add markdown linting exclusions for existing documentation This allows pre-commit hooks to run successfully on new code without blocking commits due to existing issues in legacy documentation files.
277 lines
8.4 KiB
Rust
277 lines
8.4 KiB
Rust
use vapora_llm_router::{CostRanker, ProviderConfig};
|
|
|
|
fn create_provider_configs() -> Vec<(String, ProviderConfig)> {
|
|
vec![
|
|
(
|
|
"claude".to_string(),
|
|
ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "claude-opus-4-5".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 3.0, // $3 per 1M input
|
|
cost_per_1m_output: 15.0, // $15 per 1M output
|
|
},
|
|
),
|
|
(
|
|
"gpt4".to_string(),
|
|
ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "gpt-4".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 2.5,
|
|
cost_per_1m_output: 10.0,
|
|
},
|
|
),
|
|
(
|
|
"gemini".to_string(),
|
|
ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "gemini-pro".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 0.30,
|
|
cost_per_1m_output: 1.20,
|
|
},
|
|
),
|
|
(
|
|
"ollama".to_string(),
|
|
ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: Some("http://localhost:11434".to_string()),
|
|
model: "llama2".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 0.0,
|
|
cost_per_1m_output: 0.0,
|
|
},
|
|
),
|
|
]
|
|
}
|
|
|
|
#[test]
|
|
fn test_cost_estimation_accuracy() {
|
|
let config = ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "test".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 1.0, // $1 per 1M input
|
|
cost_per_1m_output: 2.0, // $2 per 1M output
|
|
};
|
|
|
|
// 1000 input + 500 output tokens
|
|
let cost = CostRanker::estimate_cost(&config, 1000, 500);
|
|
// (1000 * 1 / 1M) * 100 + (500 * 2 / 1M) * 100 = 0.1 + 0.1 = 0.2 cents ≈ 0
|
|
assert!(cost <= 1); // Small rounding acceptable
|
|
}
|
|
|
|
#[test]
|
|
fn test_efficiency_ranking_prioritizes_value() {
|
|
let configs = create_provider_configs();
|
|
let ranked = CostRanker::rank_by_efficiency(configs, "coding", 10000, 2000);
|
|
|
|
assert_eq!(ranked.len(), 4);
|
|
// Ollama should rank first (free + decent quality)
|
|
assert_eq!(ranked[0].provider, "ollama");
|
|
|
|
// Claude should rank last (most expensive)
|
|
assert_eq!(ranked[ranked.len() - 1].provider, "claude");
|
|
|
|
// Efficiency should be descending
|
|
for i in 1..ranked.len() {
|
|
assert!(
|
|
ranked[i - 1].cost_efficiency >= ranked[i].cost_efficiency,
|
|
"Efficiency should be descending"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_cost_ranking_cheapest_first() {
|
|
let configs = create_provider_configs();
|
|
let ranked = CostRanker::rank_by_cost(configs, 10000, 2000);
|
|
|
|
assert_eq!(ranked.len(), 4);
|
|
// Ollama (free) should be first
|
|
assert_eq!(ranked[0].provider, "ollama");
|
|
assert_eq!(ranked[0].estimated_cost_cents, 0);
|
|
|
|
// Costs should be ascending
|
|
for i in 1..ranked.len() {
|
|
assert!(
|
|
ranked[i - 1].estimated_cost_cents <= ranked[i].estimated_cost_cents,
|
|
"Costs should be ascending"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_quality_score_differentiation() {
|
|
let claude_quality = CostRanker::get_quality_score("claude", "coding", None);
|
|
let gpt4_quality = CostRanker::get_quality_score("gpt4", "coding", None);
|
|
let gemini_quality = CostRanker::get_quality_score("gemini", "coding", None);
|
|
let ollama_quality = CostRanker::get_quality_score("ollama", "coding", None);
|
|
|
|
// Quality should reflect realistic differences
|
|
assert!(claude_quality > gpt4_quality);
|
|
assert!(gpt4_quality > gemini_quality);
|
|
assert!(gemini_quality > ollama_quality);
|
|
}
|
|
|
|
#[test]
|
|
fn test_cost_benefit_ratio_ordering() {
|
|
let configs = create_provider_configs();
|
|
let ratios = CostRanker::cost_benefit_ratio(configs, "coding", 5000, 1000);
|
|
|
|
assert_eq!(ratios.len(), 4);
|
|
// First item should have best efficiency
|
|
let best = &ratios[0];
|
|
let worst = &ratios[ratios.len() - 1];
|
|
assert!(
|
|
best.2 >= worst.2,
|
|
"First should have better efficiency than last"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_cost_calculation_with_large_tokens() {
|
|
let configs = create_provider_configs();
|
|
let ranked = CostRanker::rank_by_cost(configs, 1_000_000, 100_000);
|
|
|
|
// For claude: (1M * $3) + (100k * $15/1M) = $3 + $1.50 = $4.50 = 450 cents
|
|
let claude_cost = ranked
|
|
.iter()
|
|
.find(|s| s.provider == "claude")
|
|
.unwrap()
|
|
.estimated_cost_cents;
|
|
assert!(claude_cost > 400); // Approximately $4.50
|
|
|
|
// For ollama: $0
|
|
let ollama_cost = ranked
|
|
.iter()
|
|
.find(|s| s.provider == "ollama")
|
|
.unwrap()
|
|
.estimated_cost_cents;
|
|
assert_eq!(ollama_cost, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_efficiency_with_fallback_strategy() {
|
|
let configs = create_provider_configs();
|
|
|
|
// High-quality task (e.g., architecture) - use best
|
|
let premium = CostRanker::rank_by_efficiency(configs.clone(), "architecture", 5000, 2000);
|
|
// Top provider should have reasonable quality score
|
|
assert!(premium[0].quality_score >= 0.75);
|
|
|
|
// Low-cost task (e.g., simple formatting) - use cheap
|
|
let budget = CostRanker::rank_by_cost(configs.clone(), 1000, 500);
|
|
// Ollama should be in the zero-cost group (first position or tied for first)
|
|
let ollama_index = budget.iter().position(|s| s.provider == "ollama").unwrap();
|
|
assert!(
|
|
ollama_index == 0
|
|
|| budget[0].estimated_cost_cents == budget[ollama_index].estimated_cost_cents
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_provider_list() {
|
|
let ranked = CostRanker::rank_by_efficiency(Vec::new(), "coding", 5000, 1000);
|
|
assert_eq!(ranked.len(), 0);
|
|
|
|
let ranked_cost = CostRanker::rank_by_cost(Vec::new(), 5000, 1000);
|
|
assert_eq!(ranked_cost.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_single_provider() {
|
|
let single = vec![(
|
|
"ollama".to_string(),
|
|
ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: Some("http://localhost:11434".to_string()),
|
|
model: "llama2".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 0.0,
|
|
cost_per_1m_output: 0.0,
|
|
},
|
|
)];
|
|
|
|
let ranked = CostRanker::rank_by_efficiency(single.clone(), "coding", 1000, 500);
|
|
assert_eq!(ranked.len(), 1);
|
|
assert_eq!(ranked[0].provider, "ollama");
|
|
|
|
let ranked_cost = CostRanker::rank_by_cost(single, 1000, 500);
|
|
assert_eq!(ranked_cost.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_zero_token_cost() {
|
|
let config = ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "test".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 1.0,
|
|
cost_per_1m_output: 2.0,
|
|
};
|
|
|
|
// Zero tokens should cost zero
|
|
let cost = CostRanker::estimate_cost(&config, 0, 0);
|
|
assert_eq!(cost, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_efficiency_division_by_zero_protection() {
|
|
// Even free providers shouldn't cause division errors
|
|
let configs = create_provider_configs();
|
|
let ranked = CostRanker::rank_by_efficiency(configs, "coding", 5000, 1000);
|
|
|
|
// All should have valid efficiency scores
|
|
for score in ranked {
|
|
assert!(score.cost_efficiency.is_finite());
|
|
assert!(score.cost_efficiency >= 0.0);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_cost_accuracy_matches_provider_rates() {
|
|
let claude_config = ProviderConfig {
|
|
enabled: true,
|
|
api_key: None,
|
|
url: None,
|
|
model: "claude-opus-4-5".to_string(),
|
|
max_tokens: 4096,
|
|
temperature: 0.7,
|
|
cost_per_1m_input: 3.0,
|
|
cost_per_1m_output: 15.0,
|
|
};
|
|
|
|
// 1M input tokens = $3.00 = 300 cents
|
|
let cost_1m_input = CostRanker::estimate_cost(&claude_config, 1_000_000, 0);
|
|
assert_eq!(cost_1m_input, 300);
|
|
|
|
// 1M output tokens = $15.00 = 1500 cents
|
|
let cost_1m_output = CostRanker::estimate_cost(&claude_config, 0, 1_000_000);
|
|
assert_eq!(cost_1m_output, 1500);
|
|
|
|
// Combined
|
|
let cost_combined = CostRanker::estimate_cost(&claude_config, 1_000_000, 1_000_000);
|
|
assert_eq!(cost_combined, 1800);
|
|
}
|