Vapora/crates/vapora-backend/tests/provider_analytics_test.rs

482 lines
17 KiB
Rust
Raw Normal View History

// Provider Analytics Integration Tests
// Phase 7: Comprehensive tests for provider analytics functionality
#[cfg(test)]
mod provider_analytics_tests {
use vapora_knowledge_graph::models::{
ProviderAnalytics, ProviderCostForecast, ProviderEfficiency, ProviderTaskTypeMetrics,
};
#[test]
fn test_provider_analytics_creation() {
let analytics = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 1000,
total_tasks: 10,
successful_tasks: 9,
failed_tasks: 1,
success_rate: 0.9,
avg_cost_per_task_cents: 100.0,
total_input_tokens: 50000,
total_output_tokens: 25000,
cost_per_1m_tokens: 13.3,
};
assert_eq!(analytics.provider, "claude");
assert_eq!(analytics.total_tasks, 10);
assert_eq!(analytics.successful_tasks, 9);
assert_eq!(analytics.failed_tasks, 1);
assert_eq!(analytics.success_rate, 0.9);
assert_eq!(analytics.avg_cost_per_task_cents, 100.0);
assert_eq!(analytics.total_input_tokens, 50000);
assert_eq!(analytics.total_output_tokens, 25000);
assert!((analytics.cost_per_1m_tokens - 13.3).abs() < 0.01);
}
#[test]
fn test_provider_efficiency_calculation() {
let efficiency = ProviderEfficiency {
provider: "claude".to_string(),
quality_score: 0.9,
cost_score: 0.8,
efficiency_ratio: 0.72,
rank: 1,
};
assert_eq!(efficiency.provider, "claude");
assert_eq!(efficiency.quality_score, 0.9);
assert_eq!(efficiency.cost_score, 0.8);
assert_eq!(efficiency.efficiency_ratio, 0.72);
assert_eq!(efficiency.rank, 1);
assert!(efficiency.efficiency_ratio > 0.7);
}
#[test]
fn test_provider_efficiency_ranking_order() {
let efficiencies = vec![
ProviderEfficiency {
provider: "claude".to_string(),
quality_score: 0.95,
cost_score: 0.9,
efficiency_ratio: 0.855,
rank: 1,
},
ProviderEfficiency {
provider: "gpt-4".to_string(),
quality_score: 0.85,
cost_score: 0.8,
efficiency_ratio: 0.68,
rank: 2,
},
ProviderEfficiency {
provider: "gemini".to_string(),
quality_score: 0.75,
cost_score: 0.95,
efficiency_ratio: 0.7125,
rank: 3,
},
];
// Verify ordering: highest efficiency_ratio should have lowest rank
assert!(efficiencies[0].efficiency_ratio > efficiencies[1].efficiency_ratio);
assert!(efficiencies[0].rank < efficiencies[1].rank);
assert!(efficiencies[1].rank < efficiencies[2].rank);
}
#[test]
fn test_provider_task_type_metrics() {
let metrics = ProviderTaskTypeMetrics {
provider: "claude".to_string(),
task_type: "code_review".to_string(),
total_cost_cents: 500,
task_count: 5,
success_rate: 1.0,
avg_duration_ms: 2500.0,
};
assert_eq!(metrics.provider, "claude");
assert_eq!(metrics.task_type, "code_review");
assert_eq!(metrics.total_cost_cents, 500);
assert_eq!(metrics.task_count, 5);
assert_eq!(metrics.success_rate, 1.0);
assert_eq!(metrics.avg_duration_ms, 2500.0);
assert_eq!(metrics.total_cost_cents / metrics.task_count as u32, 100);
}
#[test]
fn test_provider_cost_forecast() {
let forecast = ProviderCostForecast {
provider: "claude".to_string(),
current_daily_cost_cents: 500,
projected_weekly_cost_cents: 3500,
projected_monthly_cost_cents: 15000,
trend: "stable".to_string(),
confidence: 0.9,
};
assert_eq!(forecast.provider, "claude");
assert_eq!(forecast.current_daily_cost_cents, 500);
assert_eq!(forecast.projected_weekly_cost_cents, 3500);
assert_eq!(forecast.projected_monthly_cost_cents, 15000);
assert_eq!(forecast.trend, "stable");
assert_eq!(forecast.confidence, 0.9);
// Verify reasonable projections (weekly should be ~7x daily)
let expected_weekly = forecast.current_daily_cost_cents as u32 * 7;
assert!(
(forecast.projected_weekly_cost_cents as i32 - expected_weekly as i32).abs() <= 100
);
}
#[test]
fn test_success_rate_calculation_with_zero_tasks() {
let analytics = ProviderAnalytics {
provider: "ollama".to_string(),
total_cost_cents: 0,
total_tasks: 0,
successful_tasks: 0,
failed_tasks: 0,
success_rate: 0.0,
avg_cost_per_task_cents: 0.0,
total_input_tokens: 0,
total_output_tokens: 0,
cost_per_1m_tokens: 0.0,
};
// When there are no tasks, success rate should be 0
assert_eq!(analytics.success_rate, 0.0);
assert_eq!(analytics.avg_cost_per_task_cents, 0.0);
}
#[test]
fn test_cost_per_token_calculation() {
let total_tokens = 1_000_000u64;
let cost_cents = 10u32;
let cost_per_1m_tokens = (cost_cents as f64 * 1_000_000.0) / (total_tokens as f64);
assert_eq!(cost_per_1m_tokens, 10.0);
}
#[test]
fn test_cost_per_token_with_different_volumes() {
// 1M tokens costs 10 cents
let analytics1 = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 10,
total_tasks: 1,
successful_tasks: 1,
failed_tasks: 0,
success_rate: 1.0,
avg_cost_per_task_cents: 10.0,
total_input_tokens: 500_000,
total_output_tokens: 500_000,
cost_per_1m_tokens: 10.0,
};
// 10M tokens costs 50 cents (same rate)
let analytics2 = ProviderAnalytics {
provider: "gpt-4".to_string(),
total_cost_cents: 50,
total_tasks: 1,
successful_tasks: 1,
failed_tasks: 0,
success_rate: 1.0,
avg_cost_per_task_cents: 50.0,
total_input_tokens: 5_000_000,
total_output_tokens: 5_000_000,
cost_per_1m_tokens: 5.0, // 50 cents / 10M tokens = 5 cents per 1M
};
// Verify cost scaling
assert!(analytics2.cost_per_1m_tokens < analytics1.cost_per_1m_tokens);
assert_eq!(analytics2.total_cost_cents, 50);
}
#[test]
fn test_efficiency_ratio_quality_vs_cost() {
// High quality, high cost provider
let high_quality_high_cost = ProviderEfficiency {
provider: "claude".to_string(),
quality_score: 0.95,
cost_score: 0.5, // Lower score because expensive
efficiency_ratio: 0.475, // 0.95 * 0.5
rank: 1,
};
// Lower quality, low cost provider
let low_quality_low_cost = ProviderEfficiency {
provider: "ollama".to_string(),
quality_score: 0.7,
cost_score: 0.95, // Higher score because cheap
efficiency_ratio: 0.665, // 0.7 * 0.95
rank: 1,
};
// Even though Claude is higher quality, Ollama's cost efficiency might win
// depending on the use case
assert!(high_quality_high_cost.quality_score > low_quality_low_cost.quality_score);
assert!(high_quality_high_cost.cost_score < low_quality_low_cost.cost_score);
}
#[test]
fn test_forecast_trend_detection() {
// Stable trend: costs are relatively consistent
let stable_forecast = ProviderCostForecast {
provider: "claude".to_string(),
current_daily_cost_cents: 500,
projected_weekly_cost_cents: 3500,
projected_monthly_cost_cents: 15000,
trend: "stable".to_string(),
confidence: 0.9,
};
// Increasing trend: costs are growing
let increasing_forecast = ProviderCostForecast {
provider: "gpt-4".to_string(),
current_daily_cost_cents: 600,
projected_weekly_cost_cents: 5200,
projected_monthly_cost_cents: 20000,
trend: "increasing".to_string(),
confidence: 0.85,
};
// Decreasing trend: costs are declining
let decreasing_forecast = ProviderCostForecast {
provider: "gemini".to_string(),
current_daily_cost_cents: 300,
projected_weekly_cost_cents: 1750,
projected_monthly_cost_cents: 7500,
trend: "decreasing".to_string(),
confidence: 0.7,
};
assert_eq!(stable_forecast.trend, "stable");
assert_eq!(increasing_forecast.trend, "increasing");
assert_eq!(decreasing_forecast.trend, "decreasing");
// Higher confidence when more data available
assert!(stable_forecast.confidence > increasing_forecast.confidence);
assert!(increasing_forecast.confidence > decreasing_forecast.confidence);
}
#[test]
fn test_forecast_confidence_based_on_data_volume() {
// High confidence with 7+ days of data
let high_confidence = ProviderCostForecast {
provider: "claude".to_string(),
current_daily_cost_cents: 500,
projected_weekly_cost_cents: 3500,
projected_monthly_cost_cents: 15000,
trend: "stable".to_string(),
confidence: 0.9,
};
// Medium confidence with 3-6 days of data
let medium_confidence = ProviderCostForecast {
provider: "gpt-4".to_string(),
current_daily_cost_cents: 400,
projected_weekly_cost_cents: 2800,
projected_monthly_cost_cents: 12000,
trend: "stable".to_string(),
confidence: 0.7,
};
// Low confidence with < 3 days of data
let low_confidence = ProviderCostForecast {
provider: "gemini".to_string(),
current_daily_cost_cents: 300,
projected_weekly_cost_cents: 2100,
projected_monthly_cost_cents: 9000,
trend: "insufficient_data".to_string(),
confidence: 0.3,
};
assert!(high_confidence.confidence > medium_confidence.confidence);
assert!(medium_confidence.confidence > low_confidence.confidence);
}
#[test]
fn test_provider_comparison_cost_quality_tradeoff() {
// Claude: High quality, high cost
let claude = ProviderEfficiency {
provider: "claude".to_string(),
quality_score: 0.95,
cost_score: 0.6,
efficiency_ratio: 0.57,
rank: 2,
};
// GPT-4: High quality, medium cost
let gpt4 = ProviderEfficiency {
provider: "gpt-4".to_string(),
quality_score: 0.90,
cost_score: 0.7,
efficiency_ratio: 0.63,
rank: 1,
};
// Gemini: Good quality, low cost
let gemini = ProviderEfficiency {
provider: "gemini".to_string(),
quality_score: 0.80,
cost_score: 0.85,
efficiency_ratio: 0.68,
rank: 1,
};
// Ollama: Lower quality, free
let ollama = ProviderEfficiency {
provider: "ollama".to_string(),
quality_score: 0.6,
cost_score: 1.0,
efficiency_ratio: 0.6,
rank: 4,
};
// Verify quality ordering
assert!(claude.quality_score > gpt4.quality_score);
assert!(gpt4.quality_score > gemini.quality_score);
assert!(gemini.quality_score > ollama.quality_score);
// Verify cost score ordering (higher = cheaper)
assert!(ollama.cost_score > gemini.cost_score);
assert!(gemini.cost_score > gpt4.cost_score);
assert!(gpt4.cost_score > claude.cost_score);
}
#[test]
fn test_multiple_task_types_per_provider() {
let code_review = ProviderTaskTypeMetrics {
provider: "claude".to_string(),
task_type: "code_review".to_string(),
total_cost_cents: 500,
task_count: 5,
success_rate: 1.0,
avg_duration_ms: 2500.0,
};
let documentation = ProviderTaskTypeMetrics {
provider: "claude".to_string(),
task_type: "documentation".to_string(),
total_cost_cents: 300,
task_count: 10,
success_rate: 0.9,
avg_duration_ms: 1500.0,
};
let testing = ProviderTaskTypeMetrics {
provider: "claude".to_string(),
task_type: "testing".to_string(),
total_cost_cents: 200,
task_count: 8,
success_rate: 0.875,
avg_duration_ms: 3000.0,
};
// Verify same provider, different task types
assert_eq!(code_review.provider, documentation.provider);
assert_eq!(documentation.provider, testing.provider);
assert_ne!(code_review.task_type, documentation.task_type);
assert_ne!(documentation.task_type, testing.task_type);
// Verify varying success rates across task types
assert!(code_review.success_rate > documentation.success_rate);
assert!(documentation.success_rate > testing.success_rate);
}
#[test]
fn test_provider_cost_tracking_over_time() {
// Day 1: $5
let day1 = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 500,
total_tasks: 10,
successful_tasks: 10,
failed_tasks: 0,
success_rate: 1.0,
avg_cost_per_task_cents: 50.0,
total_input_tokens: 100_000,
total_output_tokens: 50_000,
cost_per_1m_tokens: 3.33,
};
// Day 2: $7 (cumulative: $12)
let day2 = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 700,
total_tasks: 14,
successful_tasks: 14,
failed_tasks: 0,
success_rate: 1.0,
avg_cost_per_task_cents: 50.0,
total_input_tokens: 140_000,
total_output_tokens: 70_000,
cost_per_1m_tokens: 3.33,
};
// Day 3: $6 (cumulative: $18)
let day3 = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 600,
total_tasks: 12,
successful_tasks: 12,
failed_tasks: 0,
success_rate: 1.0,
avg_cost_per_task_cents: 50.0,
total_input_tokens: 120_000,
total_output_tokens: 60_000,
cost_per_1m_tokens: 3.33,
};
// Verify cost progression
assert!(day2.total_cost_cents > day1.total_cost_cents);
assert!(day3.total_cost_cents > day1.total_cost_cents);
assert!(day2.total_tasks > day1.total_tasks);
// Verify average cost per task consistency
assert!((day1.avg_cost_per_task_cents - day2.avg_cost_per_task_cents).abs() < 0.01);
assert!((day2.avg_cost_per_task_cents - day3.avg_cost_per_task_cents).abs() < 0.01);
}
#[test]
fn test_provider_with_high_failure_rate() {
let failing_provider = ProviderAnalytics {
provider: "unstable".to_string(),
total_cost_cents: 1000,
total_tasks: 100,
successful_tasks: 50,
failed_tasks: 50,
success_rate: 0.5,
avg_cost_per_task_cents: 10.0,
total_input_tokens: 500_000,
total_output_tokens: 250_000,
cost_per_1m_tokens: 1.33,
};
let reliable_provider = ProviderAnalytics {
provider: "reliable".to_string(),
total_cost_cents: 2000,
total_tasks: 100,
successful_tasks: 95,
failed_tasks: 5,
success_rate: 0.95,
avg_cost_per_task_cents: 20.0,
total_input_tokens: 500_000,
total_output_tokens: 250_000,
cost_per_1m_tokens: 2.67,
};
// Even though unstable provider is cheaper per task,
// reliability matters for efficiency
assert!(
failing_provider.avg_cost_per_task_cents < reliable_provider.avg_cost_per_task_cents
);
assert!(failing_provider.success_rate < reliable_provider.success_rate);
// Quality score should reflect reliability
// In a real scenario, this would impact efficiency_ratio
assert!(reliable_provider.success_rate > failing_provider.success_rate);
}
}