Vapora/crates/vapora-backend/src/services/provider_analytics_service.rs

454 lines
15 KiB
Rust
Raw Normal View History

// Provider Analytics Service - Phase 7
// Analyzes provider costs, efficiency, and performance
use std::collections::HashMap;
use surrealdb::engine::remote::ws::Client;
use surrealdb::Surreal;
use tracing::debug;
use vapora_knowledge_graph::models::{
ProviderAnalytics, ProviderCostForecast, ProviderEfficiency, ProviderTaskTypeMetrics,
};
#[derive(Clone)]
pub struct ProviderAnalyticsService {
db: std::sync::Arc<Surreal<Client>>,
}
impl ProviderAnalyticsService {
pub fn new(db: Surreal<Client>) -> Self {
Self {
db: std::sync::Arc::new(db),
}
}
/// Get analytics for a specific provider
pub async fn get_provider_analytics(
&self,
provider: &str,
) -> anyhow::Result<ProviderAnalytics> {
debug!("Querying analytics for provider: {}", provider);
let query = format!(
"SELECT * FROM kg_executions WHERE provider = '{}' LIMIT 10000",
provider
);
let mut response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
if response.is_empty() {
return Ok(ProviderAnalytics {
provider: provider.to_string(),
total_cost_cents: 0,
total_tasks: 0,
successful_tasks: 0,
failed_tasks: 0,
success_rate: 0.0,
avg_cost_per_task_cents: 0.0,
total_input_tokens: 0,
total_output_tokens: 0,
cost_per_1m_tokens: 0.0,
});
}
let mut total_cost_cents: u32 = 0;
let mut total_tasks: u64 = 0;
let mut successful_tasks: u64 = 0;
let mut failed_tasks: u64 = 0;
let mut total_input_tokens: u64 = 0;
let mut total_output_tokens: u64 = 0;
for record in response.iter_mut() {
if let Some(obj) = record.as_object_mut() {
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
total_cost_cents += cost as u32;
}
if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
total_tasks += 1;
match outcome {
"success" => successful_tasks += 1,
_ => failed_tasks += 1,
}
}
if let Some(input) = obj.get("input_tokens").and_then(|v| v.as_u64()) {
total_input_tokens += input;
}
if let Some(output) = obj.get("output_tokens").and_then(|v| v.as_u64()) {
total_output_tokens += output;
}
}
}
let success_rate = if total_tasks > 0 {
successful_tasks as f64 / total_tasks as f64
} else {
0.0
};
let avg_cost_per_task_cents = if total_tasks > 0 {
total_cost_cents as f64 / total_tasks as f64
} else {
0.0
};
let total_tokens = total_input_tokens + total_output_tokens;
let cost_per_1m_tokens = if total_tokens > 0 {
(total_cost_cents as f64 * 1_000_000.0) / (total_tokens as f64)
} else {
0.0
};
Ok(ProviderAnalytics {
provider: provider.to_string(),
total_cost_cents,
total_tasks,
successful_tasks,
failed_tasks,
success_rate,
avg_cost_per_task_cents,
total_input_tokens,
total_output_tokens,
cost_per_1m_tokens,
})
}
/// Get efficiency ranking for all providers
pub async fn get_provider_efficiency_ranking(&self) -> anyhow::Result<Vec<ProviderEfficiency>> {
debug!("Calculating provider efficiency ranking");
let query = "SELECT DISTINCT(provider) as provider FROM kg_executions";
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
let mut providers = Vec::new();
for record in response.iter() {
if let Some(obj) = record.as_object() {
if let Some(provider) = obj.get("provider").and_then(|v| v.as_str()) {
providers.push(provider.to_string());
}
}
}
let mut efficiency_scores = Vec::new();
for provider in providers {
let analytics = self.get_provider_analytics(&provider).await?;
let quality_score = analytics.success_rate;
let cost_score = if analytics.avg_cost_per_task_cents > 0.0 {
1.0 / (1.0 + analytics.avg_cost_per_task_cents / 100.0)
} else {
1.0
};
let efficiency_ratio = quality_score * cost_score;
efficiency_scores.push((
provider.clone(),
efficiency_ratio,
ProviderEfficiency {
provider,
quality_score,
cost_score,
efficiency_ratio,
rank: 0,
},
));
}
efficiency_scores
.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let result: Vec<ProviderEfficiency> = efficiency_scores
.into_iter()
.enumerate()
.map(|(idx, (_, _, mut eff))| {
eff.rank = (idx + 1) as u32;
eff
})
.collect();
Ok(result)
}
/// Get provider performance by task type
pub async fn get_provider_task_type_metrics(
&self,
provider: &str,
task_type: &str,
) -> anyhow::Result<ProviderTaskTypeMetrics> {
debug!(
"Querying provider {} task type {} metrics",
provider, task_type
);
let query = format!(
"SELECT * FROM kg_executions WHERE provider = '{}' AND task_type = '{}' LIMIT 1000",
provider, task_type
);
let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
let mut total_cost_cents: u32 = 0;
let mut task_count: u64 = 0;
let mut successful_count: u64 = 0;
let mut total_duration_ms: u64 = 0;
for record in response.iter() {
if let Some(obj) = record.as_object() {
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
total_cost_cents += cost as u32;
}
task_count += 1;
if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
if outcome == "success" {
successful_count += 1;
}
}
if let Some(duration) = obj.get("duration_ms").and_then(|v| v.as_u64()) {
total_duration_ms += duration;
}
}
}
let success_rate = if task_count > 0 {
successful_count as f64 / task_count as f64
} else {
0.0
};
let avg_duration_ms = if task_count > 0 {
total_duration_ms as f64 / task_count as f64
} else {
0.0
};
Ok(ProviderTaskTypeMetrics {
provider: provider.to_string(),
task_type: task_type.to_string(),
total_cost_cents,
task_count,
success_rate,
avg_duration_ms,
})
}
/// Get cost forecast for a provider
pub async fn forecast_provider_costs(
&self,
provider: &str,
) -> anyhow::Result<ProviderCostForecast> {
debug!("Forecasting costs for provider: {}", provider);
let query = format!(
"SELECT * FROM kg_executions WHERE provider = '{}' ORDER BY executed_at DESC LIMIT 100",
provider
);
let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
if response.is_empty() {
return Ok(ProviderCostForecast {
provider: provider.to_string(),
current_daily_cost_cents: 0,
projected_weekly_cost_cents: 0,
projected_monthly_cost_cents: 0,
trend: "stable".to_string(),
confidence: 0.0,
});
}
// Group costs by day for the last 30 days
let mut daily_costs: Vec<u32> = Vec::new();
let mut current_day_cost: u32 = 0;
let mut last_date_str: Option<String> = None;
for record in response.iter() {
let Some(obj) = record.as_object() else {
continue;
};
if let Some(executed_at) = obj.get("executed_at").and_then(|v| v.as_str()) {
let date_str = executed_at.split('T').next().unwrap_or("").to_string();
if let Some(ref last_date) = last_date_str {
if last_date != &date_str && current_day_cost > 0 {
daily_costs.push(current_day_cost);
current_day_cost = 0;
}
}
last_date_str = Some(date_str);
}
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
current_day_cost += cost as u32;
}
}
if current_day_cost > 0 {
daily_costs.push(current_day_cost);
}
let current_daily_cost_cents = if !daily_costs.is_empty() {
daily_costs[0]
} else {
0
};
let avg_daily_cost = if !daily_costs.is_empty() {
daily_costs.iter().sum::<u32>() as f64 / daily_costs.len() as f64
} else {
0.0
};
let projected_weekly_cost_cents = (avg_daily_cost * 7.0) as u32;
let projected_monthly_cost_cents = (avg_daily_cost * 30.0) as u32;
let trend = if daily_costs.len() >= 2 {
let recent_avg = daily_costs[0..daily_costs.len().min(5)].iter().sum::<u32>() as f64
/ daily_costs[0..daily_costs.len().min(5)].len() as f64;
let older_avg = daily_costs[daily_costs.len().min(5)..].iter().sum::<u32>() as f64
/ daily_costs[daily_costs.len().min(5)..].len().max(1) as f64;
if (recent_avg - older_avg).abs() < older_avg * 0.1 {
"stable".to_string()
} else if recent_avg > older_avg {
"increasing".to_string()
} else {
"decreasing".to_string()
}
} else {
"insufficient_data".to_string()
};
let confidence = if daily_costs.len() >= 7 {
0.9
} else if daily_costs.len() >= 3 {
0.7
} else {
0.3
};
Ok(ProviderCostForecast {
provider: provider.to_string(),
current_daily_cost_cents,
projected_weekly_cost_cents,
projected_monthly_cost_cents,
trend,
confidence,
})
}
/// Get cost breakdown by provider
pub async fn get_cost_breakdown_by_provider(&self) -> anyhow::Result<HashMap<String, u32>> {
debug!("Getting cost breakdown by provider");
let query = "SELECT provider, cost_cents FROM kg_executions";
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
let mut breakdown: HashMap<String, u32> = HashMap::new();
for record in response.iter() {
if let Some(obj) = record.as_object() {
if let (Some(provider), Some(cost)) = (
obj.get("provider").and_then(|v| v.as_str()),
obj.get("cost_cents").and_then(|v| v.as_u64()),
) {
*breakdown.entry(provider.to_string()).or_insert(0) += cost as u32;
}
}
}
Ok(breakdown)
}
/// Get cost breakdown by task type and provider
#[allow(dead_code)]
pub async fn get_cost_breakdown_by_task_and_provider(
&self,
) -> anyhow::Result<HashMap<String, HashMap<String, u32>>> {
debug!("Getting cost breakdown by task type and provider");
let query = "SELECT provider, task_type, cost_cents FROM kg_executions";
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
let mut breakdown: HashMap<String, HashMap<String, u32>> = HashMap::new();
for record in response.iter() {
if let Some(obj) = record.as_object() {
if let (Some(provider), Some(task_type), Some(cost)) = (
obj.get("provider").and_then(|v| v.as_str()),
obj.get("task_type").and_then(|v| v.as_str()),
obj.get("cost_cents").and_then(|v| v.as_u64()),
) {
breakdown
.entry(provider.to_string())
.or_default()
.entry(task_type.to_string())
.and_modify(|v| *v += cost as u32)
.or_insert(cost as u32);
}
}
}
Ok(breakdown)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_provider_analytics_creation() {
let analytics = ProviderAnalytics {
provider: "claude".to_string(),
total_cost_cents: 1000,
total_tasks: 10,
successful_tasks: 9,
failed_tasks: 1,
success_rate: 0.9,
avg_cost_per_task_cents: 100.0,
total_input_tokens: 50000,
total_output_tokens: 25000,
cost_per_1m_tokens: 13.3,
};
assert_eq!(analytics.provider, "claude");
assert_eq!(analytics.total_tasks, 10);
assert_eq!(analytics.success_rate, 0.9);
}
#[test]
fn test_provider_efficiency_calculation() {
let efficiency = ProviderEfficiency {
provider: "claude".to_string(),
quality_score: 0.9,
cost_score: 0.8,
efficiency_ratio: 0.72,
rank: 1,
};
assert_eq!(efficiency.rank, 1);
assert!(efficiency.efficiency_ratio > 0.7);
}
#[test]
fn test_cost_forecast() {
let forecast = ProviderCostForecast {
provider: "claude".to_string(),
current_daily_cost_cents: 500,
projected_weekly_cost_cents: 3500,
projected_monthly_cost_cents: 15000,
trend: "stable".to_string(),
confidence: 0.9,
};
assert_eq!(forecast.current_daily_cost_cents, 500);
assert_eq!(forecast.projected_weekly_cost_cents, 3500);
assert_eq!(forecast.projected_monthly_cost_cents, 15000);
}
}