2026-01-11 21:32:56 +00:00
|
|
|
// Provider Analytics Service - Phase 7
|
|
|
|
|
// Analyzes provider costs, efficiency, and performance
|
|
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
2026-01-11 21:46:08 +00:00
|
|
|
|
2026-01-11 21:32:56 +00:00
|
|
|
use surrealdb::engine::remote::ws::Client;
|
|
|
|
|
use surrealdb::Surreal;
|
|
|
|
|
use tracing::debug;
|
|
|
|
|
use vapora_knowledge_graph::models::{
|
2026-01-11 21:46:08 +00:00
|
|
|
ProviderAnalytics, ProviderCostForecast, ProviderEfficiency, ProviderTaskTypeMetrics,
|
2026-01-11 21:32:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
|
pub struct ProviderAnalyticsService {
|
|
|
|
|
db: std::sync::Arc<Surreal<Client>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl ProviderAnalyticsService {
|
|
|
|
|
pub fn new(db: Surreal<Client>) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
db: std::sync::Arc::new(db),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get analytics for a specific provider
|
2026-01-11 21:46:08 +00:00
|
|
|
pub async fn get_provider_analytics(
|
|
|
|
|
&self,
|
|
|
|
|
provider: &str,
|
|
|
|
|
) -> anyhow::Result<ProviderAnalytics> {
|
2026-01-11 21:32:56 +00:00
|
|
|
debug!("Querying analytics for provider: {}", provider);
|
|
|
|
|
|
|
|
|
|
let query = format!(
|
|
|
|
|
"SELECT * FROM kg_executions WHERE provider = '{}' LIMIT 10000",
|
|
|
|
|
provider
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let mut response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
if response.is_empty() {
|
|
|
|
|
return Ok(ProviderAnalytics {
|
|
|
|
|
provider: provider.to_string(),
|
|
|
|
|
total_cost_cents: 0,
|
|
|
|
|
total_tasks: 0,
|
|
|
|
|
successful_tasks: 0,
|
|
|
|
|
failed_tasks: 0,
|
|
|
|
|
success_rate: 0.0,
|
|
|
|
|
avg_cost_per_task_cents: 0.0,
|
|
|
|
|
total_input_tokens: 0,
|
|
|
|
|
total_output_tokens: 0,
|
|
|
|
|
cost_per_1m_tokens: 0.0,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut total_cost_cents: u32 = 0;
|
|
|
|
|
let mut total_tasks: u64 = 0;
|
|
|
|
|
let mut successful_tasks: u64 = 0;
|
|
|
|
|
let mut failed_tasks: u64 = 0;
|
|
|
|
|
let mut total_input_tokens: u64 = 0;
|
|
|
|
|
let mut total_output_tokens: u64 = 0;
|
|
|
|
|
|
|
|
|
|
for record in response.iter_mut() {
|
|
|
|
|
if let Some(obj) = record.as_object_mut() {
|
|
|
|
|
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
|
|
|
|
|
total_cost_cents += cost as u32;
|
|
|
|
|
}
|
2026-02-03 21:35:00 +00:00
|
|
|
if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
|
2026-01-11 21:32:56 +00:00
|
|
|
total_tasks += 1;
|
2026-02-03 21:35:00 +00:00
|
|
|
match outcome {
|
|
|
|
|
"success" => successful_tasks += 1,
|
|
|
|
|
_ => failed_tasks += 1,
|
2026-01-11 21:32:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if let Some(input) = obj.get("input_tokens").and_then(|v| v.as_u64()) {
|
|
|
|
|
total_input_tokens += input;
|
|
|
|
|
}
|
|
|
|
|
if let Some(output) = obj.get("output_tokens").and_then(|v| v.as_u64()) {
|
|
|
|
|
total_output_tokens += output;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let success_rate = if total_tasks > 0 {
|
|
|
|
|
successful_tasks as f64 / total_tasks as f64
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let avg_cost_per_task_cents = if total_tasks > 0 {
|
|
|
|
|
total_cost_cents as f64 / total_tasks as f64
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let total_tokens = total_input_tokens + total_output_tokens;
|
|
|
|
|
let cost_per_1m_tokens = if total_tokens > 0 {
|
|
|
|
|
(total_cost_cents as f64 * 1_000_000.0) / (total_tokens as f64)
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Ok(ProviderAnalytics {
|
|
|
|
|
provider: provider.to_string(),
|
|
|
|
|
total_cost_cents,
|
|
|
|
|
total_tasks,
|
|
|
|
|
successful_tasks,
|
|
|
|
|
failed_tasks,
|
|
|
|
|
success_rate,
|
|
|
|
|
avg_cost_per_task_cents,
|
|
|
|
|
total_input_tokens,
|
|
|
|
|
total_output_tokens,
|
|
|
|
|
cost_per_1m_tokens,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get efficiency ranking for all providers
|
|
|
|
|
pub async fn get_provider_efficiency_ranking(&self) -> anyhow::Result<Vec<ProviderEfficiency>> {
|
|
|
|
|
debug!("Calculating provider efficiency ranking");
|
|
|
|
|
|
|
|
|
|
let query = "SELECT DISTINCT(provider) as provider FROM kg_executions";
|
|
|
|
|
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
let mut providers = Vec::new();
|
|
|
|
|
for record in response.iter() {
|
|
|
|
|
if let Some(obj) = record.as_object() {
|
|
|
|
|
if let Some(provider) = obj.get("provider").and_then(|v| v.as_str()) {
|
|
|
|
|
providers.push(provider.to_string());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut efficiency_scores = Vec::new();
|
|
|
|
|
|
|
|
|
|
for provider in providers {
|
|
|
|
|
let analytics = self.get_provider_analytics(&provider).await?;
|
|
|
|
|
|
|
|
|
|
let quality_score = analytics.success_rate;
|
|
|
|
|
let cost_score = if analytics.avg_cost_per_task_cents > 0.0 {
|
|
|
|
|
1.0 / (1.0 + analytics.avg_cost_per_task_cents / 100.0)
|
|
|
|
|
} else {
|
|
|
|
|
1.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let efficiency_ratio = quality_score * cost_score;
|
|
|
|
|
|
|
|
|
|
efficiency_scores.push((
|
|
|
|
|
provider.clone(),
|
|
|
|
|
efficiency_ratio,
|
|
|
|
|
ProviderEfficiency {
|
|
|
|
|
provider,
|
|
|
|
|
quality_score,
|
|
|
|
|
cost_score,
|
|
|
|
|
efficiency_ratio,
|
|
|
|
|
rank: 0,
|
|
|
|
|
},
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-11 21:46:08 +00:00
|
|
|
efficiency_scores
|
|
|
|
|
.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
2026-01-11 21:32:56 +00:00
|
|
|
|
|
|
|
|
let result: Vec<ProviderEfficiency> = efficiency_scores
|
|
|
|
|
.into_iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.map(|(idx, (_, _, mut eff))| {
|
|
|
|
|
eff.rank = (idx + 1) as u32;
|
|
|
|
|
eff
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
Ok(result)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get provider performance by task type
|
|
|
|
|
pub async fn get_provider_task_type_metrics(
|
|
|
|
|
&self,
|
|
|
|
|
provider: &str,
|
|
|
|
|
task_type: &str,
|
|
|
|
|
) -> anyhow::Result<ProviderTaskTypeMetrics> {
|
|
|
|
|
debug!(
|
|
|
|
|
"Querying provider {} task type {} metrics",
|
|
|
|
|
provider, task_type
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let query = format!(
|
|
|
|
|
"SELECT * FROM kg_executions WHERE provider = '{}' AND task_type = '{}' LIMIT 1000",
|
|
|
|
|
provider, task_type
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
let mut total_cost_cents: u32 = 0;
|
|
|
|
|
let mut task_count: u64 = 0;
|
|
|
|
|
let mut successful_count: u64 = 0;
|
|
|
|
|
let mut total_duration_ms: u64 = 0;
|
|
|
|
|
|
|
|
|
|
for record in response.iter() {
|
|
|
|
|
if let Some(obj) = record.as_object() {
|
|
|
|
|
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
|
|
|
|
|
total_cost_cents += cost as u32;
|
|
|
|
|
}
|
|
|
|
|
task_count += 1;
|
|
|
|
|
|
|
|
|
|
if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
|
|
|
|
|
if outcome == "success" {
|
|
|
|
|
successful_count += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(duration) = obj.get("duration_ms").and_then(|v| v.as_u64()) {
|
|
|
|
|
total_duration_ms += duration;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let success_rate = if task_count > 0 {
|
|
|
|
|
successful_count as f64 / task_count as f64
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let avg_duration_ms = if task_count > 0 {
|
|
|
|
|
total_duration_ms as f64 / task_count as f64
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Ok(ProviderTaskTypeMetrics {
|
|
|
|
|
provider: provider.to_string(),
|
|
|
|
|
task_type: task_type.to_string(),
|
|
|
|
|
total_cost_cents,
|
|
|
|
|
task_count,
|
|
|
|
|
success_rate,
|
|
|
|
|
avg_duration_ms,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get cost forecast for a provider
|
2026-01-11 21:46:08 +00:00
|
|
|
pub async fn forecast_provider_costs(
|
|
|
|
|
&self,
|
|
|
|
|
provider: &str,
|
|
|
|
|
) -> anyhow::Result<ProviderCostForecast> {
|
2026-01-11 21:32:56 +00:00
|
|
|
debug!("Forecasting costs for provider: {}", provider);
|
|
|
|
|
|
|
|
|
|
let query = format!(
|
|
|
|
|
"SELECT * FROM kg_executions WHERE provider = '{}' ORDER BY executed_at DESC LIMIT 100",
|
|
|
|
|
provider
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
if response.is_empty() {
|
|
|
|
|
return Ok(ProviderCostForecast {
|
|
|
|
|
provider: provider.to_string(),
|
|
|
|
|
current_daily_cost_cents: 0,
|
|
|
|
|
projected_weekly_cost_cents: 0,
|
|
|
|
|
projected_monthly_cost_cents: 0,
|
|
|
|
|
trend: "stable".to_string(),
|
|
|
|
|
confidence: 0.0,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Group costs by day for the last 30 days
|
|
|
|
|
let mut daily_costs: Vec<u32> = Vec::new();
|
|
|
|
|
let mut current_day_cost: u32 = 0;
|
|
|
|
|
let mut last_date_str: Option<String> = None;
|
|
|
|
|
|
|
|
|
|
for record in response.iter() {
|
2026-02-03 21:35:00 +00:00
|
|
|
let Some(obj) = record.as_object() else {
|
|
|
|
|
continue;
|
|
|
|
|
};
|
2026-01-11 21:32:56 +00:00
|
|
|
|
2026-02-03 21:35:00 +00:00
|
|
|
if let Some(executed_at) = obj.get("executed_at").and_then(|v| v.as_str()) {
|
|
|
|
|
let date_str = executed_at.split('T').next().unwrap_or("").to_string();
|
2026-01-11 21:32:56 +00:00
|
|
|
|
2026-02-03 21:35:00 +00:00
|
|
|
if let Some(ref last_date) = last_date_str {
|
|
|
|
|
if last_date != &date_str && current_day_cost > 0 {
|
|
|
|
|
daily_costs.push(current_day_cost);
|
|
|
|
|
current_day_cost = 0;
|
|
|
|
|
}
|
2026-01-11 21:32:56 +00:00
|
|
|
}
|
2026-02-03 21:35:00 +00:00
|
|
|
|
|
|
|
|
last_date_str = Some(date_str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
|
|
|
|
|
current_day_cost += cost as u32;
|
2026-01-11 21:32:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if current_day_cost > 0 {
|
|
|
|
|
daily_costs.push(current_day_cost);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let current_daily_cost_cents = if !daily_costs.is_empty() {
|
|
|
|
|
daily_costs[0]
|
|
|
|
|
} else {
|
|
|
|
|
0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let avg_daily_cost = if !daily_costs.is_empty() {
|
|
|
|
|
daily_costs.iter().sum::<u32>() as f64 / daily_costs.len() as f64
|
|
|
|
|
} else {
|
|
|
|
|
0.0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let projected_weekly_cost_cents = (avg_daily_cost * 7.0) as u32;
|
|
|
|
|
let projected_monthly_cost_cents = (avg_daily_cost * 30.0) as u32;
|
|
|
|
|
|
|
|
|
|
let trend = if daily_costs.len() >= 2 {
|
2026-01-11 21:46:08 +00:00
|
|
|
let recent_avg = daily_costs[0..daily_costs.len().min(5)].iter().sum::<u32>() as f64
|
|
|
|
|
/ daily_costs[0..daily_costs.len().min(5)].len() as f64;
|
2026-01-11 21:32:56 +00:00
|
|
|
let older_avg = daily_costs[daily_costs.len().min(5)..].iter().sum::<u32>() as f64
|
|
|
|
|
/ daily_costs[daily_costs.len().min(5)..].len().max(1) as f64;
|
|
|
|
|
|
|
|
|
|
if (recent_avg - older_avg).abs() < older_avg * 0.1 {
|
|
|
|
|
"stable".to_string()
|
|
|
|
|
} else if recent_avg > older_avg {
|
|
|
|
|
"increasing".to_string()
|
|
|
|
|
} else {
|
|
|
|
|
"decreasing".to_string()
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
"insufficient_data".to_string()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let confidence = if daily_costs.len() >= 7 {
|
|
|
|
|
0.9
|
|
|
|
|
} else if daily_costs.len() >= 3 {
|
|
|
|
|
0.7
|
|
|
|
|
} else {
|
|
|
|
|
0.3
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Ok(ProviderCostForecast {
|
|
|
|
|
provider: provider.to_string(),
|
|
|
|
|
current_daily_cost_cents,
|
|
|
|
|
projected_weekly_cost_cents,
|
|
|
|
|
projected_monthly_cost_cents,
|
|
|
|
|
trend,
|
|
|
|
|
confidence,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get cost breakdown by provider
|
|
|
|
|
pub async fn get_cost_breakdown_by_provider(&self) -> anyhow::Result<HashMap<String, u32>> {
|
|
|
|
|
debug!("Getting cost breakdown by provider");
|
|
|
|
|
|
|
|
|
|
let query = "SELECT provider, cost_cents FROM kg_executions";
|
|
|
|
|
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
let mut breakdown: HashMap<String, u32> = HashMap::new();
|
|
|
|
|
|
|
|
|
|
for record in response.iter() {
|
|
|
|
|
if let Some(obj) = record.as_object() {
|
2026-01-11 21:46:08 +00:00
|
|
|
if let (Some(provider), Some(cost)) = (
|
|
|
|
|
obj.get("provider").and_then(|v| v.as_str()),
|
|
|
|
|
obj.get("cost_cents").and_then(|v| v.as_u64()),
|
|
|
|
|
) {
|
2026-01-11 21:32:56 +00:00
|
|
|
*breakdown.entry(provider.to_string()).or_insert(0) += cost as u32;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(breakdown)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get cost breakdown by task type and provider
|
2026-02-03 21:35:00 +00:00
|
|
|
#[allow(dead_code)]
|
2026-01-11 21:32:56 +00:00
|
|
|
pub async fn get_cost_breakdown_by_task_and_provider(
|
|
|
|
|
&self,
|
|
|
|
|
) -> anyhow::Result<HashMap<String, HashMap<String, u32>>> {
|
|
|
|
|
debug!("Getting cost breakdown by task type and provider");
|
|
|
|
|
|
|
|
|
|
let query = "SELECT provider, task_type, cost_cents FROM kg_executions";
|
|
|
|
|
let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;
|
|
|
|
|
|
|
|
|
|
let mut breakdown: HashMap<String, HashMap<String, u32>> = HashMap::new();
|
|
|
|
|
|
|
|
|
|
for record in response.iter() {
|
|
|
|
|
if let Some(obj) = record.as_object() {
|
2026-01-11 21:46:08 +00:00
|
|
|
if let (Some(provider), Some(task_type), Some(cost)) = (
|
|
|
|
|
obj.get("provider").and_then(|v| v.as_str()),
|
|
|
|
|
obj.get("task_type").and_then(|v| v.as_str()),
|
|
|
|
|
obj.get("cost_cents").and_then(|v| v.as_u64()),
|
|
|
|
|
) {
|
2026-01-11 21:32:56 +00:00
|
|
|
breakdown
|
|
|
|
|
.entry(provider.to_string())
|
|
|
|
|
.or_default()
|
|
|
|
|
.entry(task_type.to_string())
|
|
|
|
|
.and_modify(|v| *v += cost as u32)
|
|
|
|
|
.or_insert(cost as u32);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(breakdown)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_provider_analytics_creation() {
|
|
|
|
|
let analytics = ProviderAnalytics {
|
|
|
|
|
provider: "claude".to_string(),
|
|
|
|
|
total_cost_cents: 1000,
|
|
|
|
|
total_tasks: 10,
|
|
|
|
|
successful_tasks: 9,
|
|
|
|
|
failed_tasks: 1,
|
|
|
|
|
success_rate: 0.9,
|
|
|
|
|
avg_cost_per_task_cents: 100.0,
|
|
|
|
|
total_input_tokens: 50000,
|
|
|
|
|
total_output_tokens: 25000,
|
|
|
|
|
cost_per_1m_tokens: 13.3,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(analytics.provider, "claude");
|
|
|
|
|
assert_eq!(analytics.total_tasks, 10);
|
|
|
|
|
assert_eq!(analytics.success_rate, 0.9);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_provider_efficiency_calculation() {
|
|
|
|
|
let efficiency = ProviderEfficiency {
|
|
|
|
|
provider: "claude".to_string(),
|
|
|
|
|
quality_score: 0.9,
|
|
|
|
|
cost_score: 0.8,
|
|
|
|
|
efficiency_ratio: 0.72,
|
|
|
|
|
rank: 1,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(efficiency.rank, 1);
|
|
|
|
|
assert!(efficiency.efficiency_ratio > 0.7);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_cost_forecast() {
|
|
|
|
|
let forecast = ProviderCostForecast {
|
|
|
|
|
provider: "claude".to_string(),
|
|
|
|
|
current_daily_cost_cents: 500,
|
|
|
|
|
projected_weekly_cost_cents: 3500,
|
|
|
|
|
projected_monthly_cost_cents: 15000,
|
|
|
|
|
trend: "stable".to_string(),
|
|
|
|
|
confidence: 0.9,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(forecast.current_daily_cost_cents, 500);
|
|
|
|
|
assert_eq!(forecast.projected_weekly_cost_cents, 3500);
|
|
|
|
|
assert_eq!(forecast.projected_monthly_cost_cents, 15000);
|
|
|
|
|
}
|
|
|
|
|
}
|