Vapora/crates/vapora-backend/src/services/provider_analytics_service.rs

// Provider Analytics Service - Phase 7
// Analyzes provider costs, efficiency, and performance

use std::collections::HashMap;

use surrealdb::engine::remote::ws::Client;
use surrealdb::Surreal;
use tracing::debug;
use vapora_knowledge_graph::models::{
    ProviderAnalytics, ProviderCostForecast, ProviderEfficiency, ProviderTaskTypeMetrics,
};

#[derive(Clone)]
pub struct ProviderAnalyticsService {
    db: std::sync::Arc<Surreal<Client>>,
}

impl ProviderAnalyticsService {
    pub fn new(db: Surreal<Client>) -> Self {
        Self {
            db: std::sync::Arc::new(db),
        }
    }

    /// Get analytics for a specific provider
    pub async fn get_provider_analytics(
        &self,
        provider: &str,
    ) -> anyhow::Result<ProviderAnalytics> {
        debug!("Querying analytics for provider: {}", provider);

        let query = format!(
            "SELECT * FROM kg_executions WHERE provider = '{}' LIMIT 10000",
            provider
        );

        let mut response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;

        if response.is_empty() {
            return Ok(ProviderAnalytics {
                provider: provider.to_string(),
                total_cost_cents: 0,
                total_tasks: 0,
                successful_tasks: 0,
                failed_tasks: 0,
                success_rate: 0.0,
                avg_cost_per_task_cents: 0.0,
                total_input_tokens: 0,
                total_output_tokens: 0,
                cost_per_1m_tokens: 0.0,
            });
        }

        let mut total_cost_cents: u32 = 0;
        let mut total_tasks: u64 = 0;
        let mut successful_tasks: u64 = 0;
        let mut failed_tasks: u64 = 0;
        let mut total_input_tokens: u64 = 0;
        let mut total_output_tokens: u64 = 0;

        for record in response.iter_mut() {
            if let Some(obj) = record.as_object_mut() {
                if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
                    total_cost_cents += cost as u32;
                }
                if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
                    total_tasks += 1;
                    match outcome {
                        "success" => successful_tasks += 1,
                        _ => failed_tasks += 1,
                    }
                }
                if let Some(input) = obj.get("input_tokens").and_then(|v| v.as_u64()) {
                    total_input_tokens += input;
                }
                if let Some(output) = obj.get("output_tokens").and_then(|v| v.as_u64()) {
                    total_output_tokens += output;
                }
            }
        }

        let success_rate = if total_tasks > 0 {
            successful_tasks as f64 / total_tasks as f64
        } else {
            0.0
        };

        let avg_cost_per_task_cents = if total_tasks > 0 {
            total_cost_cents as f64 / total_tasks as f64
        } else {
            0.0
        };

        let total_tokens = total_input_tokens + total_output_tokens;
        let cost_per_1m_tokens = if total_tokens > 0 {
            (total_cost_cents as f64 * 1_000_000.0) / (total_tokens as f64)
        } else {
            0.0
        };

        Ok(ProviderAnalytics {
            provider: provider.to_string(),
            total_cost_cents,
            total_tasks,
            successful_tasks,
            failed_tasks,
            success_rate,
            avg_cost_per_task_cents,
            total_input_tokens,
            total_output_tokens,
            cost_per_1m_tokens,
        })
    }

    /// Get efficiency ranking for all providers
    pub async fn get_provider_efficiency_ranking(&self) -> anyhow::Result<Vec<ProviderEfficiency>> {
        debug!("Calculating provider efficiency ranking");

        let query = "SELECT DISTINCT(provider) as provider FROM kg_executions";
        let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;

        let mut providers = Vec::new();
        for record in response.iter() {
            if let Some(obj) = record.as_object() {
                if let Some(provider) = obj.get("provider").and_then(|v| v.as_str()) {
                    providers.push(provider.to_string());
                }
            }
        }

        let mut efficiency_scores = Vec::new();

        for provider in providers {
            let analytics = self.get_provider_analytics(&provider).await?;

            let quality_score = analytics.success_rate;
            let cost_score = if analytics.avg_cost_per_task_cents > 0.0 {
                1.0 / (1.0 + analytics.avg_cost_per_task_cents / 100.0)
            } else {
                1.0
            };

            let efficiency_ratio = quality_score * cost_score;

            efficiency_scores.push((
                provider.clone(),
                efficiency_ratio,
                ProviderEfficiency {
                    provider,
                    quality_score,
                    cost_score,
                    efficiency_ratio,
                    rank: 0,
                },
            ));
        }

        efficiency_scores
            .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

        let result: Vec<ProviderEfficiency> = efficiency_scores
            .into_iter()
            .enumerate()
            .map(|(idx, (_, _, mut eff))| {
                eff.rank = (idx + 1) as u32;
                eff
            })
            .collect();

        Ok(result)
    }

    /// Get provider performance by task type
    pub async fn get_provider_task_type_metrics(
        &self,
        provider: &str,
        task_type: &str,
    ) -> anyhow::Result<ProviderTaskTypeMetrics> {
        debug!(
            "Querying provider {} task type {} metrics",
            provider, task_type
        );

        let query = format!(
            "SELECT * FROM kg_executions WHERE provider = '{}' AND task_type = '{}' LIMIT 1000",
            provider, task_type
        );

        let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;

        let mut total_cost_cents: u32 = 0;
        let mut task_count: u64 = 0;
        let mut successful_count: u64 = 0;
        let mut total_duration_ms: u64 = 0;

        for record in response.iter() {
            if let Some(obj) = record.as_object() {
                if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
                    total_cost_cents += cost as u32;
                }
                task_count += 1;

                if let Some(outcome) = obj.get("outcome").and_then(|v| v.as_str()) {
                    if outcome == "success" {
                        successful_count += 1;
                    }
                }

                if let Some(duration) = obj.get("duration_ms").and_then(|v| v.as_u64()) {
                    total_duration_ms += duration;
                }
            }
        }

        let success_rate = if task_count > 0 {
            successful_count as f64 / task_count as f64
        } else {
            0.0
        };

        let avg_duration_ms = if task_count > 0 {
            total_duration_ms as f64 / task_count as f64
        } else {
            0.0
        };

        Ok(ProviderTaskTypeMetrics {
            provider: provider.to_string(),
            task_type: task_type.to_string(),
            total_cost_cents,
            task_count,
            success_rate,
            avg_duration_ms,
        })
    }

    /// Get cost forecast for a provider
    pub async fn forecast_provider_costs(
        &self,
        provider: &str,
    ) -> anyhow::Result<ProviderCostForecast> {
        debug!("Forecasting costs for provider: {}", provider);

        let query = format!(
            "SELECT * FROM kg_executions WHERE provider = '{}' ORDER BY executed_at DESC LIMIT 100",
            provider
        );

        let response: Vec<serde_json::Value> = self.db.query(&query).await?.take(0)?;

        if response.is_empty() {
            return Ok(ProviderCostForecast {
                provider: provider.to_string(),
                current_daily_cost_cents: 0,
                projected_weekly_cost_cents: 0,
                projected_monthly_cost_cents: 0,
                trend: "stable".to_string(),
                confidence: 0.0,
            });
        }

        // Group costs by day for the last 30 days
        let mut daily_costs: Vec<u32> = Vec::new();
        let mut current_day_cost: u32 = 0;
        let mut last_date_str: Option<String> = None;

        for record in response.iter() {
            let Some(obj) = record.as_object() else {
                continue;
            };

            if let Some(executed_at) = obj.get("executed_at").and_then(|v| v.as_str()) {
                let date_str = executed_at.split('T').next().unwrap_or("").to_string();

                if let Some(ref last_date) = last_date_str {
                    if last_date != &date_str && current_day_cost > 0 {
                        daily_costs.push(current_day_cost);
                        current_day_cost = 0;
                    }
                }

                last_date_str = Some(date_str);
            }

            if let Some(cost) = obj.get("cost_cents").and_then(|v| v.as_u64()) {
                current_day_cost += cost as u32;
            }
        }

        if current_day_cost > 0 {
            daily_costs.push(current_day_cost);
        }

        let current_daily_cost_cents = if !daily_costs.is_empty() {
            daily_costs[0]
        } else {
            0
        };

        let avg_daily_cost = if !daily_costs.is_empty() {
            daily_costs.iter().sum::<u32>() as f64 / daily_costs.len() as f64
        } else {
            0.0
        };

        let projected_weekly_cost_cents = (avg_daily_cost * 7.0) as u32;
        let projected_monthly_cost_cents = (avg_daily_cost * 30.0) as u32;

        let trend = if daily_costs.len() >= 2 {
            let recent_avg = daily_costs[0..daily_costs.len().min(5)].iter().sum::<u32>() as f64
                / daily_costs[0..daily_costs.len().min(5)].len() as f64;
            let older_avg = daily_costs[daily_costs.len().min(5)..].iter().sum::<u32>() as f64
                / daily_costs[daily_costs.len().min(5)..].len().max(1) as f64;

            if (recent_avg - older_avg).abs() < older_avg * 0.1 {
                "stable".to_string()
            } else if recent_avg > older_avg {
                "increasing".to_string()
            } else {
                "decreasing".to_string()
            }
        } else {
            "insufficient_data".to_string()
        };

        let confidence = if daily_costs.len() >= 7 {
            0.9
        } else if daily_costs.len() >= 3 {
            0.7
        } else {
            0.3
        };

        Ok(ProviderCostForecast {
            provider: provider.to_string(),
            current_daily_cost_cents,
            projected_weekly_cost_cents,
            projected_monthly_cost_cents,
            trend,
            confidence,
        })
    }

    /// Get cost breakdown by provider
    pub async fn get_cost_breakdown_by_provider(&self) -> anyhow::Result<HashMap<String, u32>> {
        debug!("Getting cost breakdown by provider");

        let query = "SELECT provider, cost_cents FROM kg_executions";
        let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;

        let mut breakdown: HashMap<String, u32> = HashMap::new();

        for record in response.iter() {
            if let Some(obj) = record.as_object() {
                if let (Some(provider), Some(cost)) = (
                    obj.get("provider").and_then(|v| v.as_str()),
                    obj.get("cost_cents").and_then(|v| v.as_u64()),
                ) {
                    *breakdown.entry(provider.to_string()).or_insert(0) += cost as u32;
                }
            }
        }

        Ok(breakdown)
    }

    /// Get cost breakdown by task type and provider
    #[allow(dead_code)]
    pub async fn get_cost_breakdown_by_task_and_provider(
        &self,
    ) -> anyhow::Result<HashMap<String, HashMap<String, u32>>> {
        debug!("Getting cost breakdown by task type and provider");

        let query = "SELECT provider, task_type, cost_cents FROM kg_executions";
        let response: Vec<serde_json::Value> = self.db.query(query).await?.take(0)?;

        let mut breakdown: HashMap<String, HashMap<String, u32>> = HashMap::new();

        for record in response.iter() {
            if let Some(obj) = record.as_object() {
                if let (Some(provider), Some(task_type), Some(cost)) = (
                    obj.get("provider").and_then(|v| v.as_str()),
                    obj.get("task_type").and_then(|v| v.as_str()),
                    obj.get("cost_cents").and_then(|v| v.as_u64()),
                ) {
                    breakdown
                        .entry(provider.to_string())
                        .or_default()
                        .entry(task_type.to_string())
                        .and_modify(|v| *v += cost as u32)
                        .or_insert(cost as u32);
                }
            }
        }

        Ok(breakdown)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_provider_analytics_creation() {
        let analytics = ProviderAnalytics {
            provider: "claude".to_string(),
            total_cost_cents: 1000,
            total_tasks: 10,
            successful_tasks: 9,
            failed_tasks: 1,
            success_rate: 0.9,
            avg_cost_per_task_cents: 100.0,
            total_input_tokens: 50000,
            total_output_tokens: 25000,
            cost_per_1m_tokens: 13.3,
        };

        assert_eq!(analytics.provider, "claude");
        assert_eq!(analytics.total_tasks, 10);
        assert_eq!(analytics.success_rate, 0.9);
    }

    #[test]
    fn test_provider_efficiency_calculation() {
        let efficiency = ProviderEfficiency {
            provider: "claude".to_string(),
            quality_score: 0.9,
            cost_score: 0.8,
            efficiency_ratio: 0.72,
            rank: 1,
        };

        assert_eq!(efficiency.rank, 1);
        assert!(efficiency.efficiency_ratio > 0.7);
    }

    #[test]
    fn test_cost_forecast() {
        let forecast = ProviderCostForecast {
            provider: "claude".to_string(),
            current_daily_cost_cents: 500,
            projected_weekly_cost_cents: 3500,
            projected_monthly_cost_cents: 15000,
            trend: "stable".to_string(),
            confidence: 0.9,
        };

        assert_eq!(forecast.current_daily_cost_cents, 500);
        assert_eq!(forecast.projected_weekly_cost_cents, 3500);
        assert_eq!(forecast.projected_monthly_cost_cents, 15000);
    }
}