Vapora/crates/vapora-llm-router/src/config.rs

// vapora-llm-router: Configuration module
// Load and parse LLM router configuration from TOML

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
use thiserror::Error;

#[derive(Debug, Error)]
pub enum ConfigError {
    #[error("Failed to read config file: {0}")]
    ReadError(#[from] std::io::Error),

    #[error("Failed to parse TOML: {0}")]
    ParseError(#[from] toml::de::Error),

    #[error("Invalid configuration: {0}")]
    ValidationError(String),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMRouterConfig {
    pub routing: RoutingConfig,
    pub providers: HashMap<String, ProviderConfig>,
    #[serde(default)]
    pub routing_rules: Vec<RoutingRule>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoutingConfig {
    pub default_provider: String,
    #[serde(default = "default_true")]
    pub cost_tracking_enabled: bool,
    #[serde(default = "default_true")]
    pub fallback_enabled: bool,
}

fn default_true() -> bool {
    true
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProviderConfig {
    #[serde(default = "default_true")]
    pub enabled: bool,
    pub api_key: Option<String>,
    pub url: Option<String>,
    pub model: String,
    #[serde(default = "default_max_tokens")]
    pub max_tokens: usize,
    #[serde(default = "default_temperature")]
    pub temperature: f32,
    #[serde(default)]
    pub cost_per_1m_input: f64,
    #[serde(default)]
    pub cost_per_1m_output: f64,
}

fn default_max_tokens() -> usize {
    4096
}

fn default_temperature() -> f32 {
    0.7
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoutingRule {
    pub name: String,
    pub condition: HashMap<String, String>,
    pub provider: String,
    pub model_override: Option<String>,
}

impl LLMRouterConfig {
    /// Load configuration from TOML file
    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
        let content = std::fs::read_to_string(path)?;
        let mut config: Self = toml::from_str(&content)?;

        // Expand environment variables in API keys and URLs
        config.expand_env_vars();
        config.validate()?;

        Ok(config)
    }

    /// Expand environment variables in configuration
    fn expand_env_vars(&mut self) {
        for (_, provider) in self.providers.iter_mut() {
            if let Some(ref api_key) = provider.api_key {
                provider.api_key = Some(expand_env_var(api_key));
            }
            if let Some(ref url) = provider.url {
                provider.url = Some(expand_env_var(url));
            }
        }
    }

    /// Validate configuration
    fn validate(&self) -> Result<(), ConfigError> {
        // Check that default provider exists
        if !self.providers.contains_key(&self.routing.default_provider) {
            return Err(ConfigError::ValidationError(format!(
                "Default provider '{}' not found in providers",
                self.routing.default_provider
            )));
        }

        // Check that all routing rules reference valid providers
        for rule in &self.routing_rules {
            if !self.providers.contains_key(&rule.provider) {
                return Err(ConfigError::ValidationError(format!(
                    "Routing rule '{}' references unknown provider '{}'",
                    rule.name, rule.provider
                )));
            }
        }

        Ok(())
    }

    /// Get provider configuration by name
    pub fn get_provider(&self, name: &str) -> Option<&ProviderConfig> {
        self.providers.get(name)
    }

    /// Find routing rule matching conditions
    pub fn find_rule(&self, conditions: &HashMap<String, String>) -> Option<&RoutingRule> {
        self.routing_rules.iter().find(|rule| {
            rule.condition
                .iter()
                .all(|(key, value)| conditions.get(key).map(|v| v == value).unwrap_or(false))
        })
    }
}

/// Expand environment variables in format ${VAR} or ${VAR:-default}
fn expand_env_var(input: &str) -> String {
    if !input.starts_with("${") || !input.ends_with('}') {
        return input.to_string();
    }

    let var_part = &input[2..input.len() - 1];

    // Handle ${VAR:-default} format
    if let Some(pos) = var_part.find(":-") {
        let var_name = &var_part[..pos];
        let default_value = &var_part[pos + 2..];
        std::env::var(var_name).unwrap_or_else(|_| default_value.to_string())
    } else {
        // Handle ${VAR} format
        std::env::var(var_part).unwrap_or_default()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_expand_env_var() {
        std::env::set_var("TEST_VAR", "test_value");
        assert_eq!(expand_env_var("${TEST_VAR}"), "test_value");
        assert_eq!(expand_env_var("plain_text"), "plain_text");
        assert_eq!(expand_env_var("${NONEXISTENT:-default}"), "default");
    }

    #[test]
    fn test_config_validation() {
        let config = LLMRouterConfig {
            routing: RoutingConfig {
                default_provider: "claude".to_string(),
                cost_tracking_enabled: true,
                fallback_enabled: true,
            },
            providers: {
                let mut map = HashMap::new();
                map.insert(
                    "claude".to_string(),
                    ProviderConfig {
                        enabled: true,
                        api_key: Some("test".to_string()),
                        url: None,
                        model: "claude-sonnet-4".to_string(),
                        max_tokens: 4096,
                        temperature: 0.7,
                        cost_per_1m_input: 3.0,
                        cost_per_1m_output: 15.0,
                    },
                );
                map
            },
            routing_rules: vec![],
        };

        assert!(config.validate().is_ok());
    }

    #[test]
    fn test_invalid_default_provider() {
        let config = LLMRouterConfig {
            routing: RoutingConfig {
                default_provider: "nonexistent".to_string(),
                cost_tracking_enabled: true,
                fallback_enabled: true,
            },
            providers: HashMap::new(),
            routing_rules: vec![],
        };

        assert!(config.validate().is_err());
    }
}
feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3base + 0.5expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection. 2026-01-11 13:03:53 +00:00			`// vapora-llm-router: Configuration module`
			`// Load and parse LLM router configuration from TOML`

			`use serde::{Deserialize, Serialize};`
			`use std::collections::HashMap;`
			`use std::path::Path;`
			`use thiserror::Error;`

			`#[derive(Debug, Error)]`
			`pub enum ConfigError {`
			`#[error("Failed to read config file: {0}")]`
			`ReadError(#[from] std::io::Error),`

			`#[error("Failed to parse TOML: {0}")]`
			`ParseError(#[from] toml::de::Error),`

			`#[error("Invalid configuration: {0}")]`
			`ValidationError(String),`
			`}`

			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`pub struct LLMRouterConfig {`
			`pub routing: RoutingConfig,`
			`pub providers: HashMap<String, ProviderConfig>,`
			`#[serde(default)]`
			`pub routing_rules: Vec<RoutingRule>,`
			`}`

			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`pub struct RoutingConfig {`
			`pub default_provider: String,`
			`#[serde(default = "default_true")]`
			`pub cost_tracking_enabled: bool,`
			`#[serde(default = "default_true")]`
			`pub fallback_enabled: bool,`
			`}`

			`fn default_true() -> bool {`
			`true`
			`}`

			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`pub struct ProviderConfig {`
			`#[serde(default = "default_true")]`
			`pub enabled: bool,`
			`pub api_key: Option<String>,`
			`pub url: Option<String>,`
			`pub model: String,`
			`#[serde(default = "default_max_tokens")]`
			`pub max_tokens: usize,`
			`#[serde(default = "default_temperature")]`
			`pub temperature: f32,`
			`#[serde(default)]`
			`pub cost_per_1m_input: f64,`
			`#[serde(default)]`
			`pub cost_per_1m_output: f64,`
			`}`

			`fn default_max_tokens() -> usize {`
			`4096`
			`}`

			`fn default_temperature() -> f32 {`
			`0.7`
			`}`

			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`pub struct RoutingRule {`
			`pub name: String,`
			`pub condition: HashMap<String, String>,`
			`pub provider: String,`
			`pub model_override: Option<String>,`
			`}`

			`impl LLMRouterConfig {`
			`/// Load configuration from TOML file`
			`pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {`
			`let content = std::fs::read_to_string(path)?;`
			`let mut config: Self = toml::from_str(&content)?;`

			`// Expand environment variables in API keys and URLs`
			`config.expand_env_vars();`
			`config.validate()?;`

			`Ok(config)`
			`}`

			`/// Expand environment variables in configuration`
			`fn expand_env_vars(&mut self) {`
			`for (_, provider) in self.providers.iter_mut() {`
			`if let Some(ref api_key) = provider.api_key {`
			`provider.api_key = Some(expand_env_var(api_key));`
			`}`
			`if let Some(ref url) = provider.url {`
			`provider.url = Some(expand_env_var(url));`
			`}`
			`}`
			`}`

			`/// Validate configuration`
			`fn validate(&self) -> Result<(), ConfigError> {`
			`// Check that default provider exists`
			`if !self.providers.contains_key(&self.routing.default_provider) {`
			`return Err(ConfigError::ValidationError(format!(`
			`"Default provider '{}' not found in providers",`
			`self.routing.default_provider`
			`)));`
			`}`

			`// Check that all routing rules reference valid providers`
			`for rule in &self.routing_rules {`
			`if !self.providers.contains_key(&rule.provider) {`
			`return Err(ConfigError::ValidationError(format!(`
			`"Routing rule '{}' references unknown provider '{}'",`
			`rule.name, rule.provider`
			`)));`
			`}`
			`}`

			`Ok(())`
			`}`

			`/// Get provider configuration by name`
			`pub fn get_provider(&self, name: &str) -> Option<&ProviderConfig> {`
			`self.providers.get(name)`
			`}`

			`/// Find routing rule matching conditions`
			`pub fn find_rule(&self, conditions: &HashMap<String, String>) -> Option<&RoutingRule> {`
			`self.routing_rules.iter().find(\|rule\| {`
ci: Update pre-commit hooks configuration - Exclude problematic markdown files from linting (existing legacy issues) - Make clippy check less aggressive (warnings only, not -D warnings) - Move cargo test to manual stage (too slow for pre-commit) - Exclude SVG files from end-of-file-fixer and trailing-whitespace - Add markdown linting exclusions for existing documentation This allows pre-commit hooks to run successfully on new code without blocking commits due to existing issues in legacy documentation files. 2026-01-11 21:32:56 +00:00			`rule.condition`
			`.iter()`
			`.all(\|(key, value)\| conditions.get(key).map(\|v\| v == value).unwrap_or(false))`
feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3base + 0.5expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection. 2026-01-11 13:03:53 +00:00			`})`
			`}`
			`}`

			`/// Expand environment variables in format ${VAR} or ${VAR:-default}`
			`fn expand_env_var(input: &str) -> String {`
			`if !input.starts_with("${") \|\| !input.ends_with('}') {`
			`return input.to_string();`
			`}`

			`let var_part = &input[2..input.len() - 1];`

			`// Handle ${VAR:-default} format`
			`if let Some(pos) = var_part.find(":-") {`
			`let var_name = &var_part[..pos];`
			`let default_value = &var_part[pos + 2..];`
			`std::env::var(var_name).unwrap_or_else(\|_\| default_value.to_string())`
			`} else {`
			`// Handle ${VAR} format`
			`std::env::var(var_part).unwrap_or_default()`
			`}`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

			`#[test]`
			`fn test_expand_env_var() {`
			`std::env::set_var("TEST_VAR", "test_value");`
			`assert_eq!(expand_env_var("${TEST_VAR}"), "test_value");`
			`assert_eq!(expand_env_var("plain_text"), "plain_text");`
ci: Update pre-commit hooks configuration - Exclude problematic markdown files from linting (existing legacy issues) - Make clippy check less aggressive (warnings only, not -D warnings) - Move cargo test to manual stage (too slow for pre-commit) - Exclude SVG files from end-of-file-fixer and trailing-whitespace - Add markdown linting exclusions for existing documentation This allows pre-commit hooks to run successfully on new code without blocking commits due to existing issues in legacy documentation files. 2026-01-11 21:32:56 +00:00			`assert_eq!(expand_env_var("${NONEXISTENT:-default}"), "default");`
feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3base + 0.5expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection. 2026-01-11 13:03:53 +00:00			`}`

			`#[test]`
			`fn test_config_validation() {`
			`let config = LLMRouterConfig {`
			`routing: RoutingConfig {`
			`default_provider: "claude".to_string(),`
			`cost_tracking_enabled: true,`
			`fallback_enabled: true,`
			`},`
			`providers: {`
			`let mut map = HashMap::new();`
			`map.insert(`
			`"claude".to_string(),`
			`ProviderConfig {`
			`enabled: true,`
			`api_key: Some("test".to_string()),`
			`url: None,`
			`model: "claude-sonnet-4".to_string(),`
			`max_tokens: 4096,`
			`temperature: 0.7,`
			`cost_per_1m_input: 3.0,`
			`cost_per_1m_output: 15.0,`
			`},`
			`);`
			`map`
			`},`
			`routing_rules: vec![],`
			`};`

			`assert!(config.validate().is_ok());`
			`}`

			`#[test]`
			`fn test_invalid_default_provider() {`
			`let config = LLMRouterConfig {`
			`routing: RoutingConfig {`
			`default_provider: "nonexistent".to_string(),`
			`cost_tracking_enabled: true,`
			`fallback_enabled: true,`
			`},`
			`providers: HashMap::new(),`
			`routing_rules: vec![],`
			`};`

			`assert!(config.validate().is_err());`
			`}`
			`}`