Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3*base + 0.5*expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection.
333 lines
9.8 KiB
Plaintext
333 lines
9.8 KiB
Plaintext
"""
|
|
VAPORA Multi-IA Router Configuration
|
|
Defines LLM routing rules, model mappings, cost thresholds, and fallback chains
|
|
"""
|
|
|
|
import k.api.all as k
|
|
|
|
# ===== LLM PROVIDER DEFINITIONS =====
|
|
|
|
llm_providers = {
|
|
"claude": {
|
|
name = "Anthropic Claude"
|
|
endpoint = "https://api.anthropic.com/v1"
|
|
models = [
|
|
{name = "claude-opus-4-1", context = 200000, cost_per_mtok = 15.0}
|
|
{name = "claude-sonnet-4-20250514", context = 200000, cost_per_mtok = 3.0}
|
|
{name = "claude-haiku-3-5-20241022", context = 200000, cost_per_mtok = 0.80}
|
|
]
|
|
availability = "production"
|
|
regions = ["us-east-1", "us-west-2", "eu-west-1"]
|
|
}
|
|
"openai": {
|
|
name = "OpenAI"
|
|
endpoint = "https://api.openai.com/v1"
|
|
models = [
|
|
{name = "gpt-4-turbo", context = 128000, cost_per_mtok = 10.0}
|
|
{name = "gpt-4o", context = 128000, cost_per_mtok = 5.0}
|
|
{name = "gpt-3.5-turbo", context = 16384, cost_per_mtok = 0.50}
|
|
]
|
|
availability = "production"
|
|
regions = ["us-east-1", "us-west-2", "eu-west-1"]
|
|
}
|
|
"gemini": {
|
|
name = "Google Gemini"
|
|
endpoint = "https://generativelanguage.googleapis.com/v1beta"
|
|
models = [
|
|
{name = "gemini-2.0-pro", context = 1000000, cost_per_mtok = 10.0}
|
|
{name = "gemini-2.0-flash", context = 1000000, cost_per_mtok = 0.075}
|
|
{name = "gemini-1.5-pro", context = 1000000, cost_per_mtok = 1.25}
|
|
]
|
|
availability = "production"
|
|
regions = ["us-central-1"]
|
|
}
|
|
"ollama": {
|
|
name = "Ollama Local"
|
|
endpoint = "http://ollama.vapora-system:11434"
|
|
models = [
|
|
{name = "llama2", context = 4096, cost_per_mtok = 0.0}
|
|
{name = "mistral", context = 8192, cost_per_mtok = 0.0}
|
|
{name = "neural-chat", context = 4096, cost_per_mtok = 0.0}
|
|
]
|
|
availability = "local"
|
|
regions = ["on-premise"]
|
|
}
|
|
}
|
|
|
|
# ===== TASK CONTEXT CLASSIFIERS =====
|
|
|
|
task_classifiers = {
|
|
"code_generation": {
|
|
complexity = "high"
|
|
latency_sensitive = false
|
|
context_needs = 32000
|
|
quality_critical = true
|
|
cost_sensitive = false
|
|
recommended = ["claude-opus-4-1", "gpt-4-turbo", "claude-sonnet-4-20250514"]
|
|
}
|
|
"code_review": {
|
|
complexity = "medium"
|
|
latency_sensitive = false
|
|
context_needs = 16000
|
|
quality_critical = true
|
|
cost_sensitive = true
|
|
recommended = ["claude-sonnet-4-20250514", "gpt-4o", "gemini-2.0-flash"]
|
|
}
|
|
"documentation": {
|
|
complexity = "medium"
|
|
latency_sensitive = false
|
|
context_needs = 8000
|
|
quality_critical = true
|
|
cost_sensitive = true
|
|
recommended = ["gpt-4-turbo", "gemini-1.5-pro", "claude-sonnet-4-20250514"]
|
|
}
|
|
"testing": {
|
|
complexity = "medium"
|
|
latency_sensitive = false
|
|
context_needs = 16000
|
|
quality_critical = true
|
|
cost_sensitive = true
|
|
recommended = ["claude-sonnet-4-20250514", "gpt-4o"]
|
|
}
|
|
"quick_query": {
|
|
complexity = "low"
|
|
latency_sensitive = true
|
|
context_needs = 4000
|
|
quality_critical = false
|
|
cost_sensitive = true
|
|
recommended = ["gemini-2.0-flash", "gpt-3.5-turbo", "llama2"]
|
|
}
|
|
"embeddings": {
|
|
complexity = "low"
|
|
latency_sensitive = true
|
|
context_needs = 512
|
|
quality_critical = false
|
|
cost_sensitive = true
|
|
recommended = ["ollama/neural-chat"]
|
|
}
|
|
"summarization": {
|
|
complexity = "medium"
|
|
latency_sensitive = false
|
|
context_needs = 32000
|
|
quality_critical = true
|
|
cost_sensitive = true
|
|
recommended = ["claude-sonnet-4-20250514", "gemini-2.0-flash"]
|
|
}
|
|
"real_time_monitoring": {
|
|
complexity = "low"
|
|
latency_sensitive = true
|
|
context_needs = 2000
|
|
quality_critical = false
|
|
cost_sensitive = true
|
|
recommended = ["gemini-2.0-flash", "gpt-3.5-turbo"]
|
|
}
|
|
}
|
|
|
|
# ===== DEFAULT LLM MAPPINGS =====
|
|
|
|
default_mappings = [
|
|
{
|
|
agent_role = "Architect"
|
|
task_type = "*" # All tasks
|
|
default_llm = "claude-opus-4-1"
|
|
fallback = ["gpt-4-turbo"]
|
|
override_allowed = false # Critical decisions
|
|
}
|
|
{
|
|
agent_role = "Developer"
|
|
task_type = "code_generation"
|
|
default_llm = "claude-sonnet-4-20250514"
|
|
fallback = ["gpt-4o", "claude-opus-4-1"]
|
|
override_allowed = true
|
|
}
|
|
{
|
|
agent_role = "CodeReviewer"
|
|
task_type = "code_review"
|
|
default_llm = "claude-sonnet-4-20250514"
|
|
fallback = ["gpt-4o", "gemini-2.0-flash"]
|
|
override_allowed = true
|
|
}
|
|
{
|
|
agent_role = "Tester"
|
|
task_type = "testing"
|
|
default_llm = "claude-sonnet-4-20250514"
|
|
fallback = ["gpt-4o"]
|
|
override_allowed = true
|
|
}
|
|
{
|
|
agent_role = "Documenter"
|
|
task_type = "documentation"
|
|
default_llm = "gpt-4-turbo"
|
|
fallback = ["claude-sonnet-4-20250514", "gemini-1.5-pro"]
|
|
override_allowed = true
|
|
}
|
|
{
|
|
agent_role = "Marketer"
|
|
task_type = "*"
|
|
default_llm = "claude-sonnet-4-20250514"
|
|
fallback = ["gpt-4o"]
|
|
override_allowed = true
|
|
}
|
|
{
|
|
agent_role = "Monitor"
|
|
task_type = "real_time_monitoring"
|
|
default_llm = "gemini-2.0-flash"
|
|
fallback = ["gpt-3.5-turbo"]
|
|
override_allowed = false # Must be fast
|
|
}
|
|
{
|
|
agent_role = "Security"
|
|
task_type = "*"
|
|
default_llm = "claude-opus-4-1"
|
|
fallback = ["gpt-4-turbo"]
|
|
override_allowed = false # Critical security
|
|
}
|
|
]
|
|
|
|
# ===== COST TRACKING CONFIGURATION =====
|
|
|
|
cost_tracking = {
|
|
enabled = true
|
|
daily_warn_threshold = 5000 # Warn if daily cost > $5000
|
|
daily_hard_limit = 10000 # Hard stop if daily cost > $10000
|
|
monthly_warn_threshold = 100000
|
|
monthly_hard_limit = 150000
|
|
|
|
# Cost allocation by agent role
|
|
budget_per_agent = {
|
|
"Architect": {daily = 500, monthly = 10000}
|
|
"Developer": {daily = 2000, monthly = 40000}
|
|
"CodeReviewer": {daily = 1000, monthly = 20000}
|
|
"Tester": {daily = 800, monthly = 16000}
|
|
"Documenter": {daily = 300, monthly = 6000}
|
|
"Security": {daily = 500, monthly = 10000}
|
|
"Monitor": {daily = 100, monthly = 2000}
|
|
"Other": {daily = 800, monthly = 16000}
|
|
}
|
|
|
|
# Price tracking
|
|
pricing = {
|
|
"claude-opus-4-1": {input = 15.0, output = 75.0}
|
|
"claude-sonnet-4-20250514": {input = 3.0, output = 15.0}
|
|
"gpt-4-turbo": {input = 10.0, output = 30.0}
|
|
"gpt-4o": {input = 5.0, output = 15.0}
|
|
"gpt-3.5-turbo": {input = 0.50, output = 1.50}
|
|
"gemini-2.0-pro": {input = 10.0, output = 30.0}
|
|
"gemini-2.0-flash": {input = 0.075, output = 0.30}
|
|
}
|
|
}
|
|
|
|
# ===== LATENCY AND PERFORMANCE TARGETS =====
|
|
|
|
performance_targets = {
|
|
"code_generation": {p50 = 5000, p95 = 15000, p99 = 30000} # milliseconds
|
|
"code_review": {p50 = 3000, p95 = 10000, p99 = 20000}
|
|
"quick_query": {p50 = 500, p95 = 2000, p99 = 5000}
|
|
"real_time_monitoring": {p50 = 200, p95 = 1000, p99 = 2000}
|
|
}
|
|
|
|
# ===== CIRCUIT BREAKER SETTINGS =====
|
|
|
|
circuit_breakers = {
|
|
"claude": {
|
|
failure_threshold = 5 # Fail after 5 consecutive errors
|
|
timeout_threshold = 60000 # 60s timeout
|
|
half_open_max_calls = 3
|
|
reset_timeout = 30000
|
|
}
|
|
"openai": {
|
|
failure_threshold = 5
|
|
timeout_threshold = 45000
|
|
half_open_max_calls = 3
|
|
reset_timeout = 30000
|
|
}
|
|
"gemini": {
|
|
failure_threshold = 5
|
|
timeout_threshold = 30000
|
|
half_open_max_calls = 3
|
|
reset_timeout = 30000
|
|
}
|
|
"ollama": {
|
|
failure_threshold = 3 # Local failures more critical
|
|
timeout_threshold = 15000
|
|
half_open_max_calls = 5
|
|
reset_timeout = 10000
|
|
}
|
|
}
|
|
|
|
# ===== ROUTING RULES =====
|
|
|
|
routing_rules = [
|
|
{
|
|
condition = "task.complexity == high && cost < 1000"
|
|
action = "use_claude_opus"
|
|
}
|
|
{
|
|
condition = "task.latency_sensitive == true"
|
|
action = "use_fastest_available"
|
|
}
|
|
{
|
|
condition = "task.cost_sensitive == true && daily_cost > 4000"
|
|
action = "use_ollama_or_cheap"
|
|
}
|
|
{
|
|
condition = "provider_status[claude] == down"
|
|
action = "fallback_to_gpt4"
|
|
}
|
|
{
|
|
condition = "time_of_day == peak_hours && usage_high"
|
|
action = "load_balance_all_providers"
|
|
}
|
|
]
|
|
|
|
# ===== MONITORING AND ALERTING =====
|
|
|
|
monitoring = {
|
|
track_latencies = true
|
|
track_costs = true
|
|
track_failures = true
|
|
track_token_usage = true
|
|
|
|
metrics_retention = 30 # days
|
|
|
|
alerts = [
|
|
{
|
|
name = "high_daily_cost"
|
|
condition = "cost_today > 5000"
|
|
severity = "warning"
|
|
actions = ["notify_ops", "switch_to_cheap_provider"]
|
|
}
|
|
{
|
|
name = "provider_down"
|
|
condition = "provider_status == down"
|
|
severity = "critical"
|
|
actions = ["failover", "notify_ops"]
|
|
}
|
|
{
|
|
name = "high_latency"
|
|
condition = "p95_latency > performance_target * 2"
|
|
severity = "warning"
|
|
actions = ["notify_team", "consider_load_rebalance"]
|
|
}
|
|
{
|
|
name = "budget_exceeded"
|
|
condition = "monthly_cost > monthly_hard_limit"
|
|
severity = "critical"
|
|
actions = ["stop_new_requests", "notify_management"]
|
|
}
|
|
]
|
|
}
|
|
|
|
# ===== OUTPUT =====
|
|
|
|
output = {
|
|
providers = llm_providers
|
|
classifiers = task_classifiers
|
|
default_mappings = default_mappings
|
|
cost_tracking = cost_tracking
|
|
performance_targets = performance_targets
|
|
circuit_breakers = circuit_breakers
|
|
routing_rules = routing_rules
|
|
monitoring = monitoring
|
|
}
|