333 lines
9.8 KiB
Plaintext
Raw Permalink Normal View History

feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3*base + 0.5*expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection.
2026-01-11 13:03:53 +00:00
"""
VAPORA Multi-IA Router Configuration
Defines LLM routing rules, model mappings, cost thresholds, and fallback chains
"""
import k.api.all as k
# ===== LLM PROVIDER DEFINITIONS =====
llm_providers = {
"claude": {
name = "Anthropic Claude"
endpoint = "https://api.anthropic.com/v1"
models = [
{name = "claude-opus-4-1", context = 200000, cost_per_mtok = 15.0}
{name = "claude-sonnet-4-20250514", context = 200000, cost_per_mtok = 3.0}
{name = "claude-haiku-3-5-20241022", context = 200000, cost_per_mtok = 0.80}
]
availability = "production"
regions = ["us-east-1", "us-west-2", "eu-west-1"]
}
"openai": {
name = "OpenAI"
endpoint = "https://api.openai.com/v1"
models = [
{name = "gpt-4-turbo", context = 128000, cost_per_mtok = 10.0}
{name = "gpt-4o", context = 128000, cost_per_mtok = 5.0}
{name = "gpt-3.5-turbo", context = 16384, cost_per_mtok = 0.50}
]
availability = "production"
regions = ["us-east-1", "us-west-2", "eu-west-1"]
}
"gemini": {
name = "Google Gemini"
endpoint = "https://generativelanguage.googleapis.com/v1beta"
models = [
{name = "gemini-2.0-pro", context = 1000000, cost_per_mtok = 10.0}
{name = "gemini-2.0-flash", context = 1000000, cost_per_mtok = 0.075}
{name = "gemini-1.5-pro", context = 1000000, cost_per_mtok = 1.25}
]
availability = "production"
regions = ["us-central-1"]
}
"ollama": {
name = "Ollama Local"
endpoint = "http://ollama.vapora-system:11434"
models = [
{name = "llama2", context = 4096, cost_per_mtok = 0.0}
{name = "mistral", context = 8192, cost_per_mtok = 0.0}
{name = "neural-chat", context = 4096, cost_per_mtok = 0.0}
]
availability = "local"
regions = ["on-premise"]
}
}
# ===== TASK CONTEXT CLASSIFIERS =====
task_classifiers = {
"code_generation": {
complexity = "high"
latency_sensitive = false
context_needs = 32000
quality_critical = true
cost_sensitive = false
recommended = ["claude-opus-4-1", "gpt-4-turbo", "claude-sonnet-4-20250514"]
}
"code_review": {
complexity = "medium"
latency_sensitive = false
context_needs = 16000
quality_critical = true
cost_sensitive = true
recommended = ["claude-sonnet-4-20250514", "gpt-4o", "gemini-2.0-flash"]
}
"documentation": {
complexity = "medium"
latency_sensitive = false
context_needs = 8000
quality_critical = true
cost_sensitive = true
recommended = ["gpt-4-turbo", "gemini-1.5-pro", "claude-sonnet-4-20250514"]
}
"testing": {
complexity = "medium"
latency_sensitive = false
context_needs = 16000
quality_critical = true
cost_sensitive = true
recommended = ["claude-sonnet-4-20250514", "gpt-4o"]
}
"quick_query": {
complexity = "low"
latency_sensitive = true
context_needs = 4000
quality_critical = false
cost_sensitive = true
recommended = ["gemini-2.0-flash", "gpt-3.5-turbo", "llama2"]
}
"embeddings": {
complexity = "low"
latency_sensitive = true
context_needs = 512
quality_critical = false
cost_sensitive = true
recommended = ["ollama/neural-chat"]
}
"summarization": {
complexity = "medium"
latency_sensitive = false
context_needs = 32000
quality_critical = true
cost_sensitive = true
recommended = ["claude-sonnet-4-20250514", "gemini-2.0-flash"]
}
"real_time_monitoring": {
complexity = "low"
latency_sensitive = true
context_needs = 2000
quality_critical = false
cost_sensitive = true
recommended = ["gemini-2.0-flash", "gpt-3.5-turbo"]
}
}
# ===== DEFAULT LLM MAPPINGS =====
default_mappings = [
{
agent_role = "Architect"
task_type = "*" # All tasks
default_llm = "claude-opus-4-1"
fallback = ["gpt-4-turbo"]
override_allowed = false # Critical decisions
}
{
agent_role = "Developer"
task_type = "code_generation"
default_llm = "claude-sonnet-4-20250514"
fallback = ["gpt-4o", "claude-opus-4-1"]
override_allowed = true
}
{
agent_role = "CodeReviewer"
task_type = "code_review"
default_llm = "claude-sonnet-4-20250514"
fallback = ["gpt-4o", "gemini-2.0-flash"]
override_allowed = true
}
{
agent_role = "Tester"
task_type = "testing"
default_llm = "claude-sonnet-4-20250514"
fallback = ["gpt-4o"]
override_allowed = true
}
{
agent_role = "Documenter"
task_type = "documentation"
default_llm = "gpt-4-turbo"
fallback = ["claude-sonnet-4-20250514", "gemini-1.5-pro"]
override_allowed = true
}
{
agent_role = "Marketer"
task_type = "*"
default_llm = "claude-sonnet-4-20250514"
fallback = ["gpt-4o"]
override_allowed = true
}
{
agent_role = "Monitor"
task_type = "real_time_monitoring"
default_llm = "gemini-2.0-flash"
fallback = ["gpt-3.5-turbo"]
override_allowed = false # Must be fast
}
{
agent_role = "Security"
task_type = "*"
default_llm = "claude-opus-4-1"
fallback = ["gpt-4-turbo"]
override_allowed = false # Critical security
}
]
# ===== COST TRACKING CONFIGURATION =====
cost_tracking = {
enabled = true
daily_warn_threshold = 5000 # Warn if daily cost > $5000
daily_hard_limit = 10000 # Hard stop if daily cost > $10000
monthly_warn_threshold = 100000
monthly_hard_limit = 150000
# Cost allocation by agent role
budget_per_agent = {
"Architect": {daily = 500, monthly = 10000}
"Developer": {daily = 2000, monthly = 40000}
"CodeReviewer": {daily = 1000, monthly = 20000}
"Tester": {daily = 800, monthly = 16000}
"Documenter": {daily = 300, monthly = 6000}
"Security": {daily = 500, monthly = 10000}
"Monitor": {daily = 100, monthly = 2000}
"Other": {daily = 800, monthly = 16000}
}
# Price tracking
pricing = {
"claude-opus-4-1": {input = 15.0, output = 75.0}
"claude-sonnet-4-20250514": {input = 3.0, output = 15.0}
"gpt-4-turbo": {input = 10.0, output = 30.0}
"gpt-4o": {input = 5.0, output = 15.0}
"gpt-3.5-turbo": {input = 0.50, output = 1.50}
"gemini-2.0-pro": {input = 10.0, output = 30.0}
"gemini-2.0-flash": {input = 0.075, output = 0.30}
}
}
# ===== LATENCY AND PERFORMANCE TARGETS =====
performance_targets = {
"code_generation": {p50 = 5000, p95 = 15000, p99 = 30000} # milliseconds
"code_review": {p50 = 3000, p95 = 10000, p99 = 20000}
"quick_query": {p50 = 500, p95 = 2000, p99 = 5000}
"real_time_monitoring": {p50 = 200, p95 = 1000, p99 = 2000}
}
# ===== CIRCUIT BREAKER SETTINGS =====
circuit_breakers = {
"claude": {
failure_threshold = 5 # Fail after 5 consecutive errors
timeout_threshold = 60000 # 60s timeout
half_open_max_calls = 3
reset_timeout = 30000
}
"openai": {
failure_threshold = 5
timeout_threshold = 45000
half_open_max_calls = 3
reset_timeout = 30000
}
"gemini": {
failure_threshold = 5
timeout_threshold = 30000
half_open_max_calls = 3
reset_timeout = 30000
}
"ollama": {
failure_threshold = 3 # Local failures more critical
timeout_threshold = 15000
half_open_max_calls = 5
reset_timeout = 10000
}
}
# ===== ROUTING RULES =====
routing_rules = [
{
condition = "task.complexity == high && cost < 1000"
action = "use_claude_opus"
}
{
condition = "task.latency_sensitive == true"
action = "use_fastest_available"
}
{
condition = "task.cost_sensitive == true && daily_cost > 4000"
action = "use_ollama_or_cheap"
}
{
condition = "provider_status[claude] == down"
action = "fallback_to_gpt4"
}
{
condition = "time_of_day == peak_hours && usage_high"
action = "load_balance_all_providers"
}
]
# ===== MONITORING AND ALERTING =====
monitoring = {
track_latencies = true
track_costs = true
track_failures = true
track_token_usage = true
metrics_retention = 30 # days
alerts = [
{
name = "high_daily_cost"
condition = "cost_today > 5000"
severity = "warning"
actions = ["notify_ops", "switch_to_cheap_provider"]
}
{
name = "provider_down"
condition = "provider_status == down"
severity = "critical"
actions = ["failover", "notify_ops"]
}
{
name = "high_latency"
condition = "p95_latency > performance_target * 2"
severity = "warning"
actions = ["notify_team", "consider_load_rebalance"]
}
{
name = "budget_exceeded"
condition = "monthly_cost > monthly_hard_limit"
severity = "critical"
actions = ["stop_new_requests", "notify_management"]
}
]
}
# ===== OUTPUT =====
output = {
providers = llm_providers
classifiers = task_classifiers
default_mappings = default_mappings
cost_tracking = cost_tracking
performance_targets = performance_targets
circuit_breakers = circuit_breakers
routing_rules = routing_rules
monitoring = monitoring
}