88 lines
1.9 KiB
TOML
88 lines
1.9 KiB
TOML
|
|
# Multi-IA Router Configuration
|
||
|
|
# Phase 0: Configuration for LLM provider selection
|
||
|
|
|
||
|
|
[routing]
|
||
|
|
# Default provider if no specific routing rules match
|
||
|
|
default_provider = "claude"
|
||
|
|
|
||
|
|
# Enable cost tracking
|
||
|
|
cost_tracking_enabled = true
|
||
|
|
|
||
|
|
# Enable fallback on provider failure
|
||
|
|
fallback_enabled = true
|
||
|
|
|
||
|
|
[providers.claude]
|
||
|
|
enabled = true
|
||
|
|
# ANTHROPIC_API_KEY environment variable required
|
||
|
|
api_key = "${ANTHROPIC_API_KEY}"
|
||
|
|
model = "claude-sonnet-4-5-20250929"
|
||
|
|
max_tokens = 8192
|
||
|
|
temperature = 0.7
|
||
|
|
|
||
|
|
# Cost per 1M tokens (input/output)
|
||
|
|
cost_per_1m_input = 3.00
|
||
|
|
cost_per_1m_output = 15.00
|
||
|
|
|
||
|
|
[providers.openai]
|
||
|
|
enabled = true
|
||
|
|
# OPENAI_API_KEY environment variable required
|
||
|
|
api_key = "${OPENAI_API_KEY}"
|
||
|
|
model = "gpt-4o"
|
||
|
|
max_tokens = 4096
|
||
|
|
temperature = 0.7
|
||
|
|
|
||
|
|
# Cost per 1M tokens (input/output)
|
||
|
|
cost_per_1m_input = 2.50
|
||
|
|
cost_per_1m_output = 10.00
|
||
|
|
|
||
|
|
[providers.gemini]
|
||
|
|
enabled = true
|
||
|
|
# GOOGLE_API_KEY environment variable required
|
||
|
|
api_key = "${GOOGLE_API_KEY}"
|
||
|
|
model = "gemini-2.0-flash"
|
||
|
|
max_tokens = 8192
|
||
|
|
temperature = 0.7
|
||
|
|
|
||
|
|
# Cost per 1M tokens (input/output)
|
||
|
|
cost_per_1m_input = 0.30
|
||
|
|
cost_per_1m_output = 1.20
|
||
|
|
|
||
|
|
[providers.ollama]
|
||
|
|
enabled = true
|
||
|
|
# Local Ollama instance, no API key needed
|
||
|
|
url = "${OLLAMA_URL:-http://localhost:11434}"
|
||
|
|
model = "llama3.2"
|
||
|
|
max_tokens = 4096
|
||
|
|
temperature = 0.7
|
||
|
|
|
||
|
|
# No cost for local models
|
||
|
|
cost_per_1m_input = 0.00
|
||
|
|
cost_per_1m_output = 0.00
|
||
|
|
|
||
|
|
# Routing rules: assign providers based on task characteristics
|
||
|
|
[[routing_rules]]
|
||
|
|
name = "architecture_design"
|
||
|
|
condition = { task_type = "architecture" }
|
||
|
|
provider = "claude"
|
||
|
|
model_override = "claude-opus-4-20250514"
|
||
|
|
|
||
|
|
[[routing_rules]]
|
||
|
|
name = "code_generation"
|
||
|
|
condition = { task_type = "development" }
|
||
|
|
provider = "claude"
|
||
|
|
|
||
|
|
[[routing_rules]]
|
||
|
|
name = "documentation"
|
||
|
|
condition = { task_type = "documentation" }
|
||
|
|
provider = "openai"
|
||
|
|
|
||
|
|
[[routing_rules]]
|
||
|
|
name = "monitoring"
|
||
|
|
condition = { task_type = "monitoring" }
|
||
|
|
provider = "gemini"
|
||
|
|
|
||
|
|
[[routing_rules]]
|
||
|
|
name = "local_testing"
|
||
|
|
condition = { environment = "development" }
|
||
|
|
provider = "ollama"
|