Vapora/config/llm-router.ncl
Jesús Pérez c5f4caa2ab
feat(agents): stable identity + hot-reload for zero learning loss on config change
Introduce stable_id = role on AgentMetadata so learning profiles and KG
  execution records survive process restarts and hot-reloads. Previously
  every Uuid::new_v4() rotation orphaned accumulated expertise.

  - registry: add stable_id field (serde default, backward-compatible),
    stable_id_or_role() fallback helper, drain_role(), list_roles()
  - coordinator: profile lookup and KG writes use stable_id_or_role()
    instead of the ephemeral UUID; drain_role() drops Sender to close
    mpsc channels after in-flight messages drain; registry_arc() accessor
  - executor: agent_id written to KG now uses stable_id_or_role()
  - server: reload_agents() drain-and-respawn function; SIGHUP handler
    via while sighup.recv().await.is_some(); POST /reload endpoint;
    AppState extended with config_path, router, cap_registry
  - fix: SIGHUP recv() spin-loop guard (is_some())
  - fix: io_other_error clippy lint in vapora-agents, vapora-llm-router,
    vapora-workflow-engine (std::io::Error::other instead of Error::new)
  - docs: ADR-0040, CHANGELOG entry, README hot-reload section
2026-03-02 22:54:28 +00:00

80 lines
1.9 KiB
Text

let C = import "../nickel/llm-router/contracts.ncl" in
{
routing | C.RoutingConfig = {
default_provider = "claude",
cost_tracking_enabled = true,
fallback_enabled = true,
},
providers = {
claude | C.ProviderConfig = {
enabled = true,
api_key = "${ANTHROPIC_API_KEY}",
model = "claude-sonnet-4-5-20250929",
max_tokens = 8192,
temperature = 0.7,
cost_per_1m_input = 3.00,
cost_per_1m_output = 15.00,
},
openai | C.ProviderConfig = {
enabled = true,
api_key = "${OPENAI_API_KEY}",
model = "gpt-4o",
max_tokens = 4096,
temperature = 0.7,
cost_per_1m_input = 2.50,
cost_per_1m_output = 10.00,
},
gemini | C.ProviderConfig = {
enabled = true,
api_key = "${GOOGLE_API_KEY}",
model = "gemini-2.0-flash",
max_tokens = 8192,
temperature = 0.7,
cost_per_1m_input = 0.30,
cost_per_1m_output = 1.20,
},
ollama | C.ProviderConfig = {
enabled = true,
url = "${OLLAMA_URL:-http://localhost:11434}",
model = "llama3.2",
max_tokens = 4096,
temperature = 0.7,
cost_per_1m_input = 0.00,
cost_per_1m_output = 0.00,
},
},
routing_rules | Array C.RoutingRule = [
{
name = "architecture_design",
condition = { task_type = "architecture" },
provider = "claude",
model_override = "claude-opus-4-20250514",
},
{
name = "code_generation",
condition = { task_type = "development" },
provider = "claude",
},
{
name = "documentation",
condition = { task_type = "documentation" },
provider = "openai",
},
{
name = "monitoring",
condition = { task_type = "monitoring" },
provider = "gemini",
},
{
name = "local_testing",
condition = { environment = "development" },
provider = "ollama",
},
],
}