Vapora/provisioning/schemas/vapora/llm-router.ncl

# VAPORA LLM Router Schema with Cost Tracking and Budget Enforcement

{
  host | String | doc "LLM Router bind address" | default = "0.0.0.0",
  port | Number | doc "LLM Router port" | default = 8003,

  cost_tracking = {
    enabled | Bool | doc "Enable cost tracking per provider" | default = true,
    track_tokens | Bool | doc "Track input/output tokens" | default = true,
    track_latency | Bool | doc "Track provider latency" | default = true,
    reporting_interval | Number | doc "Cost report interval in seconds" | default = 3600,
  },

  budget_enforcement = {
    enabled | Bool | doc "Enable budget enforcement with automatic fallback" | default = true,
    window | String | doc "Budget window: daily, weekly, monthly" | default = "monthly",
    near_threshold_percent | Number | doc "Alert threshold percentage (80 = 80%)" | default = 80,
    auto_fallback | Bool | doc "Automatically fallback to cheaper provider" | default = true,
    detailed_tracking | Bool | doc "Detailed cost tracking per role" | default = true,

    role_limits = {
      architect_cents | Number | doc "Architect monthly budget (USD cents)" | default = 500000,
      developer_cents | Number | doc "Developer monthly budget (USD cents)" | default = 300000,
      reviewer_cents | Number | doc "Reviewer monthly budget (USD cents)" | default = 200000,
      testing_cents | Number | doc "Testing monthly budget (USD cents)" | default = 100000,
    },
  },

  providers = {
    claude_enabled | Bool | doc "Enable Anthropic Claude provider" | default = true,
    openai_enabled | Bool | doc "Enable OpenAI provider" | default = false,
    gemini_enabled | Bool | doc "Enable Google Gemini provider" | default = false,
    ollama_enabled | Bool | doc "Enable local Ollama provider" | default = false,
    ollama_url | String | doc "Ollama server URL" | default = "http://localhost:11434",
  },

  routing = {
    strategy | String | doc "Routing strategy: cost_aware, performance, balanced" | default = "balanced",
    fallback_chain | Array String | doc "Fallback provider chain" | default = ["claude", "gpt-4", "ollama"],
    retry_attempts | Number | doc "Retry attempts for failed requests" | default = 3,
    retry_delay | Number | doc "Retry delay in milliseconds" | default = 1000,
    request_timeout | Number | doc "Request timeout in seconds" | default = 60,
  },

  logging = {
    level | String | doc "Log level: trace, debug, info, warn, error" | default = "info",
    detailed_cost_logs | Bool | doc "Log detailed cost information" | default = true,
  },
}