provisioning/config/ai.toml

# AI Integration Configuration for Provisioning Platform
# This file configures the AI system including LLM providers, RAG, MCP, and security policies.

# ============================================================================
# Core AI Configuration
# ============================================================================

[ai]
# Enable/disable AI features globally
enabled = true

# LLM Provider Selection
# Options: "anthropic" | "openai" | "local" | "azure-openai"
provider = "anthropic"

# Model Selection
# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"
# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"
# Local: "llama-3-70b", "mistral-large", "codellama-34b"
model = "claude-sonnet-4"

# Model Temperature (0.0-1.0)
# Lower = more deterministic, Higher = more creative
temperature = 0.7

# Maximum tokens for responses
max_tokens = 4096

# Request timeout (seconds)
timeout = 60

# ============================================================================
# AI Features - Fine-Grained Control
# ============================================================================

  [ai.features]
  # AI-assisted form filling (typdialog-ai)
  # Real-time suggestions and field value predictions
  form_assistance = true

  # Natural language configuration generation (typdialog-prov-gen)
  # Convert plain English to Nickel configs
  config_generation = true

  # Autonomous AI agents (typdialog-ag)
  # WARNING: Agents can execute multi-step workflows
  # Recommended: false for production (enable per-use-case)
  autonomous_agents = false

  # AI-powered troubleshooting
  # Analyze logs and suggest fixes for failed deployments
  troubleshooting = true

  # Configuration optimization
  # AI reviews configs and suggests improvements
  optimization = true

  # Validation error explanations
  # AI explains Nickel validation errors in plain language
  error_explanations = true

  # ============================================================================
  # LLM Provider Configuration
  # ============================================================================

  [ai.anthropic]
  # Anthropic Claude API configuration
  api_key = "env:ANTHROPIC_API_KEY"        # Load from environment variable
  api_url = "https://api.anthropic.com/v1"
  max_retries = 3
  retry_delay_ms = 1000

  # Rate limits (per minute)
  max_requests_per_minute = 50
  max_tokens_per_minute = 100000

  [ai.openai]
  # OpenAI GPT-4 API configuration
  api_key = "env:OPENAI_API_KEY"
  api_url = "https://api.openai.com/v1"
  max_retries = 3
  organization_id = ""                  # Optional
  retry_delay_ms = 1000

  # Rate limits (per minute)
  max_requests_per_minute = 60
  max_tokens_per_minute = 150000

  [ai.local]
  # Local LLM configuration (Ollama, LlamaCpp, vLLM)
  # Use for air-gapped deployments or privacy-critical scenarios
  context_length = 8192
  model_path = "/opt/provisioning/models/llama-3-70b"
  num_gpu_layers = 40                                 # GPU acceleration
  server_url = "http://localhost:11434"               # Ollama default

  # ============================================================================
  # Model Context Protocol (MCP) Server
  # ============================================================================

  [ai.mcp]
  # MCP server configuration
  enabled = true
  max_retries = 3
  server_url = "http://localhost:9000"
  timeout = 30

    # Tool calling configuration
    [ai.mcp.tools]
    enabled = true

    # Available tools for LLM
    # Tools provide structured actions the LLM can invoke
    tools = [
      "nickel_validate",   # Validate Nickel configuration
      "schema_query",      # Query Nickel schema information
      "config_generate",   # Generate configuration snippets
      "cedar_check",       # Check Cedar authorization policies
      "deployment_status", # Query deployment status
      "log_analyze",       # Analyze deployment logs
    ]

    # ============================================================================
    # Retrieval-Augmented Generation (RAG)
    # ============================================================================

  [ai.rag]
  # Enable RAG system
  enabled = true

  # Vector Store Configuration
  # Options: "qdrant" | "milvus" | "pgvector" | "chromadb"
  collection_name = "provisioning-knowledge"
  vector_store = "qdrant"
  vector_store_url = "http://localhost:6333"

  # Embedding Model
  # OpenAI: "text-embedding-3-large", "text-embedding-3-small"
  # Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"
  embedding_api_key = "env:OPENAI_API_KEY"   # For OpenAI embeddings
  embedding_model = "text-embedding-3-large"

  # Document Chunking
  chunk_overlap = 50        # Overlap between chunks
  chunk_size = 512          # Characters per chunk
  max_chunks_per_query = 10 # Top-k retrieval

  # ============================================================================
  # RAG Index Configuration
  # ============================================================================

    [ai.rag.index]
    # What to index for RAG retrieval

    # Index Nickel schemas (RECOMMENDED: true)
    # Provides AI with schema definitions and contracts
    schemas = true
    schemas_path = "provisioning/schemas"

    # Index documentation (RECOMMENDED: true)
    # Provides AI with user guides and best practices
    docs = true
    docs_path = "docs"

    # Index past deployments (RECOMMENDED: true)
    # AI learns from successful deployment patterns
    deployments = true
    deployments_path = "workspaces"

    # Index best practices (RECOMMENDED: true)
    # Inject organizational patterns and conventions
    best_practices = true
    best_practices_path = ".claude/patterns"

    # Index deployment logs (WARNING: Privacy concerns)
    # Logs may contain sensitive data, enable only if sanitized
    logs = false
    logs_retention_days = 30

    # Reindexing schedule
    auto_reindex = true
    reindex_interval_hours = 24

    # ============================================================================
    # Security and Access Control
    # ============================================================================

  [ai.security]
  # Cedar policy store for AI access control
  cedar_policy_store = "/etc/provisioning/cedar-policies/ai"

  # AI cannot suggest secret values (CRITICAL: keep true)
  # AI can suggest secret names/paths but not retrieve actual secrets
  max_secret_suggestions = 0

  # Require human approval for critical operations (CRITICAL: keep true)
  # Operations requiring approval:
  # - Deployments to production
  # - Configuration changes affecting security
  # - Secret rotation
  # - Infrastructure deletion
  require_human_approval = true

  # Audit all AI operations (CRITICAL: keep true)
  # Log every AI request, response, and action
  audit_all_operations = true

    # Data sanitization before sending to LLM
    # Remove sensitive data from prompts
    [ai.security.sanitization]
    sanitize_credentials = true   # Remove passwords, API keys
    sanitize_ip_addresses = false # Keep for troubleshooting
    sanitize_pii = true           # Remove personally identifiable info
    sanitize_secrets = true       # Remove secret values

    # Allowed data for LLM
    allowed_data = [
      "nickel_schemas", # Schema definitions (public)
      "documentation",  # User docs (public)
      "error_messages", # Validation errors (sanitized)
      "resource_names", # Infrastructure resource identifiers
    ]

    # Forbidden data for LLM (NEVER send to external LLM)
    forbidden_data = [
      "secret_values",  # Passwords, API keys, tokens
      "private_keys",   # SSH keys, TLS keys, encryption keys
      "pii",            # Email addresses, names, phone numbers
      "credentials",    # Authentication credentials
      "session_tokens", # User session data
    ]

    # ============================================================================
    # Rate Limiting and Cost Control
    # ============================================================================

  [ai.rate_limiting]
  # Per-user rate limits
  requests_per_day = 2000
  requests_per_hour = 500
  requests_per_minute = 60

  # Token limits (to control LLM API costs)
  tokens_per_day = 1000000    # 1M tokens/day
  tokens_per_month = 30000000 # 30M tokens/month

  # Cost limits (USD)
  cost_limit_per_day = "100.00"
  cost_limit_per_month = "2000.00"

  # Alert thresholds
  cost_alert_threshold = 0.8 # Alert at 80% of limit

  # Rate limit exceeded behavior
  # Options: "queue" | "reject" | "throttle"
  exceed_behavior = "queue"
  max_queue_size = 100

  # ============================================================================
  # Caching
  # ============================================================================

  [ai.caching]
  # Enable response caching to reduce LLM API calls
  enabled = true

  # Cache TTL (time-to-live)
  ttl = "1h"

  # Cache backend
  # Options: "redis" | "memcached" | "in-memory"
  backend = "redis"
  redis_url = "redis://localhost:6379"

  # Cache key strategy
  # "prompt" = Cache by exact prompt (high precision, low hit rate)
  # "semantic" = Cache by semantic similarity (lower precision, high hit rate)
  cache_strategy = "semantic"
  semantic_similarity_threshold = 0.95

  # Cache statistics
  log_cache_misses = false
  track_hit_rate = true

  # ============================================================================
  # Observability and Monitoring
  # ============================================================================

  [ai.observability]
  # Logging level for AI operations
  # Options: "trace" | "debug" | "info" | "warn" | "error"
  log_level = "info"

  # Trace all AI requests (detailed logging)
  # WARNING: Generates large log volume
  trace_all_requests = true

  # Store conversation history (for debugging and learning)
  conversation_retention_days = 30
  store_conversations = true

    # Metrics collection
    [ai.observability.metrics]
    enabled = true
    export_format = "prometheus" # "prometheus" | "opentelemetry"
    export_port = 9090

    # Metrics to collect
    metrics = [
      "request_count",           # Total AI requests
      "request_duration",        # Latency histogram
      "token_usage",             # Input/output tokens
      "cost_tracking",           # USD cost per request
      "cache_hit_rate",          # Cache effectiveness
      "validation_success_rate", # Generated config validity
      "human_approval_rate",     # How often humans approve AI output
    ]

    # Distributed tracing
    [ai.observability.tracing]
    enabled = true
    jaeger_endpoint = "http://localhost:14268/api/traces"
    sample_rate = 0.1                                     # Sample 10% of requests

    # ============================================================================
    # AI Agent Configuration (typdialog-ag)
    # ============================================================================

  [ai.agents]
  # WARNING: Autonomous agents can execute multi-step workflows
  # Enable with caution, only for trusted users

  # Enable AI agents globally
  enabled = false

  # Maximum iterations per agent execution
  # Prevents infinite loops
  max_iterations = 20

  # Agent timeout (seconds)
  timeout = 300

  # Require approval for each agent action (RECOMMENDED: true)
  # If false, agent executes entire workflow autonomously
  require_step_approval = true

    # Agent types
    [ai.agents.types]
    # Provisioning agent: End-to-end infrastructure setup
    provisioning_agent = false

    # Troubleshooting agent: Diagnose and fix deployment issues
    troubleshooting_agent = true

    # Optimization agent: Analyze and improve configurations
    optimization_agent = true

    # Security audit agent: Review configs for vulnerabilities
    security_audit_agent = true

    # ============================================================================
    # Configuration Generation (typdialog-prov-gen)
    # ============================================================================

  [ai.config_generation]
  # Default schema for generated configs
  default_schema = "workspace"

  # Validation mode
  # "strict" = Reject any invalid config
  # "permissive" = Allow configs with warnings
  validation_mode = "strict"

  # Best practice injection
  # Automatically add security/performance best practices
  inject_best_practices = true

  # Template usage
  # Use pre-defined templates as starting points
  template_directory = "provisioning/templates"
  use_templates = true

  # ============================================================================
  # Form Assistance (typdialog-ai)
  # ============================================================================

  [ai.form_assistance]
  # Real-time suggestions as user types
  real_time_suggestions = true

  # Minimum characters before triggering suggestions
  min_chars_for_suggestions = 3

  # Maximum suggestions per field
  max_suggestions = 5

  # Suggestion confidence threshold (0.0-1.0)
  # Only show suggestions with confidence above threshold
  confidence_threshold = 0.7

  # Natural language form filling
  # User can describe entire form in plain English
  nl_form_filling = true

  # ============================================================================
  # Environment-Specific Overrides
  # ============================================================================

  # Development environment
  [ai.environments.dev]
  cost_limit_per_day = "10.00"
  enabled = true
  model = "gpt-4-turbo"
  provider = "openai"            # Cheaper for dev
  require_human_approval = false # Faster iteration

  # Staging environment
  [ai.environments.staging]
  cost_limit_per_day = "50.00"
  enabled = true
  model = "claude-sonnet-4"
  provider = "anthropic"
  require_human_approval = true

  # Production environment
  [ai.environments.production]
  autonomous_agents = false     # NEVER enable in production
  cost_limit_per_day = "100.00"
  enabled = true
  model = "claude-sonnet-4"
  provider = "anthropic"
  require_human_approval = true # ALWAYS true for production

  # ============================================================================
  # Integration with Other Services
  # ============================================================================

  [ai.integration]
  # Orchestrator integration
  orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"
  orchestrator_url = "https://orchestrator.example.com"

  # SecretumVault integration (for secret name suggestions only)
  secretum_vault_token = "env:VAULT_TOKEN"
  secretum_vault_url = "https://vault.example.com:8200"
  # AI can query secret names/paths but NEVER values

  # Typdialog Web UI integration
  typdialog_url = "https://forms.provisioning.example.com"
  typdialog_websocket_enabled = true

  # ============================================================================
  # Advanced Settings
  # ============================================================================

  [ai.advanced]
  # Prompt engineering
  system_prompt_template = "provisioning/ai/prompts/system.txt"
  user_prompt_template = "provisioning/ai/prompts/user.txt"

  # Context window management
  context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize"
  max_context_tokens = 100000                    # Claude Sonnet 4 context window

  # Streaming responses
  enable_streaming = true
  stream_chunk_size = 100 # Characters per chunk

  # Concurrent requests
  max_concurrent_requests = 10

  # ============================================================================
  # Experimental Features (Use at Your Own Risk)
  # ============================================================================

  [ai.experimental]
  # Multi-agent collaboration
  # Multiple AI agents work together on complex tasks
  multi_agent_collaboration = false

  # Reinforcement learning from human feedback (RLHF)
  # Learn from user corrections to improve over time
  rlhf_enabled = false

  # Fine-tuning on deployment history
  # Train custom models on organization-specific patterns
  fine_tuning = false
  fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"

  # ============================================================================
  # Compliance and Legal
  # ============================================================================

  [ai.compliance]
  # Data residency requirements
  # Ensure LLM provider complies with data residency laws
  data_residency = "us" # "us" | "eu" | "local"

  # GDPR compliance mode
  gdpr_data_retention_days = 90
  gdpr_mode = false

  # SOC 2 compliance logging
  soc2_logging = false

  # Terms of service acceptance
  # Must explicitly accept LLM provider TOS
  tos_accepted = false
  tos_version = "2025-01-08"

  # IMPORTANT NOTES:
  #
  # 1. API Keys: NEVER hardcode API keys. Always use environment variables.
  #    Example: api_key = "env:ANTHROPIC_API_KEY"
  #
  # 2. Security: Keep require_human_approval = true for production.
  #    AI-generated configs must be reviewed by humans.
  #
  # 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.
  #    Default limits are conservative but may need adjustment.
  #
  # 4. Privacy: For sensitive workloads, use local models (no external API calls).
  #    Set provider = "local" and configure local model path.
  #
  # 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.
  #    Set auto_reindex = true and adjust reindex_interval_hours.
  #
  # 6. Cedar Policies: Define fine-grained AI access control in Cedar.
  #    Location: /etc/provisioning/cedar-policies/ai
  #
  # 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.
  #    Logs stored in: /var/log/provisioning/ai-audit.log
  #
  # 8. Agents: Autonomous agents are powerful but risky.
  #    Enable only for specific use cases, never globally in production.

  # Version: 1.0
  # Last Updated: 2025-01-08
chore: update docs, adr 2026-01-08 21:22:57 +00:00			`# AI Integration Configuration for Provisioning Platform`
			`# This file configures the AI system including LLM providers, RAG, MCP, and security policies.`

			`# ============================================================================`
			`# Core AI Configuration`
			`# ============================================================================`

			`[ai]`
			`# Enable/disable AI features globally`
			`enabled = true`

			`# LLM Provider Selection`
			`# Options: "anthropic" \| "openai" \| "local" \| "azure-openai"`
			`provider = "anthropic"`

			`# Model Selection`
			`# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"`
			`# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"`
			`# Local: "llama-3-70b", "mistral-large", "codellama-34b"`
			`model = "claude-sonnet-4"`

			`# Model Temperature (0.0-1.0)`
			`# Lower = more deterministic, Higher = more creative`
			`temperature = 0.7`

			`# Maximum tokens for responses`
			`max_tokens = 4096`

			`# Request timeout (seconds)`
			`timeout = 60`

			`# ============================================================================`
			`# AI Features - Fine-Grained Control`
			`# ============================================================================`

chore: add configs 2026-01-12 05:19:06 +00:00			`[ai.features]`
			`# AI-assisted form filling (typdialog-ai)`
			`# Real-time suggestions and field value predictions`
			`form_assistance = true`

			`# Natural language configuration generation (typdialog-prov-gen)`
			`# Convert plain English to Nickel configs`
			`config_generation = true`

			`# Autonomous AI agents (typdialog-ag)`
			`# WARNING: Agents can execute multi-step workflows`
			`# Recommended: false for production (enable per-use-case)`
			`autonomous_agents = false`

			`# AI-powered troubleshooting`
			`# Analyze logs and suggest fixes for failed deployments`
			`troubleshooting = true`

			`# Configuration optimization`
			`# AI reviews configs and suggests improvements`
			`optimization = true`

			`# Validation error explanations`
			`# AI explains Nickel validation errors in plain language`
			`error_explanations = true`

			`# ============================================================================`
			`# LLM Provider Configuration`
			`# ============================================================================`

			`[ai.anthropic]`
			`# Anthropic Claude API configuration`
			`api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable`
			`api_url = "https://api.anthropic.com/v1"`
			`max_retries = 3`
			`retry_delay_ms = 1000`

			`# Rate limits (per minute)`
			`max_requests_per_minute = 50`
			`max_tokens_per_minute = 100000`

			`[ai.openai]`
			`# OpenAI GPT-4 API configuration`
			`api_key = "env:OPENAI_API_KEY"`
			`api_url = "https://api.openai.com/v1"`
			`max_retries = 3`
			`organization_id = "" # Optional`
			`retry_delay_ms = 1000`

			`# Rate limits (per minute)`
			`max_requests_per_minute = 60`
			`max_tokens_per_minute = 150000`

			`[ai.local]`
			`# Local LLM configuration (Ollama, LlamaCpp, vLLM)`
			`# Use for air-gapped deployments or privacy-critical scenarios`
			`context_length = 8192`
			`model_path = "/opt/provisioning/models/llama-3-70b"`
			`num_gpu_layers = 40 # GPU acceleration`
			`server_url = "http://localhost:11434" # Ollama default`

			`# ============================================================================`
			`# Model Context Protocol (MCP) Server`
			`# ============================================================================`

			`[ai.mcp]`
			`# MCP server configuration`
			`enabled = true`
			`max_retries = 3`
			`server_url = "http://localhost:9000"`
			`timeout = 30`

			`# Tool calling configuration`
			`[ai.mcp.tools]`
			`enabled = true`

			`# Available tools for LLM`
			`# Tools provide structured actions the LLM can invoke`
			`tools = [`
			`"nickel_validate", # Validate Nickel configuration`
			`"schema_query", # Query Nickel schema information`
			`"config_generate", # Generate configuration snippets`
			`"cedar_check", # Check Cedar authorization policies`
			`"deployment_status", # Query deployment status`
			`"log_analyze", # Analyze deployment logs`
			`]`

			`# ============================================================================`
			`# Retrieval-Augmented Generation (RAG)`
			`# ============================================================================`

			`[ai.rag]`
			`# Enable RAG system`
			`enabled = true`

			`# Vector Store Configuration`
			`# Options: "qdrant" \| "milvus" \| "pgvector" \| "chromadb"`
			`collection_name = "provisioning-knowledge"`
			`vector_store = "qdrant"`
			`vector_store_url = "http://localhost:6333"`

			`# Embedding Model`
			`# OpenAI: "text-embedding-3-large", "text-embedding-3-small"`
			`# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"`
			`embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings`
			`embedding_model = "text-embedding-3-large"`

			`# Document Chunking`
			`chunk_overlap = 50 # Overlap between chunks`
			`chunk_size = 512 # Characters per chunk`
			`max_chunks_per_query = 10 # Top-k retrieval`

			`# ============================================================================`
			`# RAG Index Configuration`
			`# ============================================================================`

			`[ai.rag.index]`
			`# What to index for RAG retrieval`

			`# Index Nickel schemas (RECOMMENDED: true)`
			`# Provides AI with schema definitions and contracts`
			`schemas = true`
			`schemas_path = "provisioning/schemas"`

			`# Index documentation (RECOMMENDED: true)`
			`# Provides AI with user guides and best practices`
			`docs = true`
			`docs_path = "docs"`

			`# Index past deployments (RECOMMENDED: true)`
			`# AI learns from successful deployment patterns`
			`deployments = true`
			`deployments_path = "workspaces"`

			`# Index best practices (RECOMMENDED: true)`
			`# Inject organizational patterns and conventions`
			`best_practices = true`
			`best_practices_path = ".claude/patterns"`

			`# Index deployment logs (WARNING: Privacy concerns)`
			`# Logs may contain sensitive data, enable only if sanitized`
			`logs = false`
			`logs_retention_days = 30`

			`# Reindexing schedule`
			`auto_reindex = true`
			`reindex_interval_hours = 24`

			`# ============================================================================`
			`# Security and Access Control`
			`# ============================================================================`

			`[ai.security]`
			`# Cedar policy store for AI access control`
			`cedar_policy_store = "/etc/provisioning/cedar-policies/ai"`

			`# AI cannot suggest secret values (CRITICAL: keep true)`
			`# AI can suggest secret names/paths but not retrieve actual secrets`
			`max_secret_suggestions = 0`

			`# Require human approval for critical operations (CRITICAL: keep true)`
			`# Operations requiring approval:`
			`# - Deployments to production`
			`# - Configuration changes affecting security`
			`# - Secret rotation`
			`# - Infrastructure deletion`
			`require_human_approval = true`

			`# Audit all AI operations (CRITICAL: keep true)`
			`# Log every AI request, response, and action`
			`audit_all_operations = true`

			`# Data sanitization before sending to LLM`
			`# Remove sensitive data from prompts`
			`[ai.security.sanitization]`
			`sanitize_credentials = true # Remove passwords, API keys`
			`sanitize_ip_addresses = false # Keep for troubleshooting`
			`sanitize_pii = true # Remove personally identifiable info`
			`sanitize_secrets = true # Remove secret values`

			`# Allowed data for LLM`
			`allowed_data = [`
			`"nickel_schemas", # Schema definitions (public)`
			`"documentation", # User docs (public)`
			`"error_messages", # Validation errors (sanitized)`
			`"resource_names", # Infrastructure resource identifiers`
			`]`

			`# Forbidden data for LLM (NEVER send to external LLM)`
			`forbidden_data = [`
			`"secret_values", # Passwords, API keys, tokens`
			`"private_keys", # SSH keys, TLS keys, encryption keys`
			`"pii", # Email addresses, names, phone numbers`
			`"credentials", # Authentication credentials`
			`"session_tokens", # User session data`
			`]`

			`# ============================================================================`
			`# Rate Limiting and Cost Control`
			`# ============================================================================`

			`[ai.rate_limiting]`
			`# Per-user rate limits`
			`requests_per_day = 2000`
			`requests_per_hour = 500`
			`requests_per_minute = 60`

			`# Token limits (to control LLM API costs)`
			`tokens_per_day = 1000000 # 1M tokens/day`
			`tokens_per_month = 30000000 # 30M tokens/month`

			`# Cost limits (USD)`
			`cost_limit_per_day = "100.00"`
			`cost_limit_per_month = "2000.00"`

			`# Alert thresholds`
			`cost_alert_threshold = 0.8 # Alert at 80% of limit`

			`# Rate limit exceeded behavior`
			`# Options: "queue" \| "reject" \| "throttle"`
			`exceed_behavior = "queue"`
			`max_queue_size = 100`

			`# ============================================================================`
			`# Caching`
			`# ============================================================================`

			`[ai.caching]`
			`# Enable response caching to reduce LLM API calls`
			`enabled = true`

			`# Cache TTL (time-to-live)`
			`ttl = "1h"`

			`# Cache backend`
			`# Options: "redis" \| "memcached" \| "in-memory"`
			`backend = "redis"`
			`redis_url = "redis://localhost:6379"`

			`# Cache key strategy`
			`# "prompt" = Cache by exact prompt (high precision, low hit rate)`
			`# "semantic" = Cache by semantic similarity (lower precision, high hit rate)`
			`cache_strategy = "semantic"`
			`semantic_similarity_threshold = 0.95`

			`# Cache statistics`
			`log_cache_misses = false`
			`track_hit_rate = true`

			`# ============================================================================`
			`# Observability and Monitoring`
			`# ============================================================================`

			`[ai.observability]`
			`# Logging level for AI operations`
			`# Options: "trace" \| "debug" \| "info" \| "warn" \| "error"`
			`log_level = "info"`

			`# Trace all AI requests (detailed logging)`
			`# WARNING: Generates large log volume`
			`trace_all_requests = true`

			`# Store conversation history (for debugging and learning)`
			`conversation_retention_days = 30`
			`store_conversations = true`

			`# Metrics collection`
			`[ai.observability.metrics]`
			`enabled = true`
			`export_format = "prometheus" # "prometheus" \| "opentelemetry"`
			`export_port = 9090`

			`# Metrics to collect`
			`metrics = [`
			`"request_count", # Total AI requests`
			`"request_duration", # Latency histogram`
			`"token_usage", # Input/output tokens`
			`"cost_tracking", # USD cost per request`
			`"cache_hit_rate", # Cache effectiveness`
			`"validation_success_rate", # Generated config validity`
			`"human_approval_rate", # How often humans approve AI output`
			`]`

			`# Distributed tracing`
			`[ai.observability.tracing]`
			`enabled = true`
			`jaeger_endpoint = "http://localhost:14268/api/traces"`
			`sample_rate = 0.1 # Sample 10% of requests`

			`# ============================================================================`
			`# AI Agent Configuration (typdialog-ag)`
			`# ============================================================================`

			`[ai.agents]`
			`# WARNING: Autonomous agents can execute multi-step workflows`
			`# Enable with caution, only for trusted users`

			`# Enable AI agents globally`
			`enabled = false`

			`# Maximum iterations per agent execution`
			`# Prevents infinite loops`
			`max_iterations = 20`

			`# Agent timeout (seconds)`
			`timeout = 300`

			`# Require approval for each agent action (RECOMMENDED: true)`
			`# If false, agent executes entire workflow autonomously`
			`require_step_approval = true`

			`# Agent types`
			`[ai.agents.types]`
			`# Provisioning agent: End-to-end infrastructure setup`
			`provisioning_agent = false`

			`# Troubleshooting agent: Diagnose and fix deployment issues`
			`troubleshooting_agent = true`

			`# Optimization agent: Analyze and improve configurations`
			`optimization_agent = true`

			`# Security audit agent: Review configs for vulnerabilities`
			`security_audit_agent = true`

			`# ============================================================================`
			`# Configuration Generation (typdialog-prov-gen)`
			`# ============================================================================`

			`[ai.config_generation]`
			`# Default schema for generated configs`
			`default_schema = "workspace"`

			`# Validation mode`
			`# "strict" = Reject any invalid config`
			`# "permissive" = Allow configs with warnings`
			`validation_mode = "strict"`

			`# Best practice injection`
			`# Automatically add security/performance best practices`
			`inject_best_practices = true`

			`# Template usage`
			`# Use pre-defined templates as starting points`
			`template_directory = "provisioning/templates"`
			`use_templates = true`

			`# ============================================================================`
			`# Form Assistance (typdialog-ai)`
			`# ============================================================================`

			`[ai.form_assistance]`
			`# Real-time suggestions as user types`
			`real_time_suggestions = true`

			`# Minimum characters before triggering suggestions`
			`min_chars_for_suggestions = 3`

			`# Maximum suggestions per field`
			`max_suggestions = 5`

			`# Suggestion confidence threshold (0.0-1.0)`
			`# Only show suggestions with confidence above threshold`
			`confidence_threshold = 0.7`

			`# Natural language form filling`
			`# User can describe entire form in plain English`
			`nl_form_filling = true`

			`# ============================================================================`
			`# Environment-Specific Overrides`
			`# ============================================================================`

			`# Development environment`
			`[ai.environments.dev]`
			`cost_limit_per_day = "10.00"`
			`enabled = true`
			`model = "gpt-4-turbo"`
			`provider = "openai" # Cheaper for dev`
			`require_human_approval = false # Faster iteration`

			`# Staging environment`
			`[ai.environments.staging]`
			`cost_limit_per_day = "50.00"`
			`enabled = true`
			`model = "claude-sonnet-4"`
			`provider = "anthropic"`
			`require_human_approval = true`

			`# Production environment`
			`[ai.environments.production]`
			`autonomous_agents = false # NEVER enable in production`
			`cost_limit_per_day = "100.00"`
			`enabled = true`
			`model = "claude-sonnet-4"`
			`provider = "anthropic"`
			`require_human_approval = true # ALWAYS true for production`

			`# ============================================================================`
			`# Integration with Other Services`
			`# ============================================================================`

			`[ai.integration]`
			`# Orchestrator integration`
			`orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"`
			`orchestrator_url = "https://orchestrator.example.com"`

			`# SecretumVault integration (for secret name suggestions only)`
			`secretum_vault_token = "env:VAULT_TOKEN"`
			`secretum_vault_url = "https://vault.example.com:8200"`
			`# AI can query secret names/paths but NEVER values`

			`# Typdialog Web UI integration`
			`typdialog_url = "https://forms.provisioning.example.com"`
			`typdialog_websocket_enabled = true`

			`# ============================================================================`
			`# Advanced Settings`
			`# ============================================================================`

			`[ai.advanced]`
			`# Prompt engineering`
			`system_prompt_template = "provisioning/ai/prompts/system.txt"`
			`user_prompt_template = "provisioning/ai/prompts/user.txt"`

			`# Context window management`
			`context_truncation_strategy = "sliding_window" # "sliding_window" \| "summarize"`
			`max_context_tokens = 100000 # Claude Sonnet 4 context window`

			`# Streaming responses`
			`enable_streaming = true`
			`stream_chunk_size = 100 # Characters per chunk`

			`# Concurrent requests`
			`max_concurrent_requests = 10`

			`# ============================================================================`
			`# Experimental Features (Use at Your Own Risk)`
			`# ============================================================================`

			`[ai.experimental]`
			`# Multi-agent collaboration`
			`# Multiple AI agents work together on complex tasks`
			`multi_agent_collaboration = false`

			`# Reinforcement learning from human feedback (RLHF)`
			`# Learn from user corrections to improve over time`
			`rlhf_enabled = false`

			`# Fine-tuning on deployment history`
			`# Train custom models on organization-specific patterns`
			`fine_tuning = false`
			`fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"`

			`# ============================================================================`
			`# Compliance and Legal`
			`# ============================================================================`

			`[ai.compliance]`
			`# Data residency requirements`
			`# Ensure LLM provider complies with data residency laws`
			`data_residency = "us" # "us" \| "eu" \| "local"`

			`# GDPR compliance mode`
			`gdpr_data_retention_days = 90`
			`gdpr_mode = false`

			`# SOC 2 compliance logging`
			`soc2_logging = false`

			`# Terms of service acceptance`
			`# Must explicitly accept LLM provider TOS`
			`tos_accepted = false`
			`tos_version = "2025-01-08"`

			`# IMPORTANT NOTES:`
			`#`
			`# 1. API Keys: NEVER hardcode API keys. Always use environment variables.`
			`# Example: api_key = "env:ANTHROPIC_API_KEY"`
			`#`
			`# 2. Security: Keep require_human_approval = true for production.`
			`# AI-generated configs must be reviewed by humans.`
			`#`
			`# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.`
			`# Default limits are conservative but may need adjustment.`
			`#`
			`# 4. Privacy: For sensitive workloads, use local models (no external API calls).`
			`# Set provider = "local" and configure local model path.`
			`#`
			`# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.`
			`# Set auto_reindex = true and adjust reindex_interval_hours.`
			`#`
			`# 6. Cedar Policies: Define fine-grained AI access control in Cedar.`
			`# Location: /etc/provisioning/cedar-policies/ai`
			`#`
			`# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.`
			`# Logs stored in: /var/log/provisioning/ai-audit.log`
			`#`
			`# 8. Agents: Autonomous agents are powerful but risky.`
			`# Enable only for specific use cases, never globally in production.`

			`# Version: 1.0`
			`# Last Updated: 2025-01-08`