# AI Integration Configuration for Provisioning Platform
# This file configures the AI system including LLM providers, RAG, MCP, and security policies.

# ============================================================================
# Core AI Configuration
# ============================================================================

[ai]
# Enable/disable AI features globally
enabled = true

# LLM Provider Selection
# Options: "anthropic" | "openai" | "local" | "azure-openai"
provider = "anthropic"

# Model Selection
# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"
# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"
# Local: "llama-3-70b", "mistral-large", "codellama-34b"
model = "claude-sonnet-4"

# Model Temperature (0.0-1.0)
# Lower = more deterministic, Higher = more creative
temperature = 0.7

# Maximum tokens for responses
max_tokens = 4096

# Request timeout (seconds)
timeout = 60

# ============================================================================
# AI Features - Fine-Grained Control
# ============================================================================

  [ai.features]
  # AI-assisted form filling (typdialog-ai)
  # Real-time suggestions and field value predictions
  form_assistance = true

  # Natural language configuration generation (typdialog-prov-gen)
  # Convert plain English to Nickel configs
  config_generation = true

  # Autonomous AI agents (typdialog-ag)
  # WARNING: Agents can execute multi-step workflows
  # Recommended: false for production (enable per-use-case)
  autonomous_agents = false

  # AI-powered troubleshooting
  # Analyze logs and suggest fixes for failed deployments
  troubleshooting = true

  # Configuration optimization
  # AI reviews configs and suggests improvements
  optimization = true

  # Validation error explanations
  # AI explains Nickel validation errors in plain language
  error_explanations = true

  # ============================================================================
  # LLM Provider Configuration
  # ============================================================================

  [ai.anthropic]
  # Anthropic Claude API configuration
  api_key = "env:ANTHROPIC_API_KEY"        # Load from environment variable
  api_url = "https://api.anthropic.com/v1"
  max_retries = 3
  retry_delay_ms = 1000

  # Rate limits (per minute)
  max_requests_per_minute = 50
  max_tokens_per_minute = 100000

  [ai.openai]
  # OpenAI GPT-4 API configuration
  api_key = "env:OPENAI_API_KEY"
  api_url = "https://api.openai.com/v1"
  max_retries = 3
  organization_id = ""                  # Optional
  retry_delay_ms = 1000

  # Rate limits (per minute)
  max_requests_per_minute = 60
  max_tokens_per_minute = 150000

  [ai.local]
  # Local LLM configuration (Ollama, LlamaCpp, vLLM)
  # Use for air-gapped deployments or privacy-critical scenarios
  context_length = 8192
  model_path = "/opt/provisioning/models/llama-3-70b"
  num_gpu_layers = 40                                 # GPU acceleration
  server_url = "http://localhost:11434"               # Ollama default

  # ============================================================================
  # Model Context Protocol (MCP) Server
  # ============================================================================

  [ai.mcp]
  # MCP server configuration
  enabled = true
  max_retries = 3
  server_url = "http://localhost:9000"
  timeout = 30

    # Tool calling configuration
    [ai.mcp.tools]
    enabled = true

    # Available tools for LLM
    # Tools provide structured actions the LLM can invoke
    tools = [
      "nickel_validate",   # Validate Nickel configuration
      "schema_query",      # Query Nickel schema information
      "config_generate",   # Generate configuration snippets
      "cedar_check",       # Check Cedar authorization policies
      "deployment_status", # Query deployment status
      "log_analyze",       # Analyze deployment logs
    ]

    # ============================================================================
    # Retrieval-Augmented Generation (RAG)
    # ============================================================================

  [ai.rag]
  # Enable RAG system
  enabled = true

  # Vector Store Configuration
  # Options: "qdrant" | "milvus" | "pgvector" | "chromadb"
  collection_name = "provisioning-knowledge"
  vector_store = "qdrant"
  vector_store_url = "http://localhost:6333"

  # Embedding Model
  # OpenAI: "text-embedding-3-large", "text-embedding-3-small"
  # Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"
  embedding_api_key = "env:OPENAI_API_KEY"   # For OpenAI embeddings
  embedding_model = "text-embedding-3-large"

  # Document Chunking
  chunk_overlap = 50        # Overlap between chunks
  chunk_size = 512          # Characters per chunk
  max_chunks_per_query = 10 # Top-k retrieval

  # ============================================================================
  # RAG Index Configuration
  # ============================================================================

    [ai.rag.index]
    # What to index for RAG retrieval

    # Index Nickel schemas (RECOMMENDED: true)
    # Provides AI with schema definitions and contracts
    schemas = true
    schemas_path = "provisioning/schemas"

    # Index documentation (RECOMMENDED: true)
    # Provides AI with user guides and best practices
    docs = true
    docs_path = "docs"

    # Index past deployments (RECOMMENDED: true)
    # AI learns from successful deployment patterns
    deployments = true
    deployments_path = "workspaces"

    # Index best practices (RECOMMENDED: true)
    # Inject organizational patterns and conventions
    best_practices = true
    best_practices_path = ".claude/patterns"

    # Index deployment logs (WARNING: Privacy concerns)
    # Logs may contain sensitive data, enable only if sanitized
    logs = false
    logs_retention_days = 30

    # Reindexing schedule
    auto_reindex = true
    reindex_interval_hours = 24

    # ============================================================================
    # Security and Access Control
    # ============================================================================

  [ai.security]
  # Cedar policy store for AI access control
  cedar_policy_store = "/etc/provisioning/cedar-policies/ai"

  # AI cannot suggest secret values (CRITICAL: keep true)
  # AI can suggest secret names/paths but not retrieve actual secrets
  max_secret_suggestions = 0

  # Require human approval for critical operations (CRITICAL: keep true)
  # Operations requiring approval:
  # - Deployments to production
  # - Configuration changes affecting security
  # - Secret rotation
  # - Infrastructure deletion
  require_human_approval = true

  # Audit all AI operations (CRITICAL: keep true)
  # Log every AI request, response, and action
  audit_all_operations = true

    # Data sanitization before sending to LLM
    # Remove sensitive data from prompts
    [ai.security.sanitization]
    sanitize_credentials = true   # Remove passwords, API keys
    sanitize_ip_addresses = false # Keep for troubleshooting
    sanitize_pii = true           # Remove personally identifiable info
    sanitize_secrets = true       # Remove secret values

    # Allowed data for LLM
    allowed_data = [
      "nickel_schemas", # Schema definitions (public)
      "documentation",  # User docs (public)
      "error_messages", # Validation errors (sanitized)
      "resource_names", # Infrastructure resource identifiers
    ]

    # Forbidden data for LLM (NEVER send to external LLM)
    forbidden_data = [
      "secret_values",  # Passwords, API keys, tokens
      "private_keys",   # SSH keys, TLS keys, encryption keys
      "pii",            # Email addresses, names, phone numbers
      "credentials",    # Authentication credentials
      "session_tokens", # User session data
    ]

    # ============================================================================
    # Rate Limiting and Cost Control
    # ============================================================================

  [ai.rate_limiting]
  # Per-user rate limits
  requests_per_day = 2000
  requests_per_hour = 500
  requests_per_minute = 60

  # Token limits (to control LLM API costs)
  tokens_per_day = 1000000    # 1M tokens/day
  tokens_per_month = 30000000 # 30M tokens/month

  # Cost limits (USD)
  cost_limit_per_day = "100.00"
  cost_limit_per_month = "2000.00"

  # Alert thresholds
  cost_alert_threshold = 0.8 # Alert at 80% of limit

  # Rate limit exceeded behavior
  # Options: "queue" | "reject" | "throttle"
  exceed_behavior = "queue"
  max_queue_size = 100

  # ============================================================================
  # Caching
  # ============================================================================

  [ai.caching]
  # Enable response caching to reduce LLM API calls
  enabled = true

  # Cache TTL (time-to-live)
  ttl = "1h"

  # Cache backend
  # Options: "redis" | "memcached" | "in-memory"
  backend = "redis"
  redis_url = "redis://localhost:6379"

  # Cache key strategy
  # "prompt" = Cache by exact prompt (high precision, low hit rate)
  # "semantic" = Cache by semantic similarity (lower precision, high hit rate)
  cache_strategy = "semantic"
  semantic_similarity_threshold = 0.95

  # Cache statistics
  log_cache_misses = false
  track_hit_rate = true

  # ============================================================================
  # Observability and Monitoring
  # ============================================================================

  [ai.observability]
  # Logging level for AI operations
  # Options: "trace" | "debug" | "info" | "warn" | "error"
  log_level = "info"

  # Trace all AI requests (detailed logging)
  # WARNING: Generates large log volume
  trace_all_requests = true

  # Store conversation history (for debugging and learning)
  conversation_retention_days = 30
  store_conversations = true

    # Metrics collection
    [ai.observability.metrics]
    enabled = true
    export_format = "prometheus" # "prometheus" | "opentelemetry"
    export_port = 9090

    # Metrics to collect
    metrics = [
      "request_count",           # Total AI requests
      "request_duration",        # Latency histogram
      "token_usage",             # Input/output tokens
      "cost_tracking",           # USD cost per request
      "cache_hit_rate",          # Cache effectiveness
      "validation_success_rate", # Generated config validity
      "human_approval_rate",     # How often humans approve AI output
    ]

    # Distributed tracing
    [ai.observability.tracing]
    enabled = true
    jaeger_endpoint = "http://localhost:14268/api/traces"
    sample_rate = 0.1                                     # Sample 10% of requests

    # ============================================================================
    # AI Agent Configuration (typdialog-ag)
    # ============================================================================

  [ai.agents]
  # WARNING: Autonomous agents can execute multi-step workflows
  # Enable with caution, only for trusted users

  # Enable AI agents globally
  enabled = false

  # Maximum iterations per agent execution
  # Prevents infinite loops
  max_iterations = 20

  # Agent timeout (seconds)
  timeout = 300

  # Require approval for each agent action (RECOMMENDED: true)
  # If false, agent executes entire workflow autonomously
  require_step_approval = true

    # Agent types
    [ai.agents.types]
    # Provisioning agent: End-to-end infrastructure setup
    provisioning_agent = false

    # Troubleshooting agent: Diagnose and fix deployment issues
    troubleshooting_agent = true

    # Optimization agent: Analyze and improve configurations
    optimization_agent = true

    # Security audit agent: Review configs for vulnerabilities
    security_audit_agent = true

    # ============================================================================
    # Configuration Generation (typdialog-prov-gen)
    # ============================================================================

  [ai.config_generation]
  # Default schema for generated configs
  default_schema = "workspace"

  # Validation mode
  # "strict" = Reject any invalid config
  # "permissive" = Allow configs with warnings
  validation_mode = "strict"

  # Best practice injection
  # Automatically add security/performance best practices
  inject_best_practices = true

  # Template usage
  # Use pre-defined templates as starting points
  template_directory = "provisioning/templates"
  use_templates = true

  # ============================================================================
  # Form Assistance (typdialog-ai)
  # ============================================================================

  [ai.form_assistance]
  # Real-time suggestions as user types
  real_time_suggestions = true

  # Minimum characters before triggering suggestions
  min_chars_for_suggestions = 3

  # Maximum suggestions per field
  max_suggestions = 5

  # Suggestion confidence threshold (0.0-1.0)
  # Only show suggestions with confidence above threshold
  confidence_threshold = 0.7

  # Natural language form filling
  # User can describe entire form in plain English
  nl_form_filling = true

  # ============================================================================
  # Environment-Specific Overrides
  # ============================================================================

  # Development environment
  [ai.environments.dev]
  cost_limit_per_day = "10.00"
  enabled = true
  model = "gpt-4-turbo"
  provider = "openai"            # Cheaper for dev
  require_human_approval = false # Faster iteration

  # Staging environment
  [ai.environments.staging]
  cost_limit_per_day = "50.00"
  enabled = true
  model = "claude-sonnet-4"
  provider = "anthropic"
  require_human_approval = true

  # Production environment
  [ai.environments.production]
  autonomous_agents = false     # NEVER enable in production
  cost_limit_per_day = "100.00"
  enabled = true
  model = "claude-sonnet-4"
  provider = "anthropic"
  require_human_approval = true # ALWAYS true for production

  # ============================================================================
  # Integration with Other Services
  # ============================================================================

  [ai.integration]
  # Orchestrator integration
  orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"
  orchestrator_url = "https://orchestrator.example.com"

  # SecretumVault integration (for secret name suggestions only)
  secretum_vault_token = "env:VAULT_TOKEN"
  secretum_vault_url = "https://vault.example.com:8200"
  # AI can query secret names/paths but NEVER values

  # Typdialog Web UI integration
  typdialog_url = "https://forms.provisioning.example.com"
  typdialog_websocket_enabled = true

  # ============================================================================
  # Advanced Settings
  # ============================================================================

  [ai.advanced]
  # Prompt engineering
  system_prompt_template = "provisioning/ai/prompts/system.txt"
  user_prompt_template = "provisioning/ai/prompts/user.txt"

  # Context window management
  context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize"
  max_context_tokens = 100000                    # Claude Sonnet 4 context window

  # Streaming responses
  enable_streaming = true
  stream_chunk_size = 100 # Characters per chunk

  # Concurrent requests
  max_concurrent_requests = 10

  # ============================================================================
  # Experimental Features (Use at Your Own Risk)
  # ============================================================================

  [ai.experimental]
  # Multi-agent collaboration
  # Multiple AI agents work together on complex tasks
  multi_agent_collaboration = false

  # Reinforcement learning from human feedback (RLHF)
  # Learn from user corrections to improve over time
  rlhf_enabled = false

  # Fine-tuning on deployment history
  # Train custom models on organization-specific patterns
  fine_tuning = false
  fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"

  # ============================================================================
  # Compliance and Legal
  # ============================================================================

  [ai.compliance]
  # Data residency requirements
  # Ensure LLM provider complies with data residency laws
  data_residency = "us" # "us" | "eu" | "local"

  # GDPR compliance mode
  gdpr_data_retention_days = 90
  gdpr_mode = false

  # SOC 2 compliance logging
  soc2_logging = false

  # Terms of service acceptance
  # Must explicitly accept LLM provider TOS
  tos_accepted = false
  tos_version = "2025-01-08"

  # IMPORTANT NOTES:
  #
  # 1. API Keys: NEVER hardcode API keys. Always use environment variables.
  #    Example: api_key = "env:ANTHROPIC_API_KEY"
  #
  # 2. Security: Keep require_human_approval = true for production.
  #    AI-generated configs must be reviewed by humans.
  #
  # 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.
  #    Default limits are conservative but may need adjustment.
  #
  # 4. Privacy: For sensitive workloads, use local models (no external API calls).
  #    Set provider = "local" and configure local model path.
  #
  # 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.
  #    Set auto_reindex = true and adjust reindex_interval_hours.
  #
  # 6. Cedar Policies: Define fine-grained AI access control in Cedar.
  #    Location: /etc/provisioning/cedar-policies/ai
  #
  # 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.
  #    Logs stored in: /var/log/provisioning/ai-audit.log
  #
  # 8. Agents: Autonomous agents are powerful but risky.
  #    Enable only for specific use cases, never globally in production.

  # Version: 1.0
  # Last Updated: 2025-01-08