provisioning/docs/src/ai/configuration.md
2026-01-14 03:09:18 +00:00

13 KiB

AI System Configuration Guide\n\nStatus: Production-Ready (Configuration system)\n\nComplete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost\ncontrols, and security settings.\n\n## Quick Start\n\n### Minimal Configuration\n\n\n# provisioning/config/ai.toml\n[ai]\nenabled = true\nprovider = "anthropic" # or "openai" or "local"\nmodel = "claude-sonnet-4"\napi_key = "sk-ant-..." # Set via PROVISIONING_AI_API_KEY env var\n\n[ai.cache]\nenabled = true\n\n[ai.limits]\nmax_tokens = 4096\ntemperature = 0.7\n\n\n### Initialize Configuration\n\n\n# Generate default configuration\nprovisioning config init ai\n\n# Edit configuration\nprovisioning config edit ai\n\n# Validate configuration\nprovisioning config validate ai\n\n# Show current configuration\nprovisioning config show ai\n\n\n## Provider Configuration\n\n### Anthropic Claude\n\n\n[ai]\nenabled = true\nprovider = "anthropic"\nmodel = "claude-sonnet-4" # or "claude-opus-4", "claude-haiku-4"\napi_key = "${PROVISIONING_AI_API_KEY}"\napi_base = "[https://api.anthropic.com"](https://api.anthropic.com")\n\n# Request parameters\n[ai.request]\nmax_tokens = 4096\ntemperature = 0.7\ntop_p = 0.95\ntop_k = 40\n\n# Supported models\n# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)\n# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)\n# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)\n\n\n### OpenAI GPT-4\n\n\n[ai]\nenabled = true\nprovider = "openai"\nmodel = "gpt-4-turbo" # or "gpt-4", "gpt-4o"\napi_key = "${OPENAI_API_KEY}"\napi_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")\n\n[ai.request]\nmax_tokens = 4096\ntemperature = 0.7\ntop_p = 0.95\n\n# Supported models\n# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)\n# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)\n# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)\n\n\n### Local Models\n\n\n[ai]\nenabled = true\nprovider = "local"\nmodel = "llama2-70b" # or "mistral", "neural-chat"\napi_base = "[http://localhost:8000"](http://localhost:8000") # Local Ollama or LM Studio\n\n# Local model support\n# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama\n# - LM Studio: GUI app with API\n# - vLLM: High-throughput serving\n# - llama.cpp: CPU inference\n\n[ai.local]\ngpu_enabled = true\ngpu_memory_gb = 24\nmax_batch_size = 4\n\n\n## Feature Configuration\n\n### Enable Specific Features\n\n\n[ai.features]\n# Core features (production-ready)\nrag_search = true # Retrieve-Augmented Generation\nconfig_generation = true # Generate Nickel from natural language\nmcp_server = true # Model Context Protocol server\ntroubleshooting = true # AI-assisted debugging\n\n# Form assistance (planned Q2 2025)\nform_assistance = false # AI suggestions in forms\nform_explanations = false # AI explains validation errors\n\n# Agents (planned Q2 2025)\nautonomous_agents = false # AI agents for workflows\nagent_learning = false # Agents learn from deployments\n\n# Advanced features\nfine_tuning = false # Fine-tune models for domain\nknowledge_base = false # Custom knowledge base per workspace\n\n\n## Cache Configuration\n\n### Cache Strategy\n\n\n[ai.cache]\nenabled = true\ncache_type = "memory" # or "redis", "disk"\nttl_seconds = 3600 # Cache entry lifetime\n\n# Memory cache (recommended for single server)\n[ai.cache.memory]\nmax_size_mb = 500\neviction_policy = "lru" # Least Recently Used\n\n# Redis cache (recommended for distributed)\n[ai.cache.redis]\nurl = "redis://localhost:6379"\ndb = 0\npassword = "${REDIS_PASSWORD}"\nttl_seconds = 3600\n\n# Disk cache (recommended for persistent caching)\n[ai.cache.disk]\npath = "/var/cache/provisioning/ai"\nmax_size_mb = 5000\n\n# Semantic caching (for RAG)\n[ai.cache.semantic]\nenabled = true\nsimilarity_threshold = 0.95 # Cache hit if query similarity > 0.95\ncache_embeddings = true # Cache embedding vectors\n\n\n### Cache Metrics\n\n\n# Monitor cache performance\nprovisioning admin cache stats ai\n\n# Clear cache\nprovisioning admin cache clear ai\n\n# Analyze cache efficiency\nprovisioning admin cache analyze ai --hours 24\n\n\n## Rate Limiting and Cost Control\n\n### Rate Limits\n\n\n[ai.limits]\n# Tokens per request\nmax_tokens = 4096\nmax_input_tokens = 8192\nmax_output_tokens = 4096\n\n# Requests per minute/hour\nrpm_limit = 60 # Requests per minute\nrpm_burst = 100 # Allow bursts up to 100 RPM\n\n# Daily cost limit\ndaily_cost_limit_usd = 100\nwarn_at_percent = 80 # Warn when at 80% of daily limit\nstop_at_percent = 95 # Stop accepting requests at 95%\n\n# Token usage tracking\ntrack_token_usage = true\ntrack_cost_per_request = true\n\n\n### Cost Budgeting\n\n\n[ai.budget]\nenabled = true\nmonthly_limit_usd = 1000\n\n# Budget alerts\nalert_at_percent = [50, 75, 90]\nalert_email = "ops@company.com"\nalert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")\n\n# Cost by provider\n[ai.budget.providers]\nanthropic_limit = 500\nopenai_limit = 300\nlocal_limit = 0 # Free (run locally)\n\n\n### Track Costs\n\n\n# View cost metrics\nprovisioning admin costs show ai --period month\n\n# Forecast cost\nprovisioning admin costs forecast ai --days 30\n\n# Analyze cost by feature\nprovisioning admin costs analyze ai --by feature\n\n# Export cost report\nprovisioning admin costs export ai --format csv --output costs.csv\n\n\n## Security Configuration\n\n### Authentication\n\n\n[ai.auth]\n# API key from environment variable\napi_key = "${PROVISIONING_AI_API_KEY}"\n\n# Or from secure store\napi_key_vault = "secrets/ai-api-key"\n\n# Token rotation\nrotate_key_days = 90\nrotation_alert_days = 7\n\n# Request signing (for cloud providers)\nsign_requests = true\nsigning_method = "hmac-sha256"\n\n\n### Authorization (Cedar)\n\n\n[ai.authorization]\nenabled = true\npolicy_file = "provisioning/policies/ai-policies.cedar"\n\n# Example policies:\n# allow(principal, action, resource) when principal.role == "admin"\n# allow(principal == ?principal, action == "ai_generate_config", resource)\n# when principal.workspace == resource.workspace\n\n\n### Data Protection\n\n\n[ai.security]\n# Sanitize data before sending to external LLM\nsanitize_pii = true\nsanitize_secrets = true\nredact_patterns = [\n "(?i)password\\s*[:=]\\s*[^\\s]+", # Passwords\n "(?i)api[_-]?key\\s*[:=]\\s*[^\\s]+", # API keys\n "(?i)secret\\s*[:=]\\s*[^\\s]+", # Secrets\n]\n\n# Encryption\nencryption_enabled = true\nencryption_algorithm = "aes-256-gcm"\nkey_derivation = "argon2id"\n\n# Local-only mode (never send to external LLM)\nlocal_only = false # Set true for air-gapped deployments\n\n\n## RAG Configuration\n\n### Vector Store Setup\n\n\n[ai.rag]\nenabled = true\n\n# SurrealDB backend\n[ai.rag.database]\nurl = "surreal://localhost:8000"\nusername = "root"\npassword = "${SURREALDB_PASSWORD}"\nnamespace = "provisioning"\ndatabase = "ai_rag"\n\n# Embedding model\n[ai.rag.embedding]\nprovider = "openai" # or "anthropic", "local"\nmodel = "text-embedding-3-small"\nbatch_size = 100\ncache_embeddings = true\n\n# Search configuration\n[ai.rag.search]\nhybrid_enabled = true\nvector_weight = 0.7 # Weight for vector search\nkeyword_weight = 0.3 # Weight for BM25 search\ntop_k = 5 # Number of results to return\nrerank_enabled = false # Use cross-encoder to rerank results\n\n# Chunking strategy\n[ai.rag.chunking]\nmarkdown_chunk_size = 1024\nmarkdown_overlap = 256\ncode_chunk_size = 512\ncode_overlap = 128\n\n\n### Index Management\n\n\n# Create indexes\nprovisioning ai index create rag\n\n# Rebuild indexes\nprovisioning ai index rebuild rag\n\n# Show index status\nprovisioning ai index status rag\n\n# Remove old indexes\nprovisioning ai index cleanup rag --older-than 30days\n\n\n## MCP Server Configuration\n\n### MCP Server Setup\n\n\n[ai.mcp]\nenabled = true\nport = 3000\nhost = "127.0.0.1" # Change to 0.0.0.0 for network access\n\n# Tool registry\n[ai.mcp.tools]\ngenerate_config = true\nvalidate_config = true\nsearch_docs = true\ntroubleshoot_deployment = true\nget_schema = true\ncheck_compliance = true\n\n# Rate limiting for tool calls\nrpm_limit = 30\nburst_limit = 50\n\n# Tool request timeout\ntimeout_seconds = 30\n\n\n### MCP Client Configuration\n\n\n~/.claude/claude_desktop_config.json:\n{\n "mcpServers": {\n "provisioning": {\n "command": "provisioning-mcp-server",\n "args": ["--config", "/etc/provisioning/ai.toml"],\n "env": {\n "PROVISIONING_API_KEY": "sk-ant-...",\n "RUST_LOG": "info"\n }\n }\n }\n}\n\n\n## Logging and Observability\n\n### Logging Configuration\n\n\n[ai.logging]\nlevel = "info" # or "debug", "warn", "error"\nformat = "json" # or "text"\noutput = "stdout" # or "file"\n\n# Log file\n[ai.logging.file]\npath = "/var/log/provisioning/ai.log"\nmax_size_mb = 100\nmax_backups = 10\nretention_days = 30\n\n# Log filters\n[ai.logging.filters]\nlog_requests = true\nlog_responses = false # Don't log full responses (verbose)\nlog_token_usage = true\nlog_costs = true\n\n\n### Metrics and Monitoring\n\n\n# View AI service metrics\nprovisioning admin metrics show ai\n\n# Prometheus metrics endpoint\ncurl [http://localhost:8083/metrics](http://localhost:8083/metrics)\n\n# Key metrics:\n# - ai_requests_total: Total requests by provider/model\n# - ai_request_duration_seconds: Request latency\n# - ai_token_usage_total: Token consumption by provider\n# - ai_cost_total: Cumulative cost by provider\n# - ai_cache_hits: Cache hit rate\n# - ai_errors_total: Errors by type\n\n\n## Health Checks\n\n### Configuration Validation\n\n\n# Validate configuration syntax\nprovisioning config validate ai\n\n# Test provider connectivity\nprovisioning ai test provider anthropic\n\n# Test RAG system\nprovisioning ai test rag\n\n# Test MCP server\nprovisioning ai test mcp\n\n# Full health check\nprovisioning ai health-check\n\n\n## Environment Variables\n\n### Common Settings\n\n\n# Provider configuration\nexport PROVISIONING_AI_PROVIDER="anthropic"\nexport PROVISIONING_AI_MODEL="claude-sonnet-4"\nexport PROVISIONING_AI_API_KEY="sk-ant-..."\n\n# Feature flags\nexport PROVISIONING_AI_ENABLED="true"\nexport PROVISIONING_AI_CACHE_ENABLED="true"\nexport PROVISIONING_AI_RAG_ENABLED="true"\n\n# Cost control\nexport PROVISIONING_AI_DAILY_LIMIT_USD="100"\nexport PROVISIONING_AI_RPM_LIMIT="60"\n\n# Security\nexport PROVISIONING_AI_SANITIZE_PII="true"\nexport PROVISIONING_AI_LOCAL_ONLY="false"\n\n# Logging\nexport RUST_LOG="provisioning::ai=info"\n\n\n## Troubleshooting Configuration\n\n### Common Issues\n\nIssue: API key not recognized\n\n# Check environment variable is set\necho $PROVISIONING_AI_API_KEY\n\n# Test connectivity\nprovisioning ai test provider anthropic\n\n# Verify key format (should start with sk-ant- or sk-)\n| provisioning config show ai | grep api_key |\n\n\nIssue: Cache not working\n\n# Check cache status\nprovisioning admin cache stats ai\n\n# Clear cache and restart\nprovisioning admin cache clear ai\nprovisioning service restart ai-service\n\n# Enable cache debugging\nRUST_LOG=provisioning::cache=debug provisioning-ai-service\n\n\nIssue: RAG search not finding results\n\n# Rebuild RAG indexes\nprovisioning ai index rebuild rag\n\n# Test search\nprovisioning ai query "test query"\n\n# Check index status\nprovisioning ai index status rag\n\n\n## Upgrading Configuration\n\n### Backward Compatibility\n\nNew AI versions automatically migrate old configurations:\n\n\n# Check configuration version\nprovisioning config version ai\n\n# Migrate configuration to latest version\nprovisioning config migrate ai --auto\n\n# Backup before migration\nprovisioning config backup ai\n\n\n## Production Deployment\n\n### Recommended Production Settings\n\n\n[ai]\nenabled = true\nprovider = "anthropic"\nmodel = "claude-sonnet-4"\napi_key = "${PROVISIONING_AI_API_KEY}"\n\n[ai.features]\nrag_search = true\nconfig_generation = true\nmcp_server = true\ntroubleshooting = true\n\n[ai.cache]\nenabled = true\ncache_type = "redis"\nttl_seconds = 3600\n\n[ai.limits]\nrpm_limit = 60\ndaily_cost_limit_usd = 1000\nmax_tokens = 4096\n\n[ai.security]\nsanitize_pii = true\nsanitize_secrets = true\nencryption_enabled = true\n\n[ai.logging]\nlevel = "warn" # Less verbose in production\nformat = "json"\noutput = "file"\n\n[ai.rag.database]\nurl = "surreal://surrealdb-cluster:8000"\n\n\n## Related Documentation\n\n- Architecture - System overview\n- RAG System - Vector database setup\n- MCP Integration - MCP configuration\n- Security Policies - Authorization policies\n- Cost Management - Budget tracking\n\n---\n\nLast Updated: 2025-01-13\nStatus: Production-Ready\nVersions Supported: v1.0+