12 KiB
12 KiB
AI System Configuration Guide
Status: ✅ Production-Ready (Configuration system)
Complete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost controls, and security settings.
Quick Start
Minimal Configuration
# provisioning/config/ai.toml
[ai]
enabled = true
provider = "anthropic" # or "openai" or "local"
model = "claude-sonnet-4"
api_key = "sk-ant-..." # Set via PROVISIONING_AI_API_KEY env var
[ai.cache]
enabled = true
[ai.limits]
max_tokens = 4096
temperature = 0.7
Initialize Configuration
# Generate default configuration
provisioning config init ai
# Edit configuration
provisioning config edit ai
# Validate configuration
provisioning config validate ai
# Show current configuration
provisioning config show ai
Provider Configuration
Anthropic Claude
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4" # or "claude-opus-4", "claude-haiku-4"
api_key = "${PROVISIONING_AI_API_KEY}"
api_base = "[https://api.anthropic.com"](https://api.anthropic.com")
# Request parameters
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
top_k = 40
# Supported models
# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)
# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)
# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)
OpenAI GPT-4
[ai]
enabled = true
provider = "openai"
model = "gpt-4-turbo" # or "gpt-4", "gpt-4o"
api_key = "${OPENAI_API_KEY}"
api_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
# Supported models
# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)
# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)
# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)
Local Models
[ai]
enabled = true
provider = "local"
model = "llama2-70b" # or "mistral", "neural-chat"
api_base = "[http://localhost:8000"](http://localhost:8000") # Local Ollama or LM Studio
# Local model support
# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
# - LM Studio: GUI app with API
# - vLLM: High-throughput serving
# - llama.cpp: CPU inference
[ai.local]
gpu_enabled = true
gpu_memory_gb = 24
max_batch_size = 4
Feature Configuration
Enable Specific Features
[ai.features]
# Core features (production-ready)
rag_search = true # Retrieve-Augmented Generation
config_generation = true # Generate Nickel from natural language
mcp_server = true # Model Context Protocol server
troubleshooting = true # AI-assisted debugging
# Form assistance (planned Q2 2025)
form_assistance = false # AI suggestions in forms
form_explanations = false # AI explains validation errors
# Agents (planned Q2 2025)
autonomous_agents = false # AI agents for workflows
agent_learning = false # Agents learn from deployments
# Advanced features
fine_tuning = false # Fine-tune models for domain
knowledge_base = false # Custom knowledge base per workspace
Cache Configuration
Cache Strategy
[ai.cache]
enabled = true
cache_type = "memory" # or "redis", "disk"
ttl_seconds = 3600 # Cache entry lifetime
# Memory cache (recommended for single server)
[ai.cache.memory]
max_size_mb = 500
eviction_policy = "lru" # Least Recently Used
# Redis cache (recommended for distributed)
[ai.cache.redis]
url = "redis://localhost:6379"
db = 0
password = "${REDIS_PASSWORD}"
ttl_seconds = 3600
# Disk cache (recommended for persistent caching)
[ai.cache.disk]
path = "/var/cache/provisioning/ai"
max_size_mb = 5000
# Semantic caching (for RAG)
[ai.cache.semantic]
enabled = true
similarity_threshold = 0.95 # Cache hit if query similarity > 0.95
cache_embeddings = true # Cache embedding vectors
Cache Metrics
# Monitor cache performance
provisioning admin cache stats ai
# Clear cache
provisioning admin cache clear ai
# Analyze cache efficiency
provisioning admin cache analyze ai --hours 24
Rate Limiting and Cost Control
Rate Limits
[ai.limits]
# Tokens per request
max_tokens = 4096
max_input_tokens = 8192
max_output_tokens = 4096
# Requests per minute/hour
rpm_limit = 60 # Requests per minute
rpm_burst = 100 # Allow bursts up to 100 RPM
# Daily cost limit
daily_cost_limit_usd = 100
warn_at_percent = 80 # Warn when at 80% of daily limit
stop_at_percent = 95 # Stop accepting requests at 95%
# Token usage tracking
track_token_usage = true
track_cost_per_request = true
Cost Budgeting
[ai.budget]
enabled = true
monthly_limit_usd = 1000
# Budget alerts
alert_at_percent = [50, 75, 90]
alert_email = "ops@company.com"
alert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")
# Cost by provider
[ai.budget.providers]
anthropic_limit = 500
openai_limit = 300
local_limit = 0 # Free (run locally)
Track Costs
# View cost metrics
provisioning admin costs show ai --period month
# Forecast cost
provisioning admin costs forecast ai --days 30
# Analyze cost by feature
provisioning admin costs analyze ai --by feature
# Export cost report
provisioning admin costs export ai --format csv --output costs.csv
Security Configuration
Authentication
[ai.auth]
# API key from environment variable
api_key = "${PROVISIONING_AI_API_KEY}"
# Or from secure store
api_key_vault = "secrets/ai-api-key"
# Token rotation
rotate_key_days = 90
rotation_alert_days = 7
# Request signing (for cloud providers)
sign_requests = true
signing_method = "hmac-sha256"
Authorization (Cedar)
[ai.authorization]
enabled = true
policy_file = "provisioning/policies/ai-policies.cedar"
# Example policies:
# allow(principal, action, resource) when principal.role == "admin"
# allow(principal == ?principal, action == "ai_generate_config", resource)
# when principal.workspace == resource.workspace
Data Protection
[ai.security]
# Sanitize data before sending to external LLM
sanitize_pii = true
sanitize_secrets = true
redact_patterns = [
"(?i)password\\s*[:=]\\s*[^\\s]+", # Passwords
"(?i)api[_-]?key\\s*[:=]\\s*[^\\s]+", # API keys
"(?i)secret\\s*[:=]\\s*[^\\s]+", # Secrets
]
# Encryption
encryption_enabled = true
encryption_algorithm = "aes-256-gcm"
key_derivation = "argon2id"
# Local-only mode (never send to external LLM)
local_only = false # Set true for air-gapped deployments
RAG Configuration
Vector Store Setup
[ai.rag]
enabled = true
# SurrealDB backend
[ai.rag.database]
url = "surreal://localhost:8000"
username = "root"
password = "${SURREALDB_PASSWORD}"
namespace = "provisioning"
database = "ai_rag"
# Embedding model
[ai.rag.embedding]
provider = "openai" # or "anthropic", "local"
model = "text-embedding-3-small"
batch_size = 100
cache_embeddings = true
# Search configuration
[ai.rag.search]
hybrid_enabled = true
vector_weight = 0.7 # Weight for vector search
keyword_weight = 0.3 # Weight for BM25 search
top_k = 5 # Number of results to return
rerank_enabled = false # Use cross-encoder to rerank results
# Chunking strategy
[ai.rag.chunking]
markdown_chunk_size = 1024
markdown_overlap = 256
code_chunk_size = 512
code_overlap = 128
Index Management
# Create indexes
provisioning ai index create rag
# Rebuild indexes
provisioning ai index rebuild rag
# Show index status
provisioning ai index status rag
# Remove old indexes
provisioning ai index cleanup rag --older-than 30days
MCP Server Configuration
MCP Server Setup
[ai.mcp]
enabled = true
port = 3000
host = "127.0.0.1" # Change to 0.0.0.0 for network access
# Tool registry
[ai.mcp.tools]
generate_config = true
validate_config = true
search_docs = true
troubleshoot_deployment = true
get_schema = true
check_compliance = true
# Rate limiting for tool calls
rpm_limit = 30
burst_limit = 50
# Tool request timeout
timeout_seconds = 30
MCP Client Configuration
~/.claude/claude_desktop_config.json:
{
"mcpServers": {
"provisioning": {
"command": "provisioning-mcp-server",
"args": ["--config", "/etc/provisioning/ai.toml"],
"env": {
"PROVISIONING_API_KEY": "sk-ant-...",
"RUST_LOG": "info"
}
}
}
}
Logging and Observability
Logging Configuration
[ai.logging]
level = "info" # or "debug", "warn", "error"
format = "json" # or "text"
output = "stdout" # or "file"
# Log file
[ai.logging.file]
path = "/var/log/provisioning/ai.log"
max_size_mb = 100
max_backups = 10
retention_days = 30
# Log filters
[ai.logging.filters]
log_requests = true
log_responses = false # Don't log full responses (verbose)
log_token_usage = true
log_costs = true
Metrics and Monitoring
# View AI service metrics
provisioning admin metrics show ai
# Prometheus metrics endpoint
curl [http://localhost:8083/metrics](http://localhost:8083/metrics)
# Key metrics:
# - ai_requests_total: Total requests by provider/model
# - ai_request_duration_seconds: Request latency
# - ai_token_usage_total: Token consumption by provider
# - ai_cost_total: Cumulative cost by provider
# - ai_cache_hits: Cache hit rate
# - ai_errors_total: Errors by type
Health Checks
Configuration Validation
# Validate configuration syntax
provisioning config validate ai
# Test provider connectivity
provisioning ai test provider anthropic
# Test RAG system
provisioning ai test rag
# Test MCP server
provisioning ai test mcp
# Full health check
provisioning ai health-check
Environment Variables
Common Settings
# Provider configuration
export PROVISIONING_AI_PROVIDER="anthropic"
export PROVISIONING_AI_MODEL="claude-sonnet-4"
export PROVISIONING_AI_API_KEY="sk-ant-..."
# Feature flags
export PROVISIONING_AI_ENABLED="true"
export PROVISIONING_AI_CACHE_ENABLED="true"
export PROVISIONING_AI_RAG_ENABLED="true"
# Cost control
export PROVISIONING_AI_DAILY_LIMIT_USD="100"
export PROVISIONING_AI_RPM_LIMIT="60"
# Security
export PROVISIONING_AI_SANITIZE_PII="true"
export PROVISIONING_AI_LOCAL_ONLY="false"
# Logging
export RUST_LOG="provisioning::ai=info"
Troubleshooting Configuration
Common Issues
Issue: API key not recognized
# Check environment variable is set
echo $PROVISIONING_AI_API_KEY
# Test connectivity
provisioning ai test provider anthropic
# Verify key format (should start with sk-ant- or sk-)
| provisioning config show ai | grep api_key |
Issue: Cache not working
# Check cache status
provisioning admin cache stats ai
# Clear cache and restart
provisioning admin cache clear ai
provisioning service restart ai-service
# Enable cache debugging
RUST_LOG=provisioning::cache=debug provisioning-ai-service
Issue: RAG search not finding results
# Rebuild RAG indexes
provisioning ai index rebuild rag
# Test search
provisioning ai query "test query"
# Check index status
provisioning ai index status rag
Upgrading Configuration
Backward Compatibility
New AI versions automatically migrate old configurations:
# Check configuration version
provisioning config version ai
# Migrate configuration to latest version
provisioning config migrate ai --auto
# Backup before migration
provisioning config backup ai
Production Deployment
Recommended Production Settings
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
api_key = "${PROVISIONING_AI_API_KEY}"
[ai.features]
rag_search = true
config_generation = true
mcp_server = true
troubleshooting = true
[ai.cache]
enabled = true
cache_type = "redis"
ttl_seconds = 3600
[ai.limits]
rpm_limit = 60
daily_cost_limit_usd = 1000
max_tokens = 4096
[ai.security]
sanitize_pii = true
sanitize_secrets = true
encryption_enabled = true
[ai.logging]
level = "warn" # Less verbose in production
format = "json"
output = "file"
[ai.rag.database]
url = "surreal://surrealdb-cluster:8000"
Related Documentation
- Architecture - System overview
- RAG System - Vector database setup
- MCP Integration - MCP configuration
- Security Policies - Authorization policies
- Cost Management - Budget tracking
Last Updated: 2025-01-13 Status: ✅ Production-Ready Versions Supported: v1.0+