provisioning/docs/src/ai/configuration.md
2026-01-14 04:53:21 +00:00

601 lines
12 KiB
Markdown

# AI System Configuration Guide
**Status**: ✅ Production-Ready (Configuration system)
Complete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost
controls, and security settings.
## Quick Start
### Minimal Configuration
```text
# provisioning/config/ai.toml
[ai]
enabled = true
provider = "anthropic" # or "openai" or "local"
model = "claude-sonnet-4"
api_key = "sk-ant-..." # Set via PROVISIONING_AI_API_KEY env var
[ai.cache]
enabled = true
[ai.limits]
max_tokens = 4096
temperature = 0.7
```
### Initialize Configuration
```text
# Generate default configuration
provisioning config init ai
# Edit configuration
provisioning config edit ai
# Validate configuration
provisioning config validate ai
# Show current configuration
provisioning config show ai
```
## Provider Configuration
### Anthropic Claude
```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4" # or "claude-opus-4", "claude-haiku-4"
api_key = "${PROVISIONING_AI_API_KEY}"
api_base = "[https://api.anthropic.com"](https://api.anthropic.com")
# Request parameters
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
top_k = 40
# Supported models
# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)
# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)
# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)
```
### OpenAI GPT-4
```text
[ai]
enabled = true
provider = "openai"
model = "gpt-4-turbo" # or "gpt-4", "gpt-4o"
api_key = "${OPENAI_API_KEY}"
api_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
# Supported models
# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)
# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)
# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)
```
### Local Models
```text
[ai]
enabled = true
provider = "local"
model = "llama2-70b" # or "mistral", "neural-chat"
api_base = "[http://localhost:8000"](http://localhost:8000") # Local Ollama or LM Studio
# Local model support
# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
# - LM Studio: GUI app with API
# - vLLM: High-throughput serving
# - llama.cpp: CPU inference
[ai.local]
gpu_enabled = true
gpu_memory_gb = 24
max_batch_size = 4
```
## Feature Configuration
### Enable Specific Features
```text
[ai.features]
# Core features (production-ready)
rag_search = true # Retrieve-Augmented Generation
config_generation = true # Generate Nickel from natural language
mcp_server = true # Model Context Protocol server
troubleshooting = true # AI-assisted debugging
# Form assistance (planned Q2 2025)
form_assistance = false # AI suggestions in forms
form_explanations = false # AI explains validation errors
# Agents (planned Q2 2025)
autonomous_agents = false # AI agents for workflows
agent_learning = false # Agents learn from deployments
# Advanced features
fine_tuning = false # Fine-tune models for domain
knowledge_base = false # Custom knowledge base per workspace
```
## Cache Configuration
### Cache Strategy
```text
[ai.cache]
enabled = true
cache_type = "memory" # or "redis", "disk"
ttl_seconds = 3600 # Cache entry lifetime
# Memory cache (recommended for single server)
[ai.cache.memory]
max_size_mb = 500
eviction_policy = "lru" # Least Recently Used
# Redis cache (recommended for distributed)
[ai.cache.redis]
url = "redis://localhost:6379"
db = 0
password = "${REDIS_PASSWORD}"
ttl_seconds = 3600
# Disk cache (recommended for persistent caching)
[ai.cache.disk]
path = "/var/cache/provisioning/ai"
max_size_mb = 5000
# Semantic caching (for RAG)
[ai.cache.semantic]
enabled = true
similarity_threshold = 0.95 # Cache hit if query similarity > 0.95
cache_embeddings = true # Cache embedding vectors
```
### Cache Metrics
```text
# Monitor cache performance
provisioning admin cache stats ai
# Clear cache
provisioning admin cache clear ai
# Analyze cache efficiency
provisioning admin cache analyze ai --hours 24
```
## Rate Limiting and Cost Control
### Rate Limits
```text
[ai.limits]
# Tokens per request
max_tokens = 4096
max_input_tokens = 8192
max_output_tokens = 4096
# Requests per minute/hour
rpm_limit = 60 # Requests per minute
rpm_burst = 100 # Allow bursts up to 100 RPM
# Daily cost limit
daily_cost_limit_usd = 100
warn_at_percent = 80 # Warn when at 80% of daily limit
stop_at_percent = 95 # Stop accepting requests at 95%
# Token usage tracking
track_token_usage = true
track_cost_per_request = true
```
### Cost Budgeting
```text
[ai.budget]
enabled = true
monthly_limit_usd = 1000
# Budget alerts
alert_at_percent = [50, 75, 90]
alert_email = "ops@company.com"
alert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")
# Cost by provider
[ai.budget.providers]
anthropic_limit = 500
openai_limit = 300
local_limit = 0 # Free (run locally)
```
### Track Costs
```text
# View cost metrics
provisioning admin costs show ai --period month
# Forecast cost
provisioning admin costs forecast ai --days 30
# Analyze cost by feature
provisioning admin costs analyze ai --by feature
# Export cost report
provisioning admin costs export ai --format csv --output costs.csv
```
## Security Configuration
### Authentication
```text
[ai.auth]
# API key from environment variable
api_key = "${PROVISIONING_AI_API_KEY}"
# Or from secure store
api_key_vault = "secrets/ai-api-key"
# Token rotation
rotate_key_days = 90
rotation_alert_days = 7
# Request signing (for cloud providers)
sign_requests = true
signing_method = "hmac-sha256"
```
### Authorization (Cedar)
```text
[ai.authorization]
enabled = true
policy_file = "provisioning/policies/ai-policies.cedar"
# Example policies:
# allow(principal, action, resource) when principal.role == "admin"
# allow(principal == ?principal, action == "ai_generate_config", resource)
# when principal.workspace == resource.workspace
```
### Data Protection
```text
[ai.security]
# Sanitize data before sending to external LLM
sanitize_pii = true
sanitize_secrets = true
redact_patterns = [
"(?i)password\\s*[:=]\\s*[^\\s]+", # Passwords
"(?i)api[_-]?key\\s*[:=]\\s*[^\\s]+", # API keys
"(?i)secret\\s*[:=]\\s*[^\\s]+", # Secrets
]
# Encryption
encryption_enabled = true
encryption_algorithm = "aes-256-gcm"
key_derivation = "argon2id"
# Local-only mode (never send to external LLM)
local_only = false # Set true for air-gapped deployments
```
## RAG Configuration
### Vector Store Setup
```text
[ai.rag]
enabled = true
# SurrealDB backend
[ai.rag.database]
url = "surreal://localhost:8000"
username = "root"
password = "${SURREALDB_PASSWORD}"
namespace = "provisioning"
database = "ai_rag"
# Embedding model
[ai.rag.embedding]
provider = "openai" # or "anthropic", "local"
model = "text-embedding-3-small"
batch_size = 100
cache_embeddings = true
# Search configuration
[ai.rag.search]
hybrid_enabled = true
vector_weight = 0.7 # Weight for vector search
keyword_weight = 0.3 # Weight for BM25 search
top_k = 5 # Number of results to return
rerank_enabled = false # Use cross-encoder to rerank results
# Chunking strategy
[ai.rag.chunking]
markdown_chunk_size = 1024
markdown_overlap = 256
code_chunk_size = 512
code_overlap = 128
```
### Index Management
```text
# Create indexes
provisioning ai index create rag
# Rebuild indexes
provisioning ai index rebuild rag
# Show index status
provisioning ai index status rag
# Remove old indexes
provisioning ai index cleanup rag --older-than 30days
```
## MCP Server Configuration
### MCP Server Setup
```text
[ai.mcp]
enabled = true
port = 3000
host = "127.0.0.1" # Change to 0.0.0.0 for network access
# Tool registry
[ai.mcp.tools]
generate_config = true
validate_config = true
search_docs = true
troubleshoot_deployment = true
get_schema = true
check_compliance = true
# Rate limiting for tool calls
rpm_limit = 30
burst_limit = 50
# Tool request timeout
timeout_seconds = 30
```
### MCP Client Configuration
```text
~/.claude/claude_desktop_config.json:
{
"mcpServers": {
"provisioning": {
"command": "provisioning-mcp-server",
"args": ["--config", "/etc/provisioning/ai.toml"],
"env": {
"PROVISIONING_API_KEY": "sk-ant-...",
"RUST_LOG": "info"
}
}
}
}
```
## Logging and Observability
### Logging Configuration
```text
[ai.logging]
level = "info" # or "debug", "warn", "error"
format = "json" # or "text"
output = "stdout" # or "file"
# Log file
[ai.logging.file]
path = "/var/log/provisioning/ai.log"
max_size_mb = 100
max_backups = 10
retention_days = 30
# Log filters
[ai.logging.filters]
log_requests = true
log_responses = false # Don't log full responses (verbose)
log_token_usage = true
log_costs = true
```
### Metrics and Monitoring
```text
# View AI service metrics
provisioning admin metrics show ai
# Prometheus metrics endpoint
curl [http://localhost:8083/metrics](http://localhost:8083/metrics)
# Key metrics:
# - ai_requests_total: Total requests by provider/model
# - ai_request_duration_seconds: Request latency
# - ai_token_usage_total: Token consumption by provider
# - ai_cost_total: Cumulative cost by provider
# - ai_cache_hits: Cache hit rate
# - ai_errors_total: Errors by type
```
## Health Checks
### Configuration Validation
```text
# Validate configuration syntax
provisioning config validate ai
# Test provider connectivity
provisioning ai test provider anthropic
# Test RAG system
provisioning ai test rag
# Test MCP server
provisioning ai test mcp
# Full health check
provisioning ai health-check
```
## Environment Variables
### Common Settings
```text
# Provider configuration
export PROVISIONING_AI_PROVIDER="anthropic"
export PROVISIONING_AI_MODEL="claude-sonnet-4"
export PROVISIONING_AI_API_KEY="sk-ant-..."
# Feature flags
export PROVISIONING_AI_ENABLED="true"
export PROVISIONING_AI_CACHE_ENABLED="true"
export PROVISIONING_AI_RAG_ENABLED="true"
# Cost control
export PROVISIONING_AI_DAILY_LIMIT_USD="100"
export PROVISIONING_AI_RPM_LIMIT="60"
# Security
export PROVISIONING_AI_SANITIZE_PII="true"
export PROVISIONING_AI_LOCAL_ONLY="false"
# Logging
export RUST_LOG="provisioning::ai=info"
```
## Troubleshooting Configuration
### Common Issues
**Issue**: API key not recognized
```text
# Check environment variable is set
echo $PROVISIONING_AI_API_KEY
# Test connectivity
provisioning ai test provider anthropic
# Verify key format (should start with sk-ant- or sk-)
| provisioning config show ai | grep api_key |
```
**Issue**: Cache not working
```text
# Check cache status
provisioning admin cache stats ai
# Clear cache and restart
provisioning admin cache clear ai
provisioning service restart ai-service
# Enable cache debugging
RUST_LOG=provisioning::cache=debug provisioning-ai-service
```
**Issue**: RAG search not finding results
```text
# Rebuild RAG indexes
provisioning ai index rebuild rag
# Test search
provisioning ai query "test query"
# Check index status
provisioning ai index status rag
```
## Upgrading Configuration
### Backward Compatibility
New AI versions automatically migrate old configurations:
```text
# Check configuration version
provisioning config version ai
# Migrate configuration to latest version
provisioning config migrate ai --auto
# Backup before migration
provisioning config backup ai
```
## Production Deployment
### Recommended Production Settings
```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
api_key = "${PROVISIONING_AI_API_KEY}"
[ai.features]
rag_search = true
config_generation = true
mcp_server = true
troubleshooting = true
[ai.cache]
enabled = true
cache_type = "redis"
ttl_seconds = 3600
[ai.limits]
rpm_limit = 60
daily_cost_limit_usd = 1000
max_tokens = 4096
[ai.security]
sanitize_pii = true
sanitize_secrets = true
encryption_enabled = true
[ai.logging]
level = "warn" # Less verbose in production
format = "json"
output = "file"
[ai.rag.database]
url = "surreal://surrealdb-cluster:8000"
```
## Related Documentation
- [Architecture](architecture.md) - System overview
- [RAG System](rag-system.md) - Vector database setup
- [MCP Integration](mcp-integration.md) - MCP configuration
- [Security Policies](security-policies.md) - Authorization policies
- [Cost Management](cost-management.md) - Budget tracking
---
**Last Updated**: 2025-01-13
**Status**: ✅ Production-Ready
**Versions Supported**: v1.0+