provisioning/docs/src/ai/configuration.md

# AI System Configuration Guide

**Status**: ✅ Production-Ready (Configuration system)

Complete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost
controls, and security settings.

## Quick Start

### Minimal Configuration

```text
# provisioning/config/ai.toml
[ai]
enabled = true
provider = "anthropic"  # or "openai" or "local"
model = "claude-sonnet-4"
api_key = "sk-ant-..."  # Set via PROVISIONING_AI_API_KEY env var

[ai.cache]
enabled = true

[ai.limits]
max_tokens = 4096
temperature = 0.7
```

### Initialize Configuration

```text
# Generate default configuration
provisioning config init ai

# Edit configuration
provisioning config edit ai

# Validate configuration
provisioning config validate ai

# Show current configuration
provisioning config show ai
```

## Provider Configuration

### Anthropic Claude

```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"  # or "claude-opus-4", "claude-haiku-4"
api_key = "${PROVISIONING_AI_API_KEY}"
api_base = "[https://api.anthropic.com"](https://api.anthropic.com")

# Request parameters
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
top_k = 40

# Supported models
# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)
# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)
# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)
```

### OpenAI GPT-4

```text
[ai]
enabled = true
provider = "openai"
model = "gpt-4-turbo"  # or "gpt-4", "gpt-4o"
api_key = "${OPENAI_API_KEY}"
api_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")

[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95

# Supported models
# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)
# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)
# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)
```

### Local Models

```text
[ai]
enabled = true
provider = "local"
model = "llama2-70b"  # or "mistral", "neural-chat"
api_base = "[http://localhost:8000"](http://localhost:8000")  # Local Ollama or LM Studio

# Local model support
# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
# - LM Studio: GUI app with API
# - vLLM: High-throughput serving
# - llama.cpp: CPU inference

[ai.local]
gpu_enabled = true
gpu_memory_gb = 24
max_batch_size = 4
```

## Feature Configuration

### Enable Specific Features

```text
[ai.features]
# Core features (production-ready)
rag_search = true           # Retrieve-Augmented Generation
config_generation = true    # Generate Nickel from natural language
mcp_server = true           # Model Context Protocol server
troubleshooting = true      # AI-assisted debugging

# Form assistance (planned Q2 2025)
form_assistance = false     # AI suggestions in forms
form_explanations = false   # AI explains validation errors

# Agents (planned Q2 2025)
autonomous_agents = false   # AI agents for workflows
agent_learning = false      # Agents learn from deployments

# Advanced features
fine_tuning = false        # Fine-tune models for domain
knowledge_base = false     # Custom knowledge base per workspace
```

## Cache Configuration

### Cache Strategy

```text
[ai.cache]
enabled = true
cache_type = "memory"  # or "redis", "disk"
ttl_seconds = 3600     # Cache entry lifetime

# Memory cache (recommended for single server)
[ai.cache.memory]
max_size_mb = 500
eviction_policy = "lru"  # Least Recently Used

# Redis cache (recommended for distributed)
[ai.cache.redis]
url = "redis://localhost:6379"
db = 0
password = "${REDIS_PASSWORD}"
ttl_seconds = 3600

# Disk cache (recommended for persistent caching)
[ai.cache.disk]
path = "/var/cache/provisioning/ai"
max_size_mb = 5000

# Semantic caching (for RAG)
[ai.cache.semantic]
enabled = true
similarity_threshold = 0.95  # Cache hit if query similarity > 0.95
cache_embeddings = true       # Cache embedding vectors
```

### Cache Metrics

```text
# Monitor cache performance
provisioning admin cache stats ai

# Clear cache
provisioning admin cache clear ai

# Analyze cache efficiency
provisioning admin cache analyze ai --hours 24
```

## Rate Limiting and Cost Control

### Rate Limits

```text
[ai.limits]
# Tokens per request
max_tokens = 4096
max_input_tokens = 8192
max_output_tokens = 4096

# Requests per minute/hour
rpm_limit = 60              # Requests per minute
rpm_burst = 100             # Allow bursts up to 100 RPM

# Daily cost limit
daily_cost_limit_usd = 100
warn_at_percent = 80        # Warn when at 80% of daily limit
stop_at_percent = 95        # Stop accepting requests at 95%

# Token usage tracking
track_token_usage = true
track_cost_per_request = true
```

### Cost Budgeting

```text
[ai.budget]
enabled = true
monthly_limit_usd = 1000

# Budget alerts
alert_at_percent = [50, 75, 90]
alert_email = "ops@company.com"
alert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")

# Cost by provider
[ai.budget.providers]
anthropic_limit = 500
openai_limit = 300
local_limit = 0  # Free (run locally)
```

### Track Costs

```text
# View cost metrics
provisioning admin costs show ai --period month

# Forecast cost
provisioning admin costs forecast ai --days 30

# Analyze cost by feature
provisioning admin costs analyze ai --by feature

# Export cost report
provisioning admin costs export ai --format csv --output costs.csv
```

## Security Configuration

### Authentication

```text
[ai.auth]
# API key from environment variable
api_key = "${PROVISIONING_AI_API_KEY}"

# Or from secure store
api_key_vault = "secrets/ai-api-key"

# Token rotation
rotate_key_days = 90
rotation_alert_days = 7

# Request signing (for cloud providers)
sign_requests = true
signing_method = "hmac-sha256"
```

### Authorization (Cedar)

```text
[ai.authorization]
enabled = true
policy_file = "provisioning/policies/ai-policies.cedar"

# Example policies:
# allow(principal, action, resource) when principal.role == "admin"
# allow(principal == ?principal, action == "ai_generate_config", resource)
#   when principal.workspace == resource.workspace
```

### Data Protection

```text
[ai.security]
# Sanitize data before sending to external LLM
sanitize_pii = true
sanitize_secrets = true
redact_patterns = [
  "(?i)password\\s*[:=]\\s*[^\\s]+",  # Passwords
  "(?i)api[_-]?key\\s*[:=]\\s*[^\\s]+", # API keys
  "(?i)secret\\s*[:=]\\s*[^\\s]+",     # Secrets
]

# Encryption
encryption_enabled = true
encryption_algorithm = "aes-256-gcm"
key_derivation = "argon2id"

# Local-only mode (never send to external LLM)
local_only = false  # Set true for air-gapped deployments
```

## RAG Configuration

### Vector Store Setup

```text
[ai.rag]
enabled = true

# SurrealDB backend
[ai.rag.database]
url = "surreal://localhost:8000"
username = "root"
password = "${SURREALDB_PASSWORD}"
namespace = "provisioning"
database = "ai_rag"

# Embedding model
[ai.rag.embedding]
provider = "openai"  # or "anthropic", "local"
model = "text-embedding-3-small"
batch_size = 100
cache_embeddings = true

# Search configuration
[ai.rag.search]
hybrid_enabled = true
vector_weight = 0.7      # Weight for vector search
keyword_weight = 0.3     # Weight for BM25 search
top_k = 5                # Number of results to return
rerank_enabled = false   # Use cross-encoder to rerank results

# Chunking strategy
[ai.rag.chunking]
markdown_chunk_size = 1024
markdown_overlap = 256
code_chunk_size = 512
code_overlap = 128
```

### Index Management

```text
# Create indexes
provisioning ai index create rag

# Rebuild indexes
provisioning ai index rebuild rag

# Show index status
provisioning ai index status rag

# Remove old indexes
provisioning ai index cleanup rag --older-than 30days
```

## MCP Server Configuration

### MCP Server Setup

```text
[ai.mcp]
enabled = true
port = 3000
host = "127.0.0.1"  # Change to 0.0.0.0 for network access

# Tool registry
[ai.mcp.tools]
generate_config = true
validate_config = true
search_docs = true
troubleshoot_deployment = true
get_schema = true
check_compliance = true

# Rate limiting for tool calls
rpm_limit = 30
burst_limit = 50

# Tool request timeout
timeout_seconds = 30
```

### MCP Client Configuration

```text
~/.claude/claude_desktop_config.json:
{
  "mcpServers": {
    "provisioning": {
      "command": "provisioning-mcp-server",
      "args": ["--config", "/etc/provisioning/ai.toml"],
      "env": {
        "PROVISIONING_API_KEY": "sk-ant-...",
        "RUST_LOG": "info"
      }
    }
  }
}
```

## Logging and Observability

### Logging Configuration

```text
[ai.logging]
level = "info"  # or "debug", "warn", "error"
format = "json"  # or "text"
output = "stdout"  # or "file"

# Log file
[ai.logging.file]
path = "/var/log/provisioning/ai.log"
max_size_mb = 100
max_backups = 10
retention_days = 30

# Log filters
[ai.logging.filters]
log_requests = true
log_responses = false  # Don't log full responses (verbose)
log_token_usage = true
log_costs = true
```

### Metrics and Monitoring

```text
# View AI service metrics
provisioning admin metrics show ai

# Prometheus metrics endpoint
curl [http://localhost:8083/metrics](http://localhost:8083/metrics)

# Key metrics:
# - ai_requests_total: Total requests by provider/model
# - ai_request_duration_seconds: Request latency
# - ai_token_usage_total: Token consumption by provider
# - ai_cost_total: Cumulative cost by provider
# - ai_cache_hits: Cache hit rate
# - ai_errors_total: Errors by type
```

## Health Checks

### Configuration Validation

```text
# Validate configuration syntax
provisioning config validate ai

# Test provider connectivity
provisioning ai test provider anthropic

# Test RAG system
provisioning ai test rag

# Test MCP server
provisioning ai test mcp

# Full health check
provisioning ai health-check
```

## Environment Variables

### Common Settings

```text
# Provider configuration
export PROVISIONING_AI_PROVIDER="anthropic"
export PROVISIONING_AI_MODEL="claude-sonnet-4"
export PROVISIONING_AI_API_KEY="sk-ant-..."

# Feature flags
export PROVISIONING_AI_ENABLED="true"
export PROVISIONING_AI_CACHE_ENABLED="true"
export PROVISIONING_AI_RAG_ENABLED="true"

# Cost control
export PROVISIONING_AI_DAILY_LIMIT_USD="100"
export PROVISIONING_AI_RPM_LIMIT="60"

# Security
export PROVISIONING_AI_SANITIZE_PII="true"
export PROVISIONING_AI_LOCAL_ONLY="false"

# Logging
export RUST_LOG="provisioning::ai=info"
```

## Troubleshooting Configuration

### Common Issues

**Issue**: API key not recognized
```text
# Check environment variable is set
echo $PROVISIONING_AI_API_KEY

# Test connectivity
provisioning ai test provider anthropic

# Verify key format (should start with sk-ant- or sk-)
| provisioning config show ai | grep api_key |
```

**Issue**: Cache not working
```text
# Check cache status
provisioning admin cache stats ai

# Clear cache and restart
provisioning admin cache clear ai
provisioning service restart ai-service

# Enable cache debugging
RUST_LOG=provisioning::cache=debug provisioning-ai-service
```

**Issue**: RAG search not finding results
```text
# Rebuild RAG indexes
provisioning ai index rebuild rag

# Test search
provisioning ai query "test query"

# Check index status
provisioning ai index status rag
```

## Upgrading Configuration

### Backward Compatibility

New AI versions automatically migrate old configurations:

```text
# Check configuration version
provisioning config version ai

# Migrate configuration to latest version
provisioning config migrate ai --auto

# Backup before migration
provisioning config backup ai
```

## Production Deployment

### Recommended Production Settings

```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
api_key = "${PROVISIONING_AI_API_KEY}"

[ai.features]
rag_search = true
config_generation = true
mcp_server = true
troubleshooting = true

[ai.cache]
enabled = true
cache_type = "redis"
ttl_seconds = 3600

[ai.limits]
rpm_limit = 60
daily_cost_limit_usd = 1000
max_tokens = 4096

[ai.security]
sanitize_pii = true
sanitize_secrets = true
encryption_enabled = true

[ai.logging]
level = "warn"  # Less verbose in production
format = "json"
output = "file"

[ai.rag.database]
url = "surreal://surrealdb-cluster:8000"
```

## Related Documentation

- [Architecture](architecture.md) - System overview
- [RAG System](rag-system.md) - Vector database setup
- [MCP Integration](mcp-integration.md) - MCP configuration
- [Security Policies](security-policies.md) - Authorization policies
- [Cost Management](cost-management.md) - Budget tracking

---

**Last Updated**: 2025-01-13
**Status**: ✅ Production-Ready
**Versions Supported**: v1.0+