provisioning/docs/src/ai/configuration.md

# AI System Configuration Guide

**Status**: ✅ Production-Ready (Configuration system)

Complete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost
controls, and security settings.

## Quick Start

### Minimal Configuration

```text
# provisioning/config/ai.toml
[ai]
enabled = true
provider = "anthropic"  # or "openai" or "local"
model = "claude-sonnet-4"
api_key = "sk-ant-..."  # Set via PROVISIONING_AI_API_KEY env var

[ai.cache]
enabled = true

[ai.limits]
max_tokens = 4096
temperature = 0.7
```

### Initialize Configuration

```text
# Generate default configuration
provisioning config init ai

# Edit configuration
provisioning config edit ai

# Validate configuration
provisioning config validate ai

# Show current configuration
provisioning config show ai
```

## Provider Configuration

### Anthropic Claude

```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"  # or "claude-opus-4", "claude-haiku-4"
api_key = "${PROVISIONING_AI_API_KEY}"
api_base = "[https://api.anthropic.com"](https://api.anthropic.com")

# Request parameters
[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95
top_k = 40

# Supported models
# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)
# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)
# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)
```

### OpenAI GPT-4

```text
[ai]
enabled = true
provider = "openai"
model = "gpt-4-turbo"  # or "gpt-4", "gpt-4o"
api_key = "${OPENAI_API_KEY}"
api_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")

[ai.request]
max_tokens = 4096
temperature = 0.7
top_p = 0.95

# Supported models
# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)
# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)
# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)
```

### Local Models

```text
[ai]
enabled = true
provider = "local"
model = "llama2-70b"  # or "mistral", "neural-chat"
api_base = "[http://localhost:8000"](http://localhost:8000")  # Local Ollama or LM Studio

# Local model support
# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
# - LM Studio: GUI app with API
# - vLLM: High-throughput serving
# - llama.cpp: CPU inference

[ai.local]
gpu_enabled = true
gpu_memory_gb = 24
max_batch_size = 4
```

## Feature Configuration

### Enable Specific Features

```text
[ai.features]
# Core features (production-ready)
rag_search = true           # Retrieve-Augmented Generation
config_generation = true    # Generate Nickel from natural language
mcp_server = true           # Model Context Protocol server
troubleshooting = true      # AI-assisted debugging

# Form assistance (planned Q2 2025)
form_assistance = false     # AI suggestions in forms
form_explanations = false   # AI explains validation errors

# Agents (planned Q2 2025)
autonomous_agents = false   # AI agents for workflows
agent_learning = false      # Agents learn from deployments

# Advanced features
fine_tuning = false        # Fine-tune models for domain
knowledge_base = false     # Custom knowledge base per workspace
```

## Cache Configuration

### Cache Strategy

```text
[ai.cache]
enabled = true
cache_type = "memory"  # or "redis", "disk"
ttl_seconds = 3600     # Cache entry lifetime

# Memory cache (recommended for single server)
[ai.cache.memory]
max_size_mb = 500
eviction_policy = "lru"  # Least Recently Used

# Redis cache (recommended for distributed)
[ai.cache.redis]
url = "redis://localhost:6379"
db = 0
password = "${REDIS_PASSWORD}"
ttl_seconds = 3600

# Disk cache (recommended for persistent caching)
[ai.cache.disk]
path = "/var/cache/provisioning/ai"
max_size_mb = 5000

# Semantic caching (for RAG)
[ai.cache.semantic]
enabled = true
similarity_threshold = 0.95  # Cache hit if query similarity > 0.95
cache_embeddings = true       # Cache embedding vectors
```

### Cache Metrics

```text
# Monitor cache performance
provisioning admin cache stats ai

# Clear cache
provisioning admin cache clear ai

# Analyze cache efficiency
provisioning admin cache analyze ai --hours 24
```

## Rate Limiting and Cost Control

### Rate Limits

```text
[ai.limits]
# Tokens per request
max_tokens = 4096
max_input_tokens = 8192
max_output_tokens = 4096

# Requests per minute/hour
rpm_limit = 60              # Requests per minute
rpm_burst = 100             # Allow bursts up to 100 RPM

# Daily cost limit
daily_cost_limit_usd = 100
warn_at_percent = 80        # Warn when at 80% of daily limit
stop_at_percent = 95        # Stop accepting requests at 95%

# Token usage tracking
track_token_usage = true
track_cost_per_request = true
```

### Cost Budgeting

```text
[ai.budget]
enabled = true
monthly_limit_usd = 1000

# Budget alerts
alert_at_percent = [50, 75, 90]
alert_email = "ops@company.com"
alert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")

# Cost by provider
[ai.budget.providers]
anthropic_limit = 500
openai_limit = 300
local_limit = 0  # Free (run locally)
```

### Track Costs

```text
# View cost metrics
provisioning admin costs show ai --period month

# Forecast cost
provisioning admin costs forecast ai --days 30

# Analyze cost by feature
provisioning admin costs analyze ai --by feature

# Export cost report
provisioning admin costs export ai --format csv --output costs.csv
```

## Security Configuration

### Authentication

```text
[ai.auth]
# API key from environment variable
api_key = "${PROVISIONING_AI_API_KEY}"

# Or from secure store
api_key_vault = "secrets/ai-api-key"

# Token rotation
rotate_key_days = 90
rotation_alert_days = 7

# Request signing (for cloud providers)
sign_requests = true
signing_method = "hmac-sha256"
```

### Authorization (Cedar)

```text
[ai.authorization]
enabled = true
policy_file = "provisioning/policies/ai-policies.cedar"

# Example policies:
# allow(principal, action, resource) when principal.role == "admin"
# allow(principal == ?principal, action == "ai_generate_config", resource)
#   when principal.workspace == resource.workspace
```

### Data Protection

```text
[ai.security]
# Sanitize data before sending to external LLM
sanitize_pii = true
sanitize_secrets = true
redact_patterns = [
  "(?i)password\\s*[:=]\\s*[^\\s]+",  # Passwords
  "(?i)api[_-]?key\\s*[:=]\\s*[^\\s]+", # API keys
  "(?i)secret\\s*[:=]\\s*[^\\s]+",     # Secrets
]

# Encryption
encryption_enabled = true
encryption_algorithm = "aes-256-gcm"
key_derivation = "argon2id"

# Local-only mode (never send to external LLM)
local_only = false  # Set true for air-gapped deployments
```

## RAG Configuration

### Vector Store Setup

```text
[ai.rag]
enabled = true

# SurrealDB backend
[ai.rag.database]
url = "surreal://localhost:8000"
username = "root"
password = "${SURREALDB_PASSWORD}"
namespace = "provisioning"
database = "ai_rag"

# Embedding model
[ai.rag.embedding]
provider = "openai"  # or "anthropic", "local"
model = "text-embedding-3-small"
batch_size = 100
cache_embeddings = true

# Search configuration
[ai.rag.search]
hybrid_enabled = true
vector_weight = 0.7      # Weight for vector search
keyword_weight = 0.3     # Weight for BM25 search
top_k = 5                # Number of results to return
rerank_enabled = false   # Use cross-encoder to rerank results

# Chunking strategy
[ai.rag.chunking]
markdown_chunk_size = 1024
markdown_overlap = 256
code_chunk_size = 512
code_overlap = 128
```

### Index Management

```text
# Create indexes
provisioning ai index create rag

# Rebuild indexes
provisioning ai index rebuild rag

# Show index status
provisioning ai index status rag

# Remove old indexes
provisioning ai index cleanup rag --older-than 30days
```

## MCP Server Configuration

### MCP Server Setup

```text
[ai.mcp]
enabled = true
port = 3000
host = "127.0.0.1"  # Change to 0.0.0.0 for network access

# Tool registry
[ai.mcp.tools]
generate_config = true
validate_config = true
search_docs = true
troubleshoot_deployment = true
get_schema = true
check_compliance = true

# Rate limiting for tool calls
rpm_limit = 30
burst_limit = 50

# Tool request timeout
timeout_seconds = 30
```

### MCP Client Configuration

```text
~/.claude/claude_desktop_config.json:
{
  "mcpServers": {
    "provisioning": {
      "command": "provisioning-mcp-server",
      "args": ["--config", "/etc/provisioning/ai.toml"],
      "env": {
        "PROVISIONING_API_KEY": "sk-ant-...",
        "RUST_LOG": "info"
      }
    }
  }
}
```

## Logging and Observability

### Logging Configuration

```text
[ai.logging]
level = "info"  # or "debug", "warn", "error"
format = "json"  # or "text"
output = "stdout"  # or "file"

# Log file
[ai.logging.file]
path = "/var/log/provisioning/ai.log"
max_size_mb = 100
max_backups = 10
retention_days = 30

# Log filters
[ai.logging.filters]
log_requests = true
log_responses = false  # Don't log full responses (verbose)
log_token_usage = true
log_costs = true
```

### Metrics and Monitoring

```text
# View AI service metrics
provisioning admin metrics show ai

# Prometheus metrics endpoint
curl [http://localhost:8083/metrics](http://localhost:8083/metrics)

# Key metrics:
# - ai_requests_total: Total requests by provider/model
# - ai_request_duration_seconds: Request latency
# - ai_token_usage_total: Token consumption by provider
# - ai_cost_total: Cumulative cost by provider
# - ai_cache_hits: Cache hit rate
# - ai_errors_total: Errors by type
```

## Health Checks

### Configuration Validation

```text
# Validate configuration syntax
provisioning config validate ai

# Test provider connectivity
provisioning ai test provider anthropic

# Test RAG system
provisioning ai test rag

# Test MCP server
provisioning ai test mcp

# Full health check
provisioning ai health-check
```

## Environment Variables

### Common Settings

```text
# Provider configuration
export PROVISIONING_AI_PROVIDER="anthropic"
export PROVISIONING_AI_MODEL="claude-sonnet-4"
export PROVISIONING_AI_API_KEY="sk-ant-..."

# Feature flags
export PROVISIONING_AI_ENABLED="true"
export PROVISIONING_AI_CACHE_ENABLED="true"
export PROVISIONING_AI_RAG_ENABLED="true"

# Cost control
export PROVISIONING_AI_DAILY_LIMIT_USD="100"
export PROVISIONING_AI_RPM_LIMIT="60"

# Security
export PROVISIONING_AI_SANITIZE_PII="true"
export PROVISIONING_AI_LOCAL_ONLY="false"

# Logging
export RUST_LOG="provisioning::ai=info"
```

## Troubleshooting Configuration

### Common Issues

**Issue**: API key not recognized
```text
# Check environment variable is set
echo $PROVISIONING_AI_API_KEY

# Test connectivity
provisioning ai test provider anthropic

# Verify key format (should start with sk-ant- or sk-)
| provisioning config show ai | grep api_key |
```

**Issue**: Cache not working
```text
# Check cache status
provisioning admin cache stats ai

# Clear cache and restart
provisioning admin cache clear ai
provisioning service restart ai-service

# Enable cache debugging
RUST_LOG=provisioning::cache=debug provisioning-ai-service
```

**Issue**: RAG search not finding results
```text
# Rebuild RAG indexes
provisioning ai index rebuild rag

# Test search
provisioning ai query "test query"

# Check index status
provisioning ai index status rag
```

## Upgrading Configuration

### Backward Compatibility

New AI versions automatically migrate old configurations:

```text
# Check configuration version
provisioning config version ai

# Migrate configuration to latest version
provisioning config migrate ai --auto

# Backup before migration
provisioning config backup ai
```

## Production Deployment

### Recommended Production Settings

```text
[ai]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
api_key = "${PROVISIONING_AI_API_KEY}"

[ai.features]
rag_search = true
config_generation = true
mcp_server = true
troubleshooting = true

[ai.cache]
enabled = true
cache_type = "redis"
ttl_seconds = 3600

[ai.limits]
rpm_limit = 60
daily_cost_limit_usd = 1000
max_tokens = 4096

[ai.security]
sanitize_pii = true
sanitize_secrets = true
encryption_enabled = true

[ai.logging]
level = "warn"  # Less verbose in production
format = "json"
output = "file"

[ai.rag.database]
url = "surreal://surrealdb-cluster:8000"
```

## Related Documentation

- [Architecture](architecture.md) - System overview
- [RAG System](rag-system.md) - Vector database setup
- [MCP Integration](mcp-integration.md) - MCP configuration
- [Security Policies](security-policies.md) - Authorization policies
- [Cost Management](cost-management.md) - Budget tracking

---

**Last Updated**: 2025-01-13
**Status**: ✅ Production-Ready
**Versions Supported**: v1.0+
chore: fix docs after fences fix 2026-01-14 04:53:21 +00:00			`# AI System Configuration Guide`

			`Status: ✅ Production-Ready (Configuration system)`

			`Complete setup guide for AI features in the provisioning platform. This guide covers LLM provider configuration, feature enablement, cache setup, cost`
			`controls, and security settings.`

			`## Quick Start`

			`### Minimal Configuration`

			```text
			`# provisioning/config/ai.toml`
			`[ai]`
			`enabled = true`
			`provider = "anthropic" # or "openai" or "local"`
			`model = "claude-sonnet-4"`
			`api_key = "sk-ant-..." # Set via PROVISIONING_AI_API_KEY env var`

			`[ai.cache]`
			`enabled = true`

			`[ai.limits]`
			`max_tokens = 4096`
			`temperature = 0.7`
			```

			`### Initialize Configuration`

			```text
			`# Generate default configuration`
			`provisioning config init ai`

			`# Edit configuration`
			`provisioning config edit ai`

			`# Validate configuration`
			`provisioning config validate ai`

			`# Show current configuration`
			`provisioning config show ai`
			```

			`## Provider Configuration`

			`### Anthropic Claude`

			```text
			`[ai]`
			`enabled = true`
			`provider = "anthropic"`
			`model = "claude-sonnet-4" # or "claude-opus-4", "claude-haiku-4"`
			`api_key = "${PROVISIONING_AI_API_KEY}"`
			`api_base = "[https://api.anthropic.com"](https://api.anthropic.com")`

			`# Request parameters`
			`[ai.request]`
			`max_tokens = 4096`
			`temperature = 0.7`
			`top_p = 0.95`
			`top_k = 40`

			`# Supported models`
			`# - claude-opus-4: Most capable, for complex reasoning ($15/MTok input, $45/MTok output)`
			`# - claude-sonnet-4: Balanced (recommended), ($3/MTok input, $15/MTok output)`
			`# - claude-haiku-4: Fast, for simple tasks ($0.80/MTok input, $4/MTok output)`
			```

			`### OpenAI GPT-4`

			```text
			`[ai]`
			`enabled = true`
			`provider = "openai"`
			`model = "gpt-4-turbo" # or "gpt-4", "gpt-4o"`
			`api_key = "${OPENAI_API_KEY}"`
			`api_base = "[https://api.openai.com/v1"](https://api.openai.com/v1")`

			`[ai.request]`
			`max_tokens = 4096`
			`temperature = 0.7`
			`top_p = 0.95`

			`# Supported models`
			`# - gpt-4: Most capable ($0.03/1K input, $0.06/1K output)`
			`# - gpt-4-turbo: Better at code ($0.01/1K input, $0.03/1K output)`
			`# - gpt-4o: Latest, multi-modal ($5/MTok input, $15/MTok output)`
			```

			`### Local Models`

			```text
			`[ai]`
			`enabled = true`
			`provider = "local"`
			`model = "llama2-70b" # or "mistral", "neural-chat"`
			`api_base = "[http://localhost:8000"](http://localhost:8000") # Local Ollama or LM Studio`

			`# Local model support`
			`# - Ollama: docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama`
			`# - LM Studio: GUI app with API`
			`# - vLLM: High-throughput serving`
			`# - llama.cpp: CPU inference`

			`[ai.local]`
			`gpu_enabled = true`
			`gpu_memory_gb = 24`
			`max_batch_size = 4`
			```

			`## Feature Configuration`

			`### Enable Specific Features`

			```text
			`[ai.features]`
			`# Core features (production-ready)`
			`rag_search = true # Retrieve-Augmented Generation`
			`config_generation = true # Generate Nickel from natural language`
			`mcp_server = true # Model Context Protocol server`
			`troubleshooting = true # AI-assisted debugging`

			`# Form assistance (planned Q2 2025)`
			`form_assistance = false # AI suggestions in forms`
			`form_explanations = false # AI explains validation errors`

			`# Agents (planned Q2 2025)`
			`autonomous_agents = false # AI agents for workflows`
			`agent_learning = false # Agents learn from deployments`

			`# Advanced features`
			`fine_tuning = false # Fine-tune models for domain`
			`knowledge_base = false # Custom knowledge base per workspace`
			```

			`## Cache Configuration`

			`### Cache Strategy`

			```text
			`[ai.cache]`
			`enabled = true`
			`cache_type = "memory" # or "redis", "disk"`
			`ttl_seconds = 3600 # Cache entry lifetime`

			`# Memory cache (recommended for single server)`
			`[ai.cache.memory]`
			`max_size_mb = 500`
			`eviction_policy = "lru" # Least Recently Used`

			`# Redis cache (recommended for distributed)`
			`[ai.cache.redis]`
			`url = "redis://localhost:6379"`
			`db = 0`
			`password = "${REDIS_PASSWORD}"`
			`ttl_seconds = 3600`

			`# Disk cache (recommended for persistent caching)`
			`[ai.cache.disk]`
			`path = "/var/cache/provisioning/ai"`
			`max_size_mb = 5000`

			`# Semantic caching (for RAG)`
			`[ai.cache.semantic]`
			`enabled = true`
			`similarity_threshold = 0.95 # Cache hit if query similarity > 0.95`
			`cache_embeddings = true # Cache embedding vectors`
			```

			`### Cache Metrics`

			```text
			`# Monitor cache performance`
			`provisioning admin cache stats ai`

			`# Clear cache`
			`provisioning admin cache clear ai`

			`# Analyze cache efficiency`
			`provisioning admin cache analyze ai --hours 24`
			```

			`## Rate Limiting and Cost Control`

			`### Rate Limits`

			```text
			`[ai.limits]`
			`# Tokens per request`
			`max_tokens = 4096`
			`max_input_tokens = 8192`
			`max_output_tokens = 4096`

			`# Requests per minute/hour`
			`rpm_limit = 60 # Requests per minute`
			`rpm_burst = 100 # Allow bursts up to 100 RPM`

			`# Daily cost limit`
			`daily_cost_limit_usd = 100`
			`warn_at_percent = 80 # Warn when at 80% of daily limit`
			`stop_at_percent = 95 # Stop accepting requests at 95%`

			`# Token usage tracking`
			`track_token_usage = true`
			`track_cost_per_request = true`
			```

			`### Cost Budgeting`

			```text
			`[ai.budget]`
			`enabled = true`
			`monthly_limit_usd = 1000`

			`# Budget alerts`
			`alert_at_percent = [50, 75, 90]`
			`alert_email = "ops@company.com"`
			`alert_slack = "[https://hooks.slack.com/services/..."](https://hooks.slack.com/services/...")`

			`# Cost by provider`
			`[ai.budget.providers]`
			`anthropic_limit = 500`
			`openai_limit = 300`
			`local_limit = 0 # Free (run locally)`
			```

			`### Track Costs`

			```text
			`# View cost metrics`
			`provisioning admin costs show ai --period month`

			`# Forecast cost`
			`provisioning admin costs forecast ai --days 30`

			`# Analyze cost by feature`
			`provisioning admin costs analyze ai --by feature`

			`# Export cost report`
			`provisioning admin costs export ai --format csv --output costs.csv`
			```

			`## Security Configuration`

			`### Authentication`

			```text
			`[ai.auth]`
			`# API key from environment variable`
			`api_key = "${PROVISIONING_AI_API_KEY}"`

			`# Or from secure store`
			`api_key_vault = "secrets/ai-api-key"`

			`# Token rotation`
			`rotate_key_days = 90`
			`rotation_alert_days = 7`

			`# Request signing (for cloud providers)`
			`sign_requests = true`
			`signing_method = "hmac-sha256"`
			```

			`### Authorization (Cedar)`

			```text
			`[ai.authorization]`
			`enabled = true`
			`policy_file = "provisioning/policies/ai-policies.cedar"`

			`# Example policies:`
			`# allow(principal, action, resource) when principal.role == "admin"`
			`# allow(principal == ?principal, action == "ai_generate_config", resource)`
			`# when principal.workspace == resource.workspace`
			```

			`### Data Protection`

			```text
			`[ai.security]`
			`# Sanitize data before sending to external LLM`
			`sanitize_pii = true`
			`sanitize_secrets = true`
			`redact_patterns = [`
			`"(?i)password\\s[:=]\\s[^\\s]+", # Passwords`
			`"(?i)api[_-]?key\\s[:=]\\s[^\\s]+", # API keys`
			`"(?i)secret\\s[:=]\\s[^\\s]+", # Secrets`
			`]`

			`# Encryption`
			`encryption_enabled = true`
			`encryption_algorithm = "aes-256-gcm"`
			`key_derivation = "argon2id"`

			`# Local-only mode (never send to external LLM)`
			`local_only = false # Set true for air-gapped deployments`
			```

			`## RAG Configuration`

			`### Vector Store Setup`

			```text
			`[ai.rag]`
			`enabled = true`

			`# SurrealDB backend`
			`[ai.rag.database]`
			`url = "surreal://localhost:8000"`
			`username = "root"`
			`password = "${SURREALDB_PASSWORD}"`
			`namespace = "provisioning"`
			`database = "ai_rag"`

			`# Embedding model`
			`[ai.rag.embedding]`
			`provider = "openai" # or "anthropic", "local"`
			`model = "text-embedding-3-small"`
			`batch_size = 100`
			`cache_embeddings = true`

			`# Search configuration`
			`[ai.rag.search]`
			`hybrid_enabled = true`
			`vector_weight = 0.7 # Weight for vector search`
			`keyword_weight = 0.3 # Weight for BM25 search`
			`top_k = 5 # Number of results to return`
			`rerank_enabled = false # Use cross-encoder to rerank results`

			`# Chunking strategy`
			`[ai.rag.chunking]`
			`markdown_chunk_size = 1024`
			`markdown_overlap = 256`
			`code_chunk_size = 512`
			`code_overlap = 128`
			```

			`### Index Management`

			```text
			`# Create indexes`
			`provisioning ai index create rag`

			`# Rebuild indexes`
			`provisioning ai index rebuild rag`

			`# Show index status`
			`provisioning ai index status rag`

			`# Remove old indexes`
			`provisioning ai index cleanup rag --older-than 30days`
			```

			`## MCP Server Configuration`

			`### MCP Server Setup`

			```text
			`[ai.mcp]`
			`enabled = true`
			`port = 3000`
			`host = "127.0.0.1" # Change to 0.0.0.0 for network access`

			`# Tool registry`
			`[ai.mcp.tools]`
			`generate_config = true`
			`validate_config = true`
			`search_docs = true`
			`troubleshoot_deployment = true`
			`get_schema = true`
			`check_compliance = true`

			`# Rate limiting for tool calls`
			`rpm_limit = 30`
			`burst_limit = 50`

			`# Tool request timeout`
			`timeout_seconds = 30`
			```

			`### MCP Client Configuration`

			```text
			`~/.claude/claude_desktop_config.json:`
			`{`
			`"mcpServers": {`
			`"provisioning": {`
			`"command": "provisioning-mcp-server",`
			`"args": ["--config", "/etc/provisioning/ai.toml"],`
			`"env": {`
			`"PROVISIONING_API_KEY": "sk-ant-...",`
			`"RUST_LOG": "info"`
			`}`
			`}`
			`}`
			`}`
			```

			`## Logging and Observability`

			`### Logging Configuration`

			```text
			`[ai.logging]`
			`level = "info" # or "debug", "warn", "error"`
			`format = "json" # or "text"`
			`output = "stdout" # or "file"`

			`# Log file`
			`[ai.logging.file]`
			`path = "/var/log/provisioning/ai.log"`
			`max_size_mb = 100`
			`max_backups = 10`
			`retention_days = 30`

			`# Log filters`
			`[ai.logging.filters]`
			`log_requests = true`
			`log_responses = false # Don't log full responses (verbose)`
			`log_token_usage = true`
			`log_costs = true`
			```

			`### Metrics and Monitoring`

			```text
			`# View AI service metrics`
			`provisioning admin metrics show ai`

			`# Prometheus metrics endpoint`
			`curl [http://localhost:8083/metrics](http://localhost:8083/metrics)`

			`# Key metrics:`
			`# - ai_requests_total: Total requests by provider/model`
			`# - ai_request_duration_seconds: Request latency`
			`# - ai_token_usage_total: Token consumption by provider`
			`# - ai_cost_total: Cumulative cost by provider`
			`# - ai_cache_hits: Cache hit rate`
			`# - ai_errors_total: Errors by type`
			```

			`## Health Checks`

			`### Configuration Validation`

			```text
			`# Validate configuration syntax`
			`provisioning config validate ai`

			`# Test provider connectivity`
			`provisioning ai test provider anthropic`

			`# Test RAG system`
			`provisioning ai test rag`

			`# Test MCP server`
			`provisioning ai test mcp`

			`# Full health check`
			`provisioning ai health-check`
			```

			`## Environment Variables`

			`### Common Settings`

			```text
			`# Provider configuration`
			`export PROVISIONING_AI_PROVIDER="anthropic"`
			`export PROVISIONING_AI_MODEL="claude-sonnet-4"`
			`export PROVISIONING_AI_API_KEY="sk-ant-..."`

			`# Feature flags`
			`export PROVISIONING_AI_ENABLED="true"`
			`export PROVISIONING_AI_CACHE_ENABLED="true"`
			`export PROVISIONING_AI_RAG_ENABLED="true"`

			`# Cost control`
			`export PROVISIONING_AI_DAILY_LIMIT_USD="100"`
			`export PROVISIONING_AI_RPM_LIMIT="60"`

			`# Security`
			`export PROVISIONING_AI_SANITIZE_PII="true"`
			`export PROVISIONING_AI_LOCAL_ONLY="false"`

			`# Logging`
			`export RUST_LOG="provisioning::ai=info"`
			```

			`## Troubleshooting Configuration`

			`### Common Issues`

			`Issue: API key not recognized`
			```text
			`# Check environment variable is set`
			`echo $PROVISIONING_AI_API_KEY`

			`# Test connectivity`
			`provisioning ai test provider anthropic`

			`# Verify key format (should start with sk-ant- or sk-)`
			`\| provisioning config show ai \| grep api_key \|`
			```

			`Issue: Cache not working`
			```text
			`# Check cache status`
			`provisioning admin cache stats ai`

			`# Clear cache and restart`
			`provisioning admin cache clear ai`
			`provisioning service restart ai-service`

			`# Enable cache debugging`
			`RUST_LOG=provisioning::cache=debug provisioning-ai-service`
			```

			`Issue: RAG search not finding results`
			```text
			`# Rebuild RAG indexes`
			`provisioning ai index rebuild rag`

			`# Test search`
			`provisioning ai query "test query"`

			`# Check index status`
			`provisioning ai index status rag`
			```

			`## Upgrading Configuration`

			`### Backward Compatibility`

			`New AI versions automatically migrate old configurations:`

			```text
			`# Check configuration version`
			`provisioning config version ai`

			`# Migrate configuration to latest version`
			`provisioning config migrate ai --auto`

			`# Backup before migration`
			`provisioning config backup ai`
			```

			`## Production Deployment`

			`### Recommended Production Settings`

			```text
			`[ai]`
			`enabled = true`
			`provider = "anthropic"`
			`model = "claude-sonnet-4"`
			`api_key = "${PROVISIONING_AI_API_KEY}"`

			`[ai.features]`
			`rag_search = true`
			`config_generation = true`
			`mcp_server = true`
			`troubleshooting = true`

			`[ai.cache]`
			`enabled = true`
			`cache_type = "redis"`
			`ttl_seconds = 3600`

			`[ai.limits]`
			`rpm_limit = 60`
			`daily_cost_limit_usd = 1000`
			`max_tokens = 4096`

			`[ai.security]`
			`sanitize_pii = true`
			`sanitize_secrets = true`
			`encryption_enabled = true`

			`[ai.logging]`
			`level = "warn" # Less verbose in production`
			`format = "json"`
			`output = "file"`

			`[ai.rag.database]`
			`url = "surreal://surrealdb-cluster:8000"`
			```

			`## Related Documentation`

			`- [Architecture](architecture.md) - System overview`
			`- [RAG System](rag-system.md) - Vector database setup`
			`- [MCP Integration](mcp-integration.md) - MCP configuration`
			`- [Security Policies](security-policies.md) - Authorization policies`
			`- [Cost Management](cost-management.md) - Budget tracking`

			`---`

			`Last Updated: 2025-01-13`
			`Status: ✅ Production-Ready`
			`Versions Supported: v1.0+`