provisioning/config/ai.toml

# AI Integration Configuration for Provisioning Platform
# This file configures the AI system including LLM providers, RAG, MCP, and security policies.

# ============================================================================
# Core AI Configuration
# ============================================================================

[ai]
# Enable/disable AI features globally
enabled = true

# LLM Provider Selection
# Options: "anthropic" | "openai" | "local" | "azure-openai"
provider = "anthropic"

# Model Selection
# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"
# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"
# Local: "llama-3-70b", "mistral-large", "codellama-34b"
model = "claude-sonnet-4"

# Model Temperature (0.0-1.0)
# Lower = more deterministic, Higher = more creative
temperature = 0.7

# Maximum tokens for responses
max_tokens = 4096

# Request timeout (seconds)
timeout = 60

# ============================================================================
# AI Features - Fine-Grained Control
# ============================================================================

[ai.features]
# AI-assisted form filling (typdialog-ai)
# Real-time suggestions and field value predictions
form_assistance = true

# Natural language configuration generation (typdialog-prov-gen)
# Convert plain English to Nickel configs
config_generation = true

# Autonomous AI agents (typdialog-ag)
# WARNING: Agents can execute multi-step workflows
# Recommended: false for production (enable per-use-case)
autonomous_agents = false

# AI-powered troubleshooting
# Analyze logs and suggest fixes for failed deployments
troubleshooting = true

# Configuration optimization
# AI reviews configs and suggests improvements
optimization = true

# Validation error explanations
# AI explains Nickel validation errors in plain language
error_explanations = true

# ============================================================================
# LLM Provider Configuration
# ============================================================================

[ai.anthropic]
# Anthropic Claude API configuration
api_key = "env:ANTHROPIC_API_KEY"  # Load from environment variable
api_url = "https://api.anthropic.com/v1"
max_retries = 3
retry_delay_ms = 1000

# Rate limits (per minute)
max_requests_per_minute = 50
max_tokens_per_minute = 100000

[ai.openai]
# OpenAI GPT-4 API configuration
api_key = "env:OPENAI_API_KEY"
api_url = "https://api.openai.com/v1"
organization_id = ""  # Optional
max_retries = 3
retry_delay_ms = 1000

# Rate limits (per minute)
max_requests_per_minute = 60
max_tokens_per_minute = 150000

[ai.local]
# Local LLM configuration (Ollama, LlamaCpp, vLLM)
# Use for air-gapped deployments or privacy-critical scenarios
model_path = "/opt/provisioning/models/llama-3-70b"
server_url = "http://localhost:11434"  # Ollama default
context_length = 8192
num_gpu_layers = 40  # GPU acceleration

# ============================================================================
# Model Context Protocol (MCP) Server
# ============================================================================

[ai.mcp]
# MCP server configuration
enabled = true
server_url = "http://localhost:9000"
timeout = 30
max_retries = 3

# Tool calling configuration
[ai.mcp.tools]
enabled = true

# Available tools for LLM
# Tools provide structured actions the LLM can invoke
tools = [
    "nickel_validate",      # Validate Nickel configuration
    "schema_query",         # Query Nickel schema information
    "config_generate",      # Generate configuration snippets
    "cedar_check",          # Check Cedar authorization policies
    "deployment_status",    # Query deployment status
    "log_analyze",          # Analyze deployment logs
]

# ============================================================================
# Retrieval-Augmented Generation (RAG)
# ============================================================================

[ai.rag]
# Enable RAG system
enabled = true

# Vector Store Configuration
# Options: "qdrant" | "milvus" | "pgvector" | "chromadb"
vector_store = "qdrant"
vector_store_url = "http://localhost:6333"
collection_name = "provisioning-knowledge"

# Embedding Model
# OpenAI: "text-embedding-3-large", "text-embedding-3-small"
# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"
embedding_model = "text-embedding-3-large"
embedding_api_key = "env:OPENAI_API_KEY"  # For OpenAI embeddings

# Document Chunking
chunk_size = 512        # Characters per chunk
chunk_overlap = 50      # Overlap between chunks
max_chunks_per_query = 10  # Top-k retrieval

# ============================================================================
# RAG Index Configuration
# ============================================================================

[ai.rag.index]
# What to index for RAG retrieval

# Index Nickel schemas (RECOMMENDED: true)
# Provides AI with schema definitions and contracts
schemas = true
schemas_path = "provisioning/schemas"

# Index documentation (RECOMMENDED: true)
# Provides AI with user guides and best practices
docs = true
docs_path = "docs"

# Index past deployments (RECOMMENDED: true)
# AI learns from successful deployment patterns
deployments = true
deployments_path = "workspaces"

# Index best practices (RECOMMENDED: true)
# Inject organizational patterns and conventions
best_practices = true
best_practices_path = ".claude/patterns"

# Index deployment logs (WARNING: Privacy concerns)
# Logs may contain sensitive data, enable only if sanitized
logs = false
logs_retention_days = 30

# Reindexing schedule
auto_reindex = true
reindex_interval_hours = 24

# ============================================================================
# Security and Access Control
# ============================================================================

[ai.security]
# Cedar policy store for AI access control
cedar_policy_store = "/etc/provisioning/cedar-policies/ai"

# AI cannot suggest secret values (CRITICAL: keep true)
# AI can suggest secret names/paths but not retrieve actual secrets
max_secret_suggestions = 0

# Require human approval for critical operations (CRITICAL: keep true)
# Operations requiring approval:
# - Deployments to production
# - Configuration changes affecting security
# - Secret rotation
# - Infrastructure deletion
require_human_approval = true

# Audit all AI operations (CRITICAL: keep true)
# Log every AI request, response, and action
audit_all_operations = true

# Data sanitization before sending to LLM
# Remove sensitive data from prompts
[ai.security.sanitization]
sanitize_secrets = true          # Remove secret values
sanitize_pii = true              # Remove personally identifiable info
sanitize_credentials = true      # Remove passwords, API keys
sanitize_ip_addresses = false    # Keep for troubleshooting

# Allowed data for LLM
allowed_data = [
    "nickel_schemas",     # Schema definitions (public)
    "documentation",      # User docs (public)
    "error_messages",     # Validation errors (sanitized)
    "resource_names",     # Infrastructure resource identifiers
]

# Forbidden data for LLM (NEVER send to external LLM)
forbidden_data = [
    "secret_values",      # Passwords, API keys, tokens
    "private_keys",       # SSH keys, TLS keys, encryption keys
    "pii",                # Email addresses, names, phone numbers
    "credentials",        # Authentication credentials
    "session_tokens",     # User session data
]

# ============================================================================
# Rate Limiting and Cost Control
# ============================================================================

[ai.rate_limiting]
# Per-user rate limits
requests_per_minute = 60
requests_per_hour = 500
requests_per_day = 2000

# Token limits (to control LLM API costs)
tokens_per_day = 1000000       # 1M tokens/day
tokens_per_month = 30000000    # 30M tokens/month

# Cost limits (USD)
cost_limit_per_day = "100.00"
cost_limit_per_month = "2000.00"

# Alert thresholds
cost_alert_threshold = 0.8  # Alert at 80% of limit

# Rate limit exceeded behavior
# Options: "queue" | "reject" | "throttle"
exceed_behavior = "queue"
max_queue_size = 100

# ============================================================================
# Caching
# ============================================================================

[ai.caching]
# Enable response caching to reduce LLM API calls
enabled = true

# Cache TTL (time-to-live)
ttl = "1h"

# Cache backend
# Options: "redis" | "memcached" | "in-memory"
backend = "redis"
redis_url = "redis://localhost:6379"

# Cache key strategy
# "prompt" = Cache by exact prompt (high precision, low hit rate)
# "semantic" = Cache by semantic similarity (lower precision, high hit rate)
cache_strategy = "semantic"
semantic_similarity_threshold = 0.95

# Cache statistics
track_hit_rate = true
log_cache_misses = false

# ============================================================================
# Observability and Monitoring
# ============================================================================

[ai.observability]
# Logging level for AI operations
# Options: "trace" | "debug" | "info" | "warn" | "error"
log_level = "info"

# Trace all AI requests (detailed logging)
# WARNING: Generates large log volume
trace_all_requests = true

# Store conversation history (for debugging and learning)
store_conversations = true
conversation_retention_days = 30

# Metrics collection
[ai.observability.metrics]
enabled = true
export_format = "prometheus"  # "prometheus" | "opentelemetry"
export_port = 9090

# Metrics to collect
metrics = [
    "request_count",              # Total AI requests
    "request_duration",           # Latency histogram
    "token_usage",                # Input/output tokens
    "cost_tracking",              # USD cost per request
    "cache_hit_rate",             # Cache effectiveness
    "validation_success_rate",    # Generated config validity
    "human_approval_rate",        # How often humans approve AI output
]

# Distributed tracing
[ai.observability.tracing]
enabled = true
jaeger_endpoint = "http://localhost:14268/api/traces"
sample_rate = 0.1  # Sample 10% of requests

# ============================================================================
# AI Agent Configuration (typdialog-ag)
# ============================================================================

[ai.agents]
# WARNING: Autonomous agents can execute multi-step workflows
# Enable with caution, only for trusted users

# Enable AI agents globally
enabled = false

# Maximum iterations per agent execution
# Prevents infinite loops
max_iterations = 20

# Agent timeout (seconds)
timeout = 300

# Require approval for each agent action (RECOMMENDED: true)
# If false, agent executes entire workflow autonomously
require_step_approval = true

# Agent types
[ai.agents.types]
# Provisioning agent: End-to-end infrastructure setup
provisioning_agent = false

# Troubleshooting agent: Diagnose and fix deployment issues
troubleshooting_agent = true

# Optimization agent: Analyze and improve configurations
optimization_agent = true

# Security audit agent: Review configs for vulnerabilities
security_audit_agent = true

# ============================================================================
# Configuration Generation (typdialog-prov-gen)
# ============================================================================

[ai.config_generation]
# Default schema for generated configs
default_schema = "workspace"

# Validation mode
# "strict" = Reject any invalid config
# "permissive" = Allow configs with warnings
validation_mode = "strict"

# Best practice injection
# Automatically add security/performance best practices
inject_best_practices = true

# Template usage
# Use pre-defined templates as starting points
use_templates = true
template_directory = "provisioning/templates"

# ============================================================================
# Form Assistance (typdialog-ai)
# ============================================================================

[ai.form_assistance]
# Real-time suggestions as user types
real_time_suggestions = true

# Minimum characters before triggering suggestions
min_chars_for_suggestions = 3

# Maximum suggestions per field
max_suggestions = 5

# Suggestion confidence threshold (0.0-1.0)
# Only show suggestions with confidence above threshold
confidence_threshold = 0.7

# Natural language form filling
# User can describe entire form in plain English
nl_form_filling = true

# ============================================================================
# Environment-Specific Overrides
# ============================================================================

# Development environment
[ai.environments.dev]
enabled = true
provider = "openai"  # Cheaper for dev
model = "gpt-4-turbo"
require_human_approval = false  # Faster iteration
cost_limit_per_day = "10.00"

# Staging environment
[ai.environments.staging]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
require_human_approval = true
cost_limit_per_day = "50.00"

# Production environment
[ai.environments.production]
enabled = true
provider = "anthropic"
model = "claude-sonnet-4"
require_human_approval = true  # ALWAYS true for production
autonomous_agents = false       # NEVER enable in production
cost_limit_per_day = "100.00"

# ============================================================================
# Integration with Other Services
# ============================================================================

[ai.integration]
# Orchestrator integration
orchestrator_url = "https://orchestrator.example.com"
orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"

# SecretumVault integration (for secret name suggestions only)
secretum_vault_url = "https://vault.example.com:8200"
secretum_vault_token = "env:VAULT_TOKEN"
# AI can query secret names/paths but NEVER values

# Typdialog Web UI integration
typdialog_url = "https://forms.provisioning.example.com"
typdialog_websocket_enabled = true

# ============================================================================
# Advanced Settings
# ============================================================================

[ai.advanced]
# Prompt engineering
system_prompt_template = "provisioning/ai/prompts/system.txt"
user_prompt_template = "provisioning/ai/prompts/user.txt"

# Context window management
max_context_tokens = 100000  # Claude Sonnet 4 context window
context_truncation_strategy = "sliding_window"  # "sliding_window" | "summarize"

# Streaming responses
enable_streaming = true
stream_chunk_size = 100  # Characters per chunk

# Concurrent requests
max_concurrent_requests = 10

# ============================================================================
# Experimental Features (Use at Your Own Risk)
# ============================================================================

[ai.experimental]
# Multi-agent collaboration
# Multiple AI agents work together on complex tasks
multi_agent_collaboration = false

# Reinforcement learning from human feedback (RLHF)
# Learn from user corrections to improve over time
rlhf_enabled = false

# Fine-tuning on deployment history
# Train custom models on organization-specific patterns
fine_tuning = false
fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"

# ============================================================================
# Compliance and Legal
# ============================================================================

[ai.compliance]
# Data residency requirements
# Ensure LLM provider complies with data residency laws
data_residency = "us"  # "us" | "eu" | "local"

# GDPR compliance mode
gdpr_mode = false
gdpr_data_retention_days = 90

# SOC 2 compliance logging
soc2_logging = false

# Terms of service acceptance
# Must explicitly accept LLM provider TOS
tos_accepted = false
tos_version = "2025-01-08"

# IMPORTANT NOTES:
#
# 1. API Keys: NEVER hardcode API keys. Always use environment variables.
#    Example: api_key = "env:ANTHROPIC_API_KEY"
#
# 2. Security: Keep require_human_approval = true for production.
#    AI-generated configs must be reviewed by humans.
#
# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.
#    Default limits are conservative but may need adjustment.
#
# 4. Privacy: For sensitive workloads, use local models (no external API calls).
#    Set provider = "local" and configure local model path.
#
# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.
#    Set auto_reindex = true and adjust reindex_interval_hours.
#
# 6. Cedar Policies: Define fine-grained AI access control in Cedar.
#    Location: /etc/provisioning/cedar-policies/ai
#
# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.
#    Logs stored in: /var/log/provisioning/ai-audit.log
#
# 8. Agents: Autonomous agents are powerful but risky.
#    Enable only for specific use cases, never globally in production.

# Version: 1.0
# Last Updated: 2025-01-08