539 lines
18 KiB
TOML
Raw Normal View History

2026-01-08 21:22:57 +00:00
# AI Integration Configuration for Provisioning Platform
# This file configures the AI system including LLM providers, RAG, MCP, and security policies.
# ============================================================================
# Core AI Configuration
# ============================================================================
[ai]
# Enable/disable AI features globally
enabled = true
# LLM Provider Selection
# Options: "anthropic" | "openai" | "local" | "azure-openai"
provider = "anthropic"
# Model Selection
# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"
# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"
# Local: "llama-3-70b", "mistral-large", "codellama-34b"
model = "claude-sonnet-4"
# Model Temperature (0.0-1.0)
# Lower = more deterministic, Higher = more creative
temperature = 0.7
# Maximum tokens for responses
max_tokens = 4096
# Request timeout (seconds)
timeout = 60
# ============================================================================
# AI Features - Fine-Grained Control
# ============================================================================
2026-01-12 05:19:06 +00:00
[ai.features]
# AI-assisted form filling (typdialog-ai)
# Real-time suggestions and field value predictions
form_assistance = true
# Natural language configuration generation (typdialog-prov-gen)
# Convert plain English to Nickel configs
config_generation = true
# Autonomous AI agents (typdialog-ag)
# WARNING: Agents can execute multi-step workflows
# Recommended: false for production (enable per-use-case)
autonomous_agents = false
# AI-powered troubleshooting
# Analyze logs and suggest fixes for failed deployments
troubleshooting = true
# Configuration optimization
# AI reviews configs and suggests improvements
optimization = true
# Validation error explanations
# AI explains Nickel validation errors in plain language
error_explanations = true
# ============================================================================
# LLM Provider Configuration
# ============================================================================
[ai.anthropic]
# Anthropic Claude API configuration
api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable
api_url = "https://api.anthropic.com/v1"
max_retries = 3
retry_delay_ms = 1000
# Rate limits (per minute)
max_requests_per_minute = 50
max_tokens_per_minute = 100000
[ai.openai]
# OpenAI GPT-4 API configuration
api_key = "env:OPENAI_API_KEY"
api_url = "https://api.openai.com/v1"
max_retries = 3
organization_id = "" # Optional
retry_delay_ms = 1000
# Rate limits (per minute)
max_requests_per_minute = 60
max_tokens_per_minute = 150000
[ai.local]
# Local LLM configuration (Ollama, LlamaCpp, vLLM)
# Use for air-gapped deployments or privacy-critical scenarios
context_length = 8192
model_path = "/opt/provisioning/models/llama-3-70b"
num_gpu_layers = 40 # GPU acceleration
server_url = "http://localhost:11434" # Ollama default
# ============================================================================
# Model Context Protocol (MCP) Server
# ============================================================================
[ai.mcp]
# MCP server configuration
enabled = true
max_retries = 3
server_url = "http://localhost:9000"
timeout = 30
# Tool calling configuration
[ai.mcp.tools]
enabled = true
# Available tools for LLM
# Tools provide structured actions the LLM can invoke
tools = [
"nickel_validate", # Validate Nickel configuration
"schema_query", # Query Nickel schema information
"config_generate", # Generate configuration snippets
"cedar_check", # Check Cedar authorization policies
"deployment_status", # Query deployment status
"log_analyze", # Analyze deployment logs
]
# ============================================================================
# Retrieval-Augmented Generation (RAG)
# ============================================================================
[ai.rag]
# Enable RAG system
enabled = true
# Vector Store Configuration
# Options: "qdrant" | "milvus" | "pgvector" | "chromadb"
collection_name = "provisioning-knowledge"
vector_store = "qdrant"
vector_store_url = "http://localhost:6333"
# Embedding Model
# OpenAI: "text-embedding-3-large", "text-embedding-3-small"
# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"
embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings
embedding_model = "text-embedding-3-large"
# Document Chunking
chunk_overlap = 50 # Overlap between chunks
chunk_size = 512 # Characters per chunk
max_chunks_per_query = 10 # Top-k retrieval
# ============================================================================
# RAG Index Configuration
# ============================================================================
[ai.rag.index]
# What to index for RAG retrieval
# Index Nickel schemas (RECOMMENDED: true)
# Provides AI with schema definitions and contracts
schemas = true
schemas_path = "provisioning/schemas"
# Index documentation (RECOMMENDED: true)
# Provides AI with user guides and best practices
docs = true
docs_path = "docs"
# Index past deployments (RECOMMENDED: true)
# AI learns from successful deployment patterns
deployments = true
deployments_path = "workspaces"
# Index best practices (RECOMMENDED: true)
# Inject organizational patterns and conventions
best_practices = true
best_practices_path = ".claude/patterns"
# Index deployment logs (WARNING: Privacy concerns)
# Logs may contain sensitive data, enable only if sanitized
logs = false
logs_retention_days = 30
# Reindexing schedule
auto_reindex = true
reindex_interval_hours = 24
# ============================================================================
# Security and Access Control
# ============================================================================
[ai.security]
# Cedar policy store for AI access control
cedar_policy_store = "/etc/provisioning/cedar-policies/ai"
# AI cannot suggest secret values (CRITICAL: keep true)
# AI can suggest secret names/paths but not retrieve actual secrets
max_secret_suggestions = 0
# Require human approval for critical operations (CRITICAL: keep true)
# Operations requiring approval:
# - Deployments to production
# - Configuration changes affecting security
# - Secret rotation
# - Infrastructure deletion
require_human_approval = true
# Audit all AI operations (CRITICAL: keep true)
# Log every AI request, response, and action
audit_all_operations = true
# Data sanitization before sending to LLM
# Remove sensitive data from prompts
[ai.security.sanitization]
sanitize_credentials = true # Remove passwords, API keys
sanitize_ip_addresses = false # Keep for troubleshooting
sanitize_pii = true # Remove personally identifiable info
sanitize_secrets = true # Remove secret values
# Allowed data for LLM
allowed_data = [
"nickel_schemas", # Schema definitions (public)
"documentation", # User docs (public)
"error_messages", # Validation errors (sanitized)
"resource_names", # Infrastructure resource identifiers
]
# Forbidden data for LLM (NEVER send to external LLM)
forbidden_data = [
"secret_values", # Passwords, API keys, tokens
"private_keys", # SSH keys, TLS keys, encryption keys
"pii", # Email addresses, names, phone numbers
"credentials", # Authentication credentials
"session_tokens", # User session data
]
# ============================================================================
# Rate Limiting and Cost Control
# ============================================================================
[ai.rate_limiting]
# Per-user rate limits
requests_per_day = 2000
requests_per_hour = 500
requests_per_minute = 60
# Token limits (to control LLM API costs)
tokens_per_day = 1000000 # 1M tokens/day
tokens_per_month = 30000000 # 30M tokens/month
# Cost limits (USD)
cost_limit_per_day = "100.00"
cost_limit_per_month = "2000.00"
# Alert thresholds
cost_alert_threshold = 0.8 # Alert at 80% of limit
# Rate limit exceeded behavior
# Options: "queue" | "reject" | "throttle"
exceed_behavior = "queue"
max_queue_size = 100
# ============================================================================
# Caching
# ============================================================================
[ai.caching]
# Enable response caching to reduce LLM API calls
enabled = true
# Cache TTL (time-to-live)
ttl = "1h"
# Cache backend
# Options: "redis" | "memcached" | "in-memory"
backend = "redis"
redis_url = "redis://localhost:6379"
# Cache key strategy
# "prompt" = Cache by exact prompt (high precision, low hit rate)
# "semantic" = Cache by semantic similarity (lower precision, high hit rate)
cache_strategy = "semantic"
semantic_similarity_threshold = 0.95
# Cache statistics
log_cache_misses = false
track_hit_rate = true
# ============================================================================
# Observability and Monitoring
# ============================================================================
[ai.observability]
# Logging level for AI operations
# Options: "trace" | "debug" | "info" | "warn" | "error"
log_level = "info"
# Trace all AI requests (detailed logging)
# WARNING: Generates large log volume
trace_all_requests = true
# Store conversation history (for debugging and learning)
conversation_retention_days = 30
store_conversations = true
# Metrics collection
[ai.observability.metrics]
enabled = true
export_format = "prometheus" # "prometheus" | "opentelemetry"
export_port = 9090
# Metrics to collect
metrics = [
"request_count", # Total AI requests
"request_duration", # Latency histogram
"token_usage", # Input/output tokens
"cost_tracking", # USD cost per request
"cache_hit_rate", # Cache effectiveness
"validation_success_rate", # Generated config validity
"human_approval_rate", # How often humans approve AI output
]
# Distributed tracing
[ai.observability.tracing]
enabled = true
jaeger_endpoint = "http://localhost:14268/api/traces"
sample_rate = 0.1 # Sample 10% of requests
# ============================================================================
# AI Agent Configuration (typdialog-ag)
# ============================================================================
[ai.agents]
# WARNING: Autonomous agents can execute multi-step workflows
# Enable with caution, only for trusted users
# Enable AI agents globally
enabled = false
# Maximum iterations per agent execution
# Prevents infinite loops
max_iterations = 20
# Agent timeout (seconds)
timeout = 300
# Require approval for each agent action (RECOMMENDED: true)
# If false, agent executes entire workflow autonomously
require_step_approval = true
# Agent types
[ai.agents.types]
# Provisioning agent: End-to-end infrastructure setup
provisioning_agent = false
# Troubleshooting agent: Diagnose and fix deployment issues
troubleshooting_agent = true
# Optimization agent: Analyze and improve configurations
optimization_agent = true
# Security audit agent: Review configs for vulnerabilities
security_audit_agent = true
# ============================================================================
# Configuration Generation (typdialog-prov-gen)
# ============================================================================
[ai.config_generation]
# Default schema for generated configs
default_schema = "workspace"
# Validation mode
# "strict" = Reject any invalid config
# "permissive" = Allow configs with warnings
validation_mode = "strict"
# Best practice injection
# Automatically add security/performance best practices
inject_best_practices = true
# Template usage
# Use pre-defined templates as starting points
template_directory = "provisioning/templates"
use_templates = true
# ============================================================================
# Form Assistance (typdialog-ai)
# ============================================================================
[ai.form_assistance]
# Real-time suggestions as user types
real_time_suggestions = true
# Minimum characters before triggering suggestions
min_chars_for_suggestions = 3
# Maximum suggestions per field
max_suggestions = 5
# Suggestion confidence threshold (0.0-1.0)
# Only show suggestions with confidence above threshold
confidence_threshold = 0.7
# Natural language form filling
# User can describe entire form in plain English
nl_form_filling = true
# ============================================================================
# Environment-Specific Overrides
# ============================================================================
# Development environment
[ai.environments.dev]
cost_limit_per_day = "10.00"
enabled = true
model = "gpt-4-turbo"
provider = "openai" # Cheaper for dev
require_human_approval = false # Faster iteration
# Staging environment
[ai.environments.staging]
cost_limit_per_day = "50.00"
enabled = true
model = "claude-sonnet-4"
provider = "anthropic"
require_human_approval = true
# Production environment
[ai.environments.production]
autonomous_agents = false # NEVER enable in production
cost_limit_per_day = "100.00"
enabled = true
model = "claude-sonnet-4"
provider = "anthropic"
require_human_approval = true # ALWAYS true for production
# ============================================================================
# Integration with Other Services
# ============================================================================
[ai.integration]
# Orchestrator integration
orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"
orchestrator_url = "https://orchestrator.example.com"
# SecretumVault integration (for secret name suggestions only)
secretum_vault_token = "env:VAULT_TOKEN"
secretum_vault_url = "https://vault.example.com:8200"
# AI can query secret names/paths but NEVER values
# Typdialog Web UI integration
typdialog_url = "https://forms.provisioning.example.com"
typdialog_websocket_enabled = true
# ============================================================================
# Advanced Settings
# ============================================================================
[ai.advanced]
# Prompt engineering
system_prompt_template = "provisioning/ai/prompts/system.txt"
user_prompt_template = "provisioning/ai/prompts/user.txt"
# Context window management
context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize"
max_context_tokens = 100000 # Claude Sonnet 4 context window
# Streaming responses
enable_streaming = true
stream_chunk_size = 100 # Characters per chunk
# Concurrent requests
max_concurrent_requests = 10
# ============================================================================
# Experimental Features (Use at Your Own Risk)
# ============================================================================
[ai.experimental]
# Multi-agent collaboration
# Multiple AI agents work together on complex tasks
multi_agent_collaboration = false
# Reinforcement learning from human feedback (RLHF)
# Learn from user corrections to improve over time
rlhf_enabled = false
# Fine-tuning on deployment history
# Train custom models on organization-specific patterns
fine_tuning = false
fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"
# ============================================================================
# Compliance and Legal
# ============================================================================
[ai.compliance]
# Data residency requirements
# Ensure LLM provider complies with data residency laws
data_residency = "us" # "us" | "eu" | "local"
# GDPR compliance mode
gdpr_data_retention_days = 90
gdpr_mode = false
# SOC 2 compliance logging
soc2_logging = false
# Terms of service acceptance
# Must explicitly accept LLM provider TOS
tos_accepted = false
tos_version = "2025-01-08"
# IMPORTANT NOTES:
#
# 1. API Keys: NEVER hardcode API keys. Always use environment variables.
# Example: api_key = "env:ANTHROPIC_API_KEY"
#
# 2. Security: Keep require_human_approval = true for production.
# AI-generated configs must be reviewed by humans.
#
# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.
# Default limits are conservative but may need adjustment.
#
# 4. Privacy: For sensitive workloads, use local models (no external API calls).
# Set provider = "local" and configure local model path.
#
# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.
# Set auto_reindex = true and adjust reindex_interval_hours.
#
# 6. Cedar Policies: Define fine-grained AI access control in Cedar.
# Location: /etc/provisioning/cedar-policies/ai
#
# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.
# Logs stored in: /var/log/provisioning/ai-audit.log
#
# 8. Agents: Autonomous agents are powerful but risky.
# Enable only for specific use cases, never globally in production.
# Version: 1.0
# Last Updated: 2025-01-08