539 lines
17 KiB
TOML
539 lines
17 KiB
TOML
# AI Integration Configuration for Provisioning Platform
|
|
# This file configures the AI system including LLM providers, RAG, MCP, and security policies.
|
|
|
|
# ============================================================================
|
|
# Core AI Configuration
|
|
# ============================================================================
|
|
|
|
[ai]
|
|
# Enable/disable AI features globally
|
|
enabled = true
|
|
|
|
# LLM Provider Selection
|
|
# Options: "anthropic" | "openai" | "local" | "azure-openai"
|
|
provider = "anthropic"
|
|
|
|
# Model Selection
|
|
# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4"
|
|
# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"
|
|
# Local: "llama-3-70b", "mistral-large", "codellama-34b"
|
|
model = "claude-sonnet-4"
|
|
|
|
# Model Temperature (0.0-1.0)
|
|
# Lower = more deterministic, Higher = more creative
|
|
temperature = 0.7
|
|
|
|
# Maximum tokens for responses
|
|
max_tokens = 4096
|
|
|
|
# Request timeout (seconds)
|
|
timeout = 60
|
|
|
|
# ============================================================================
|
|
# AI Features - Fine-Grained Control
|
|
# ============================================================================
|
|
|
|
[ai.features]
|
|
# AI-assisted form filling (typdialog-ai)
|
|
# Real-time suggestions and field value predictions
|
|
form_assistance = true
|
|
|
|
# Natural language configuration generation (typdialog-prov-gen)
|
|
# Convert plain English to Nickel configs
|
|
config_generation = true
|
|
|
|
# Autonomous AI agents (typdialog-ag)
|
|
# WARNING: Agents can execute multi-step workflows
|
|
# Recommended: false for production (enable per-use-case)
|
|
autonomous_agents = false
|
|
|
|
# AI-powered troubleshooting
|
|
# Analyze logs and suggest fixes for failed deployments
|
|
troubleshooting = true
|
|
|
|
# Configuration optimization
|
|
# AI reviews configs and suggests improvements
|
|
optimization = true
|
|
|
|
# Validation error explanations
|
|
# AI explains Nickel validation errors in plain language
|
|
error_explanations = true
|
|
|
|
# ============================================================================
|
|
# LLM Provider Configuration
|
|
# ============================================================================
|
|
|
|
[ai.anthropic]
|
|
# Anthropic Claude API configuration
|
|
api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable
|
|
api_url = "https://api.anthropic.com/v1"
|
|
max_retries = 3
|
|
retry_delay_ms = 1000
|
|
|
|
# Rate limits (per minute)
|
|
max_requests_per_minute = 50
|
|
max_tokens_per_minute = 100000
|
|
|
|
[ai.openai]
|
|
# OpenAI GPT-4 API configuration
|
|
api_key = "env:OPENAI_API_KEY"
|
|
api_url = "https://api.openai.com/v1"
|
|
organization_id = "" # Optional
|
|
max_retries = 3
|
|
retry_delay_ms = 1000
|
|
|
|
# Rate limits (per minute)
|
|
max_requests_per_minute = 60
|
|
max_tokens_per_minute = 150000
|
|
|
|
[ai.local]
|
|
# Local LLM configuration (Ollama, LlamaCpp, vLLM)
|
|
# Use for air-gapped deployments or privacy-critical scenarios
|
|
model_path = "/opt/provisioning/models/llama-3-70b"
|
|
server_url = "http://localhost:11434" # Ollama default
|
|
context_length = 8192
|
|
num_gpu_layers = 40 # GPU acceleration
|
|
|
|
# ============================================================================
|
|
# Model Context Protocol (MCP) Server
|
|
# ============================================================================
|
|
|
|
[ai.mcp]
|
|
# MCP server configuration
|
|
enabled = true
|
|
server_url = "http://localhost:9000"
|
|
timeout = 30
|
|
max_retries = 3
|
|
|
|
# Tool calling configuration
|
|
[ai.mcp.tools]
|
|
enabled = true
|
|
|
|
# Available tools for LLM
|
|
# Tools provide structured actions the LLM can invoke
|
|
tools = [
|
|
"nickel_validate", # Validate Nickel configuration
|
|
"schema_query", # Query Nickel schema information
|
|
"config_generate", # Generate configuration snippets
|
|
"cedar_check", # Check Cedar authorization policies
|
|
"deployment_status", # Query deployment status
|
|
"log_analyze", # Analyze deployment logs
|
|
]
|
|
|
|
# ============================================================================
|
|
# Retrieval-Augmented Generation (RAG)
|
|
# ============================================================================
|
|
|
|
[ai.rag]
|
|
# Enable RAG system
|
|
enabled = true
|
|
|
|
# Vector Store Configuration
|
|
# Options: "qdrant" | "milvus" | "pgvector" | "chromadb"
|
|
vector_store = "qdrant"
|
|
vector_store_url = "http://localhost:6333"
|
|
collection_name = "provisioning-knowledge"
|
|
|
|
# Embedding Model
|
|
# OpenAI: "text-embedding-3-large", "text-embedding-3-small"
|
|
# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5"
|
|
embedding_model = "text-embedding-3-large"
|
|
embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings
|
|
|
|
# Document Chunking
|
|
chunk_size = 512 # Characters per chunk
|
|
chunk_overlap = 50 # Overlap between chunks
|
|
max_chunks_per_query = 10 # Top-k retrieval
|
|
|
|
# ============================================================================
|
|
# RAG Index Configuration
|
|
# ============================================================================
|
|
|
|
[ai.rag.index]
|
|
# What to index for RAG retrieval
|
|
|
|
# Index Nickel schemas (RECOMMENDED: true)
|
|
# Provides AI with schema definitions and contracts
|
|
schemas = true
|
|
schemas_path = "provisioning/schemas"
|
|
|
|
# Index documentation (RECOMMENDED: true)
|
|
# Provides AI with user guides and best practices
|
|
docs = true
|
|
docs_path = "docs"
|
|
|
|
# Index past deployments (RECOMMENDED: true)
|
|
# AI learns from successful deployment patterns
|
|
deployments = true
|
|
deployments_path = "workspaces"
|
|
|
|
# Index best practices (RECOMMENDED: true)
|
|
# Inject organizational patterns and conventions
|
|
best_practices = true
|
|
best_practices_path = ".claude/patterns"
|
|
|
|
# Index deployment logs (WARNING: Privacy concerns)
|
|
# Logs may contain sensitive data, enable only if sanitized
|
|
logs = false
|
|
logs_retention_days = 30
|
|
|
|
# Reindexing schedule
|
|
auto_reindex = true
|
|
reindex_interval_hours = 24
|
|
|
|
# ============================================================================
|
|
# Security and Access Control
|
|
# ============================================================================
|
|
|
|
[ai.security]
|
|
# Cedar policy store for AI access control
|
|
cedar_policy_store = "/etc/provisioning/cedar-policies/ai"
|
|
|
|
# AI cannot suggest secret values (CRITICAL: keep true)
|
|
# AI can suggest secret names/paths but not retrieve actual secrets
|
|
max_secret_suggestions = 0
|
|
|
|
# Require human approval for critical operations (CRITICAL: keep true)
|
|
# Operations requiring approval:
|
|
# - Deployments to production
|
|
# - Configuration changes affecting security
|
|
# - Secret rotation
|
|
# - Infrastructure deletion
|
|
require_human_approval = true
|
|
|
|
# Audit all AI operations (CRITICAL: keep true)
|
|
# Log every AI request, response, and action
|
|
audit_all_operations = true
|
|
|
|
# Data sanitization before sending to LLM
|
|
# Remove sensitive data from prompts
|
|
[ai.security.sanitization]
|
|
sanitize_secrets = true # Remove secret values
|
|
sanitize_pii = true # Remove personally identifiable info
|
|
sanitize_credentials = true # Remove passwords, API keys
|
|
sanitize_ip_addresses = false # Keep for troubleshooting
|
|
|
|
# Allowed data for LLM
|
|
allowed_data = [
|
|
"nickel_schemas", # Schema definitions (public)
|
|
"documentation", # User docs (public)
|
|
"error_messages", # Validation errors (sanitized)
|
|
"resource_names", # Infrastructure resource identifiers
|
|
]
|
|
|
|
# Forbidden data for LLM (NEVER send to external LLM)
|
|
forbidden_data = [
|
|
"secret_values", # Passwords, API keys, tokens
|
|
"private_keys", # SSH keys, TLS keys, encryption keys
|
|
"pii", # Email addresses, names, phone numbers
|
|
"credentials", # Authentication credentials
|
|
"session_tokens", # User session data
|
|
]
|
|
|
|
# ============================================================================
|
|
# Rate Limiting and Cost Control
|
|
# ============================================================================
|
|
|
|
[ai.rate_limiting]
|
|
# Per-user rate limits
|
|
requests_per_minute = 60
|
|
requests_per_hour = 500
|
|
requests_per_day = 2000
|
|
|
|
# Token limits (to control LLM API costs)
|
|
tokens_per_day = 1000000 # 1M tokens/day
|
|
tokens_per_month = 30000000 # 30M tokens/month
|
|
|
|
# Cost limits (USD)
|
|
cost_limit_per_day = "100.00"
|
|
cost_limit_per_month = "2000.00"
|
|
|
|
# Alert thresholds
|
|
cost_alert_threshold = 0.8 # Alert at 80% of limit
|
|
|
|
# Rate limit exceeded behavior
|
|
# Options: "queue" | "reject" | "throttle"
|
|
exceed_behavior = "queue"
|
|
max_queue_size = 100
|
|
|
|
# ============================================================================
|
|
# Caching
|
|
# ============================================================================
|
|
|
|
[ai.caching]
|
|
# Enable response caching to reduce LLM API calls
|
|
enabled = true
|
|
|
|
# Cache TTL (time-to-live)
|
|
ttl = "1h"
|
|
|
|
# Cache backend
|
|
# Options: "redis" | "memcached" | "in-memory"
|
|
backend = "redis"
|
|
redis_url = "redis://localhost:6379"
|
|
|
|
# Cache key strategy
|
|
# "prompt" = Cache by exact prompt (high precision, low hit rate)
|
|
# "semantic" = Cache by semantic similarity (lower precision, high hit rate)
|
|
cache_strategy = "semantic"
|
|
semantic_similarity_threshold = 0.95
|
|
|
|
# Cache statistics
|
|
track_hit_rate = true
|
|
log_cache_misses = false
|
|
|
|
# ============================================================================
|
|
# Observability and Monitoring
|
|
# ============================================================================
|
|
|
|
[ai.observability]
|
|
# Logging level for AI operations
|
|
# Options: "trace" | "debug" | "info" | "warn" | "error"
|
|
log_level = "info"
|
|
|
|
# Trace all AI requests (detailed logging)
|
|
# WARNING: Generates large log volume
|
|
trace_all_requests = true
|
|
|
|
# Store conversation history (for debugging and learning)
|
|
store_conversations = true
|
|
conversation_retention_days = 30
|
|
|
|
# Metrics collection
|
|
[ai.observability.metrics]
|
|
enabled = true
|
|
export_format = "prometheus" # "prometheus" | "opentelemetry"
|
|
export_port = 9090
|
|
|
|
# Metrics to collect
|
|
metrics = [
|
|
"request_count", # Total AI requests
|
|
"request_duration", # Latency histogram
|
|
"token_usage", # Input/output tokens
|
|
"cost_tracking", # USD cost per request
|
|
"cache_hit_rate", # Cache effectiveness
|
|
"validation_success_rate", # Generated config validity
|
|
"human_approval_rate", # How often humans approve AI output
|
|
]
|
|
|
|
# Distributed tracing
|
|
[ai.observability.tracing]
|
|
enabled = true
|
|
jaeger_endpoint = "http://localhost:14268/api/traces"
|
|
sample_rate = 0.1 # Sample 10% of requests
|
|
|
|
# ============================================================================
|
|
# AI Agent Configuration (typdialog-ag)
|
|
# ============================================================================
|
|
|
|
[ai.agents]
|
|
# WARNING: Autonomous agents can execute multi-step workflows
|
|
# Enable with caution, only for trusted users
|
|
|
|
# Enable AI agents globally
|
|
enabled = false
|
|
|
|
# Maximum iterations per agent execution
|
|
# Prevents infinite loops
|
|
max_iterations = 20
|
|
|
|
# Agent timeout (seconds)
|
|
timeout = 300
|
|
|
|
# Require approval for each agent action (RECOMMENDED: true)
|
|
# If false, agent executes entire workflow autonomously
|
|
require_step_approval = true
|
|
|
|
# Agent types
|
|
[ai.agents.types]
|
|
# Provisioning agent: End-to-end infrastructure setup
|
|
provisioning_agent = false
|
|
|
|
# Troubleshooting agent: Diagnose and fix deployment issues
|
|
troubleshooting_agent = true
|
|
|
|
# Optimization agent: Analyze and improve configurations
|
|
optimization_agent = true
|
|
|
|
# Security audit agent: Review configs for vulnerabilities
|
|
security_audit_agent = true
|
|
|
|
# ============================================================================
|
|
# Configuration Generation (typdialog-prov-gen)
|
|
# ============================================================================
|
|
|
|
[ai.config_generation]
|
|
# Default schema for generated configs
|
|
default_schema = "workspace"
|
|
|
|
# Validation mode
|
|
# "strict" = Reject any invalid config
|
|
# "permissive" = Allow configs with warnings
|
|
validation_mode = "strict"
|
|
|
|
# Best practice injection
|
|
# Automatically add security/performance best practices
|
|
inject_best_practices = true
|
|
|
|
# Template usage
|
|
# Use pre-defined templates as starting points
|
|
use_templates = true
|
|
template_directory = "provisioning/templates"
|
|
|
|
# ============================================================================
|
|
# Form Assistance (typdialog-ai)
|
|
# ============================================================================
|
|
|
|
[ai.form_assistance]
|
|
# Real-time suggestions as user types
|
|
real_time_suggestions = true
|
|
|
|
# Minimum characters before triggering suggestions
|
|
min_chars_for_suggestions = 3
|
|
|
|
# Maximum suggestions per field
|
|
max_suggestions = 5
|
|
|
|
# Suggestion confidence threshold (0.0-1.0)
|
|
# Only show suggestions with confidence above threshold
|
|
confidence_threshold = 0.7
|
|
|
|
# Natural language form filling
|
|
# User can describe entire form in plain English
|
|
nl_form_filling = true
|
|
|
|
# ============================================================================
|
|
# Environment-Specific Overrides
|
|
# ============================================================================
|
|
|
|
# Development environment
|
|
[ai.environments.dev]
|
|
enabled = true
|
|
provider = "openai" # Cheaper for dev
|
|
model = "gpt-4-turbo"
|
|
require_human_approval = false # Faster iteration
|
|
cost_limit_per_day = "10.00"
|
|
|
|
# Staging environment
|
|
[ai.environments.staging]
|
|
enabled = true
|
|
provider = "anthropic"
|
|
model = "claude-sonnet-4"
|
|
require_human_approval = true
|
|
cost_limit_per_day = "50.00"
|
|
|
|
# Production environment
|
|
[ai.environments.production]
|
|
enabled = true
|
|
provider = "anthropic"
|
|
model = "claude-sonnet-4"
|
|
require_human_approval = true # ALWAYS true for production
|
|
autonomous_agents = false # NEVER enable in production
|
|
cost_limit_per_day = "100.00"
|
|
|
|
# ============================================================================
|
|
# Integration with Other Services
|
|
# ============================================================================
|
|
|
|
[ai.integration]
|
|
# Orchestrator integration
|
|
orchestrator_url = "https://orchestrator.example.com"
|
|
orchestrator_api_key = "env:ORCHESTRATOR_API_KEY"
|
|
|
|
# SecretumVault integration (for secret name suggestions only)
|
|
secretum_vault_url = "https://vault.example.com:8200"
|
|
secretum_vault_token = "env:VAULT_TOKEN"
|
|
# AI can query secret names/paths but NEVER values
|
|
|
|
# Typdialog Web UI integration
|
|
typdialog_url = "https://forms.provisioning.example.com"
|
|
typdialog_websocket_enabled = true
|
|
|
|
# ============================================================================
|
|
# Advanced Settings
|
|
# ============================================================================
|
|
|
|
[ai.advanced]
|
|
# Prompt engineering
|
|
system_prompt_template = "provisioning/ai/prompts/system.txt"
|
|
user_prompt_template = "provisioning/ai/prompts/user.txt"
|
|
|
|
# Context window management
|
|
max_context_tokens = 100000 # Claude Sonnet 4 context window
|
|
context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize"
|
|
|
|
# Streaming responses
|
|
enable_streaming = true
|
|
stream_chunk_size = 100 # Characters per chunk
|
|
|
|
# Concurrent requests
|
|
max_concurrent_requests = 10
|
|
|
|
# ============================================================================
|
|
# Experimental Features (Use at Your Own Risk)
|
|
# ============================================================================
|
|
|
|
[ai.experimental]
|
|
# Multi-agent collaboration
|
|
# Multiple AI agents work together on complex tasks
|
|
multi_agent_collaboration = false
|
|
|
|
# Reinforcement learning from human feedback (RLHF)
|
|
# Learn from user corrections to improve over time
|
|
rlhf_enabled = false
|
|
|
|
# Fine-tuning on deployment history
|
|
# Train custom models on organization-specific patterns
|
|
fine_tuning = false
|
|
fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data"
|
|
|
|
# ============================================================================
|
|
# Compliance and Legal
|
|
# ============================================================================
|
|
|
|
[ai.compliance]
|
|
# Data residency requirements
|
|
# Ensure LLM provider complies with data residency laws
|
|
data_residency = "us" # "us" | "eu" | "local"
|
|
|
|
# GDPR compliance mode
|
|
gdpr_mode = false
|
|
gdpr_data_retention_days = 90
|
|
|
|
# SOC 2 compliance logging
|
|
soc2_logging = false
|
|
|
|
# Terms of service acceptance
|
|
# Must explicitly accept LLM provider TOS
|
|
tos_accepted = false
|
|
tos_version = "2025-01-08"
|
|
|
|
# IMPORTANT NOTES:
|
|
#
|
|
# 1. API Keys: NEVER hardcode API keys. Always use environment variables.
|
|
# Example: api_key = "env:ANTHROPIC_API_KEY"
|
|
#
|
|
# 2. Security: Keep require_human_approval = true for production.
|
|
# AI-generated configs must be reviewed by humans.
|
|
#
|
|
# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day.
|
|
# Default limits are conservative but may need adjustment.
|
|
#
|
|
# 4. Privacy: For sensitive workloads, use local models (no external API calls).
|
|
# Set provider = "local" and configure local model path.
|
|
#
|
|
# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date.
|
|
# Set auto_reindex = true and adjust reindex_interval_hours.
|
|
#
|
|
# 6. Cedar Policies: Define fine-grained AI access control in Cedar.
|
|
# Location: /etc/provisioning/cedar-policies/ai
|
|
#
|
|
# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true.
|
|
# Logs stored in: /var/log/provisioning/ai-audit.log
|
|
#
|
|
# 8. Agents: Autonomous agents are powerful but risky.
|
|
# Enable only for specific use cases, never globally in production.
|
|
|
|
# Version: 1.0
|
|
# Last Updated: 2025-01-08
|