# AI Integration Configuration for Provisioning Platform # This file configures the AI system including LLM providers, RAG, MCP, and security policies. # ============================================================================ # Core AI Configuration # ============================================================================ [ai] # Enable/disable AI features globally enabled = true # LLM Provider Selection # Options: "anthropic" | "openai" | "local" | "azure-openai" provider = "anthropic" # Model Selection # Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4" # OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo" # Local: "llama-3-70b", "mistral-large", "codellama-34b" model = "claude-sonnet-4" # Model Temperature (0.0-1.0) # Lower = more deterministic, Higher = more creative temperature = 0.7 # Maximum tokens for responses max_tokens = 4096 # Request timeout (seconds) timeout = 60 # ============================================================================ # AI Features - Fine-Grained Control # ============================================================================ [ai.features] # AI-assisted form filling (typdialog-ai) # Real-time suggestions and field value predictions form_assistance = true # Natural language configuration generation (typdialog-prov-gen) # Convert plain English to Nickel configs config_generation = true # Autonomous AI agents (typdialog-ag) # WARNING: Agents can execute multi-step workflows # Recommended: false for production (enable per-use-case) autonomous_agents = false # AI-powered troubleshooting # Analyze logs and suggest fixes for failed deployments troubleshooting = true # Configuration optimization # AI reviews configs and suggests improvements optimization = true # Validation error explanations # AI explains Nickel validation errors in plain language error_explanations = true # ============================================================================ # LLM Provider Configuration # ============================================================================ [ai.anthropic] # Anthropic Claude API configuration api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable api_url = "https://api.anthropic.com/v1" max_retries = 3 retry_delay_ms = 1000 # Rate limits (per minute) max_requests_per_minute = 50 max_tokens_per_minute = 100000 [ai.openai] # OpenAI GPT-4 API configuration api_key = "env:OPENAI_API_KEY" api_url = "https://api.openai.com/v1" max_retries = 3 organization_id = "" # Optional retry_delay_ms = 1000 # Rate limits (per minute) max_requests_per_minute = 60 max_tokens_per_minute = 150000 [ai.local] # Local LLM configuration (Ollama, LlamaCpp, vLLM) # Use for air-gapped deployments or privacy-critical scenarios context_length = 8192 model_path = "/opt/provisioning/models/llama-3-70b" num_gpu_layers = 40 # GPU acceleration server_url = "http://localhost:11434" # Ollama default # ============================================================================ # Model Context Protocol (MCP) Server # ============================================================================ [ai.mcp] # MCP server configuration enabled = true max_retries = 3 server_url = "http://localhost:9000" timeout = 30 # Tool calling configuration [ai.mcp.tools] enabled = true # Available tools for LLM # Tools provide structured actions the LLM can invoke tools = [ "nickel_validate", # Validate Nickel configuration "schema_query", # Query Nickel schema information "config_generate", # Generate configuration snippets "cedar_check", # Check Cedar authorization policies "deployment_status", # Query deployment status "log_analyze", # Analyze deployment logs ] # ============================================================================ # Retrieval-Augmented Generation (RAG) # ============================================================================ [ai.rag] # Enable RAG system enabled = true # Vector Store Configuration # Options: "qdrant" | "milvus" | "pgvector" | "chromadb" collection_name = "provisioning-knowledge" vector_store = "qdrant" vector_store_url = "http://localhost:6333" # Embedding Model # OpenAI: "text-embedding-3-large", "text-embedding-3-small" # Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5" embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings embedding_model = "text-embedding-3-large" # Document Chunking chunk_overlap = 50 # Overlap between chunks chunk_size = 512 # Characters per chunk max_chunks_per_query = 10 # Top-k retrieval # ============================================================================ # RAG Index Configuration # ============================================================================ [ai.rag.index] # What to index for RAG retrieval # Index Nickel schemas (RECOMMENDED: true) # Provides AI with schema definitions and contracts schemas = true schemas_path = "provisioning/schemas" # Index documentation (RECOMMENDED: true) # Provides AI with user guides and best practices docs = true docs_path = "docs" # Index past deployments (RECOMMENDED: true) # AI learns from successful deployment patterns deployments = true deployments_path = "workspaces" # Index best practices (RECOMMENDED: true) # Inject organizational patterns and conventions best_practices = true best_practices_path = ".claude/patterns" # Index deployment logs (WARNING: Privacy concerns) # Logs may contain sensitive data, enable only if sanitized logs = false logs_retention_days = 30 # Reindexing schedule auto_reindex = true reindex_interval_hours = 24 # ============================================================================ # Security and Access Control # ============================================================================ [ai.security] # Cedar policy store for AI access control cedar_policy_store = "/etc/provisioning/cedar-policies/ai" # AI cannot suggest secret values (CRITICAL: keep true) # AI can suggest secret names/paths but not retrieve actual secrets max_secret_suggestions = 0 # Require human approval for critical operations (CRITICAL: keep true) # Operations requiring approval: # - Deployments to production # - Configuration changes affecting security # - Secret rotation # - Infrastructure deletion require_human_approval = true # Audit all AI operations (CRITICAL: keep true) # Log every AI request, response, and action audit_all_operations = true # Data sanitization before sending to LLM # Remove sensitive data from prompts [ai.security.sanitization] sanitize_credentials = true # Remove passwords, API keys sanitize_ip_addresses = false # Keep for troubleshooting sanitize_pii = true # Remove personally identifiable info sanitize_secrets = true # Remove secret values # Allowed data for LLM allowed_data = [ "nickel_schemas", # Schema definitions (public) "documentation", # User docs (public) "error_messages", # Validation errors (sanitized) "resource_names", # Infrastructure resource identifiers ] # Forbidden data for LLM (NEVER send to external LLM) forbidden_data = [ "secret_values", # Passwords, API keys, tokens "private_keys", # SSH keys, TLS keys, encryption keys "pii", # Email addresses, names, phone numbers "credentials", # Authentication credentials "session_tokens", # User session data ] # ============================================================================ # Rate Limiting and Cost Control # ============================================================================ [ai.rate_limiting] # Per-user rate limits requests_per_day = 2000 requests_per_hour = 500 requests_per_minute = 60 # Token limits (to control LLM API costs) tokens_per_day = 1000000 # 1M tokens/day tokens_per_month = 30000000 # 30M tokens/month # Cost limits (USD) cost_limit_per_day = "100.00" cost_limit_per_month = "2000.00" # Alert thresholds cost_alert_threshold = 0.8 # Alert at 80% of limit # Rate limit exceeded behavior # Options: "queue" | "reject" | "throttle" exceed_behavior = "queue" max_queue_size = 100 # ============================================================================ # Caching # ============================================================================ [ai.caching] # Enable response caching to reduce LLM API calls enabled = true # Cache TTL (time-to-live) ttl = "1h" # Cache backend # Options: "redis" | "memcached" | "in-memory" backend = "redis" redis_url = "redis://localhost:6379" # Cache key strategy # "prompt" = Cache by exact prompt (high precision, low hit rate) # "semantic" = Cache by semantic similarity (lower precision, high hit rate) cache_strategy = "semantic" semantic_similarity_threshold = 0.95 # Cache statistics log_cache_misses = false track_hit_rate = true # ============================================================================ # Observability and Monitoring # ============================================================================ [ai.observability] # Logging level for AI operations # Options: "trace" | "debug" | "info" | "warn" | "error" log_level = "info" # Trace all AI requests (detailed logging) # WARNING: Generates large log volume trace_all_requests = true # Store conversation history (for debugging and learning) conversation_retention_days = 30 store_conversations = true # Metrics collection [ai.observability.metrics] enabled = true export_format = "prometheus" # "prometheus" | "opentelemetry" export_port = 9090 # Metrics to collect metrics = [ "request_count", # Total AI requests "request_duration", # Latency histogram "token_usage", # Input/output tokens "cost_tracking", # USD cost per request "cache_hit_rate", # Cache effectiveness "validation_success_rate", # Generated config validity "human_approval_rate", # How often humans approve AI output ] # Distributed tracing [ai.observability.tracing] enabled = true jaeger_endpoint = "http://localhost:14268/api/traces" sample_rate = 0.1 # Sample 10% of requests # ============================================================================ # AI Agent Configuration (typdialog-ag) # ============================================================================ [ai.agents] # WARNING: Autonomous agents can execute multi-step workflows # Enable with caution, only for trusted users # Enable AI agents globally enabled = false # Maximum iterations per agent execution # Prevents infinite loops max_iterations = 20 # Agent timeout (seconds) timeout = 300 # Require approval for each agent action (RECOMMENDED: true) # If false, agent executes entire workflow autonomously require_step_approval = true # Agent types [ai.agents.types] # Provisioning agent: End-to-end infrastructure setup provisioning_agent = false # Troubleshooting agent: Diagnose and fix deployment issues troubleshooting_agent = true # Optimization agent: Analyze and improve configurations optimization_agent = true # Security audit agent: Review configs for vulnerabilities security_audit_agent = true # ============================================================================ # Configuration Generation (typdialog-prov-gen) # ============================================================================ [ai.config_generation] # Default schema for generated configs default_schema = "workspace" # Validation mode # "strict" = Reject any invalid config # "permissive" = Allow configs with warnings validation_mode = "strict" # Best practice injection # Automatically add security/performance best practices inject_best_practices = true # Template usage # Use pre-defined templates as starting points template_directory = "provisioning/templates" use_templates = true # ============================================================================ # Form Assistance (typdialog-ai) # ============================================================================ [ai.form_assistance] # Real-time suggestions as user types real_time_suggestions = true # Minimum characters before triggering suggestions min_chars_for_suggestions = 3 # Maximum suggestions per field max_suggestions = 5 # Suggestion confidence threshold (0.0-1.0) # Only show suggestions with confidence above threshold confidence_threshold = 0.7 # Natural language form filling # User can describe entire form in plain English nl_form_filling = true # ============================================================================ # Environment-Specific Overrides # ============================================================================ # Development environment [ai.environments.dev] cost_limit_per_day = "10.00" enabled = true model = "gpt-4-turbo" provider = "openai" # Cheaper for dev require_human_approval = false # Faster iteration # Staging environment [ai.environments.staging] cost_limit_per_day = "50.00" enabled = true model = "claude-sonnet-4" provider = "anthropic" require_human_approval = true # Production environment [ai.environments.production] autonomous_agents = false # NEVER enable in production cost_limit_per_day = "100.00" enabled = true model = "claude-sonnet-4" provider = "anthropic" require_human_approval = true # ALWAYS true for production # ============================================================================ # Integration with Other Services # ============================================================================ [ai.integration] # Orchestrator integration orchestrator_api_key = "env:ORCHESTRATOR_API_KEY" orchestrator_url = "https://orchestrator.example.com" # SecretumVault integration (for secret name suggestions only) secretum_vault_token = "env:VAULT_TOKEN" secretum_vault_url = "https://vault.example.com:8200" # AI can query secret names/paths but NEVER values # Typdialog Web UI integration typdialog_url = "https://forms.provisioning.example.com" typdialog_websocket_enabled = true # ============================================================================ # Advanced Settings # ============================================================================ [ai.advanced] # Prompt engineering system_prompt_template = "provisioning/ai/prompts/system.txt" user_prompt_template = "provisioning/ai/prompts/user.txt" # Context window management context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize" max_context_tokens = 100000 # Claude Sonnet 4 context window # Streaming responses enable_streaming = true stream_chunk_size = 100 # Characters per chunk # Concurrent requests max_concurrent_requests = 10 # ============================================================================ # Experimental Features (Use at Your Own Risk) # ============================================================================ [ai.experimental] # Multi-agent collaboration # Multiple AI agents work together on complex tasks multi_agent_collaboration = false # Reinforcement learning from human feedback (RLHF) # Learn from user corrections to improve over time rlhf_enabled = false # Fine-tuning on deployment history # Train custom models on organization-specific patterns fine_tuning = false fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data" # ============================================================================ # Compliance and Legal # ============================================================================ [ai.compliance] # Data residency requirements # Ensure LLM provider complies with data residency laws data_residency = "us" # "us" | "eu" | "local" # GDPR compliance mode gdpr_data_retention_days = 90 gdpr_mode = false # SOC 2 compliance logging soc2_logging = false # Terms of service acceptance # Must explicitly accept LLM provider TOS tos_accepted = false tos_version = "2025-01-08" # IMPORTANT NOTES: # # 1. API Keys: NEVER hardcode API keys. Always use environment variables. # Example: api_key = "env:ANTHROPIC_API_KEY" # # 2. Security: Keep require_human_approval = true for production. # AI-generated configs must be reviewed by humans. # # 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day. # Default limits are conservative but may need adjustment. # # 4. Privacy: For sensitive workloads, use local models (no external API calls). # Set provider = "local" and configure local model path. # # 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date. # Set auto_reindex = true and adjust reindex_interval_hours. # # 6. Cedar Policies: Define fine-grained AI access control in Cedar. # Location: /etc/provisioning/cedar-policies/ai # # 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true. # Logs stored in: /var/log/provisioning/ai-audit.log # # 8. Agents: Autonomous agents are powerful but risky. # Enable only for specific use cases, never globally in production. # Version: 1.0 # Last Updated: 2025-01-08