From 52904472de68412eaa9c8b88d1802d75d8959b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jesu=CC=81s=20Pe=CC=81rez?= Date: Mon, 12 Jan 2026 05:19:06 +0000 Subject: [PATCH] chore: add configs --- config/ai.toml | 1006 ++++++++--------- .../examples/control-center.solo.example.toml | 152 +-- ...extension-registry.enterprise.example.toml | 42 +- ...ension-registry.multi-backend.example.toml | 42 +- .../extension-registry.solo.example.toml | 12 +- .../orchestrator.enterprise.example.toml | 128 +-- .../examples/orchestrator.solo.example.toml | 128 +-- 7 files changed, 755 insertions(+), 755 deletions(-) diff --git a/config/ai.toml b/config/ai.toml index a334009..015f3cc 100644 --- a/config/ai.toml +++ b/config/ai.toml @@ -33,506 +33,506 @@ timeout = 60 # AI Features - Fine-Grained Control # ============================================================================ -[ai.features] -# AI-assisted form filling (typdialog-ai) -# Real-time suggestions and field value predictions -form_assistance = true - -# Natural language configuration generation (typdialog-prov-gen) -# Convert plain English to Nickel configs -config_generation = true - -# Autonomous AI agents (typdialog-ag) -# WARNING: Agents can execute multi-step workflows -# Recommended: false for production (enable per-use-case) -autonomous_agents = false - -# AI-powered troubleshooting -# Analyze logs and suggest fixes for failed deployments -troubleshooting = true - -# Configuration optimization -# AI reviews configs and suggests improvements -optimization = true - -# Validation error explanations -# AI explains Nickel validation errors in plain language -error_explanations = true - -# ============================================================================ -# LLM Provider Configuration -# ============================================================================ - -[ai.anthropic] -# Anthropic Claude API configuration -api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable -api_url = "https://api.anthropic.com/v1" -max_retries = 3 -retry_delay_ms = 1000 - -# Rate limits (per minute) -max_requests_per_minute = 50 -max_tokens_per_minute = 100000 - -[ai.openai] -# OpenAI GPT-4 API configuration -api_key = "env:OPENAI_API_KEY" -api_url = "https://api.openai.com/v1" -organization_id = "" # Optional -max_retries = 3 -retry_delay_ms = 1000 - -# Rate limits (per minute) -max_requests_per_minute = 60 -max_tokens_per_minute = 150000 - -[ai.local] -# Local LLM configuration (Ollama, LlamaCpp, vLLM) -# Use for air-gapped deployments or privacy-critical scenarios -model_path = "/opt/provisioning/models/llama-3-70b" -server_url = "http://localhost:11434" # Ollama default -context_length = 8192 -num_gpu_layers = 40 # GPU acceleration - -# ============================================================================ -# Model Context Protocol (MCP) Server -# ============================================================================ - -[ai.mcp] -# MCP server configuration -enabled = true -server_url = "http://localhost:9000" -timeout = 30 -max_retries = 3 - -# Tool calling configuration -[ai.mcp.tools] -enabled = true - -# Available tools for LLM -# Tools provide structured actions the LLM can invoke -tools = [ - "nickel_validate", # Validate Nickel configuration - "schema_query", # Query Nickel schema information - "config_generate", # Generate configuration snippets - "cedar_check", # Check Cedar authorization policies - "deployment_status", # Query deployment status - "log_analyze", # Analyze deployment logs -] - -# ============================================================================ -# Retrieval-Augmented Generation (RAG) -# ============================================================================ - -[ai.rag] -# Enable RAG system -enabled = true - -# Vector Store Configuration -# Options: "qdrant" | "milvus" | "pgvector" | "chromadb" -vector_store = "qdrant" -vector_store_url = "http://localhost:6333" -collection_name = "provisioning-knowledge" - -# Embedding Model -# OpenAI: "text-embedding-3-large", "text-embedding-3-small" -# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5" -embedding_model = "text-embedding-3-large" -embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings - -# Document Chunking -chunk_size = 512 # Characters per chunk -chunk_overlap = 50 # Overlap between chunks -max_chunks_per_query = 10 # Top-k retrieval - -# ============================================================================ -# RAG Index Configuration -# ============================================================================ - -[ai.rag.index] -# What to index for RAG retrieval - -# Index Nickel schemas (RECOMMENDED: true) -# Provides AI with schema definitions and contracts -schemas = true -schemas_path = "provisioning/schemas" - -# Index documentation (RECOMMENDED: true) -# Provides AI with user guides and best practices -docs = true -docs_path = "docs" - -# Index past deployments (RECOMMENDED: true) -# AI learns from successful deployment patterns -deployments = true -deployments_path = "workspaces" - -# Index best practices (RECOMMENDED: true) -# Inject organizational patterns and conventions -best_practices = true -best_practices_path = ".claude/patterns" - -# Index deployment logs (WARNING: Privacy concerns) -# Logs may contain sensitive data, enable only if sanitized -logs = false -logs_retention_days = 30 - -# Reindexing schedule -auto_reindex = true -reindex_interval_hours = 24 - -# ============================================================================ -# Security and Access Control -# ============================================================================ - -[ai.security] -# Cedar policy store for AI access control -cedar_policy_store = "/etc/provisioning/cedar-policies/ai" - -# AI cannot suggest secret values (CRITICAL: keep true) -# AI can suggest secret names/paths but not retrieve actual secrets -max_secret_suggestions = 0 - -# Require human approval for critical operations (CRITICAL: keep true) -# Operations requiring approval: -# - Deployments to production -# - Configuration changes affecting security -# - Secret rotation -# - Infrastructure deletion -require_human_approval = true - -# Audit all AI operations (CRITICAL: keep true) -# Log every AI request, response, and action -audit_all_operations = true - -# Data sanitization before sending to LLM -# Remove sensitive data from prompts -[ai.security.sanitization] -sanitize_secrets = true # Remove secret values -sanitize_pii = true # Remove personally identifiable info -sanitize_credentials = true # Remove passwords, API keys -sanitize_ip_addresses = false # Keep for troubleshooting - -# Allowed data for LLM -allowed_data = [ - "nickel_schemas", # Schema definitions (public) - "documentation", # User docs (public) - "error_messages", # Validation errors (sanitized) - "resource_names", # Infrastructure resource identifiers -] - -# Forbidden data for LLM (NEVER send to external LLM) -forbidden_data = [ - "secret_values", # Passwords, API keys, tokens - "private_keys", # SSH keys, TLS keys, encryption keys - "pii", # Email addresses, names, phone numbers - "credentials", # Authentication credentials - "session_tokens", # User session data -] - -# ============================================================================ -# Rate Limiting and Cost Control -# ============================================================================ - -[ai.rate_limiting] -# Per-user rate limits -requests_per_minute = 60 -requests_per_hour = 500 -requests_per_day = 2000 - -# Token limits (to control LLM API costs) -tokens_per_day = 1000000 # 1M tokens/day -tokens_per_month = 30000000 # 30M tokens/month - -# Cost limits (USD) -cost_limit_per_day = "100.00" -cost_limit_per_month = "2000.00" - -# Alert thresholds -cost_alert_threshold = 0.8 # Alert at 80% of limit - -# Rate limit exceeded behavior -# Options: "queue" | "reject" | "throttle" -exceed_behavior = "queue" -max_queue_size = 100 - -# ============================================================================ -# Caching -# ============================================================================ - -[ai.caching] -# Enable response caching to reduce LLM API calls -enabled = true - -# Cache TTL (time-to-live) -ttl = "1h" - -# Cache backend -# Options: "redis" | "memcached" | "in-memory" -backend = "redis" -redis_url = "redis://localhost:6379" - -# Cache key strategy -# "prompt" = Cache by exact prompt (high precision, low hit rate) -# "semantic" = Cache by semantic similarity (lower precision, high hit rate) -cache_strategy = "semantic" -semantic_similarity_threshold = 0.95 - -# Cache statistics -track_hit_rate = true -log_cache_misses = false - -# ============================================================================ -# Observability and Monitoring -# ============================================================================ - -[ai.observability] -# Logging level for AI operations -# Options: "trace" | "debug" | "info" | "warn" | "error" -log_level = "info" - -# Trace all AI requests (detailed logging) -# WARNING: Generates large log volume -trace_all_requests = true - -# Store conversation history (for debugging and learning) -store_conversations = true -conversation_retention_days = 30 - -# Metrics collection -[ai.observability.metrics] -enabled = true -export_format = "prometheus" # "prometheus" | "opentelemetry" -export_port = 9090 - -# Metrics to collect -metrics = [ - "request_count", # Total AI requests - "request_duration", # Latency histogram - "token_usage", # Input/output tokens - "cost_tracking", # USD cost per request - "cache_hit_rate", # Cache effectiveness - "validation_success_rate", # Generated config validity - "human_approval_rate", # How often humans approve AI output -] - -# Distributed tracing -[ai.observability.tracing] -enabled = true -jaeger_endpoint = "http://localhost:14268/api/traces" -sample_rate = 0.1 # Sample 10% of requests - -# ============================================================================ -# AI Agent Configuration (typdialog-ag) -# ============================================================================ - -[ai.agents] -# WARNING: Autonomous agents can execute multi-step workflows -# Enable with caution, only for trusted users - -# Enable AI agents globally -enabled = false - -# Maximum iterations per agent execution -# Prevents infinite loops -max_iterations = 20 - -# Agent timeout (seconds) -timeout = 300 - -# Require approval for each agent action (RECOMMENDED: true) -# If false, agent executes entire workflow autonomously -require_step_approval = true - -# Agent types -[ai.agents.types] -# Provisioning agent: End-to-end infrastructure setup -provisioning_agent = false - -# Troubleshooting agent: Diagnose and fix deployment issues -troubleshooting_agent = true - -# Optimization agent: Analyze and improve configurations -optimization_agent = true - -# Security audit agent: Review configs for vulnerabilities -security_audit_agent = true - -# ============================================================================ -# Configuration Generation (typdialog-prov-gen) -# ============================================================================ - -[ai.config_generation] -# Default schema for generated configs -default_schema = "workspace" - -# Validation mode -# "strict" = Reject any invalid config -# "permissive" = Allow configs with warnings -validation_mode = "strict" - -# Best practice injection -# Automatically add security/performance best practices -inject_best_practices = true - -# Template usage -# Use pre-defined templates as starting points -use_templates = true -template_directory = "provisioning/templates" - -# ============================================================================ -# Form Assistance (typdialog-ai) -# ============================================================================ - -[ai.form_assistance] -# Real-time suggestions as user types -real_time_suggestions = true - -# Minimum characters before triggering suggestions -min_chars_for_suggestions = 3 - -# Maximum suggestions per field -max_suggestions = 5 - -# Suggestion confidence threshold (0.0-1.0) -# Only show suggestions with confidence above threshold -confidence_threshold = 0.7 - -# Natural language form filling -# User can describe entire form in plain English -nl_form_filling = true - -# ============================================================================ -# Environment-Specific Overrides -# ============================================================================ - -# Development environment -[ai.environments.dev] -enabled = true -provider = "openai" # Cheaper for dev -model = "gpt-4-turbo" -require_human_approval = false # Faster iteration -cost_limit_per_day = "10.00" - -# Staging environment -[ai.environments.staging] -enabled = true -provider = "anthropic" -model = "claude-sonnet-4" -require_human_approval = true -cost_limit_per_day = "50.00" - -# Production environment -[ai.environments.production] -enabled = true -provider = "anthropic" -model = "claude-sonnet-4" -require_human_approval = true # ALWAYS true for production -autonomous_agents = false # NEVER enable in production -cost_limit_per_day = "100.00" - -# ============================================================================ -# Integration with Other Services -# ============================================================================ - -[ai.integration] -# Orchestrator integration -orchestrator_url = "https://orchestrator.example.com" -orchestrator_api_key = "env:ORCHESTRATOR_API_KEY" - -# SecretumVault integration (for secret name suggestions only) -secretum_vault_url = "https://vault.example.com:8200" -secretum_vault_token = "env:VAULT_TOKEN" -# AI can query secret names/paths but NEVER values - -# Typdialog Web UI integration -typdialog_url = "https://forms.provisioning.example.com" -typdialog_websocket_enabled = true - -# ============================================================================ -# Advanced Settings -# ============================================================================ - -[ai.advanced] -# Prompt engineering -system_prompt_template = "provisioning/ai/prompts/system.txt" -user_prompt_template = "provisioning/ai/prompts/user.txt" - -# Context window management -max_context_tokens = 100000 # Claude Sonnet 4 context window -context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize" - -# Streaming responses -enable_streaming = true -stream_chunk_size = 100 # Characters per chunk - -# Concurrent requests -max_concurrent_requests = 10 - -# ============================================================================ -# Experimental Features (Use at Your Own Risk) -# ============================================================================ - -[ai.experimental] -# Multi-agent collaboration -# Multiple AI agents work together on complex tasks -multi_agent_collaboration = false - -# Reinforcement learning from human feedback (RLHF) -# Learn from user corrections to improve over time -rlhf_enabled = false - -# Fine-tuning on deployment history -# Train custom models on organization-specific patterns -fine_tuning = false -fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data" - -# ============================================================================ -# Compliance and Legal -# ============================================================================ - -[ai.compliance] -# Data residency requirements -# Ensure LLM provider complies with data residency laws -data_residency = "us" # "us" | "eu" | "local" - -# GDPR compliance mode -gdpr_mode = false -gdpr_data_retention_days = 90 - -# SOC 2 compliance logging -soc2_logging = false - -# Terms of service acceptance -# Must explicitly accept LLM provider TOS -tos_accepted = false -tos_version = "2025-01-08" - -# IMPORTANT NOTES: -# -# 1. API Keys: NEVER hardcode API keys. Always use environment variables. -# Example: api_key = "env:ANTHROPIC_API_KEY" -# -# 2. Security: Keep require_human_approval = true for production. -# AI-generated configs must be reviewed by humans. -# -# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day. -# Default limits are conservative but may need adjustment. -# -# 4. Privacy: For sensitive workloads, use local models (no external API calls). -# Set provider = "local" and configure local model path. -# -# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date. -# Set auto_reindex = true and adjust reindex_interval_hours. -# -# 6. Cedar Policies: Define fine-grained AI access control in Cedar. -# Location: /etc/provisioning/cedar-policies/ai -# -# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true. -# Logs stored in: /var/log/provisioning/ai-audit.log -# -# 8. Agents: Autonomous agents are powerful but risky. -# Enable only for specific use cases, never globally in production. - -# Version: 1.0 -# Last Updated: 2025-01-08 + [ai.features] + # AI-assisted form filling (typdialog-ai) + # Real-time suggestions and field value predictions + form_assistance = true + + # Natural language configuration generation (typdialog-prov-gen) + # Convert plain English to Nickel configs + config_generation = true + + # Autonomous AI agents (typdialog-ag) + # WARNING: Agents can execute multi-step workflows + # Recommended: false for production (enable per-use-case) + autonomous_agents = false + + # AI-powered troubleshooting + # Analyze logs and suggest fixes for failed deployments + troubleshooting = true + + # Configuration optimization + # AI reviews configs and suggests improvements + optimization = true + + # Validation error explanations + # AI explains Nickel validation errors in plain language + error_explanations = true + + # ============================================================================ + # LLM Provider Configuration + # ============================================================================ + + [ai.anthropic] + # Anthropic Claude API configuration + api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable + api_url = "https://api.anthropic.com/v1" + max_retries = 3 + retry_delay_ms = 1000 + + # Rate limits (per minute) + max_requests_per_minute = 50 + max_tokens_per_minute = 100000 + + [ai.openai] + # OpenAI GPT-4 API configuration + api_key = "env:OPENAI_API_KEY" + api_url = "https://api.openai.com/v1" + max_retries = 3 + organization_id = "" # Optional + retry_delay_ms = 1000 + + # Rate limits (per minute) + max_requests_per_minute = 60 + max_tokens_per_minute = 150000 + + [ai.local] + # Local LLM configuration (Ollama, LlamaCpp, vLLM) + # Use for air-gapped deployments or privacy-critical scenarios + context_length = 8192 + model_path = "/opt/provisioning/models/llama-3-70b" + num_gpu_layers = 40 # GPU acceleration + server_url = "http://localhost:11434" # Ollama default + + # ============================================================================ + # Model Context Protocol (MCP) Server + # ============================================================================ + + [ai.mcp] + # MCP server configuration + enabled = true + max_retries = 3 + server_url = "http://localhost:9000" + timeout = 30 + + # Tool calling configuration + [ai.mcp.tools] + enabled = true + + # Available tools for LLM + # Tools provide structured actions the LLM can invoke + tools = [ + "nickel_validate", # Validate Nickel configuration + "schema_query", # Query Nickel schema information + "config_generate", # Generate configuration snippets + "cedar_check", # Check Cedar authorization policies + "deployment_status", # Query deployment status + "log_analyze", # Analyze deployment logs + ] + + # ============================================================================ + # Retrieval-Augmented Generation (RAG) + # ============================================================================ + + [ai.rag] + # Enable RAG system + enabled = true + + # Vector Store Configuration + # Options: "qdrant" | "milvus" | "pgvector" | "chromadb" + collection_name = "provisioning-knowledge" + vector_store = "qdrant" + vector_store_url = "http://localhost:6333" + + # Embedding Model + # OpenAI: "text-embedding-3-large", "text-embedding-3-small" + # Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5" + embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings + embedding_model = "text-embedding-3-large" + + # Document Chunking + chunk_overlap = 50 # Overlap between chunks + chunk_size = 512 # Characters per chunk + max_chunks_per_query = 10 # Top-k retrieval + + # ============================================================================ + # RAG Index Configuration + # ============================================================================ + + [ai.rag.index] + # What to index for RAG retrieval + + # Index Nickel schemas (RECOMMENDED: true) + # Provides AI with schema definitions and contracts + schemas = true + schemas_path = "provisioning/schemas" + + # Index documentation (RECOMMENDED: true) + # Provides AI with user guides and best practices + docs = true + docs_path = "docs" + + # Index past deployments (RECOMMENDED: true) + # AI learns from successful deployment patterns + deployments = true + deployments_path = "workspaces" + + # Index best practices (RECOMMENDED: true) + # Inject organizational patterns and conventions + best_practices = true + best_practices_path = ".claude/patterns" + + # Index deployment logs (WARNING: Privacy concerns) + # Logs may contain sensitive data, enable only if sanitized + logs = false + logs_retention_days = 30 + + # Reindexing schedule + auto_reindex = true + reindex_interval_hours = 24 + + # ============================================================================ + # Security and Access Control + # ============================================================================ + + [ai.security] + # Cedar policy store for AI access control + cedar_policy_store = "/etc/provisioning/cedar-policies/ai" + + # AI cannot suggest secret values (CRITICAL: keep true) + # AI can suggest secret names/paths but not retrieve actual secrets + max_secret_suggestions = 0 + + # Require human approval for critical operations (CRITICAL: keep true) + # Operations requiring approval: + # - Deployments to production + # - Configuration changes affecting security + # - Secret rotation + # - Infrastructure deletion + require_human_approval = true + + # Audit all AI operations (CRITICAL: keep true) + # Log every AI request, response, and action + audit_all_operations = true + + # Data sanitization before sending to LLM + # Remove sensitive data from prompts + [ai.security.sanitization] + sanitize_credentials = true # Remove passwords, API keys + sanitize_ip_addresses = false # Keep for troubleshooting + sanitize_pii = true # Remove personally identifiable info + sanitize_secrets = true # Remove secret values + + # Allowed data for LLM + allowed_data = [ + "nickel_schemas", # Schema definitions (public) + "documentation", # User docs (public) + "error_messages", # Validation errors (sanitized) + "resource_names", # Infrastructure resource identifiers + ] + + # Forbidden data for LLM (NEVER send to external LLM) + forbidden_data = [ + "secret_values", # Passwords, API keys, tokens + "private_keys", # SSH keys, TLS keys, encryption keys + "pii", # Email addresses, names, phone numbers + "credentials", # Authentication credentials + "session_tokens", # User session data + ] + + # ============================================================================ + # Rate Limiting and Cost Control + # ============================================================================ + + [ai.rate_limiting] + # Per-user rate limits + requests_per_day = 2000 + requests_per_hour = 500 + requests_per_minute = 60 + + # Token limits (to control LLM API costs) + tokens_per_day = 1000000 # 1M tokens/day + tokens_per_month = 30000000 # 30M tokens/month + + # Cost limits (USD) + cost_limit_per_day = "100.00" + cost_limit_per_month = "2000.00" + + # Alert thresholds + cost_alert_threshold = 0.8 # Alert at 80% of limit + + # Rate limit exceeded behavior + # Options: "queue" | "reject" | "throttle" + exceed_behavior = "queue" + max_queue_size = 100 + + # ============================================================================ + # Caching + # ============================================================================ + + [ai.caching] + # Enable response caching to reduce LLM API calls + enabled = true + + # Cache TTL (time-to-live) + ttl = "1h" + + # Cache backend + # Options: "redis" | "memcached" | "in-memory" + backend = "redis" + redis_url = "redis://localhost:6379" + + # Cache key strategy + # "prompt" = Cache by exact prompt (high precision, low hit rate) + # "semantic" = Cache by semantic similarity (lower precision, high hit rate) + cache_strategy = "semantic" + semantic_similarity_threshold = 0.95 + + # Cache statistics + log_cache_misses = false + track_hit_rate = true + + # ============================================================================ + # Observability and Monitoring + # ============================================================================ + + [ai.observability] + # Logging level for AI operations + # Options: "trace" | "debug" | "info" | "warn" | "error" + log_level = "info" + + # Trace all AI requests (detailed logging) + # WARNING: Generates large log volume + trace_all_requests = true + + # Store conversation history (for debugging and learning) + conversation_retention_days = 30 + store_conversations = true + + # Metrics collection + [ai.observability.metrics] + enabled = true + export_format = "prometheus" # "prometheus" | "opentelemetry" + export_port = 9090 + + # Metrics to collect + metrics = [ + "request_count", # Total AI requests + "request_duration", # Latency histogram + "token_usage", # Input/output tokens + "cost_tracking", # USD cost per request + "cache_hit_rate", # Cache effectiveness + "validation_success_rate", # Generated config validity + "human_approval_rate", # How often humans approve AI output + ] + + # Distributed tracing + [ai.observability.tracing] + enabled = true + jaeger_endpoint = "http://localhost:14268/api/traces" + sample_rate = 0.1 # Sample 10% of requests + + # ============================================================================ + # AI Agent Configuration (typdialog-ag) + # ============================================================================ + + [ai.agents] + # WARNING: Autonomous agents can execute multi-step workflows + # Enable with caution, only for trusted users + + # Enable AI agents globally + enabled = false + + # Maximum iterations per agent execution + # Prevents infinite loops + max_iterations = 20 + + # Agent timeout (seconds) + timeout = 300 + + # Require approval for each agent action (RECOMMENDED: true) + # If false, agent executes entire workflow autonomously + require_step_approval = true + + # Agent types + [ai.agents.types] + # Provisioning agent: End-to-end infrastructure setup + provisioning_agent = false + + # Troubleshooting agent: Diagnose and fix deployment issues + troubleshooting_agent = true + + # Optimization agent: Analyze and improve configurations + optimization_agent = true + + # Security audit agent: Review configs for vulnerabilities + security_audit_agent = true + + # ============================================================================ + # Configuration Generation (typdialog-prov-gen) + # ============================================================================ + + [ai.config_generation] + # Default schema for generated configs + default_schema = "workspace" + + # Validation mode + # "strict" = Reject any invalid config + # "permissive" = Allow configs with warnings + validation_mode = "strict" + + # Best practice injection + # Automatically add security/performance best practices + inject_best_practices = true + + # Template usage + # Use pre-defined templates as starting points + template_directory = "provisioning/templates" + use_templates = true + + # ============================================================================ + # Form Assistance (typdialog-ai) + # ============================================================================ + + [ai.form_assistance] + # Real-time suggestions as user types + real_time_suggestions = true + + # Minimum characters before triggering suggestions + min_chars_for_suggestions = 3 + + # Maximum suggestions per field + max_suggestions = 5 + + # Suggestion confidence threshold (0.0-1.0) + # Only show suggestions with confidence above threshold + confidence_threshold = 0.7 + + # Natural language form filling + # User can describe entire form in plain English + nl_form_filling = true + + # ============================================================================ + # Environment-Specific Overrides + # ============================================================================ + + # Development environment + [ai.environments.dev] + cost_limit_per_day = "10.00" + enabled = true + model = "gpt-4-turbo" + provider = "openai" # Cheaper for dev + require_human_approval = false # Faster iteration + + # Staging environment + [ai.environments.staging] + cost_limit_per_day = "50.00" + enabled = true + model = "claude-sonnet-4" + provider = "anthropic" + require_human_approval = true + + # Production environment + [ai.environments.production] + autonomous_agents = false # NEVER enable in production + cost_limit_per_day = "100.00" + enabled = true + model = "claude-sonnet-4" + provider = "anthropic" + require_human_approval = true # ALWAYS true for production + + # ============================================================================ + # Integration with Other Services + # ============================================================================ + + [ai.integration] + # Orchestrator integration + orchestrator_api_key = "env:ORCHESTRATOR_API_KEY" + orchestrator_url = "https://orchestrator.example.com" + + # SecretumVault integration (for secret name suggestions only) + secretum_vault_token = "env:VAULT_TOKEN" + secretum_vault_url = "https://vault.example.com:8200" + # AI can query secret names/paths but NEVER values + + # Typdialog Web UI integration + typdialog_url = "https://forms.provisioning.example.com" + typdialog_websocket_enabled = true + + # ============================================================================ + # Advanced Settings + # ============================================================================ + + [ai.advanced] + # Prompt engineering + system_prompt_template = "provisioning/ai/prompts/system.txt" + user_prompt_template = "provisioning/ai/prompts/user.txt" + + # Context window management + context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize" + max_context_tokens = 100000 # Claude Sonnet 4 context window + + # Streaming responses + enable_streaming = true + stream_chunk_size = 100 # Characters per chunk + + # Concurrent requests + max_concurrent_requests = 10 + + # ============================================================================ + # Experimental Features (Use at Your Own Risk) + # ============================================================================ + + [ai.experimental] + # Multi-agent collaboration + # Multiple AI agents work together on complex tasks + multi_agent_collaboration = false + + # Reinforcement learning from human feedback (RLHF) + # Learn from user corrections to improve over time + rlhf_enabled = false + + # Fine-tuning on deployment history + # Train custom models on organization-specific patterns + fine_tuning = false + fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data" + + # ============================================================================ + # Compliance and Legal + # ============================================================================ + + [ai.compliance] + # Data residency requirements + # Ensure LLM provider complies with data residency laws + data_residency = "us" # "us" | "eu" | "local" + + # GDPR compliance mode + gdpr_data_retention_days = 90 + gdpr_mode = false + + # SOC 2 compliance logging + soc2_logging = false + + # Terms of service acceptance + # Must explicitly accept LLM provider TOS + tos_accepted = false + tos_version = "2025-01-08" + + # IMPORTANT NOTES: + # + # 1. API Keys: NEVER hardcode API keys. Always use environment variables. + # Example: api_key = "env:ANTHROPIC_API_KEY" + # + # 2. Security: Keep require_human_approval = true for production. + # AI-generated configs must be reviewed by humans. + # + # 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day. + # Default limits are conservative but may need adjustment. + # + # 4. Privacy: For sensitive workloads, use local models (no external API calls). + # Set provider = "local" and configure local model path. + # + # 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date. + # Set auto_reindex = true and adjust reindex_interval_hours. + # + # 6. Cedar Policies: Define fine-grained AI access control in Cedar. + # Location: /etc/provisioning/cedar-policies/ai + # + # 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true. + # Logs stored in: /var/log/provisioning/ai-audit.log + # + # 8. Agents: Autonomous agents are powerful but risky. + # Enable only for specific use cases, never globally in production. + + # Version: 1.0 + # Last Updated: 2025-01-08 diff --git a/config/examples/control-center.solo.example.toml b/config/examples/control-center.solo.example.toml index 69365ef..5fe0a3b 100644 --- a/config/examples/control-center.solo.example.toml +++ b/config/examples/control-center.solo.example.toml @@ -2,21 +2,21 @@ enabled = false redact_sensitive = true -[control_center.audit.storage] -immutable = false -retention_days = 90 + [control_center.audit.storage] + immutable = false + retention_days = 90 [control_center.compliance] enabled = false encryption_required = false -[control_center.compliance.data_retention] -audit_log_days = 2555 -policy_years = 7 + [control_center.compliance.data_retention] + audit_log_days = 2555 + policy_years = 7 -[control_center.compliance.validation] -enabled = false -interval_hours = 24 + [control_center.compliance.validation] + enabled = false + interval_hours = 24 [control_center.database] backend = "rocksdb" @@ -40,78 +40,78 @@ format = "&" level = "&" outputs = ["stdout"] -[control_center.logging.fields] -caller = false -hostname = true -pid = true -service_name = true -stack_trace = false -timestamp = true + [control_center.logging.fields] + caller = false + hostname = true + pid = true + service_name = true + stack_trace = false + timestamp = true -[control_center.logging.file] -compress = false -max_age = 30 -max_backups = 10 -max_size = 104857600 -path = "/var/log/provisioning/service.log" + [control_center.logging.file] + compress = false + max_age = 30 + max_backups = 10 + max_size = 104857600 + path = "/var/log/provisioning/service.log" -[control_center.logging.performance] -enabled = false -memory_info = false -slow_threshold = 1000 + [control_center.logging.performance] + enabled = false + memory_info = false + slow_threshold = 1000 -[control_center.logging.sampling] -enabled = false -initial = 100 -thereafter = 100 + [control_center.logging.sampling] + enabled = false + initial = 100 + thereafter = 100 -[control_center.logging.syslog] -protocol = "udp" + [control_center.logging.syslog] + protocol = "udp" [control_center.monitoring] enabled = false -[control_center.monitoring.alerting] -enabled = false + [control_center.monitoring.alerting] + enabled = false -[control_center.monitoring.health_check] -enabled = false -endpoint = "/health" -healthy_threshold = 2 -interval = 30 -timeout = 5000 -type = "&" -unhealthy_threshold = 3 + [control_center.monitoring.health_check] + enabled = false + endpoint = "/health" + healthy_threshold = 2 + interval = 30 + timeout = 5000 + type = "&" + unhealthy_threshold = 3 -[control_center.monitoring.metrics] -buffer_size = 1000 -enabled = false -interval = 60 -prometheus_path = "/metrics" -retention_days = 30 + [control_center.monitoring.metrics] + buffer_size = 1000 + enabled = false + interval = 60 + prometheus_path = "/metrics" + retention_days = 30 -[control_center.monitoring.resources] -alert_threshold = 80 -cpu = false -disk = false -memory = false -network = false + [control_center.monitoring.resources] + alert_threshold = 80 + cpu = false + disk = false + memory = false + network = false -[control_center.monitoring.tracing] -enabled = false -sample_rate = 0.1 + [control_center.monitoring.tracing] + enabled = false + sample_rate = 0.1 [control_center.policy] enabled = true -[control_center.policy.cache] -enabled = true -max_policies = 10000 -ttl = 3600 + [control_center.policy.cache] + enabled = true + max_policies = 10000 + ttl = 3600 -[control_center.policy.versioning] -enabled = true -max_versions = 20 + [control_center.policy.versioning] + enabled = true + max_versions = 20 [control_center.rbac] attribute_based = false @@ -120,10 +120,10 @@ dynamic_roles = false enabled = true hierarchy = true -[control_center.rbac.roles] -admin = true -operator = true -viewer = true + [control_center.rbac.roles] + admin = true + operator = true + viewer = true [control_center.security.cors] allow_credentials = false @@ -176,15 +176,15 @@ workers = 4 audit_enabled = false enabled = true -[control_center.users.registration] -auto_assign_role = "user" -enabled = true -requires_approval = false + [control_center.users.registration] + auto_assign_role = "user" + enabled = true + requires_approval = false -[control_center.users.sessions] -absolute_timeout = 86400 -idle_timeout = 3600 -max_active = 5 + [control_center.users.sessions] + absolute_timeout = 86400 + idle_timeout = 3600 + max_active = 5 [control_center.workspace] enabled = true diff --git a/config/examples/extension-registry.enterprise.example.toml b/config/examples/extension-registry.enterprise.example.toml index eb55813..c2744c3 100644 --- a/config/examples/extension-registry.enterprise.example.toml +++ b/config/examples/extension-registry.enterprise.example.toml @@ -2,85 +2,85 @@ # High-availability, multi-source, multi-registry production deployment [server] +enable_compression = true +enable_cors = true host = "0.0.0.0" port = 8082 workers = 8 -enable_cors = true -enable_compression = true # Primary internal Gitea instance [[sources.gitea]] id = "primary-internal-gitea" -url = "https://gitea.internal.company.com" organization = "platform-extensions" -token_path = "/etc/secrets/gitea-primary-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/gitea-primary-token.txt" +url = "https://gitea.internal.company.com" verify_ssl = true # Secondary internal Gitea (failover) [[sources.gitea]] id = "secondary-internal-gitea" -url = "https://gitea-secondary.internal.company.com" organization = "platform-extensions" -token_path = "/etc/secrets/gitea-secondary-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/gitea-secondary-token.txt" +url = "https://gitea-secondary.internal.company.com" verify_ssl = true # Forgejo for community extensions [[sources.forgejo]] id = "enterprise-forgejo" -url = "https://forge.company.com" organization = "platform" -token_path = "/etc/secrets/forgejo-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/forgejo-token.txt" +url = "https://forge.company.com" verify_ssl = true # GitHub organization [[sources.github]] id = "company-github" organization = "company-platform" -token_path = "/etc/secrets/github-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/github-token.txt" verify_ssl = true # Primary enterprise OCI registry (Zot) [[distributions.oci]] id = "primary-oci-zot" -registry = "zot.internal.company.com" namespace = "platform/extensions" +registry = "zot.internal.company.com" timeout_seconds = 30 verify_ssl = true # Secondary enterprise OCI registry (Harbor) [[distributions.oci]] -id = "secondary-oci-harbor" -registry = "harbor.internal.company.com" -namespace = "platform" auth_token_path = "/etc/secrets/harbor-token.txt" +id = "secondary-oci-harbor" +namespace = "platform" +registry = "harbor.internal.company.com" timeout_seconds = 30 verify_ssl = true # Public Docker Hub for external distribution [[distributions.oci]] -id = "public-docker-hub" -registry = "docker.io" -namespace = "company-open-source" auth_token_path = "/etc/secrets/docker-hub-token.txt" +id = "public-docker-hub" +namespace = "company-open-source" +registry = "docker.io" timeout_seconds = 30 verify_ssl = true # Public GHCR for open-source projects [[distributions.oci]] -id = "public-ghcr" -registry = "ghcr.io" -namespace = "company-open-source" auth_token_path = "/etc/secrets/ghcr-token.txt" +id = "public-ghcr" +namespace = "company-open-source" +registry = "ghcr.io" timeout_seconds = 30 verify_ssl = true # Caching configuration for high-traffic enterprise environment [cache] capacity = 5000 -ttl_seconds = 600 -enable_metadata_cache = true enable_list_cache = true +enable_metadata_cache = true +ttl_seconds = 600 diff --git a/config/examples/extension-registry.multi-backend.example.toml b/config/examples/extension-registry.multi-backend.example.toml index dbeedab..9001b6b 100644 --- a/config/examples/extension-registry.multi-backend.example.toml +++ b/config/examples/extension-registry.multi-backend.example.toml @@ -3,87 +3,87 @@ # multiple Git-based sources (Gitea, Forgejo, GitHub) and multiple OCI registries [server] +enable_compression = true +enable_cors = false host = "0.0.0.0" port = 8082 workers = 4 -enable_cors = false -enable_compression = true # Multiple Git-based source backends # Internal Gitea instance for private extensions [[sources.gitea]] id = "internal-gitea" -url = "https://gitea.internal.example.com" organization = "provisioning" -token_path = "/etc/secrets/gitea-internal-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/gitea-internal-token.txt" +url = "https://gitea.internal.example.com" verify_ssl = true # Public Gitea instance for community extensions [[sources.gitea]] id = "public-gitea" -url = "https://gitea.public.example.com" organization = "provisioning-extensions" -token_path = "/etc/secrets/gitea-public-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/gitea-public-token.txt" +url = "https://gitea.public.example.com" verify_ssl = true # Forgejo sources (Git-compatible) [[sources.forgejo]] id = "community-forgejo" -url = "https://forgejo.community.example.com" organization = "provisioning" -token_path = "/etc/secrets/forgejo-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/forgejo-token.txt" +url = "https://forgejo.community.example.com" verify_ssl = true # GitHub sources [[sources.github]] id = "org-github" organization = "my-organization" -token_path = "/etc/secrets/github-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/github-token.txt" verify_ssl = true # Multiple OCI distribution registries # Internal Zot registry [[distributions.oci]] id = "internal-zot" -registry = "zot.internal.example.com" namespace = "provisioning/extensions" +registry = "zot.internal.example.com" timeout_seconds = 30 verify_ssl = true # Public Harbor registry [[distributions.oci]] -id = "public-harbor" -registry = "harbor.public.example.com" -namespace = "provisioning" auth_token_path = "/etc/secrets/harbor-token.txt" +id = "public-harbor" +namespace = "provisioning" +registry = "harbor.public.example.com" timeout_seconds = 30 verify_ssl = true # Docker Hub [[distributions.oci]] -id = "docker-hub" -registry = "docker.io" -namespace = "myorg" auth_token_path = "/etc/secrets/docker-hub-token.txt" +id = "docker-hub" +namespace = "myorg" +registry = "docker.io" timeout_seconds = 30 verify_ssl = true # GHCR (GitHub Container Registry) [[distributions.oci]] -id = "ghcr" -registry = "ghcr.io" -namespace = "my-organization" auth_token_path = "/etc/secrets/ghcr-token.txt" +id = "ghcr" +namespace = "my-organization" +registry = "ghcr.io" timeout_seconds = 30 verify_ssl = true # Caching configuration [cache] capacity = 1000 -ttl_seconds = 300 -enable_metadata_cache = true enable_list_cache = true +enable_metadata_cache = true +ttl_seconds = 300 diff --git a/config/examples/extension-registry.solo.example.toml b/config/examples/extension-registry.solo.example.toml index 64fd538..60757e4 100644 --- a/config/examples/extension-registry.solo.example.toml +++ b/config/examples/extension-registry.solo.example.toml @@ -3,23 +3,23 @@ # Old single-instance format (auto-migrated to multi-instance on startup) [server] +enable_compression = true +enable_cors = false host = "127.0.0.1" port = 8082 workers = 2 -enable_cors = false -enable_compression = true # Single Gitea backend (auto-migrated to sources.gitea[0]) [gitea] -url = "http://localhost:3000" organization = "provisioning" -token_path = "/etc/secrets/gitea-token.txt" timeout_seconds = 30 +token_path = "/etc/secrets/gitea-token.txt" +url = "http://localhost:3000" verify_ssl = false # Caching configuration [cache] capacity = 100 -ttl_seconds = 300 -enable_metadata_cache = true enable_list_cache = true +enable_metadata_cache = true +ttl_seconds = 300 diff --git a/config/examples/orchestrator.enterprise.example.toml b/config/examples/orchestrator.enterprise.example.toml index 7d15ba4..a76830d 100644 --- a/config/examples/orchestrator.enterprise.example.toml +++ b/config/examples/orchestrator.enterprise.example.toml @@ -3,15 +3,15 @@ metrics = false operation_timeout = 1800000 parallel_limit = 5 -[orchestrator.batch.checkpointing] -enabled = true -interval = 100 -max_checkpoints = 10 + [orchestrator.batch.checkpointing] + enabled = true + interval = 100 + max_checkpoints = 10 -[orchestrator.batch.rollback] -enabled = true -max_rollback_depth = 5 -strategy = "checkpoint_based" + [orchestrator.batch.rollback] + enabled = true + max_rollback_depth = 5 + strategy = "checkpoint_based" [orchestrator.extensions] auto_load = false @@ -25,66 +25,66 @@ format = "&" level = "&" outputs = ["stdout"] -[orchestrator.logging.fields] -caller = false -hostname = true -pid = true -service_name = true -stack_trace = false -timestamp = true + [orchestrator.logging.fields] + caller = false + hostname = true + pid = true + service_name = true + stack_trace = false + timestamp = true -[orchestrator.logging.file] -compress = false -max_age = 30 -max_backups = 10 -max_size = 104857600 -path = "/var/log/provisioning/service.log" + [orchestrator.logging.file] + compress = false + max_age = 30 + max_backups = 10 + max_size = 104857600 + path = "/var/log/provisioning/service.log" -[orchestrator.logging.performance] -enabled = false -memory_info = false -slow_threshold = 1000 + [orchestrator.logging.performance] + enabled = false + memory_info = false + slow_threshold = 1000 -[orchestrator.logging.sampling] -enabled = false -initial = 100 -thereafter = 100 + [orchestrator.logging.sampling] + enabled = false + initial = 100 + thereafter = 100 -[orchestrator.logging.syslog] -protocol = "udp" + [orchestrator.logging.syslog] + protocol = "udp" [orchestrator.monitoring] enabled = false -[orchestrator.monitoring.alerting] -enabled = false + [orchestrator.monitoring.alerting] + enabled = false -[orchestrator.monitoring.health_check] -enabled = false -endpoint = "/health" -healthy_threshold = 2 -interval = 30 -timeout = 5000 -type = "&" -unhealthy_threshold = 3 + [orchestrator.monitoring.health_check] + enabled = false + endpoint = "/health" + healthy_threshold = 2 + interval = 30 + timeout = 5000 + type = "&" + unhealthy_threshold = 3 -[orchestrator.monitoring.metrics] -buffer_size = 1000 -enabled = false -interval = 60 -prometheus_path = "/metrics" -retention_days = 30 + [orchestrator.monitoring.metrics] + buffer_size = 1000 + enabled = false + interval = 60 + prometheus_path = "/metrics" + retention_days = 30 -[orchestrator.monitoring.resources] -alert_threshold = 80 -cpu = false -disk = false -memory = false -network = false + [orchestrator.monitoring.resources] + alert_threshold = 80 + cpu = false + disk = false + memory = false + network = false -[orchestrator.monitoring.tracing] -enabled = false -sample_rate = 0.1 + [orchestrator.monitoring.tracing] + enabled = false + sample_rate = 0.1 [orchestrator.queue] max_concurrent_tasks = 5 @@ -95,9 +95,9 @@ retry_attempts = 3 retry_delay = 5000 task_timeout = 3600000 -[orchestrator.queue.dead_letter_queue] -enabled = true -max_size = 1000 + [orchestrator.queue.dead_letter_queue] + enabled = true + max_size = 1000 [orchestrator.server] graceful_shutdown = true @@ -113,11 +113,11 @@ workers = 4 backend = "filesystem" path = "/var/lib/provisioning/orchestrator/data" -[orchestrator.storage.cache] -enabled = true -eviction_policy = "lru" -ttl = 3600 -type = "in_memory" + [orchestrator.storage.cache] + enabled = true + eviction_policy = "lru" + ttl = 3600 + type = "in_memory" [orchestrator.workspace] enabled = true diff --git a/config/examples/orchestrator.solo.example.toml b/config/examples/orchestrator.solo.example.toml index 7d15ba4..a76830d 100644 --- a/config/examples/orchestrator.solo.example.toml +++ b/config/examples/orchestrator.solo.example.toml @@ -3,15 +3,15 @@ metrics = false operation_timeout = 1800000 parallel_limit = 5 -[orchestrator.batch.checkpointing] -enabled = true -interval = 100 -max_checkpoints = 10 + [orchestrator.batch.checkpointing] + enabled = true + interval = 100 + max_checkpoints = 10 -[orchestrator.batch.rollback] -enabled = true -max_rollback_depth = 5 -strategy = "checkpoint_based" + [orchestrator.batch.rollback] + enabled = true + max_rollback_depth = 5 + strategy = "checkpoint_based" [orchestrator.extensions] auto_load = false @@ -25,66 +25,66 @@ format = "&" level = "&" outputs = ["stdout"] -[orchestrator.logging.fields] -caller = false -hostname = true -pid = true -service_name = true -stack_trace = false -timestamp = true + [orchestrator.logging.fields] + caller = false + hostname = true + pid = true + service_name = true + stack_trace = false + timestamp = true -[orchestrator.logging.file] -compress = false -max_age = 30 -max_backups = 10 -max_size = 104857600 -path = "/var/log/provisioning/service.log" + [orchestrator.logging.file] + compress = false + max_age = 30 + max_backups = 10 + max_size = 104857600 + path = "/var/log/provisioning/service.log" -[orchestrator.logging.performance] -enabled = false -memory_info = false -slow_threshold = 1000 + [orchestrator.logging.performance] + enabled = false + memory_info = false + slow_threshold = 1000 -[orchestrator.logging.sampling] -enabled = false -initial = 100 -thereafter = 100 + [orchestrator.logging.sampling] + enabled = false + initial = 100 + thereafter = 100 -[orchestrator.logging.syslog] -protocol = "udp" + [orchestrator.logging.syslog] + protocol = "udp" [orchestrator.monitoring] enabled = false -[orchestrator.monitoring.alerting] -enabled = false + [orchestrator.monitoring.alerting] + enabled = false -[orchestrator.monitoring.health_check] -enabled = false -endpoint = "/health" -healthy_threshold = 2 -interval = 30 -timeout = 5000 -type = "&" -unhealthy_threshold = 3 + [orchestrator.monitoring.health_check] + enabled = false + endpoint = "/health" + healthy_threshold = 2 + interval = 30 + timeout = 5000 + type = "&" + unhealthy_threshold = 3 -[orchestrator.monitoring.metrics] -buffer_size = 1000 -enabled = false -interval = 60 -prometheus_path = "/metrics" -retention_days = 30 + [orchestrator.monitoring.metrics] + buffer_size = 1000 + enabled = false + interval = 60 + prometheus_path = "/metrics" + retention_days = 30 -[orchestrator.monitoring.resources] -alert_threshold = 80 -cpu = false -disk = false -memory = false -network = false + [orchestrator.monitoring.resources] + alert_threshold = 80 + cpu = false + disk = false + memory = false + network = false -[orchestrator.monitoring.tracing] -enabled = false -sample_rate = 0.1 + [orchestrator.monitoring.tracing] + enabled = false + sample_rate = 0.1 [orchestrator.queue] max_concurrent_tasks = 5 @@ -95,9 +95,9 @@ retry_attempts = 3 retry_delay = 5000 task_timeout = 3600000 -[orchestrator.queue.dead_letter_queue] -enabled = true -max_size = 1000 + [orchestrator.queue.dead_letter_queue] + enabled = true + max_size = 1000 [orchestrator.server] graceful_shutdown = true @@ -113,11 +113,11 @@ workers = 4 backend = "filesystem" path = "/var/lib/provisioning/orchestrator/data" -[orchestrator.storage.cache] -enabled = true -eviction_policy = "lru" -ttl = 3600 -type = "in_memory" + [orchestrator.storage.cache] + enabled = true + eviction_policy = "lru" + ttl = 3600 + type = "in_memory" [orchestrator.workspace] enabled = true