# Example: Orchestrator Configuration - Enterprise Mode (Production HA) # # This example shows a production-grade orchestrator setup with: # - SurrealDB cluster for distributed storage # - High concurrency and throughput # - Comprehensive monitoring and observability # - Full security and audit logging # - Advanced performance tuning # # Usage: # nickel export --format toml orchestrator-enterprise.ncl > orchestrator.enterprise.toml # ORCHESTRATOR_CONFIG=orchestrator.enterprise.toml cargo run --bin orchestrator { # Workspace Configuration workspace = { name = "production", path = "/var/lib/provisioning/orchestrator", enabled = true, multi_workspace = true, # Support multiple workspaces in production }, # Server Configuration: High performance server = { host = "0.0.0.0", # Listen on all interfaces port = 9090, workers = 16, # Multiple workers for high concurrency keep_alive = 30, # Shorter keep-alive for better connection management max_connections = 4096, # High limit for production }, # Storage: SurrealDB Cluster (for HA) storage = { backend = "surrealdb_cluster", surrealdb_url = "surrealdb://surrealdb-1.provisioning.svc.cluster.local:8000,surrealdb-2.provisioning.svc.cluster.local:8000", surrealdb_namespace = "provisioning", surrealdb_database = "orchestrator-prod", }, # Queue/Task Processing: High throughput queue = { max_concurrent_tasks = 100, # Maximum concurrency for production retry_attempts = 5, # More retries for reliability retry_delay = 2000, # ms (exponential backoff) task_timeout = 7200000, # 2 hours for long-running tasks deadletter_queue = { enabled = true, max_messages = 10000, # Large queue for error handling retention_period = 604800, # 7 days }, priority_levels = ["low", "normal", "high", "critical"], default_priority = "normal", }, # Batch Workflow: Optimized for throughput batch = { parallel_limit = 50, # High parallelism operation_timeout = 3600000, # 1 hour checkpoint = { enabled = true, interval = 1000, # Checkpoint frequently for reliability auto_cleanup = true, max_checkpoints = 100, # Keep many checkpoints }, rollback = { strategy = "automatic", retain_logs = true, }, }, # Monitoring: Comprehensive production observability monitoring = { enabled = true, metrics = { enabled = true, interval = 10, # Frequent metrics collection export_format = "prometheus", }, health_check = { enabled = true, interval = 30, timeout = 5, }, resources = { track_cpu = true, track_memory = true, track_disk = true, alert_threshold_cpu = 85, # Alert at 85% CPU alert_threshold_memory = 90, # Alert at 90% memory alert_threshold_disk = 95, # Alert at 95% disk }, profiling = { enabled = true, sample_rate = 0.1, # Profile 10% of requests }, }, # Logging: Production-grade with audit trail logging = { level = "info", # Information level for production format = "json", # Structured logging for aggregation outputs = [ { destination = "stdout", level = "warn", # Only warnings and above to stdout }, { destination = "file", path = "/var/log/provisioning/orchestrator/orchestrator.log", level = "info", rotation = { max_size = "500MB", # Larger files in production max_backups = 30, # Keep many backups max_age = 90, # Keep for 90 days }, }, { destination = "file", path = "/var/log/provisioning/orchestrator/audit.log", level = "info", rotation = { max_size = "200MB", max_backups = 365, # Keep audit logs for 1 year max_age = 365, }, }, ], include_fields = [ "timestamp", "level", "message", "task_id", "workflow_id", "user_id", "duration", "status", "error", "context", ], }, # Security: Full production security security = { auth = { enabled = true, method = "jwt", jwt_secret = "${JWT_SECRET}", # From environment jwt_issuer = "provisioning.production", jwt_audience = "orchestrator-prod", token_expiration = 3600, # 1 hour }, cors = { enabled = true, allowed_origins = [ "https://orchestrator.example.com", "https://control-center.example.com", ], allowed_methods = ["GET", "POST", "PUT"], allowed_headers = ["Content-Type", "Authorization"], expose_headers = ["X-Request-ID", "X-Total-Count"], }, tls = { enabled = true, # TLS required for production cert_path = "/etc/provisioning/certs/orchestrator.crt", key_path = "/etc/provisioning/certs/orchestrator.key", min_version = "TLSv1.3", }, rate_limit = { enabled = true, requests_per_second = 10000, # High limit for production burst_size = 1000, }, }, # Extensions: Enabled for production capability extensions = { auto_load = true, oci_registry_url = "registry.example.com:5000", oci_namespace = "provisioning/extensions", refresh_interval = 24, # Check for updates daily max_concurrent_init = 10, # Load extensions in parallel }, # Database: Connection pooling for performance database = { pool = { min_size = 20, # Pre-allocated connections max_size = 100, # Maximum connections connection_timeout = 10, # Shorter timeout for production idle_timeout = 600, # 10 minutes max_lifetime = 3600, # 1 hour }, retry = { max_attempts = 5, initial_backoff = 100, max_backoff = 30000, }, }, # Features: Production-ready with full auditing features = { enable_audit_logging = true, # Full audit trail enable_task_history = true, # Keep task history enable_performance_tracking = true, # Track all metrics enable_experimental_features = false, # No experimental features in production }, }