provisioning/schemas/platform/examples/orchestrator-enterprise.ncl

# Example: Orchestrator Configuration - Enterprise Mode (Production HA)
#
# This example shows a production-grade orchestrator setup with:
# - SurrealDB cluster for distributed storage
# - High concurrency and throughput
# - Comprehensive monitoring and observability
# - Full security and audit logging
# - Advanced performance tuning
#
# Usage:
#   nickel export --format toml orchestrator-enterprise.ncl > orchestrator.enterprise.toml
#   ORCHESTRATOR_CONFIG=orchestrator.enterprise.toml cargo run --bin orchestrator

{
  # Workspace Configuration
  workspace = {
    name = "production",
    path = "/var/lib/provisioning/orchestrator",
    enabled = true,
    multi_workspace = true,  # Support multiple workspaces in production
  },

  # Server Configuration: High performance
  server = {
    host = "0.0.0.0",  # Listen on all interfaces
    port = 9090,
    workers = 16,  # Multiple workers for high concurrency
    keep_alive = 30,  # Shorter keep-alive for better connection management
    max_connections = 4096,  # High limit for production
  },

  # Storage: SurrealDB Cluster (for HA)
  storage = {
    backend = "surrealdb_cluster",
    surrealdb_url = "surrealdb://surrealdb-1.provisioning.svc.cluster.local:8000,surrealdb-2.provisioning.svc.cluster.local:8000",
    surrealdb_namespace = "provisioning",
    surrealdb_database = "orchestrator-prod",
  },

  # Queue/Task Processing: High throughput
  queue = {
    max_concurrent_tasks = 100,  # Maximum concurrency for production
    retry_attempts = 5,  # More retries for reliability
    retry_delay = 2000,  # ms (exponential backoff)
    task_timeout = 7200000,  # 2 hours for long-running tasks

    deadletter_queue = {
      enabled = true,
      max_messages = 10000,  # Large queue for error handling
      retention_period = 604800,  # 7 days
    },

    priority_levels = ["low", "normal", "high", "critical"],
    default_priority = "normal",
  },

  # Batch Workflow: Optimized for throughput
  batch = {
    parallel_limit = 50,  # High parallelism
    operation_timeout = 3600000,  # 1 hour

    checkpoint = {
      enabled = true,
      interval = 1000,  # Checkpoint frequently for reliability
      auto_cleanup = true,
      max_checkpoints = 100,  # Keep many checkpoints
    },

    rollback = {
      strategy = "automatic",
      retain_logs = true,
    },
  },

  # Monitoring: Comprehensive production observability
  monitoring = {
    enabled = true,

    metrics = {
      enabled = true,
      interval = 10,  # Frequent metrics collection
      export_format = "prometheus",
    },

    health_check = {
      enabled = true,
      interval = 30,
      timeout = 5,
    },

    resources = {
      track_cpu = true,
      track_memory = true,
      track_disk = true,
      alert_threshold_cpu = 85,  # Alert at 85% CPU
      alert_threshold_memory = 90,  # Alert at 90% memory
      alert_threshold_disk = 95,  # Alert at 95% disk
    },

    profiling = {
      enabled = true,
      sample_rate = 0.1,  # Profile 10% of requests
    },
  },

  # Logging: Production-grade with audit trail
  logging = {
    level = "info",  # Information level for production
    format = "json",  # Structured logging for aggregation

    outputs = [
      {
        destination = "stdout",
        level = "warn",  # Only warnings and above to stdout
      },
      {
        destination = "file",
        path = "/var/log/provisioning/orchestrator/orchestrator.log",
        level = "info",
        rotation = {
          max_size = "500MB",  # Larger files in production
          max_backups = 30,  # Keep many backups
          max_age = 90,  # Keep for 90 days
        },
      },
      {
        destination = "file",
        path = "/var/log/provisioning/orchestrator/audit.log",
        level = "info",
        rotation = {
          max_size = "200MB",
          max_backups = 365,  # Keep audit logs for 1 year
          max_age = 365,
        },
      },
    ],

    include_fields = [
      "timestamp",
      "level",
      "message",
      "task_id",
      "workflow_id",
      "user_id",
      "duration",
      "status",
      "error",
      "context",
    ],
  },

  # Security: Full production security
  security = {
    auth = {
      enabled = true,
      method = "jwt",
      jwt_secret = "${JWT_SECRET}",  # From environment
      jwt_issuer = "provisioning.production",
      jwt_audience = "orchestrator-prod",
      token_expiration = 3600,  # 1 hour
    },

    cors = {
      enabled = true,
      allowed_origins = [
        "https://orchestrator.example.com",
        "https://control-center.example.com",
      ],
      allowed_methods = ["GET", "POST", "PUT"],
      allowed_headers = ["Content-Type", "Authorization"],
      expose_headers = ["X-Request-ID", "X-Total-Count"],
    },

    tls = {
      enabled = true,  # TLS required for production
      cert_path = "/etc/provisioning/certs/orchestrator.crt",
      key_path = "/etc/provisioning/certs/orchestrator.key",
      min_version = "TLSv1.3",
    },

    rate_limit = {
      enabled = true,
      requests_per_second = 10000,  # High limit for production
      burst_size = 1000,
    },
  },

  # Extensions: Enabled for production capability
  extensions = {
    auto_load = true,
    oci_registry_url = "registry.example.com:5000",
    oci_namespace = "provisioning/extensions",
    refresh_interval = 24,  # Check for updates daily
    max_concurrent_init = 10,  # Load extensions in parallel
  },

  # Database: Connection pooling for performance
  database = {
    pool = {
      min_size = 20,  # Pre-allocated connections
      max_size = 100,  # Maximum connections
      connection_timeout = 10,  # Shorter timeout for production
      idle_timeout = 600,  # 10 minutes
      max_lifetime = 3600,  # 1 hour
    },

    retry = {
      max_attempts = 5,
      initial_backoff = 100,
      max_backoff = 30000,
    },
  },

  # Features: Production-ready with full auditing
  features = {
    enable_audit_logging = true,  # Full audit trail
    enable_task_history = true,  # Keep task history
    enable_performance_tracking = true,  # Track all metrics
    enable_experimental_features = false,  # No experimental features in production
  },
}
chore: complete nickel migration and consolidate legacy configs - Remove KCL ecosystem (~220 files deleted) - Migrate all infrastructure to Nickel schema system - Consolidate documentation: legacy docs → provisioning/docs/src/ - Add CI/CD workflows (.github/) and Rust build config (.cargo/) - Update core system for Nickel schema parsing - Update README.md and CHANGES.md for v5.0.0 release - Fix pre-commit hooks: end-of-file, trailing-whitespace - Breaking changes: KCL workspaces require migration - Migration bridge available in docs/src/development/ 2026-01-08 09:55:37 +00:00			`# Example: Orchestrator Configuration - Enterprise Mode (Production HA)`
			`#`
			`# This example shows a production-grade orchestrator setup with:`
			`# - SurrealDB cluster for distributed storage`
			`# - High concurrency and throughput`
			`# - Comprehensive monitoring and observability`
			`# - Full security and audit logging`
			`# - Advanced performance tuning`
			`#`
			`# Usage:`
			`# nickel export --format toml orchestrator-enterprise.ncl > orchestrator.enterprise.toml`
			`# ORCHESTRATOR_CONFIG=orchestrator.enterprise.toml cargo run --bin orchestrator`

			`{`
			`# Workspace Configuration`
			`workspace = {`
			`name = "production",`
			`path = "/var/lib/provisioning/orchestrator",`
			`enabled = true,`
			`multi_workspace = true, # Support multiple workspaces in production`
			`},`

			`# Server Configuration: High performance`
			`server = {`
			`host = "0.0.0.0", # Listen on all interfaces`
			`port = 9090,`
			`workers = 16, # Multiple workers for high concurrency`
			`keep_alive = 30, # Shorter keep-alive for better connection management`
			`max_connections = 4096, # High limit for production`
			`},`

			`# Storage: SurrealDB Cluster (for HA)`
			`storage = {`
			`backend = "surrealdb_cluster",`
			`surrealdb_url = "surrealdb://surrealdb-1.provisioning.svc.cluster.local:8000,surrealdb-2.provisioning.svc.cluster.local:8000",`
			`surrealdb_namespace = "provisioning",`
			`surrealdb_database = "orchestrator-prod",`
			`},`

			`# Queue/Task Processing: High throughput`
			`queue = {`
			`max_concurrent_tasks = 100, # Maximum concurrency for production`
			`retry_attempts = 5, # More retries for reliability`
			`retry_delay = 2000, # ms (exponential backoff)`
			`task_timeout = 7200000, # 2 hours for long-running tasks`

			`deadletter_queue = {`
			`enabled = true,`
			`max_messages = 10000, # Large queue for error handling`
			`retention_period = 604800, # 7 days`
			`},`

			`priority_levels = ["low", "normal", "high", "critical"],`
			`default_priority = "normal",`
			`},`

			`# Batch Workflow: Optimized for throughput`
			`batch = {`
			`parallel_limit = 50, # High parallelism`
			`operation_timeout = 3600000, # 1 hour`

			`checkpoint = {`
			`enabled = true,`
			`interval = 1000, # Checkpoint frequently for reliability`
			`auto_cleanup = true,`
			`max_checkpoints = 100, # Keep many checkpoints`
			`},`

			`rollback = {`
			`strategy = "automatic",`
			`retain_logs = true,`
			`},`
			`},`

			`# Monitoring: Comprehensive production observability`
			`monitoring = {`
			`enabled = true,`

			`metrics = {`
			`enabled = true,`
			`interval = 10, # Frequent metrics collection`
			`export_format = "prometheus",`
			`},`

			`health_check = {`
			`enabled = true,`
			`interval = 30,`
			`timeout = 5,`
			`},`

			`resources = {`
			`track_cpu = true,`
			`track_memory = true,`
			`track_disk = true,`
			`alert_threshold_cpu = 85, # Alert at 85% CPU`
			`alert_threshold_memory = 90, # Alert at 90% memory`
			`alert_threshold_disk = 95, # Alert at 95% disk`
			`},`

			`profiling = {`
			`enabled = true,`
			`sample_rate = 0.1, # Profile 10% of requests`
			`},`
			`},`

			`# Logging: Production-grade with audit trail`
			`logging = {`
			`level = "info", # Information level for production`
			`format = "json", # Structured logging for aggregation`

			`outputs = [`
			`{`
			`destination = "stdout",`
			`level = "warn", # Only warnings and above to stdout`
			`},`
			`{`
			`destination = "file",`
			`path = "/var/log/provisioning/orchestrator/orchestrator.log",`
			`level = "info",`
			`rotation = {`
			`max_size = "500MB", # Larger files in production`
			`max_backups = 30, # Keep many backups`
			`max_age = 90, # Keep for 90 days`
			`},`
			`},`
			`{`
			`destination = "file",`
			`path = "/var/log/provisioning/orchestrator/audit.log",`
			`level = "info",`
			`rotation = {`
			`max_size = "200MB",`
			`max_backups = 365, # Keep audit logs for 1 year`
			`max_age = 365,`
			`},`
			`},`
			`],`

			`include_fields = [`
			`"timestamp",`
			`"level",`
			`"message",`
			`"task_id",`
			`"workflow_id",`
			`"user_id",`
			`"duration",`
			`"status",`
			`"error",`
			`"context",`
			`],`
			`},`

			`# Security: Full production security`
			`security = {`
			`auth = {`
			`enabled = true,`
			`method = "jwt",`
			`jwt_secret = "${JWT_SECRET}", # From environment`
			`jwt_issuer = "provisioning.production",`
			`jwt_audience = "orchestrator-prod",`
			`token_expiration = 3600, # 1 hour`
			`},`

			`cors = {`
			`enabled = true,`
			`allowed_origins = [`
			`"https://orchestrator.example.com",`
			`"https://control-center.example.com",`
			`],`
			`allowed_methods = ["GET", "POST", "PUT"],`
			`allowed_headers = ["Content-Type", "Authorization"],`
			`expose_headers = ["X-Request-ID", "X-Total-Count"],`
			`},`

			`tls = {`
			`enabled = true, # TLS required for production`
			`cert_path = "/etc/provisioning/certs/orchestrator.crt",`
			`key_path = "/etc/provisioning/certs/orchestrator.key",`
			`min_version = "TLSv1.3",`
			`},`

			`rate_limit = {`
			`enabled = true,`
			`requests_per_second = 10000, # High limit for production`
			`burst_size = 1000,`
			`},`
			`},`

			`# Extensions: Enabled for production capability`
			`extensions = {`
			`auto_load = true,`
			`oci_registry_url = "registry.example.com:5000",`
			`oci_namespace = "provisioning/extensions",`
			`refresh_interval = 24, # Check for updates daily`
			`max_concurrent_init = 10, # Load extensions in parallel`
			`},`

			`# Database: Connection pooling for performance`
			`database = {`
			`pool = {`
			`min_size = 20, # Pre-allocated connections`
			`max_size = 100, # Maximum connections`
			`connection_timeout = 10, # Shorter timeout for production`
			`idle_timeout = 600, # 10 minutes`
			`max_lifetime = 3600, # 1 hour`
			`},`

			`retry = {`
			`max_attempts = 5,`
			`initial_backoff = 100,`
			`max_backoff = 30000,`
			`},`
			`},`

			`# Features: Production-ready with full auditing`
			`features = {`
			`enable_audit_logging = true, # Full audit trail`
			`enable_task_history = true, # Keep task history`
			`enable_performance_tracking = true, # Track all metrics`
			`enable_experimental_features = false, # No experimental features in production`
			`},`
			`}`