245 lines
5.8 KiB
Plaintext
245 lines
5.8 KiB
Plaintext
|
|
# Orchestrator Service Configuration - TOML Export
|
||
|
|
# Generates TOML configuration for Orchestrator service
|
||
|
|
# Supports 4 deployment modes: solo, multiuser, cicd, enterprise
|
||
|
|
#
|
||
|
|
# Usage:
|
||
|
|
# nickel export --format toml orchestrator-config.toml.ncl > orchestrator.toml
|
||
|
|
# ORCHESTRATOR_MODE=solo nickel export --format toml orchestrator-config.toml.ncl > orchestrator.solo.toml
|
||
|
|
|
||
|
|
{
|
||
|
|
# Workspace Configuration
|
||
|
|
workspace = {
|
||
|
|
name = "default",
|
||
|
|
path = "/var/lib/provisioning/orchestrator",
|
||
|
|
enabled = true,
|
||
|
|
multi_workspace = false,
|
||
|
|
},
|
||
|
|
|
||
|
|
# Server Configuration
|
||
|
|
server = {
|
||
|
|
host = "0.0.0.0",
|
||
|
|
port = 9090,
|
||
|
|
workers = 4,
|
||
|
|
keep_alive = 75,
|
||
|
|
max_connections = 512,
|
||
|
|
},
|
||
|
|
|
||
|
|
# Storage Backend Configuration
|
||
|
|
storage = {
|
||
|
|
# Mode-specific overrides:
|
||
|
|
# - solo: "filesystem"
|
||
|
|
# - multiuser: "surrealdb_server"
|
||
|
|
# - cicd: "filesystem" (ephemeral)
|
||
|
|
# - enterprise: "surrealdb_cluster"
|
||
|
|
backend = "filesystem",
|
||
|
|
|
||
|
|
# Filesystem storage (solo, cicd modes)
|
||
|
|
path = "/var/lib/provisioning/orchestrator/data",
|
||
|
|
|
||
|
|
# SurrealDB connection (multiuser, enterprise modes)
|
||
|
|
# surrealdb_url = "surrealdb://localhost:8000",
|
||
|
|
# surrealdb_namespace = "provisioning",
|
||
|
|
# surrealdb_database = "orchestrator",
|
||
|
|
},
|
||
|
|
|
||
|
|
# Queue/Task Processing Configuration
|
||
|
|
queue = {
|
||
|
|
# Maximum concurrent tasks running simultaneously
|
||
|
|
# Constraints: min=1, max=100
|
||
|
|
max_concurrent_tasks = 5,
|
||
|
|
|
||
|
|
# Retry strategy for failed tasks
|
||
|
|
retry_attempts = 3,
|
||
|
|
retry_delay = 5000, # milliseconds
|
||
|
|
|
||
|
|
# Task execution timeout
|
||
|
|
task_timeout = 3600000, # milliseconds (1 hour)
|
||
|
|
|
||
|
|
# Task deadletter queue configuration
|
||
|
|
deadletter_queue = {
|
||
|
|
enabled = true,
|
||
|
|
max_messages = 1000,
|
||
|
|
retention_period = 86400, # seconds (24 hours)
|
||
|
|
},
|
||
|
|
|
||
|
|
# Task priority levels
|
||
|
|
priority_levels = ["low", "normal", "high"],
|
||
|
|
default_priority = "normal",
|
||
|
|
},
|
||
|
|
|
||
|
|
# Batch Workflow Configuration
|
||
|
|
batch = {
|
||
|
|
# Maximum parallel operations within a batch
|
||
|
|
# Constraints: min=1, max=50
|
||
|
|
parallel_limit = 5,
|
||
|
|
|
||
|
|
# Operation execution timeout
|
||
|
|
operation_timeout = 1800000, # milliseconds (30 minutes)
|
||
|
|
|
||
|
|
# Batch checkpoint strategy
|
||
|
|
checkpoint = {
|
||
|
|
enabled = true,
|
||
|
|
interval = 100, # Save checkpoint every N operations
|
||
|
|
auto_cleanup = true,
|
||
|
|
max_checkpoints = 10,
|
||
|
|
},
|
||
|
|
|
||
|
|
# Rollback strategy
|
||
|
|
rollback = {
|
||
|
|
strategy = "automatic", # "automatic" or "manual"
|
||
|
|
retain_logs = true,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
|
||
|
|
# Monitoring and Observability
|
||
|
|
monitoring = {
|
||
|
|
enabled = true,
|
||
|
|
|
||
|
|
# Metrics collection
|
||
|
|
metrics = {
|
||
|
|
enabled = true,
|
||
|
|
interval = 30, # seconds
|
||
|
|
export_format = "prometheus",
|
||
|
|
},
|
||
|
|
|
||
|
|
# Health check configuration
|
||
|
|
health_check = {
|
||
|
|
enabled = true,
|
||
|
|
interval = 30, # seconds
|
||
|
|
timeout = 10, # seconds
|
||
|
|
},
|
||
|
|
|
||
|
|
# Resource monitoring
|
||
|
|
resources = {
|
||
|
|
track_cpu = true,
|
||
|
|
track_memory = true,
|
||
|
|
track_disk = true,
|
||
|
|
alert_threshold_cpu = 90, # percent
|
||
|
|
alert_threshold_memory = 85, # percent
|
||
|
|
alert_threshold_disk = 90, # percent
|
||
|
|
},
|
||
|
|
|
||
|
|
# Performance profiling
|
||
|
|
profiling = {
|
||
|
|
enabled = false,
|
||
|
|
sample_rate = 0.1, # 10% of requests
|
||
|
|
},
|
||
|
|
},
|
||
|
|
|
||
|
|
# Logging Configuration
|
||
|
|
logging = {
|
||
|
|
# Log level: trace, debug, info, warn, error
|
||
|
|
level = "info",
|
||
|
|
|
||
|
|
# Log format: json, text
|
||
|
|
format = "json",
|
||
|
|
|
||
|
|
# Log output destinations
|
||
|
|
outputs = [
|
||
|
|
{
|
||
|
|
destination = "stdout",
|
||
|
|
level = "info",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
destination = "file",
|
||
|
|
path = "/var/log/provisioning/orchestrator/orchestrator.log",
|
||
|
|
level = "debug",
|
||
|
|
rotation = {
|
||
|
|
max_size = "100MB",
|
||
|
|
max_backups = 10,
|
||
|
|
max_age = 30, # days
|
||
|
|
},
|
||
|
|
},
|
||
|
|
],
|
||
|
|
|
||
|
|
# Structured logging fields
|
||
|
|
include_fields = [
|
||
|
|
"timestamp",
|
||
|
|
"level",
|
||
|
|
"message",
|
||
|
|
"task_id",
|
||
|
|
"workflow_id",
|
||
|
|
"duration",
|
||
|
|
"status",
|
||
|
|
],
|
||
|
|
},
|
||
|
|
|
||
|
|
# Security Configuration
|
||
|
|
security = {
|
||
|
|
# Authentication
|
||
|
|
auth = {
|
||
|
|
enabled = true,
|
||
|
|
method = "jwt",
|
||
|
|
jwt_secret = "${JWT_SECRET}",
|
||
|
|
jwt_issuer = "provisioning.local",
|
||
|
|
jwt_audience = "orchestrator",
|
||
|
|
token_expiration = 3600, # seconds (1 hour)
|
||
|
|
},
|
||
|
|
|
||
|
|
# CORS configuration
|
||
|
|
cors = {
|
||
|
|
enabled = true,
|
||
|
|
allowed_origins = ["https://control-center:8080"],
|
||
|
|
allowed_methods = ["GET", "POST", "PUT", "DELETE"],
|
||
|
|
allowed_headers = ["Content-Type", "Authorization"],
|
||
|
|
expose_headers = ["X-Request-ID"],
|
||
|
|
},
|
||
|
|
|
||
|
|
# TLS/SSL configuration
|
||
|
|
tls = {
|
||
|
|
enabled = false, # Typically behind reverse proxy
|
||
|
|
cert_path = "/etc/provisioning/certs/cert.pem",
|
||
|
|
key_path = "/etc/provisioning/certs/key.pem",
|
||
|
|
min_version = "TLSv1.2",
|
||
|
|
},
|
||
|
|
|
||
|
|
# Rate limiting
|
||
|
|
rate_limit = {
|
||
|
|
enabled = true,
|
||
|
|
requests_per_second = 1000,
|
||
|
|
burst_size = 100,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
|
||
|
|
# Extension Management
|
||
|
|
extensions = {
|
||
|
|
# Auto-load extensions from OCI registry
|
||
|
|
auto_load = false,
|
||
|
|
oci_registry_url = "registry.example.com",
|
||
|
|
oci_namespace = "provisioning/extensions",
|
||
|
|
|
||
|
|
# Refresh interval for extension updates (hours)
|
||
|
|
refresh_interval = 24,
|
||
|
|
|
||
|
|
# Maximum concurrent extension initializations
|
||
|
|
max_concurrent_init = 5,
|
||
|
|
},
|
||
|
|
|
||
|
|
# Database Connection Pool (for non-filesystem storage)
|
||
|
|
database = {
|
||
|
|
# Connection pool settings
|
||
|
|
pool = {
|
||
|
|
min_size = 5,
|
||
|
|
max_size = 20,
|
||
|
|
connection_timeout = 30, # seconds
|
||
|
|
idle_timeout = 300, # seconds
|
||
|
|
max_lifetime = 1800, # seconds
|
||
|
|
},
|
||
|
|
|
||
|
|
# Retry strategy
|
||
|
|
retry = {
|
||
|
|
max_attempts = 3,
|
||
|
|
initial_backoff = 100, # milliseconds
|
||
|
|
max_backoff = 30000, # milliseconds
|
||
|
|
},
|
||
|
|
},
|
||
|
|
|
||
|
|
# Feature Flags
|
||
|
|
features = {
|
||
|
|
enable_audit_logging = true,
|
||
|
|
enable_task_history = true,
|
||
|
|
enable_performance_tracking = true,
|
||
|
|
enable_experimental_features = false,
|
||
|
|
},
|
||
|
|
}
|