provisioning/schemas/platform/examples/full-platform-enterprise.ncl

703 lines
15 KiB
Plaintext
Raw Normal View History

# Example: Complete Provisioning Platform - Enterprise Deployment
#
# This example demonstrates deploying all three platform services together
# in enterprise mode with full integration, security, and observability.
#
# Deployment Architecture:
# - 3x Orchestrator replicas (load balanced)
# - 1x Control Center (with PostgreSQL)
# - 1x MCP Server (for AI/LLM integration)
# - SurrealDB cluster for workflow storage
# - Comprehensive monitoring (Prometheus, Grafana, Loki)
# - Full RBAC, audit logging, and compliance
#
# Usage:
# # Generate all three configs
# nickel eval --format toml orchestrator.ncl > orchestrator.enterprise.toml
# nickel eval --format toml control-center.ncl > control-center.enterprise.toml
# nickel eval --format toml mcp-server.ncl > mcp-server.enterprise.toml
#
# # Deploy to Kubernetes
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/namespace.yaml
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/resource-quota.yaml
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/rbac.yaml
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/*.deployment.yaml
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/*.service.yaml
# kubectl apply -f provisioning/platform/infrastructure/kubernetes/platform-ingress.yaml
# ============================================================================
# Global Configuration
# ============================================================================
let deployment_mode = "enterprise"
let namespace = "provisioning"
let domain = "provisioning.example.com"
let environment = "production"
# ============================================================================
# 1. ORCHESTRATOR CONFIGURATION
# ============================================================================
let orchestrator = {
# Workspace: Production workspace
workspace = {
name = "production",
path = "/var/lib/provisioning/orchestrator",
enabled = true,
multi_workspace = true,
},
# Server: High-performance, multi-replica
server = {
host = "0.0.0.0",
port = 9090,
workers = 16,
keep_alive = 30,
max_connections = 4096,
},
# Storage: SurrealDB cluster (3 nodes for HA)
storage = {
backend = "surrealdb_cluster",
surrealdb_url = "surrealdb://surrealdb-1.${namespace}.svc.cluster.local:8000,surrealdb-2.${namespace}.svc.cluster.local:8000,surrealdb-3.${namespace}.svc.cluster.local:8000",
surrealdb_namespace = "provisioning",
surrealdb_database = "orchestrator-${environment}",
},
# Queue: Maximum throughput
queue = {
max_concurrent_tasks = 100,
retry_attempts = 5,
retry_delay = 2000,
task_timeout = 7200000, # 2 hours
deadletter_queue = {
enabled = true,
max_messages = 10000,
retention_period = 604800, # 7 days
},
priority_levels = ["low", "normal", "high", "critical"],
default_priority = "normal",
},
# Batch: Optimized for large workflows
batch = {
parallel_limit = 50,
operation_timeout = 3600000,
checkpoint = {
enabled = true,
interval = 1000,
auto_cleanup = true,
max_checkpoints = 100,
},
rollback = {
strategy = "automatic",
retain_logs = true,
},
},
# Monitoring: Full observability
monitoring = {
enabled = true,
metrics = {
enabled = true,
interval = 10,
export_format = "prometheus",
},
health_check = {
enabled = true,
interval = 30,
timeout = 5,
},
resources = {
track_cpu = true,
track_memory = true,
track_disk = true,
alert_threshold_cpu = 85,
alert_threshold_memory = 90,
alert_threshold_disk = 95,
},
profiling = {
enabled = true,
sample_rate = 0.1,
},
},
# Logging: Comprehensive audit trail
logging = {
level = "info",
format = "json",
outputs = [
{
destination = "stdout",
level = "warn",
},
{
destination = "file",
path = "/var/log/provisioning/orchestrator/orchestrator.log",
level = "info",
rotation = {
max_size = "500MB",
max_backups = 30,
max_age = 90,
},
},
{
destination = "file",
path = "/var/log/provisioning/orchestrator/audit.log",
level = "info",
rotation = {
max_size = "200MB",
max_backups = 365,
max_age = 365,
},
},
],
},
# Security: Full production hardening
security = {
auth = {
enabled = true,
method = "jwt",
jwt_secret = "${JWT_SECRET}",
jwt_issuer = "provisioning.${environment}",
jwt_audience = "orchestrator-${environment}",
token_expiration = 3600,
},
cors = {
enabled = true,
allowed_origins = [
"https://orchestrator.${domain}",
"https://control-center.${domain}",
"https://mcp.${domain}",
],
allowed_methods = ["GET", "POST", "PUT"],
allowed_headers = ["Content-Type", "Authorization"],
},
tls = {
enabled = true,
cert_path = "/etc/provisioning/certs/orchestrator.crt",
key_path = "/etc/provisioning/certs/orchestrator.key",
min_version = "TLSv1.3",
},
rate_limit = {
enabled = true,
requests_per_second = 10000,
burst_size = 1000,
},
},
# Extensions: Production-ready
extensions = {
auto_load = true,
oci_registry_url = "registry.${domain}:5000",
oci_namespace = "provisioning/extensions",
refresh_interval = 24,
max_concurrent_init = 10,
},
# Database: High-performance pooling
database = {
pool = {
min_size = 20,
max_size = 100,
connection_timeout = 10,
idle_timeout = 600,
max_lifetime = 3600,
},
retry = {
max_attempts = 5,
initial_backoff = 100,
max_backoff = 30000,
},
},
# Features: Full production suite
features = {
enable_audit_logging = true,
enable_task_history = true,
enable_performance_tracking = true,
enable_experimental_features = false,
},
}
# ============================================================================
# 2. CONTROL CENTER CONFIGURATION
# ============================================================================
let control_center = {
server = {
host = "0.0.0.0",
port = 8080,
workers = 4,
keep_alive = 75,
max_connections = 512,
},
database = {
backend = "postgres",
postgres = {
host = "postgres.${namespace}.svc.cluster.local",
port = 5432,
database = "provisioning",
user = "provisioning",
password = "${DB_PASSWORD}",
ssl_mode = "require",
pool = {
min_size = 10,
max_size = 50,
idle_timeout = 300,
},
},
},
auth = {
enabled = true,
jwt = {
issuer = "provisioning.${environment}",
audience = "control-center",
secret = "${JWT_SECRET}",
algorithm = "HS256",
expiration = 3600,
refresh_token_expiration = 604800, # 7 days
},
oauth2 = {
enabled = false,
},
ldap = {
enabled = false,
},
},
rbac = {
enabled = true,
default_role = "viewer",
roles = {
admin = {
description = "Infrastructure administrator",
permissions = ["*"],
},
operator = {
description = "Operations team member",
permissions = [
"orchestrator.view",
"orchestrator.execute",
"orchestrator.manage",
"policies.view",
],
},
developer = {
description = "Developer with read-only access",
permissions = [
"orchestrator.view",
"policies.view",
],
},
},
},
mfa = {
required = true, # Required in enterprise
methods = ["totp", "email"],
totp = {
enabled = true,
issuer = "Provisioning Enterprise",
algorithm = "SHA1",
digits = 6,
period = 30,
},
email = {
enabled = true,
expiration = 300,
},
},
policies = {
password = {
min_length = 16, # Stronger in production
require_uppercase = true,
require_lowercase = true,
require_digits = true,
require_special_chars = true, # Required in production
expiration_days = 90,
history_count = 5,
},
session = {
max_duration = 28800, # 8 hours
idle_timeout = 1800, # 30 minutes (stricter)
max_concurrent = 2, # Limit concurrent sessions
},
audit = {
enabled = true,
log_all_api_calls = true,
log_user_actions = true,
log_rbac_changes = true,
retention_days = 365, # 1 year
},
compliance = {
soc2 = {
enabled = true,
log_all_access = true,
require_mfa = true,
},
hipaa = {
enabled = false, # Unless required
},
},
},
rate_limit = {
enabled = true,
global = {
requests_per_second = 1000,
burst_size = 100,
},
per_user = {
requests_per_second = 100,
burst_size = 20,
},
},
cors = {
enabled = true,
allowed_origins = [
"https://control-center.${domain}",
"https://orchestrator.${domain}",
],
allowed_methods = ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allowed_headers = ["Content-Type", "Authorization"],
max_age = 86400,
},
tls = {
enabled = true, # Required in production
cert_path = "/etc/provisioning/certs/control-center.crt",
key_path = "/etc/provisioning/certs/control-center.key",
min_version = "TLSv1.3",
},
monitoring = {
enabled = true,
metrics = {
enabled = true,
interval = 30,
export_format = "prometheus",
},
health_check = {
enabled = true,
interval = 30,
timeout = 10,
},
tracing = {
enabled = true,
sample_rate = 0.1,
},
},
logging = {
level = "info",
format = "json",
outputs = [
{
destination = "stdout",
level = "warn",
},
{
destination = "file",
path = "/var/log/provisioning/control-center/control-center.log",
level = "info",
rotation = {
max_size = "500MB",
max_backups = 30,
max_age = 90,
},
},
],
},
orchestrator = {
url = "http://orchestrator:9090",
timeout = 30,
retry = {
max_attempts = 3,
initial_backoff = 100,
max_backoff = 30000,
},
},
features = {
enable_audit_logging = true,
enable_policy_enforcement = true,
enable_experimental_ui = false,
},
}
# ============================================================================
# 3. MCP SERVER CONFIGURATION
# ============================================================================
let mcp_server = {
server = {
host = "0.0.0.0",
port = 8888,
protocol = "http", # HTTP in Kubernetes (UNIX socket elsewhere)
workers = 4,
keep_alive = 75,
},
capabilities = {
tools = {
enabled = true,
max_concurrent = 20,
timeout = 30000,
categories = [
"orchestrator",
"provisioning",
"workspace",
"configuration",
"system",
],
},
resources = {
enabled = true,
max_size = 1073741824, # 1GB
caching = {
enabled = true,
ttl = 3600,
max_entries = 1000,
},
},
prompts = {
enabled = true,
max_length = 10000,
template_engine = "jinja2",
},
sampling = {
enabled = true, # Enabled in enterprise for advanced use
models = ["claude-3-opus-20240229"],
default_model = "claude-3-opus-20240229",
temperature = 0.7,
max_tokens = 8192,
},
},
tools = {
orchestrator = {
enabled = true,
submit_workflow = {
description = "Submit workflows to orchestrator",
timeout = 60000,
},
list_workflows = {
description = "List all workflows",
timeout = 10000,
},
get_workflow = {
description = "Get workflow details",
timeout = 10000,
},
cancel_workflow = {
description = "Cancel running workflow",
timeout = 30000,
},
},
provisioning = {
enabled = true,
deploy_infrastructure = {
description = "Deploy infrastructure",
timeout = 300000,
},
validate_config = {
description = "Validate configuration",
timeout = 30000,
},
},
},
resources = {
file_system = {
enabled = true,
root_path = "/var/lib/provisioning",
allow_write = false, # Read-only in production
allowed_extensions = ["ncl", "toml", "yaml", "json"],
max_file_size = 10485760, # 10MB
},
database = {
enabled = true,
connections = {
orchestrator = {
type = "http",
url = "http://orchestrator:9090/api",
},
control_center = {
type = "http",
url = "http://control-center:8080/api",
},
},
},
},
integration = {
orchestrator = {
enabled = true,
url = "http://orchestrator:9090",
timeout = 30,
auth = {
method = "jwt",
token = "${ORCHESTRATOR_TOKEN}",
},
},
control_center = {
enabled = true,
url = "http://control-center:8080",
timeout = 30,
auth = {
method = "jwt",
token = "${CONTROL_CENTER_TOKEN}",
},
},
claude = {
enabled = true,
api_key = "${CLAUDE_API_KEY}",
model = "claude-3-opus-20240229",
max_tokens = 8192,
},
},
security = {
auth = {
enabled = true,
method = "jwt",
jwt_secret = "${JWT_SECRET}",
},
authorization = {
enabled = true,
role_based = true,
default_role = "viewer",
},
rate_limit = {
enabled = true,
requests_per_second = 100,
burst_size = 20,
},
input_validation = {
enabled = true,
max_input_size = 1000000,
sanitize_inputs = true,
},
},
monitoring = {
enabled = true,
metrics = {
enabled = true,
interval = 30,
export_format = "prometheus",
},
health_check = {
enabled = true,
interval = 30,
timeout = 10,
},
audit = {
enabled = true,
log_all_requests = true,
log_sensitive_data = false,
retention_days = 90,
},
},
logging = {
level = "info",
format = "json",
outputs = [
{
destination = "stdout",
level = "warn",
},
{
destination = "file",
path = "/var/log/provisioning/mcp-server/mcp-server.log",
level = "info",
rotation = {
max_size = "200MB",
max_backups = 15,
max_age = 30,
},
},
],
},
features = {
enable_audit_logging = true,
enable_caching = true,
enable_sampling = true,
enable_experimental_tools = false,
},
performance = {
worker_threads = 4,
blocking_threads = 2,
default_timeout = 30000,
max_timeout = 300000,
request_buffer_size = 1000,
response_buffer_size = 1000,
cache_enabled = true,
cache_size = "256MB",
cache_ttl = 3600,
},
}
# ============================================================================
# Export all configurations
# ============================================================================
{
orchestrator = orchestrator,
control_center = control_center,
mcp_server = mcp_server,
}