458 lines
15 KiB
Plaintext
458 lines
15 KiB
Plaintext
# Info: KCL batch workflow examples for provisioning (Provisioning)
|
|
# Author: JesusPerezLorenzo
|
|
# Release: 0.0.1
|
|
# Date: 25-09-2025
|
|
# Description: Usage examples for batch workflows and operations
|
|
import .workflows
|
|
import .batch
|
|
import .settings
|
|
|
|
# Example 1: Mixed Provider Infrastructure Deployment
|
|
mixed_provider_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
|
|
workflow_id: "mixed_infra_deploy_001"
|
|
name: "Mixed Provider Infrastructure Deployment"
|
|
description: "Deploy infrastructure across UpCloud and AWS with cross-provider networking"
|
|
operations: [
|
|
workflows.BatchOperation {
|
|
operation_id: "create_upcloud_servers"
|
|
name: "Create UpCloud Web Servers"
|
|
operation_type: "server"
|
|
provider: "upcloud"
|
|
action: "create"
|
|
parameters: {
|
|
"server_count": "3"
|
|
"server_type": "web"
|
|
"zone": "fi-hel2"
|
|
"plan": "1xCPU-2GB"
|
|
}
|
|
allow_parallel: True
|
|
priority: 10
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "create_aws_database"
|
|
name: "Create AWS RDS Database"
|
|
operation_type: "server"
|
|
provider: "aws"
|
|
action: "create"
|
|
parameters: {
|
|
"service": "rds"
|
|
"instance_class": "db.t3.micro"
|
|
"engine": "postgresql"
|
|
"region": "eu-west-1"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "create_upcloud_servers"
|
|
dependency_type: "sequential"
|
|
timeout: 600
|
|
}
|
|
]
|
|
priority: 5
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "install_kubernetes"
|
|
name: "Install Kubernetes on UpCloud servers"
|
|
operation_type: "taskserv"
|
|
provider: "upcloud"
|
|
action: "create"
|
|
parameters: {
|
|
"taskserv": "kubernetes"
|
|
"version": "v1.28.0"
|
|
"cluster_name": "prod-cluster"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "create_upcloud_servers"
|
|
dependency_type: "sequential"
|
|
timeout: 1200
|
|
}
|
|
]
|
|
# 1 hour for K8s installation
|
|
timeout: 3600
|
|
priority: 8
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "setup_monitoring"
|
|
name: "Setup Prometheus monitoring"
|
|
operation_type: "taskserv"
|
|
action: "create"
|
|
parameters: {
|
|
"taskserv": "prometheus"
|
|
"namespace": "monitoring"
|
|
"retention": "30d"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "install_kubernetes"
|
|
dependency_type: "sequential"
|
|
timeout: 600
|
|
}
|
|
]
|
|
priority: 3
|
|
}
|
|
]
|
|
max_parallel_operations: 3
|
|
fail_fast: False
|
|
storage: workflows.StorageConfig {
|
|
backend: "surrealdb"
|
|
connection_config: {
|
|
"url": "ws://localhost:8000"
|
|
"namespace": "provisioning"
|
|
"database": "batch_workflows"
|
|
}
|
|
enable_persistence: True
|
|
# 30 days
|
|
retention_hours: 720
|
|
}
|
|
monitoring: workflows.MonitoringConfig {
|
|
enabled: True
|
|
backend: "prometheus"
|
|
enable_tracing: True
|
|
enable_notifications: True
|
|
notification_channels: ["webhook:slack://ops-channel"]
|
|
}
|
|
default_retry_policy: workflows.RetryPolicy {
|
|
max_attempts: 3
|
|
initial_delay: 10
|
|
backoff_multiplier: 2
|
|
retry_on_errors: ["connection_error", "timeout", "rate_limit", "resource_unavailable"]
|
|
}
|
|
execution_context: {
|
|
"environment": "production"
|
|
"cost_center": "infrastructure"
|
|
"owner": "devops-team"
|
|
}
|
|
}
|
|
|
|
# Example 2: Server Scaling Workflow with SurrealDB Backend
|
|
server_scaling_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
|
|
workflow_id: "server_scaling_002"
|
|
name: "Auto-scaling Server Workflow"
|
|
description: "Scale servers based on load with automatic rollback on failure"
|
|
operations: [
|
|
workflows.BatchOperation {
|
|
operation_id: "scale_web_servers"
|
|
name: "Scale web servers up"
|
|
operation_type: "server"
|
|
action: "scale"
|
|
parameters: {
|
|
"target_count": "6"
|
|
"current_count": "3"
|
|
"server_group": "web-tier"
|
|
}
|
|
retry_policy: workflows.RetryPolicy {
|
|
max_attempts: 2
|
|
initial_delay: 30
|
|
retry_on_errors: ["resource_limit", "quota_exceeded"]
|
|
}
|
|
rollback_strategy: workflows.RollbackStrategy {
|
|
enabled: True
|
|
strategy: "immediate"
|
|
custom_rollback_operations: ["scale_down_to_original"]
|
|
}
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "update_load_balancer"
|
|
name: "Update load balancer configuration"
|
|
operation_type: "custom"
|
|
action: "configure"
|
|
parameters: {
|
|
"service": "haproxy"
|
|
"config_template": "web_tier_6_servers"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "scale_web_servers"
|
|
dependency_type: "conditional"
|
|
conditions: ["servers_ready", "health_check_passed"]
|
|
timeout: 300
|
|
}
|
|
]
|
|
}
|
|
]
|
|
storage: workflows.StorageConfig {
|
|
backend: "surrealdb"
|
|
connection_config: {
|
|
"url": "ws://surrealdb.local:8000"
|
|
"namespace": "scaling"
|
|
"database": "operations"
|
|
}
|
|
}
|
|
fail_fast: True
|
|
}
|
|
|
|
# Example 3: Maintenance Workflow with Filesystem Backend
|
|
maintenance_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
|
|
workflow_id: "maintenance_003"
|
|
name: "System Maintenance Workflow"
|
|
description: "Perform scheduled maintenance across multiple providers"
|
|
operations: [
|
|
workflows.BatchOperation {
|
|
operation_id: "backup_databases"
|
|
name: "Backup all databases"
|
|
operation_type: "custom"
|
|
action: "create"
|
|
parameters: {
|
|
"backup_type": "full"
|
|
"compression": "gzip"
|
|
"retention_days": "30"
|
|
}
|
|
# 2 hours
|
|
timeout: 7200
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "update_taskservs"
|
|
name: "Update all taskservs to latest versions"
|
|
operation_type: "taskserv"
|
|
action: "update"
|
|
parameters: {
|
|
"update_strategy": "rolling"
|
|
"max_unavailable": "1"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "backup_databases"
|
|
dependency_type: "sequential"
|
|
}
|
|
]
|
|
# Sequential updates for safety
|
|
allow_parallel: False
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "verify_services"
|
|
name: "Verify all services are healthy"
|
|
operation_type: "custom"
|
|
action: "configure"
|
|
parameters: {
|
|
"verification_type": "health_check"
|
|
"timeout_per_service": "30"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "update_taskservs"
|
|
dependency_type: "sequential"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
storage: workflows.StorageConfig {
|
|
backend: "filesystem"
|
|
base_path: "./maintenance_workflows"
|
|
enable_persistence: True
|
|
enable_compression: True
|
|
}
|
|
pre_workflow_hooks: ["notify_maintenance_start", "set_maintenance_mode"]
|
|
post_workflow_hooks: ["unset_maintenance_mode", "notify_maintenance_complete"]
|
|
}
|
|
|
|
# Example 4: Comprehensive Batch Executor Configuration
|
|
production_batch_executor: batch.BatchExecutor = batch.BatchExecutor {
|
|
executor_id: "prod_batch_executor"
|
|
name: "Production Batch Executor"
|
|
description: "Production-ready batch executor with full observability and mixed provider support"
|
|
scheduler: batch.BatchScheduler {
|
|
strategy: "resource_aware"
|
|
resource_limits: {
|
|
"max_cpu_cores": 16
|
|
"max_memory_mb": 32768
|
|
# 10Gbps
|
|
"max_network_bandwidth": 10000
|
|
}
|
|
scheduling_interval: 5
|
|
enable_preemption: True
|
|
}
|
|
queues: [
|
|
batch.BatchQueue {
|
|
queue_id: "high_priority"
|
|
queue_type: "priority"
|
|
max_size: 100
|
|
# 1 day
|
|
retention_period: 86400
|
|
max_delivery_attempts: 5
|
|
}
|
|
batch.BatchQueue {
|
|
queue_id: "standard"
|
|
queue_type: "standard"
|
|
max_size: 500
|
|
dead_letter_queue: "failed_operations"
|
|
}
|
|
batch.BatchQueue {
|
|
queue_id: "failed_operations"
|
|
queue_type: "dead_letter"
|
|
# 7 days
|
|
retention_period: 604800
|
|
}
|
|
]
|
|
resource_constraints: [
|
|
batch.ResourceConstraint {
|
|
resource_type: "cpu"
|
|
resource_name: "total_cpu_cores"
|
|
max_units: 16
|
|
units_per_operation: 1
|
|
hard_constraint: True
|
|
}
|
|
batch.ResourceConstraint {
|
|
resource_type: "memory"
|
|
resource_name: "total_memory_gb"
|
|
max_units: 32
|
|
units_per_operation: 2
|
|
hard_constraint: True
|
|
}
|
|
]
|
|
provider_config: batch.ProviderMixConfig {
|
|
primary_provider: "upcloud"
|
|
secondary_providers: ["aws"]
|
|
provider_selection: "cost_optimize"
|
|
cross_provider_networking: {
|
|
"vpn_enabled": "True"
|
|
"mesh_networking": "wireguard"
|
|
}
|
|
provider_limits: {
|
|
"upcloud": {"max_servers": 20, "max_storage_gb": 1000}
|
|
"aws": {"max_instances": 10, "max_ebs_gb": 500}
|
|
}
|
|
}
|
|
health_check: batch.BatchHealthCheck {
|
|
enabled: True
|
|
check_interval: 30
|
|
failure_threshold: 2
|
|
health_checks: ["http://localhost:8080/health", "check_disk_space", "check_memory_usage"]
|
|
failure_actions: ["alert", "scale_down", "rollback"]
|
|
}
|
|
autoscaling: batch.BatchAutoscaling {
|
|
enabled: True
|
|
min_parallel: 2
|
|
max_parallel: 12
|
|
scale_up_threshold: 0.85
|
|
scale_down_threshold: 0.15
|
|
target_utilization: 0.65
|
|
# 3 minutes
|
|
cooldown_period: 180
|
|
}
|
|
metrics: batch.BatchMetrics {
|
|
detailed_metrics: True
|
|
# 30 days
|
|
retention_hours: 720
|
|
# 1min, 5min, 30min, 1hour
|
|
aggregation_intervals: [60, 300, 1800, 3600]
|
|
enable_export: True
|
|
export_config: {
|
|
"prometheus_endpoint": "http://prometheus.local:9090"
|
|
"grafana_dashboard": "batch_operations_dashboard"
|
|
}
|
|
}
|
|
storage: workflows.StorageConfig {
|
|
backend: "surrealdb"
|
|
connection_config: {
|
|
"url": "ws://surrealdb.prod:8000"
|
|
"namespace": "production"
|
|
"database": "batch_workflows"
|
|
"user": "batch_executor"
|
|
"auth_token": "{{env.SURREALDB_TOKEN}}"
|
|
}
|
|
enable_persistence: True
|
|
# 90 days
|
|
retention_hours: 2160
|
|
enable_compression: True
|
|
encryption: settings.SecretProvider {
|
|
provider: "sops"
|
|
sops_config: settings.SopsConfig {
|
|
config_path: "./.sops.yaml"
|
|
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
|
|
use_age: True
|
|
}
|
|
}
|
|
}
|
|
security_config: {
|
|
"tls_enabled": "True"
|
|
"auth_required": "True"
|
|
"rbac_enabled": "True"
|
|
"audit_level": "full"
|
|
}
|
|
webhook_endpoints: [
|
|
"https://hooks.slack.com/services/ops-notifications"
|
|
"https://api.pagerduty.com/generic/incidents"
|
|
]
|
|
performance_config: {
|
|
"io_threads": "8"
|
|
"worker_threads": "16"
|
|
"batch_size": "50"
|
|
"connection_pool_size": "20"
|
|
}
|
|
}
|
|
|
|
# Example 5: Template for Common Infrastructure Deployment
|
|
infra_deployment_template: workflows.WorkflowTemplate = workflows.WorkflowTemplate {
|
|
template_id: "standard_infra_deployment"
|
|
name: "Standard Infrastructure Deployment Template"
|
|
description: "Template for deploying standard infrastructure with customizable parameters"
|
|
category: "infrastructure"
|
|
workflow_template: workflows.BatchWorkflow {
|
|
# Template parameter: {{template.workflow_id}}
|
|
workflow_id: "custom_deployment"
|
|
# Template parameter: {{template.workflow_name}}
|
|
name: "Custom Deployment"
|
|
operations: [
|
|
workflows.BatchOperation {
|
|
operation_id: "create_servers"
|
|
# Template: Create {{template.server_count}} servers
|
|
name: "Create servers"
|
|
operation_type: "server"
|
|
# Template parameter: {{template.provider}}
|
|
provider: "upcloud"
|
|
action: "create"
|
|
parameters: {
|
|
# Template parameter: {{template.server_count}}
|
|
"count": "3"
|
|
# Template parameter: {{template.server_type}}
|
|
"type": "web"
|
|
# Template parameter: {{template.zone}}
|
|
"zone": "fi-hel2"
|
|
}
|
|
}
|
|
workflows.BatchOperation {
|
|
operation_id: "install_base_taskservs"
|
|
name: "Install base taskservs"
|
|
operation_type: "taskserv"
|
|
action: "create"
|
|
parameters: {
|
|
# Template parameter: {{template.base_taskservs}}
|
|
"taskservs": "kubernetes,prometheus,grafana"
|
|
}
|
|
dependencies: [
|
|
workflows.DependencyDef {
|
|
target_operation_id: "create_servers"
|
|
dependency_type: "sequential"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
storage: workflows.StorageConfig {
|
|
# Template parameter: {{template.storage_backend}}
|
|
backend: "filesystem"
|
|
# Template parameter: {{template.storage_path}}
|
|
base_path: "./deployments"
|
|
}
|
|
}
|
|
parameters: {
|
|
"workflow_id": "custom_deployment"
|
|
"workflow_name": "Custom Deployment"
|
|
"server_count": "3"
|
|
"server_type": "web"
|
|
"provider": "upcloud"
|
|
"zone": "fi-hel2"
|
|
"base_taskservs": "kubernetes,prometheus,grafana"
|
|
"storage_backend": "filesystem"
|
|
"storage_path": "./deployments"
|
|
}
|
|
required_parameters: [
|
|
"workflow_id"
|
|
"server_count"
|
|
"provider"
|
|
]
|
|
examples: [
|
|
"Small deployment: server_count=2, server_type=micro"
|
|
"Production deployment: server_count=6, server_type=standard, provider=upcloud"
|
|
]
|
|
}
|