prvng_kcl/examples_batch.k

458 lines
15 KiB
Plaintext
Raw Normal View History

2025-10-07 11:17:54 +01:00
# Info: KCL batch workflow examples for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.1
# Date: 25-09-2025
# Description: Usage examples for batch workflows and operations
import .workflows
import .batch
import .settings
# Example 1: Mixed Provider Infrastructure Deployment
mixed_provider_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "mixed_infra_deploy_001"
name: "Mixed Provider Infrastructure Deployment"
description: "Deploy infrastructure across UpCloud and AWS with cross-provider networking"
operations: [
workflows.BatchOperation {
operation_id: "create_upcloud_servers"
name: "Create UpCloud Web Servers"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "3"
"server_type": "web"
"zone": "fi-hel2"
"plan": "1xCPU-2GB"
}
allow_parallel: True
priority: 10
}
workflows.BatchOperation {
operation_id: "create_aws_database"
name: "Create AWS RDS Database"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "rds"
"instance_class": "db.t3.micro"
"engine": "postgresql"
"region": "eu-west-1"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_upcloud_servers"
dependency_type: "sequential"
timeout: 600
}
]
priority: 5
}
workflows.BatchOperation {
operation_id: "install_kubernetes"
name: "Install Kubernetes on UpCloud servers"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"version": "v1.28.0"
"cluster_name": "prod-cluster"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_upcloud_servers"
dependency_type: "sequential"
timeout: 1200
}
]
# 1 hour for K8s installation
timeout: 3600
priority: 8
}
workflows.BatchOperation {
operation_id: "setup_monitoring"
name: "Setup Prometheus monitoring"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "prometheus"
"namespace": "monitoring"
"retention": "30d"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "install_kubernetes"
dependency_type: "sequential"
timeout: 600
}
]
priority: 3
}
]
max_parallel_operations: 3
fail_fast: False
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://localhost:8000"
"namespace": "provisioning"
"database": "batch_workflows"
}
enable_persistence: True
# 30 days
retention_hours: 720
}
monitoring: workflows.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: True
enable_notifications: True
notification_channels: ["webhook:slack://ops-channel"]
}
default_retry_policy: workflows.RetryPolicy {
max_attempts: 3
initial_delay: 10
backoff_multiplier: 2
retry_on_errors: ["connection_error", "timeout", "rate_limit", "resource_unavailable"]
}
execution_context: {
"environment": "production"
"cost_center": "infrastructure"
"owner": "devops-team"
}
}
# Example 2: Server Scaling Workflow with SurrealDB Backend
server_scaling_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "server_scaling_002"
name: "Auto-scaling Server Workflow"
description: "Scale servers based on load with automatic rollback on failure"
operations: [
workflows.BatchOperation {
operation_id: "scale_web_servers"
name: "Scale web servers up"
operation_type: "server"
action: "scale"
parameters: {
"target_count": "6"
"current_count": "3"
"server_group": "web-tier"
}
retry_policy: workflows.RetryPolicy {
max_attempts: 2
initial_delay: 30
retry_on_errors: ["resource_limit", "quota_exceeded"]
}
rollback_strategy: workflows.RollbackStrategy {
enabled: True
strategy: "immediate"
custom_rollback_operations: ["scale_down_to_original"]
}
}
workflows.BatchOperation {
operation_id: "update_load_balancer"
name: "Update load balancer configuration"
operation_type: "custom"
action: "configure"
parameters: {
"service": "haproxy"
"config_template": "web_tier_6_servers"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "scale_web_servers"
dependency_type: "conditional"
conditions: ["servers_ready", "health_check_passed"]
timeout: 300
}
]
}
]
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.local:8000"
"namespace": "scaling"
"database": "operations"
}
}
fail_fast: True
}
# Example 3: Maintenance Workflow with Filesystem Backend
maintenance_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "maintenance_003"
name: "System Maintenance Workflow"
description: "Perform scheduled maintenance across multiple providers"
operations: [
workflows.BatchOperation {
operation_id: "backup_databases"
name: "Backup all databases"
operation_type: "custom"
action: "create"
parameters: {
"backup_type": "full"
"compression": "gzip"
"retention_days": "30"
}
# 2 hours
timeout: 7200
}
workflows.BatchOperation {
operation_id: "update_taskservs"
name: "Update all taskservs to latest versions"
operation_type: "taskserv"
action: "update"
parameters: {
"update_strategy": "rolling"
"max_unavailable": "1"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "backup_databases"
dependency_type: "sequential"
}
]
# Sequential updates for safety
allow_parallel: False
}
workflows.BatchOperation {
operation_id: "verify_services"
name: "Verify all services are healthy"
operation_type: "custom"
action: "configure"
parameters: {
"verification_type": "health_check"
"timeout_per_service": "30"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "update_taskservs"
dependency_type: "sequential"
}
]
}
]
storage: workflows.StorageConfig {
backend: "filesystem"
base_path: "./maintenance_workflows"
enable_persistence: True
enable_compression: True
}
pre_workflow_hooks: ["notify_maintenance_start", "set_maintenance_mode"]
post_workflow_hooks: ["unset_maintenance_mode", "notify_maintenance_complete"]
}
# Example 4: Comprehensive Batch Executor Configuration
production_batch_executor: batch.BatchExecutor = batch.BatchExecutor {
executor_id: "prod_batch_executor"
name: "Production Batch Executor"
description: "Production-ready batch executor with full observability and mixed provider support"
scheduler: batch.BatchScheduler {
strategy: "resource_aware"
resource_limits: {
"max_cpu_cores": 16
"max_memory_mb": 32768
# 10Gbps
"max_network_bandwidth": 10000
}
scheduling_interval: 5
enable_preemption: True
}
queues: [
batch.BatchQueue {
queue_id: "high_priority"
queue_type: "priority"
max_size: 100
# 1 day
retention_period: 86400
max_delivery_attempts: 5
}
batch.BatchQueue {
queue_id: "standard"
queue_type: "standard"
max_size: 500
dead_letter_queue: "failed_operations"
}
batch.BatchQueue {
queue_id: "failed_operations"
queue_type: "dead_letter"
# 7 days
retention_period: 604800
}
]
resource_constraints: [
batch.ResourceConstraint {
resource_type: "cpu"
resource_name: "total_cpu_cores"
max_units: 16
units_per_operation: 1
hard_constraint: True
}
batch.ResourceConstraint {
resource_type: "memory"
resource_name: "total_memory_gb"
max_units: 32
units_per_operation: 2
hard_constraint: True
}
]
provider_config: batch.ProviderMixConfig {
primary_provider: "upcloud"
secondary_providers: ["aws"]
provider_selection: "cost_optimize"
cross_provider_networking: {
"vpn_enabled": "True"
"mesh_networking": "wireguard"
}
provider_limits: {
"upcloud": {"max_servers": 20, "max_storage_gb": 1000}
"aws": {"max_instances": 10, "max_ebs_gb": 500}
}
}
health_check: batch.BatchHealthCheck {
enabled: True
check_interval: 30
failure_threshold: 2
health_checks: ["http://localhost:8080/health", "check_disk_space", "check_memory_usage"]
failure_actions: ["alert", "scale_down", "rollback"]
}
autoscaling: batch.BatchAutoscaling {
enabled: True
min_parallel: 2
max_parallel: 12
scale_up_threshold: 0.85
scale_down_threshold: 0.15
target_utilization: 0.65
# 3 minutes
cooldown_period: 180
}
metrics: batch.BatchMetrics {
detailed_metrics: True
# 30 days
retention_hours: 720
# 1min, 5min, 30min, 1hour
aggregation_intervals: [60, 300, 1800, 3600]
enable_export: True
export_config: {
"prometheus_endpoint": "http://prometheus.local:9090"
"grafana_dashboard": "batch_operations_dashboard"
}
}
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.prod:8000"
"namespace": "production"
"database": "batch_workflows"
"user": "batch_executor"
"auth_token": "{{env.SURREALDB_TOKEN}}"
}
enable_persistence: True
# 90 days
retention_hours: 2160
enable_compression: True
encryption: settings.SecretProvider {
provider: "sops"
sops_config: settings.SopsConfig {
config_path: "./.sops.yaml"
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
use_age: True
}
}
}
security_config: {
"tls_enabled": "True"
"auth_required": "True"
"rbac_enabled": "True"
"audit_level": "full"
}
webhook_endpoints: [
"https://hooks.slack.com/services/ops-notifications"
"https://api.pagerduty.com/generic/incidents"
]
performance_config: {
"io_threads": "8"
"worker_threads": "16"
"batch_size": "50"
"connection_pool_size": "20"
}
}
# Example 5: Template for Common Infrastructure Deployment
infra_deployment_template: workflows.WorkflowTemplate = workflows.WorkflowTemplate {
template_id: "standard_infra_deployment"
name: "Standard Infrastructure Deployment Template"
description: "Template for deploying standard infrastructure with customizable parameters"
category: "infrastructure"
workflow_template: workflows.BatchWorkflow {
# Template parameter: {{template.workflow_id}}
workflow_id: "custom_deployment"
# Template parameter: {{template.workflow_name}}
name: "Custom Deployment"
operations: [
workflows.BatchOperation {
operation_id: "create_servers"
# Template: Create {{template.server_count}} servers
name: "Create servers"
operation_type: "server"
# Template parameter: {{template.provider}}
provider: "upcloud"
action: "create"
parameters: {
# Template parameter: {{template.server_count}}
"count": "3"
# Template parameter: {{template.server_type}}
"type": "web"
# Template parameter: {{template.zone}}
"zone": "fi-hel2"
}
}
workflows.BatchOperation {
operation_id: "install_base_taskservs"
name: "Install base taskservs"
operation_type: "taskserv"
action: "create"
parameters: {
# Template parameter: {{template.base_taskservs}}
"taskservs": "kubernetes,prometheus,grafana"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_servers"
dependency_type: "sequential"
}
]
}
]
storage: workflows.StorageConfig {
# Template parameter: {{template.storage_backend}}
backend: "filesystem"
# Template parameter: {{template.storage_path}}
base_path: "./deployments"
}
}
parameters: {
"workflow_id": "custom_deployment"
"workflow_name": "Custom Deployment"
"server_count": "3"
"server_type": "web"
"provider": "upcloud"
"zone": "fi-hel2"
"base_taskservs": "kubernetes,prometheus,grafana"
"storage_backend": "filesystem"
"storage_path": "./deployments"
}
required_parameters: [
"workflow_id"
"server_count"
"provider"
]
examples: [
"Small deployment: server_count=2, server_type=micro"
"Production deployment: server_count=6, server_type=standard, provider=upcloud"
]
}