452 lines
14 KiB
Plaintext
452 lines
14 KiB
Plaintext
|
|
# Mixed Provider Workflow Example
|
||
|
|
# Demonstrates deployment across multiple cloud providers with advanced features
|
||
|
|
|
||
|
|
import ..main
|
||
|
|
|
||
|
|
# Production deployment across UpCloud and AWS
|
||
|
|
production_deployment: main.BatchWorkflow = main.BatchWorkflow {
|
||
|
|
workflow_id: "prod_multicloud_001"
|
||
|
|
name: "Production Multi-Cloud Deployment"
|
||
|
|
description: "Deploy production infrastructure across UpCloud (compute) and AWS (managed services)"
|
||
|
|
|
||
|
|
operations: [
|
||
|
|
# Phase 1: Create UpCloud infrastructure
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "upcloud_network"
|
||
|
|
name: "Create UpCloud Network Infrastructure"
|
||
|
|
operation_type: "custom"
|
||
|
|
provider: "upcloud"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"resource_type": "network"
|
||
|
|
"vpc_cidr": "10.0.0.0/16"
|
||
|
|
"subnets": "public,private"
|
||
|
|
"zones": "fi-hel2,de-fra1"
|
||
|
|
}
|
||
|
|
priority: 10
|
||
|
|
timeout: 300
|
||
|
|
}
|
||
|
|
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "upcloud_compute"
|
||
|
|
name: "Create UpCloud Compute Instances"
|
||
|
|
operation_type: "server"
|
||
|
|
provider: "upcloud"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"server_count": "6"
|
||
|
|
"plan": "2xCPU-4GB"
|
||
|
|
"zones": "fi-hel2,de-fra1"
|
||
|
|
"distribution": "even" # Spread across zones
|
||
|
|
"server_type": "k8s-worker"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "upcloud_network"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 300
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 9
|
||
|
|
timeout: 900
|
||
|
|
allow_parallel: True
|
||
|
|
}
|
||
|
|
|
||
|
|
# Phase 2: Create AWS managed services
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "aws_database"
|
||
|
|
name: "Create AWS RDS PostgreSQL"
|
||
|
|
operation_type: "server"
|
||
|
|
provider: "aws"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"service": "rds"
|
||
|
|
"engine": "postgresql"
|
||
|
|
"version": "15.4"
|
||
|
|
"instance_class": "db.t3.medium"
|
||
|
|
"allocated_storage": "100"
|
||
|
|
"multi_az": "true"
|
||
|
|
"region": "eu-west-1"
|
||
|
|
"vpc_security_groups": "prod-db-sg"
|
||
|
|
}
|
||
|
|
priority: 9
|
||
|
|
timeout: 1800 # RDS can take time
|
||
|
|
allow_parallel: True
|
||
|
|
}
|
||
|
|
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "aws_redis"
|
||
|
|
name: "Create AWS ElastiCache Redis"
|
||
|
|
operation_type: "server"
|
||
|
|
provider: "aws"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"service": "elasticache"
|
||
|
|
"engine": "redis"
|
||
|
|
"node_type": "cache.t3.micro"
|
||
|
|
"num_cache_nodes": "2"
|
||
|
|
"region": "eu-west-1"
|
||
|
|
"parameter_group": "default.redis7"
|
||
|
|
}
|
||
|
|
priority: 9
|
||
|
|
timeout: 1200
|
||
|
|
allow_parallel: True
|
||
|
|
}
|
||
|
|
|
||
|
|
# Phase 3: Set up Kubernetes cluster on UpCloud
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "install_k8s_control"
|
||
|
|
name: "Install Kubernetes Control Plane"
|
||
|
|
operation_type: "taskserv"
|
||
|
|
provider: "upcloud"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"taskserv": "kubernetes"
|
||
|
|
"role": "control-plane"
|
||
|
|
"version": "v1.31.0"
|
||
|
|
"target_count": "3"
|
||
|
|
"ha_mode": "true"
|
||
|
|
"container_runtime": "containerd"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "upcloud_compute"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 600
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 8
|
||
|
|
timeout: 1800
|
||
|
|
}
|
||
|
|
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "install_k8s_workers"
|
||
|
|
name: "Install Kubernetes Worker Nodes"
|
||
|
|
operation_type: "taskserv"
|
||
|
|
provider: "upcloud"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"taskserv": "kubernetes"
|
||
|
|
"role": "worker"
|
||
|
|
"target_count": "3"
|
||
|
|
"container_runtime": "containerd"
|
||
|
|
"join_existing": "true"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "install_k8s_control"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 300
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 7
|
||
|
|
timeout: 1200
|
||
|
|
}
|
||
|
|
|
||
|
|
# Phase 4: Install cluster services
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "install_networking"
|
||
|
|
name: "Install Cluster Networking"
|
||
|
|
operation_type: "taskserv"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"taskserv": "cilium"
|
||
|
|
"version": "1.14.2"
|
||
|
|
"mode": "tunnel"
|
||
|
|
"enable_hubble": "true"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "install_k8s_workers"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 300
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 6
|
||
|
|
timeout: 600
|
||
|
|
}
|
||
|
|
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "install_monitoring"
|
||
|
|
name: "Install Monitoring Stack"
|
||
|
|
operation_type: "taskserv"
|
||
|
|
action: "create"
|
||
|
|
parameters: {
|
||
|
|
"taskserv": "prometheus-stack"
|
||
|
|
"grafana_enabled": "true"
|
||
|
|
"alertmanager_enabled": "true"
|
||
|
|
"node_exporter_enabled": "true"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "install_networking"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 180
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 5
|
||
|
|
timeout: 900
|
||
|
|
}
|
||
|
|
|
||
|
|
# Phase 5: Configure cross-cloud connectivity
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "configure_connectivity"
|
||
|
|
name: "Configure Cross-Cloud Connectivity"
|
||
|
|
operation_type: "custom"
|
||
|
|
action: "configure"
|
||
|
|
parameters: {
|
||
|
|
"connectivity_type": "vpn"
|
||
|
|
"upcloud_endpoint": "dynamic"
|
||
|
|
"aws_vpc_id": "auto-detect"
|
||
|
|
"encryption": "wireguard"
|
||
|
|
"routing": "bgp"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "aws_database"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 60
|
||
|
|
},
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "install_monitoring"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 60
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 4
|
||
|
|
timeout: 600
|
||
|
|
}
|
||
|
|
|
||
|
|
# Phase 6: Final validation
|
||
|
|
main.BatchOperation {
|
||
|
|
operation_id: "validate_deployment"
|
||
|
|
name: "Validate Complete Deployment"
|
||
|
|
operation_type: "custom"
|
||
|
|
action: "configure"
|
||
|
|
parameters: {
|
||
|
|
"validation_type": "end_to_end"
|
||
|
|
"test_database_connectivity": "true"
|
||
|
|
"test_redis_connectivity": "true"
|
||
|
|
"test_k8s_cluster": "true"
|
||
|
|
"test_monitoring": "true"
|
||
|
|
}
|
||
|
|
dependencies: [
|
||
|
|
main.DependencyDef {
|
||
|
|
target_operation_id: "configure_connectivity"
|
||
|
|
dependency_type: "sequential"
|
||
|
|
timeout: 300
|
||
|
|
}
|
||
|
|
]
|
||
|
|
priority: 1
|
||
|
|
timeout: 600
|
||
|
|
}
|
||
|
|
]
|
||
|
|
|
||
|
|
# Advanced workflow configuration
|
||
|
|
max_parallel_operations: 4
|
||
|
|
global_timeout: 7200 # 2 hours
|
||
|
|
fail_fast: False # Continue on non-critical failures
|
||
|
|
|
||
|
|
# SurrealDB for persistent state
|
||
|
|
storage: main.StorageConfig {
|
||
|
|
backend: "surrealdb"
|
||
|
|
connection_config: {
|
||
|
|
"url": "ws://surrealdb.internal:8000"
|
||
|
|
"namespace": "production"
|
||
|
|
"database": "multicloud_workflows"
|
||
|
|
"user": "workflow_executor"
|
||
|
|
"auth_token": "{{env.SURREALDB_TOKEN}}"
|
||
|
|
}
|
||
|
|
enable_persistence: True
|
||
|
|
retention_hours: 2160 # 90 days for production
|
||
|
|
enable_compression: True
|
||
|
|
encryption: main.SecretProvider {
|
||
|
|
provider: "sops"
|
||
|
|
sops_config: main.SopsConfig {
|
||
|
|
config_path: "./.sops.yaml"
|
||
|
|
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
|
||
|
|
use_age: True
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Comprehensive monitoring
|
||
|
|
monitoring: main.MonitoringConfig {
|
||
|
|
enabled: True
|
||
|
|
backend: "prometheus"
|
||
|
|
enable_tracing: True
|
||
|
|
enable_notifications: True
|
||
|
|
notification_channels: [
|
||
|
|
"webhook:slack://prod-ops-alerts",
|
||
|
|
"webhook:pagerduty://high-priority",
|
||
|
|
"email:devops-team@company.com"
|
||
|
|
]
|
||
|
|
log_level: "info"
|
||
|
|
collection_interval: 30
|
||
|
|
}
|
||
|
|
|
||
|
|
# Production-grade retry policy
|
||
|
|
default_retry_policy: main.RetryPolicy {
|
||
|
|
max_attempts: 3
|
||
|
|
initial_delay: 60
|
||
|
|
max_delay: 600
|
||
|
|
backoff_multiplier: 2
|
||
|
|
retry_on_errors: [
|
||
|
|
"timeout",
|
||
|
|
"connection_error",
|
||
|
|
"rate_limit",
|
||
|
|
"resource_unavailable",
|
||
|
|
"quota_exceeded"
|
||
|
|
]
|
||
|
|
retry_on_any_error: False
|
||
|
|
}
|
||
|
|
|
||
|
|
# Conservative rollback strategy
|
||
|
|
default_rollback_strategy: main.RollbackStrategy {
|
||
|
|
enabled: True
|
||
|
|
strategy: "manual" # Manual approval for production rollbacks
|
||
|
|
preserve_partial_state: True
|
||
|
|
rollback_timeout: 1800
|
||
|
|
custom_rollback_operations: [
|
||
|
|
"backup_state",
|
||
|
|
"notify_team",
|
||
|
|
"create_incident"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
# Execution context for tracking
|
||
|
|
execution_context: {
|
||
|
|
"environment": "production"
|
||
|
|
"deployment_type": "multi_cloud"
|
||
|
|
"cost_center": "infrastructure"
|
||
|
|
"owner": "platform-team"
|
||
|
|
"change_request": "CHG-2025-001"
|
||
|
|
"approval": "approved"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Hooks for integration
|
||
|
|
pre_workflow_hooks: [
|
||
|
|
"validate_prerequisites",
|
||
|
|
"check_maintenance_windows",
|
||
|
|
"notify_deployment_start"
|
||
|
|
]
|
||
|
|
|
||
|
|
post_workflow_hooks: [
|
||
|
|
"run_smoke_tests",
|
||
|
|
"update_monitoring_dashboards",
|
||
|
|
"notify_deployment_complete",
|
||
|
|
"update_documentation"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
# Advanced batch executor configuration for this workflow
|
||
|
|
multicloud_executor: main.BatchExecutor = main.BatchExecutor {
|
||
|
|
executor_id: "multicloud_prod_executor"
|
||
|
|
name: "Multi-Cloud Production Executor"
|
||
|
|
description: "Production-ready executor for multi-cloud deployments"
|
||
|
|
|
||
|
|
scheduler: main.BatchScheduler {
|
||
|
|
strategy: "resource_aware"
|
||
|
|
resource_limits: {
|
||
|
|
"max_cpu_cores": 32
|
||
|
|
"max_memory_mb": 65536
|
||
|
|
"max_network_bandwidth": 10000
|
||
|
|
"max_concurrent_api_calls": 100
|
||
|
|
}
|
||
|
|
scheduling_interval: 15
|
||
|
|
enable_preemption: True
|
||
|
|
}
|
||
|
|
|
||
|
|
# Multiple queues for different priorities
|
||
|
|
queues: [
|
||
|
|
main.BatchQueue {
|
||
|
|
queue_id: "critical"
|
||
|
|
queue_type: "priority"
|
||
|
|
max_size: 50
|
||
|
|
retention_period: 86400
|
||
|
|
max_delivery_attempts: 5
|
||
|
|
},
|
||
|
|
main.BatchQueue {
|
||
|
|
queue_id: "standard"
|
||
|
|
queue_type: "standard"
|
||
|
|
max_size: 200
|
||
|
|
retention_period: 604800
|
||
|
|
dead_letter_queue: "failed_operations"
|
||
|
|
max_delivery_attempts: 3
|
||
|
|
}
|
||
|
|
]
|
||
|
|
|
||
|
|
# Mixed provider configuration
|
||
|
|
provider_config: main.ProviderMixConfig {
|
||
|
|
primary_provider: "upcloud"
|
||
|
|
secondary_providers: ["aws"]
|
||
|
|
provider_selection: "cost_optimize"
|
||
|
|
cross_provider_networking: {
|
||
|
|
"vpn_enabled": "true"
|
||
|
|
"mesh_networking": "wireguard"
|
||
|
|
"encryption": "aes256"
|
||
|
|
}
|
||
|
|
provider_limits: {
|
||
|
|
"upcloud": {
|
||
|
|
"max_servers": 50
|
||
|
|
"max_storage_gb": 10000
|
||
|
|
"api_rate_limit": "100/min"
|
||
|
|
}
|
||
|
|
"aws": {
|
||
|
|
"max_instances": 20
|
||
|
|
"max_ebs_gb": 5000
|
||
|
|
"api_rate_limit": "1000/min"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Production health monitoring
|
||
|
|
health_check: main.BatchHealthCheck {
|
||
|
|
enabled: True
|
||
|
|
check_interval: 30
|
||
|
|
check_timeout: 15
|
||
|
|
failure_threshold: 2
|
||
|
|
success_threshold: 3
|
||
|
|
health_checks: [
|
||
|
|
"http://localhost:8080/health",
|
||
|
|
"check_provider_apis",
|
||
|
|
"check_storage_backend",
|
||
|
|
"check_monitoring_systems"
|
||
|
|
]
|
||
|
|
failure_actions: [
|
||
|
|
"alert",
|
||
|
|
"graceful_degradation",
|
||
|
|
"escalate"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
# Intelligent autoscaling
|
||
|
|
autoscaling: main.BatchAutoscaling {
|
||
|
|
enabled: True
|
||
|
|
min_parallel: 3
|
||
|
|
max_parallel: 15
|
||
|
|
scale_up_threshold: 0.75
|
||
|
|
scale_down_threshold: 0.25
|
||
|
|
cooldown_period: 300
|
||
|
|
target_utilization: 0.60
|
||
|
|
}
|
||
|
|
|
||
|
|
# Comprehensive metrics
|
||
|
|
metrics: main.BatchMetrics {
|
||
|
|
detailed_metrics: True
|
||
|
|
retention_hours: 2160 # 90 days
|
||
|
|
aggregation_intervals: [60, 300, 1800, 3600, 86400]
|
||
|
|
enable_export: True
|
||
|
|
export_config: {
|
||
|
|
"prometheus_endpoint": "http://prometheus.monitoring:9090"
|
||
|
|
"grafana_dashboard": "multicloud_operations"
|
||
|
|
"datadog_api_key": "{{env.DATADOG_API_KEY}}"
|
||
|
|
}
|
||
|
|
custom_metrics: [
|
||
|
|
"provider_api_latency",
|
||
|
|
"cross_cloud_bandwidth",
|
||
|
|
"cost_tracking",
|
||
|
|
"sla_compliance"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
}
|