prvng_kcl/examples/mixed_provider_workflow.k

452 lines
14 KiB
Plaintext
Raw Normal View History

2025-10-07 11:17:54 +01:00
# Mixed Provider Workflow Example
# Demonstrates deployment across multiple cloud providers with advanced features
import ..main
# Production deployment across UpCloud and AWS
production_deployment: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "prod_multicloud_001"
name: "Production Multi-Cloud Deployment"
description: "Deploy production infrastructure across UpCloud (compute) and AWS (managed services)"
operations: [
# Phase 1: Create UpCloud infrastructure
main.BatchOperation {
operation_id: "upcloud_network"
name: "Create UpCloud Network Infrastructure"
operation_type: "custom"
provider: "upcloud"
action: "create"
parameters: {
"resource_type": "network"
"vpc_cidr": "10.0.0.0/16"
"subnets": "public,private"
"zones": "fi-hel2,de-fra1"
}
priority: 10
timeout: 300
}
main.BatchOperation {
operation_id: "upcloud_compute"
name: "Create UpCloud Compute Instances"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "6"
"plan": "2xCPU-4GB"
"zones": "fi-hel2,de-fra1"
"distribution": "even" # Spread across zones
"server_type": "k8s-worker"
}
dependencies: [
main.DependencyDef {
target_operation_id: "upcloud_network"
dependency_type: "sequential"
timeout: 300
}
]
priority: 9
timeout: 900
allow_parallel: True
}
# Phase 2: Create AWS managed services
main.BatchOperation {
operation_id: "aws_database"
name: "Create AWS RDS PostgreSQL"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "rds"
"engine": "postgresql"
"version": "15.4"
"instance_class": "db.t3.medium"
"allocated_storage": "100"
"multi_az": "true"
"region": "eu-west-1"
"vpc_security_groups": "prod-db-sg"
}
priority: 9
timeout: 1800 # RDS can take time
allow_parallel: True
}
main.BatchOperation {
operation_id: "aws_redis"
name: "Create AWS ElastiCache Redis"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "elasticache"
"engine": "redis"
"node_type": "cache.t3.micro"
"num_cache_nodes": "2"
"region": "eu-west-1"
"parameter_group": "default.redis7"
}
priority: 9
timeout: 1200
allow_parallel: True
}
# Phase 3: Set up Kubernetes cluster on UpCloud
main.BatchOperation {
operation_id: "install_k8s_control"
name: "Install Kubernetes Control Plane"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"role": "control-plane"
"version": "v1.31.0"
"target_count": "3"
"ha_mode": "true"
"container_runtime": "containerd"
}
dependencies: [
main.DependencyDef {
target_operation_id: "upcloud_compute"
dependency_type: "sequential"
timeout: 600
}
]
priority: 8
timeout: 1800
}
main.BatchOperation {
operation_id: "install_k8s_workers"
name: "Install Kubernetes Worker Nodes"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"role": "worker"
"target_count": "3"
"container_runtime": "containerd"
"join_existing": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_k8s_control"
dependency_type: "sequential"
timeout: 300
}
]
priority: 7
timeout: 1200
}
# Phase 4: Install cluster services
main.BatchOperation {
operation_id: "install_networking"
name: "Install Cluster Networking"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "cilium"
"version": "1.14.2"
"mode": "tunnel"
"enable_hubble": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_k8s_workers"
dependency_type: "sequential"
timeout: 300
}
]
priority: 6
timeout: 600
}
main.BatchOperation {
operation_id: "install_monitoring"
name: "Install Monitoring Stack"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "prometheus-stack"
"grafana_enabled": "true"
"alertmanager_enabled": "true"
"node_exporter_enabled": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_networking"
dependency_type: "sequential"
timeout: 180
}
]
priority: 5
timeout: 900
}
# Phase 5: Configure cross-cloud connectivity
main.BatchOperation {
operation_id: "configure_connectivity"
name: "Configure Cross-Cloud Connectivity"
operation_type: "custom"
action: "configure"
parameters: {
"connectivity_type": "vpn"
"upcloud_endpoint": "dynamic"
"aws_vpc_id": "auto-detect"
"encryption": "wireguard"
"routing": "bgp"
}
dependencies: [
main.DependencyDef {
target_operation_id: "aws_database"
dependency_type: "sequential"
timeout: 60
},
main.DependencyDef {
target_operation_id: "install_monitoring"
dependency_type: "sequential"
timeout: 60
}
]
priority: 4
timeout: 600
}
# Phase 6: Final validation
main.BatchOperation {
operation_id: "validate_deployment"
name: "Validate Complete Deployment"
operation_type: "custom"
action: "configure"
parameters: {
"validation_type": "end_to_end"
"test_database_connectivity": "true"
"test_redis_connectivity": "true"
"test_k8s_cluster": "true"
"test_monitoring": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "configure_connectivity"
dependency_type: "sequential"
timeout: 300
}
]
priority: 1
timeout: 600
}
]
# Advanced workflow configuration
max_parallel_operations: 4
global_timeout: 7200 # 2 hours
fail_fast: False # Continue on non-critical failures
# SurrealDB for persistent state
storage: main.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.internal:8000"
"namespace": "production"
"database": "multicloud_workflows"
"user": "workflow_executor"
"auth_token": "{{env.SURREALDB_TOKEN}}"
}
enable_persistence: True
retention_hours: 2160 # 90 days for production
enable_compression: True
encryption: main.SecretProvider {
provider: "sops"
sops_config: main.SopsConfig {
config_path: "./.sops.yaml"
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
use_age: True
}
}
}
# Comprehensive monitoring
monitoring: main.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: True
enable_notifications: True
notification_channels: [
"webhook:slack://prod-ops-alerts",
"webhook:pagerduty://high-priority",
"email:devops-team@company.com"
]
log_level: "info"
collection_interval: 30
}
# Production-grade retry policy
default_retry_policy: main.RetryPolicy {
max_attempts: 3
initial_delay: 60
max_delay: 600
backoff_multiplier: 2
retry_on_errors: [
"timeout",
"connection_error",
"rate_limit",
"resource_unavailable",
"quota_exceeded"
]
retry_on_any_error: False
}
# Conservative rollback strategy
default_rollback_strategy: main.RollbackStrategy {
enabled: True
strategy: "manual" # Manual approval for production rollbacks
preserve_partial_state: True
rollback_timeout: 1800
custom_rollback_operations: [
"backup_state",
"notify_team",
"create_incident"
]
}
# Execution context for tracking
execution_context: {
"environment": "production"
"deployment_type": "multi_cloud"
"cost_center": "infrastructure"
"owner": "platform-team"
"change_request": "CHG-2025-001"
"approval": "approved"
}
# Hooks for integration
pre_workflow_hooks: [
"validate_prerequisites",
"check_maintenance_windows",
"notify_deployment_start"
]
post_workflow_hooks: [
"run_smoke_tests",
"update_monitoring_dashboards",
"notify_deployment_complete",
"update_documentation"
]
}
# Advanced batch executor configuration for this workflow
multicloud_executor: main.BatchExecutor = main.BatchExecutor {
executor_id: "multicloud_prod_executor"
name: "Multi-Cloud Production Executor"
description: "Production-ready executor for multi-cloud deployments"
scheduler: main.BatchScheduler {
strategy: "resource_aware"
resource_limits: {
"max_cpu_cores": 32
"max_memory_mb": 65536
"max_network_bandwidth": 10000
"max_concurrent_api_calls": 100
}
scheduling_interval: 15
enable_preemption: True
}
# Multiple queues for different priorities
queues: [
main.BatchQueue {
queue_id: "critical"
queue_type: "priority"
max_size: 50
retention_period: 86400
max_delivery_attempts: 5
},
main.BatchQueue {
queue_id: "standard"
queue_type: "standard"
max_size: 200
retention_period: 604800
dead_letter_queue: "failed_operations"
max_delivery_attempts: 3
}
]
# Mixed provider configuration
provider_config: main.ProviderMixConfig {
primary_provider: "upcloud"
secondary_providers: ["aws"]
provider_selection: "cost_optimize"
cross_provider_networking: {
"vpn_enabled": "true"
"mesh_networking": "wireguard"
"encryption": "aes256"
}
provider_limits: {
"upcloud": {
"max_servers": 50
"max_storage_gb": 10000
"api_rate_limit": "100/min"
}
"aws": {
"max_instances": 20
"max_ebs_gb": 5000
"api_rate_limit": "1000/min"
}
}
}
# Production health monitoring
health_check: main.BatchHealthCheck {
enabled: True
check_interval: 30
check_timeout: 15
failure_threshold: 2
success_threshold: 3
health_checks: [
"http://localhost:8080/health",
"check_provider_apis",
"check_storage_backend",
"check_monitoring_systems"
]
failure_actions: [
"alert",
"graceful_degradation",
"escalate"
]
}
# Intelligent autoscaling
autoscaling: main.BatchAutoscaling {
enabled: True
min_parallel: 3
max_parallel: 15
scale_up_threshold: 0.75
scale_down_threshold: 0.25
cooldown_period: 300
target_utilization: 0.60
}
# Comprehensive metrics
metrics: main.BatchMetrics {
detailed_metrics: True
retention_hours: 2160 # 90 days
aggregation_intervals: [60, 300, 1800, 3600, 86400]
enable_export: True
export_config: {
"prometheus_endpoint": "http://prometheus.monitoring:9090"
"grafana_dashboard": "multicloud_operations"
"datadog_api_key": "{{env.DATADOG_API_KEY}}"
}
custom_metrics: [
"provider_api_latency",
"cross_cloud_bandwidth",
"cost_tracking",
"sla_compliance"
]
}
}