- Remove KCL ecosystem (~220 files deleted) - Migrate all infrastructure to Nickel schema system - Consolidate documentation: legacy docs → provisioning/docs/src/ - Add CI/CD workflows (.github/) and Rust build config (.cargo/) - Update core system for Nickel schema parsing - Update README.md and CHANGES.md for v5.0.0 release - Fix pre-commit hooks: end-of-file, trailing-whitespace - Breaking changes: KCL workspaces require migration - Migration bridge available in docs/src/development/
490 lines
13 KiB
Plaintext
490 lines
13 KiB
Plaintext
# Multi-Region High Availability Workspace
|
|
# Global deployment across 3 providers and 3 geographic regions
|
|
#
|
|
# Architecture:
|
|
# - US East (DigitalOcean NYC): Primary region, active serving
|
|
# - EU Central (Hetzner Germany): Secondary region, active serving + read replicas
|
|
# - Asia Pacific (AWS Singapore): Tertiary region, active serving + read replicas
|
|
#
|
|
# Features:
|
|
# - Geographic load balancing with Route53
|
|
# - Multi-master database replication
|
|
# - Automatic failover
|
|
# - Zero downtime deployment
|
|
# - Cost: ~$250/month across all regions
|
|
|
|
let hetzner = import "../../../extensions/providers/hetzner/nickel/main.ncl" in
|
|
let aws = import "../../../extensions/providers/aws/nickel/main.ncl" in
|
|
let digitalocean = import "../../../extensions/providers/digitalocean/nickel/main.ncl" in
|
|
|
|
{
|
|
workspace_name = "multi-region-ha",
|
|
description = "High availability deployment across 3 geographic regions",
|
|
version = "1.0",
|
|
|
|
environment = "production",
|
|
owner = "platform-team",
|
|
|
|
tags = {
|
|
"project" = "global-app",
|
|
"deployment" = "multi-region-ha",
|
|
"environment" = "production"
|
|
},
|
|
|
|
# =============================================================================
|
|
# Global Configuration
|
|
# =============================================================================
|
|
|
|
global_config = {
|
|
dns_provider = "route53",
|
|
health_check_interval = 30,
|
|
failover_ttl = 60,
|
|
replication_lag_tolerance = 300,
|
|
rpo_target = 300,
|
|
rto_target = 900
|
|
},
|
|
|
|
# =============================================================================
|
|
# US EAST REGION (DigitalOcean NYC) - PRIMARY
|
|
# =============================================================================
|
|
|
|
region_us_east = {
|
|
name = "us-east",
|
|
provider = "digitalocean",
|
|
region_code = "nyc3",
|
|
primary = true,
|
|
description = "Primary region: serves North America",
|
|
|
|
# Private Network
|
|
vpc = digitalocean.VPC & {
|
|
name = "us-east-vpc",
|
|
region = "nyc3",
|
|
ip_range = "10.0.0.0/16"
|
|
},
|
|
|
|
# Application Servers (3 replicas for HA)
|
|
app_servers = digitalocean.Droplet & {
|
|
name = "us-app",
|
|
region = "nyc3",
|
|
size = "s-2vcpu-4gb",
|
|
image = "ubuntu-22-04-x64",
|
|
count = 3,
|
|
vpc_uuid = "{{ region_us_east.vpc.id }}",
|
|
backups = true,
|
|
monitoring = true,
|
|
tags = ["us-east", "primary", "production"]
|
|
},
|
|
|
|
# Load Balancer
|
|
load_balancer = digitalocean.LoadBalancer & {
|
|
name = "us-lb",
|
|
algorithm = "round_robin",
|
|
region = "nyc3",
|
|
forwarding_rules = [
|
|
{
|
|
entry_protocol = "https",
|
|
entry_port = 443,
|
|
target_protocol = "http",
|
|
target_port = 8080,
|
|
certificate_id = "cert-id"
|
|
}
|
|
],
|
|
health_check = {
|
|
protocol = "http",
|
|
port = 8080,
|
|
path = "/health",
|
|
interval = 10,
|
|
timeout = 5,
|
|
healthy_threshold = 3,
|
|
unhealthy_threshold = 3
|
|
},
|
|
sticky_sessions = {
|
|
type = "cookies",
|
|
cookie_name = "REGION_SESSION",
|
|
cookie_ttl_seconds = 300
|
|
}
|
|
},
|
|
|
|
# Primary Database (Multi-AZ in DigitalOcean)
|
|
database = digitalocean.Database & {
|
|
name = "us-db-primary",
|
|
engine = "pg",
|
|
version = "14",
|
|
size = "db-s-2vcpu-4gb",
|
|
region = "nyc3",
|
|
num_nodes = 3,
|
|
multi_az = true,
|
|
backup_retention_days = 30
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# EU CENTRAL REGION (Hetzner Germany) - SECONDARY
|
|
# =============================================================================
|
|
|
|
region_eu_central = {
|
|
name = "eu-central",
|
|
provider = "hetzner",
|
|
location = "nbg1",
|
|
description = "Secondary region: serves Europe, read replicas",
|
|
|
|
# Private Network
|
|
network = hetzner.Network & {
|
|
name = "eu-central-network",
|
|
ip_range = "10.1.0.0/16"
|
|
},
|
|
|
|
# Application Servers (3 replicas for HA)
|
|
app_servers = hetzner.Server & {
|
|
name = "eu-app",
|
|
server_type = "cpx21",
|
|
image = "ubuntu-22.04",
|
|
location = "nbg1",
|
|
count = 3,
|
|
networks = [
|
|
{
|
|
network_name = "eu-central-network",
|
|
ip = "10.1.1.10"
|
|
}
|
|
],
|
|
labels = {
|
|
"region" = "eu-central",
|
|
"tier" = "application"
|
|
}
|
|
},
|
|
|
|
# Load Balancer
|
|
load_balancer = hetzner.LoadBalancer & {
|
|
name = "eu-lb",
|
|
load_balancer_type = "lb21",
|
|
location = "nbg1",
|
|
services = [
|
|
{
|
|
protocol = "http",
|
|
listen_port = 443,
|
|
destination_port = 8080,
|
|
health_check = {
|
|
protocol = "http",
|
|
port = 8080,
|
|
interval = 10,
|
|
timeout = 5
|
|
}
|
|
}
|
|
]
|
|
},
|
|
|
|
# Read Replica Database (from US primary)
|
|
database_replica = {
|
|
type = "read-replica",
|
|
source = "region_us_east.database",
|
|
replication_lag_seconds = 300,
|
|
read_only = true,
|
|
description = "Read-only replica of US primary database"
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# ASIA PACIFIC REGION (AWS Singapore) - TERTIARY
|
|
# =============================================================================
|
|
|
|
region_asia_southeast = {
|
|
name = "asia-southeast",
|
|
provider = "aws",
|
|
region_code = "ap-southeast-1",
|
|
description = "Tertiary region: serves Asia Pacific, read replicas",
|
|
|
|
# VPC for region
|
|
vpc = aws.VPC & {
|
|
cidr_block = "10.2.0.0/16",
|
|
enable_dns_hostnames = true,
|
|
enable_dns_support = true,
|
|
tags = [
|
|
{ key = "Name", value = "asia-vpc" },
|
|
{ key = "Region", value = "ap-southeast-1" }
|
|
]
|
|
},
|
|
|
|
# Private Subnet
|
|
private_subnet = aws.Subnet & {
|
|
vpc_id = "{{ region_asia_southeast.vpc.id }}",
|
|
cidr_block = "10.2.1.0/24",
|
|
availability_zone = "ap-southeast-1a",
|
|
tags = [
|
|
{ key = "Name", value = "asia-private-subnet" }
|
|
]
|
|
},
|
|
|
|
# Application Servers (EC2 instances)
|
|
app_servers = aws.EC2 & {
|
|
instance_type = "t3.medium",
|
|
image_id = "ami-xxxxxxxx",
|
|
region = "ap-southeast-1",
|
|
subnet_id = "{{ region_asia_southeast.private_subnet.id }}",
|
|
count = 3,
|
|
tags = [
|
|
{ key = "Name", value = "asia-app" },
|
|
{ key = "Region", value = "ap-southeast-1" }
|
|
]
|
|
},
|
|
|
|
# Load Balancer (ALB)
|
|
load_balancer = aws.ALB & {
|
|
name = "asia-lb",
|
|
internal = false,
|
|
load_balancer_type = "application",
|
|
region = "ap-southeast-1",
|
|
subnets = ["{{ region_asia_southeast.private_subnet.id }}"],
|
|
tags = [
|
|
{ key = "Name", value = "asia-lb" }
|
|
]
|
|
},
|
|
|
|
# Read Replica Database (from US primary)
|
|
database_replica = aws.RDS & {
|
|
identifier = "asia-db-replica",
|
|
replicate_source_db = "{{ region_us_east.database.identifier }}",
|
|
instance_class = "db.t3.small",
|
|
region = "ap-southeast-1",
|
|
multi_az = true,
|
|
tags = [
|
|
{ key = "Name", value = "asia-db-replica" }
|
|
]
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# VPN Tunnels Between Regions
|
|
# =============================================================================
|
|
|
|
vpn_tunnels = {
|
|
us_to_eu = {
|
|
name = "us-eu-vpn",
|
|
source_region = "us-east",
|
|
destination_region = "eu-central",
|
|
protocol = "ipsec",
|
|
encryption = "aes-256",
|
|
source_network = "10.0.0.0/16",
|
|
destination_network = "10.1.0.0/16"
|
|
},
|
|
|
|
eu_to_asia = {
|
|
name = "eu-asia-vpn",
|
|
source_region = "eu-central",
|
|
destination_region = "asia-southeast",
|
|
protocol = "ipsec",
|
|
encryption = "aes-256",
|
|
source_network = "10.1.0.0/16",
|
|
destination_network = "10.2.0.0/16"
|
|
},
|
|
|
|
asia_to_us = {
|
|
name = "asia-us-vpn",
|
|
source_region = "asia-southeast",
|
|
destination_region = "us-east",
|
|
protocol = "ipsec",
|
|
encryption = "aes-256",
|
|
source_network = "10.2.0.0/16",
|
|
destination_network = "10.0.0.0/16"
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# Global DNS and Load Balancing (Route53)
|
|
# =============================================================================
|
|
|
|
global_dns = {
|
|
domain = "api.example.com",
|
|
dns_provider = "route53",
|
|
|
|
# Geographic routing policy
|
|
geolocation_rules = [
|
|
{
|
|
location = "North America",
|
|
endpoint = "us-east-lb",
|
|
weight = 100
|
|
},
|
|
{
|
|
location = "Europe",
|
|
endpoint = "eu-central-lb",
|
|
weight = 100
|
|
},
|
|
{
|
|
location = "Asia Pacific",
|
|
endpoint = "asia-southeast-lb",
|
|
weight = 100
|
|
}
|
|
],
|
|
|
|
# Health checks for each region
|
|
health_checks = [
|
|
{
|
|
region = "us-east",
|
|
endpoint = "us.api.example.com",
|
|
protocol = "HTTPS",
|
|
port = 443,
|
|
path = "/health",
|
|
interval = 30,
|
|
failure_threshold = 3
|
|
},
|
|
{
|
|
region = "eu-central",
|
|
endpoint = "eu.api.example.com",
|
|
protocol = "HTTPS",
|
|
port = 443,
|
|
path = "/health",
|
|
interval = 30,
|
|
failure_threshold = 3
|
|
},
|
|
{
|
|
region = "asia-southeast",
|
|
endpoint = "asia.api.example.com",
|
|
protocol = "HTTPS",
|
|
port = 443,
|
|
path = "/health",
|
|
interval = 30,
|
|
failure_threshold = 3
|
|
}
|
|
],
|
|
|
|
# Failover configuration
|
|
failover_policy = {
|
|
strategy = "geographic",
|
|
primary = "us-east",
|
|
secondary = "eu-central",
|
|
tertiary = "asia-southeast",
|
|
health_check_grace_period = 60
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# Database Replication Configuration
|
|
# =============================================================================
|
|
|
|
database_replication = {
|
|
mode = "multi-master",
|
|
primary_region = "us-east",
|
|
read_replicas = [
|
|
{
|
|
region = "eu-central",
|
|
lag_tolerance = 300,
|
|
read_only = true
|
|
},
|
|
{
|
|
region = "asia-southeast",
|
|
lag_tolerance = 300,
|
|
read_only = true
|
|
}
|
|
],
|
|
|
|
replication_settings = {
|
|
method = "logical",
|
|
wal_level = "logical",
|
|
max_wal_senders = 5,
|
|
max_replication_slots = 5,
|
|
backup_retention_days = 30
|
|
}
|
|
},
|
|
|
|
# =============================================================================
|
|
# Monitoring and Alerting
|
|
# =============================================================================
|
|
|
|
monitoring = {
|
|
enabled = true,
|
|
|
|
regional_metrics = [
|
|
{
|
|
region = "us-east",
|
|
metrics = ["cpu", "memory", "disk", "network", "database_connections"]
|
|
},
|
|
{
|
|
region = "eu-central",
|
|
metrics = ["cpu", "memory", "disk", "network", "replication_lag"]
|
|
},
|
|
{
|
|
region = "asia-southeast",
|
|
metrics = ["cpu", "memory", "disk", "network", "replication_lag"]
|
|
}
|
|
],
|
|
|
|
alerts = [
|
|
{
|
|
name = "high-cpu",
|
|
condition = "cpu > 80%",
|
|
action = "scale-up",
|
|
severity = "warning"
|
|
},
|
|
{
|
|
name = "replication-lag",
|
|
condition = "replication_lag > 600s",
|
|
action = "alert",
|
|
severity = "critical"
|
|
},
|
|
{
|
|
name = "region-down",
|
|
condition = "health_check_failed",
|
|
action = "failover",
|
|
severity = "critical"
|
|
}
|
|
]
|
|
},
|
|
|
|
# =============================================================================
|
|
# Backup and Disaster Recovery
|
|
# =============================================================================
|
|
|
|
backup_strategy = {
|
|
frequency = "daily",
|
|
retention_days = 30,
|
|
|
|
backup_locations = [
|
|
{
|
|
region = "us-east",
|
|
strategy = "automated",
|
|
retention = "30 days"
|
|
},
|
|
{
|
|
region = "eu-central",
|
|
strategy = "replica-backup",
|
|
retention = "30 days"
|
|
},
|
|
{
|
|
region = "asia-southeast",
|
|
strategy = "replica-backup",
|
|
retention = "30 days"
|
|
}
|
|
]
|
|
},
|
|
|
|
# =============================================================================
|
|
# Cost Estimation
|
|
# =============================================================================
|
|
|
|
cost_estimate = {
|
|
monthly_breakdown = {
|
|
us_east = {
|
|
droplets = "$72", # 3 x $24
|
|
load_balancer = "$0",
|
|
database = "$30",
|
|
subtotal = "$102"
|
|
},
|
|
|
|
eu_central = {
|
|
servers = "$62.70", # 3 x CPX21 @ €20.90
|
|
load_balancer = "$10",
|
|
subtotal = "€72.70 (~$79)"
|
|
},
|
|
|
|
asia_southeast = {
|
|
ec2 = "$80", # 3 x t3.medium
|
|
alb = "$20",
|
|
database = "$30",
|
|
subtotal = "$130"
|
|
},
|
|
|
|
total_monthly = "$311",
|
|
total_annual = "$3,732"
|
|
}
|
|
}
|
|
}
|