provisioning/scripts/deploy-librecloud-hetzner.nu

623 lines
21 KiB
Text
Raw Normal View History

#!/usr/bin/env nu
# Deploy LibreCloud Kubernetes cluster to Hetzner Cloud + NixOS
# Complete infrastructure orchestration in 8 phases
use std log
def result_ok [value: any] { {ok: $value, err: null} }
def result_err [message: string] { {ok: null, err: $message} }
def is_ok [result: record] { $result.err == null }
# ============================================================================
# PHASE 1: Environment Validation
# ============================================================================
def validate_environment [] {
log info "Phase 1: Validating environment and prerequisites..."
let checks = [
{tool: "nickel", msg: "Nickel not found"}
{tool: "nix", msg: "Nix not found"}
{tool: "nixos-anywhere", msg: "nixos-anywhere not found"}
{tool: "hcloud", msg: "hcloud CLI not found"}
{tool: "ssh", msg: "ssh not found"}
{tool: "curl", msg: "curl not found"}
]
for check in $checks {
if (which $check.tool | is-empty) {
return (result_err $check.msg)
}
}
let workspace_dir = "workspaces/librecloud_hetzner"
if not ($workspace_dir | path exists) {
return (result_err $"Workspace not found: ($workspace_dir)")
}
let main_ncl = $"($workspace_dir)/infra/main/main.ncl"
if not ($main_ncl | path exists) {
return (result_err $"Main Nickel config missing: ($main_ncl)")
}
let servers_ncl = $"($workspace_dir)/infra/main/servers.ncl"
if not ($servers_ncl | path exists) {
return (result_err $"Servers config missing: ($servers_ncl)")
}
log info "✓ Environment validated"
result_ok {workspace_dir: $workspace_dir}
}
# ============================================================================
# PHASE 2: Export Nickel Configuration
# ============================================================================
def export_nickel_config [workspace_dir: string] {
log info "Phase 2: Exporting Nickel infrastructure configuration..."
try {
let main_ncl = $"($workspace_dir)/infra/main/main.ncl"
let export_result = (^nickel export $main_ncl | complete)
if $export_result.exit_code != 0 {
return (result_err $"Nickel export failed: ($export_result.stderr)")
}
let config = ($export_result.stdout | from json)
if ($config | is-empty) {
return (result_err "Nickel export produced empty configuration")
}
if not ($config | has "infrastructure") {
return (result_err "Configuration missing 'infrastructure' key")
}
let infra = $config.infrastructure
if not ($infra | has "servers") {
return (result_err "Configuration missing 'servers' list")
}
log info $"✓ Exported Nickel config with ($infra.servers | length) servers"
result_ok $config
} catch { |err|
result_err $"Nickel export exception: ($err.msg)"
}
}
# ============================================================================
# PHASE 3: Generate NixOS Flakes
# ============================================================================
def generate_nixos_flakes [workspace_dir: string] {
log info "Phase 3: Generating NixOS flakes from Nickel configuration..."
let script = "provisioning/scripts/nixos/generate-hetzner-nixos-flake.nu"
if not ($script | path exists) {
return (result_err $"Flake generation script not found: ($script)")
}
try {
let servers_ncl = $"($workspace_dir)/infra/main/servers.ncl"
let output_dir = $"($workspace_dir)/nixos"
let gen_result = (^nu $script $servers_ncl --output-dir $output_dir | complete)
if $gen_result.exit_code != 0 {
return (result_err $"Flake generation failed: ($gen_result.stderr)")
}
# Verify all flakes were created
let hostnames = ["wuji-cp-0", "wuji-strg-0", "wuji-wrkr-0", "sgoyol-0"]
let all_exist = ($hostnames | all { |h|
($"($output_dir)/($h)/flake.nix" | path exists)
})
if not $all_exist {
return (result_err "Flake generation incomplete")
}
log info "✓ Generated NixOS flakes for all servers"
result_ok "OK"
} catch { |err|
result_err $"Flake generation exception: ($err.msg)"
}
}
# ============================================================================
# PHASE 4: Validate NixOS Flakes
# ============================================================================
def validate_one_flake [workspace_dir: string, hostname: string] {
let flake_path = $"($workspace_dir)/nixos/($hostname)"
if not ($flake_path | path exists) {
return {hostname: $hostname, valid: false, error: "Directory not found"}
}
if not ($"($flake_path)/flake.nix" | path exists) {
return {hostname: $hostname, valid: false, error: "flake.nix not found"}
}
try {
let show_result = (^nix flake show $flake_path | complete)
if $show_result.exit_code == 0 {
{hostname: $hostname, valid: true, error: null}
} else {
{hostname: $hostname, valid: false, error: $show_result.stderr}
}
} catch { |err|
{hostname: $hostname, valid: false, error: $"Exception: ($err.msg)"}
}
}
def validate_nixos_flakes [workspace_dir: string] {
log info "Phase 4: Validating NixOS flakes..."
let hostnames = ["wuji-cp-0", "wuji-strg-0", "wuji-wrkr-0", "sgoyol-0"]
mut validation_results = []
for h in $hostnames {
let result = (validate_one_flake $workspace_dir $h)
$validation_results = ($validation_results | append $result)
}
let all_valid = ($validation_results | all { |r| $r.valid })
if not $all_valid {
let invalid = ($validation_results | where { |r| not $r.valid })
log error "Flake validation failed:"
for item in $invalid {
log error $" • ($item.hostname): ($item.error)"
}
return (result_err "Flake validation failed")
}
log info "✓ All NixOS flakes validated"
result_ok $validation_results
}
# ============================================================================
# PHASE 5: Create Hetzner Infrastructure
# ============================================================================
def create_hetzner_servers [infra_config: record] {
log info "Phase 5: Creating Hetzner Cloud infrastructure..."
if (($env | has "HCLOUD_TOKEN") == false) or ($env.HCLOUD_TOKEN == "") {
log warning " HCLOUD_TOKEN not set. Cannot create servers."
return (result_err "HCLOUD_TOKEN environment variable not set")
}
# Ensure private network exists
log info " Checking private network: librecloud-private..."
try {
let net_result = (^hcloud network list --output json | from json)
let networks = (if ($net_result | is-list) { $net_result } else { [$net_result] })
let exists = ($networks | any { |n| $n.name == "librecloud-private" })
if not $exists {
log info " Creating private network: 10.11.0.0/16..."
let create_result = (^hcloud network create --name "librecloud-private" --ip-range "10.11.0.0/16" | complete)
if $create_result.exit_code != 0 {
return (result_err $"Network creation failed: ($create_result.stderr)")
}
}
} catch { |err|
return (result_err $"Network error: ($err.msg)")
}
# Create servers
let servers_config = $infra_config.infrastructure.servers
let servers_created = []
for server in $servers_config {
let hostname = $server.hostname
let server_type = $server.server_type
let location = $server.location
log info $" Creating server: ($hostname) ($server_type in $location)..."
try {
let list_result = (^hcloud server list --output json | from json)
let existing = (if ($list_result | is-list) { $list_result } else { [$list_result] })
let already_exists = ($existing | any { |s| $s.name == $hostname })
if $already_exists {
log debug $" ✓ Server already exists"
let ip_info = ($existing | where { |s| $s.name == $hostname } | get 0)
let servers_created = ($servers_created | append {
hostname: $hostname,
server_id: $ip_info.id,
public_ip: ($ip_info.public_net.ipv4.ip // null),
private_ip: $server.networking.private_ip,
status: $ip_info.status
})
continue
}
let create_result = (^hcloud server create \
--type $server_type \
--location $location \
--image "ubuntu-24.04" \
--network "librecloud-private" \
--name $hostname | complete)
if $create_result.exit_code != 0 {
return (result_err $"Failed to create server ($hostname): ($create_result.stderr)")
}
let server_info = (^hcloud server describe $hostname --output json | from json)
log info $" ✓ Server created (ID: ($server_info.id))"
let servers_created = ($servers_created | append {
hostname: $hostname,
server_id: $server_info.id,
public_ip: $server_info.public_net.ipv4.ip,
private_ip: $server.networking.private_ip,
status: $server_info.status
})
} catch { |err|
return (result_err $"Server creation error for ($hostname): ($err.msg)")
}
}
if ($servers_created | is-empty) {
return (result_err "No servers were created or found")
}
log info $"✓ Infrastructure ready: ($servers_created | length) servers"
result_ok $servers_created
}
# ============================================================================
# PHASE 6: Setup SSH Connectivity
# ============================================================================
def setup_ssh_connectivity [servers_info: list] {
log info "Phase 6: Setting up SSH connectivity..."
for server in $servers_info {
let hostname = $server.hostname
let public_ip = $server.public_ip
log info $" Configuring SSH for ($hostname)..."
# Try to wait for SSH
mut retries = 0
while $retries < 30 {
try {
let check = (^ssh -o ConnectTimeout=5 \
-o StrictHostKeyChecking=accept-new \
-o UserKnownHostsFile=/dev/null \
root@$public_ip "echo OK" | complete)
if $check.exit_code == 0 {
break
}
} catch { }
$retries = $retries + 1
if $retries < 30 {
sleep 10sec
}
}
# Set hostname
try {
let set_hostname = (^ssh -o ConnectTimeout=5 \
-o StrictHostKeyChecking=accept-new \
-o UserKnownHostsFile=/dev/null \
root@$public_ip $"hostnamectl set-hostname ($hostname)" | complete)
if $set_hostname.exit_code == 0 {
log debug $" ✓ Hostname set"
}
} catch { |err|
log warning $" ⚠ Could not set hostname: ($err.msg)"
}
}
log info "✓ SSH connectivity configured"
result_ok null
}
# ============================================================================
# PHASE 7: Deploy NixOS via nixos-anywhere
# ============================================================================
def deploy_one_server [workspace_dir: string, server: record] {
let hostname = $server.hostname
let public_ip = $server.public_ip
let flake_path = $"($workspace_dir)/nixos/($hostname)"
log info $" Deploying NixOS to ($hostname)..."
if not ($flake_path | path exists) {
return {hostname: $hostname, success: false, error: "Flake not found"}
}
try {
let deploy_result = (^nixos-anywhere \
--flake $"($flake_path)#($hostname)" \
root@$public_ip | complete)
if $deploy_result.exit_code == 0 {
log info $" ✓ NixOS deployment complete"
{hostname: $hostname, success: true, error: null}
} else {
log warning $" ✗ Deployment failed"
{hostname: $hostname, success: false, error: $deploy_result.stderr}
}
} catch { |err|
log warning $" ✗ Deployment exception: ($err.msg)"
{hostname: $hostname, success: false, error: $err.msg}
}
}
def deploy_nixos [workspace_dir: string, servers_info: list] {
log info "Phase 7: Deploying NixOS via nixos-anywhere..."
mut deployments = []
for s in $servers_info {
let result = (deploy_one_server $workspace_dir $s)
$deployments = ($deployments | append $result)
}
let all_success = ($deployments | all { |d| $d.success })
if $all_success {
log info "✓ All servers deployed successfully"
result_ok $deployments
} else {
let failed = ($deployments | where { |d| not $d.success })
log warning $"⚠ Some deployments failed: ($failed | length) of ($deployments | length)"
result_ok $deployments
}
}
# ============================================================================
# PHASE 8: Post-Deployment Validation
# ============================================================================
def validate_deployment [servers_info: list] {
log info "Phase 8: Validating post-deployment status..."
mut validation_results = []
for server in $servers_info {
let hostname = $server.hostname
let public_ip = $server.public_ip
log info $" Validating ($hostname)..."
mut is_nixos = false
try {
let os_check = (^ssh root@$hostname "head -1 /etc/os-release" | complete)
if ($os_check.stdout | str contains "NixOS") {
$is_nixos = true
log debug $" ✓ NixOS confirmed"
}
} catch { }
let result = {
hostname: $hostname,
public_ip: $public_ip,
is_nixos: $is_nixos,
is_reachable: ($is_nixos)
}
$validation_results = ($validation_results | append $result)
}
log info "✓ Post-deployment validation complete"
result_ok $validation_results
}
# ============================================================================
# CLEANUP: Destroy Infrastructure
# ============================================================================
def destroy_infrastructure [servers_info: list] {
log info "Phase X: Destroying Hetzner infrastructure (cleanup)..."
if (($env | has "HCLOUD_TOKEN") == false) or ($env.HCLOUD_TOKEN == "") {
return (result_err "HCLOUD_TOKEN not set")
}
mut destroyed = []
for server in $servers_info {
let hostname = $server.hostname
log info $" Deleting server: ($hostname)..."
let del_result = (try {
^hcloud server delete $hostname --force | complete
} catch {
{exit_code: 1, stderr: $"Exception"}
})
if $del_result.exit_code == 0 {
log debug $" ✓ Server deleted"
$destroyed = ($destroyed | append {hostname: $hostname, success: true})
} else {
log warning $" ⚠ Failed to delete"
$destroyed = ($destroyed | append {hostname: $hostname, success: false})
}
}
log info $"✓ Cleanup complete"
result_ok $destroyed
}
# ============================================================================
# UTILITY: Status Check
# ============================================================================
def show_deployment_status [workspace_dir: string] {
log info "Checking deployment status..."
let hostnames = ["wuji-cp-0", "wuji-strg-0", "wuji-wrkr-0", "sgoyol-0"]
mut flakes_status = []
for h in $hostnames {
let flake_path = $"($workspace_dir)/nixos/($h)/flake.nix"
let result = {hostname: $h, flake_exists: ($flake_path | path exists)}
$flakes_status = ($flakes_status | append $result)
}
log info ""
for status in $flakes_status {
if $status.flake_exists {
log info $" ✓ ($status.hostname): flake generated"
} else {
log info $" ✗ ($status.hostname): flake missing"
}
}
let all_exist = ($flakes_status | all { |s| $s.flake_exists })
if $all_exist {
log info ""
log info "✓ All flakes ready. Deploy with: HCLOUD_TOKEN=xxx nu provisioning/scripts/deploy-librecloud-hetzner.nu"
} else {
log info ""
log info "⚠ Some flakes missing. Run: nu provisioning/scripts/deploy-librecloud-hetzner.nu --generate-only"
}
}
# ============================================================================
# SUMMARY
# ============================================================================
def show_deployment_summary [servers_info: list, deployment_results: record] {
log info ""
log info "=========================================="
log info "LibreCloud Hetzner Deployment Complete"
log info "=========================================="
log info ""
log info "Servers:"
for server in $servers_info {
log info $" • ($server.hostname): ($server.public_ip) (private: ($server.private_ip))"
}
log info ""
if ($deployment_results | has "deployments") {
log info "Deployment Status:"
for deploy in $deployment_results.deployments {
if $deploy.success {
log info $" ✓ ($deploy.hostname): deployed"
} else {
log info $" ✗ ($deploy.hostname): failed"
}
}
log info ""
}
log info "Next Steps:"
log info " 1. Verify cluster: kubectl --kubeconfig=<path> get nodes"
log info " 2. Check node status: ssh root@<public_ip> systemctl status kubelet"
log info " 3. View logs: ssh root@<public_ip> journalctl -u kubelet -f"
log info ""
log info "Documentation: workspaces/librecloud_hetzner/README.md"
log info "=========================================="
}
# ============================================================================
# MAIN
# ============================================================================
def main [
--dry-run
--generate-only
--destroy
--status
] {
# Phase 1: Validate environment
let env_check = (validate_environment)
if not (is_ok $env_check) {
log error $env_check.err
exit 1
}
let workspace_dir = $env_check.ok.workspace_dir
# If --status flag, show status and exit
if $status {
show_deployment_status $workspace_dir
exit 0
}
# Phase 2: Export Nickel configuration
let nickel_config = (export_nickel_config $workspace_dir)
if not (is_ok $nickel_config) {
log error $nickel_config.err
exit 1
}
# Phase 3: Generate NixOS flakes
let flakes_result = (generate_nixos_flakes $workspace_dir)
if not (is_ok $flakes_result) {
log error $flakes_result.err
exit 1
}
# Phase 4: Validate flakes
let validation_result = (validate_nixos_flakes $workspace_dir)
if not (is_ok $validation_result) {
log error $validation_result.err
exit 1
}
# If --generate-only, stop here
if $generate_only {
log info "✓ NixOS flakes generated and validated"
exit 0
}
# If --dry-run, stop here
if $dry_run {
log info "✓ Dry-run completed (no infrastructure changes)"
exit 0
}
# Phase 5: Create Hetzner infrastructure
let infra_result = (create_hetzner_servers $nickel_config.ok)
if not (is_ok $infra_result) {
log error $infra_result.err
exit 1
}
let servers_info = $infra_result.ok
# If --destroy flag, cleanup infrastructure
if $destroy {
let cleanup_result = (destroy_infrastructure $servers_info)
if not (is_ok $cleanup_result) {
log warning $cleanup_result.err
}
exit 0
}
# Phase 6: Setup SSH connectivity
let ssh_result = (setup_ssh_connectivity $servers_info)
if not (is_ok $ssh_result) {
log warning $ssh_result.err
}
# Phase 7: Deploy NixOS
let deploy_result = (deploy_nixos $workspace_dir $servers_info)
if not (is_ok $deploy_result) {
log warning $deploy_result.err
}
let deployments = $deploy_result.ok
# Phase 8: Validate deployment
let validation = (validate_deployment $servers_info)
if not (is_ok $validation) {
log warning $validation.err
}
# Show final summary
show_deployment_summary $servers_info {
deployments: $deployments,
validation: $validation.ok
}
log info ""
log info "Deployment complete! IPs for reference:"
for server in $servers_info {
log info $" ($server.hostname) = ($server.public_ip)"
}
}
main