use lib_provisioning * use utils.nu * use ../lib_provisioning/config/accessor.nu * # > Server upgrade — detect server_type drift and apply changes via provider API. # # Compares servers.ncl (desired server_type) against the live provider state. # If a mismatch is found, executes: shutdown → change_type → start. # # Usage: # provisioning server upgrade sgoyol-cp -i sgoyol # upgrade one server # provisioning server upgrade -i sgoyol # check all, upgrade drifted # provisioning server upgrade sgoyol-cp -i sgoyol --check # dry-run, show drift only export def "main upgrade" [ name?: string # Server hostname (optional, all servers if omitted) --infra (-i): string # Infra directory --settings (-s): string # Settings path --check (-c) # Dry-run: show drift without applying --yes (-y) # Skip confirmation prompt --debug (-x) # Debug mode --helpinfo (-h) # Help ] { if $helpinfo { _print "Usage: provisioning server upgrade [hostname] -i [--check] [--yes]" _print "" _print " Detects server_type drift between servers.ncl and provider." _print " If drift found: shutdown → change_type → start." _print "" _print " --check Show drift without applying" _print " --yes Skip confirmation" return } if $debug { set-debug-enabled true } # Discover infras: explicit -i, or scan all infra dirs with settings.ncl let infra_list = if ($infra | is-not-empty) { [$infra] } else { let ws_path = ($env.PROVISIONING_WORKSPACE_PATH? | default $env.PWD) let infra_dir = ($ws_path | path join "infra") if not ($infra_dir | path exists) { _print "No infra/ directory found. Use -i or run from a workspace." return } ls $infra_dir | where type == "dir" | where { ($in.name | path join "settings.ncl" | path exists) } | each {|d| $d.name | path basename } } if ($infra_list | is-empty) { _print "No infras with settings.ncl found." return } # Collect drift across all infras mut all_drift = [] mut all_settings = [] for infra_name in $infra_list { let curr_settings = (do { find_get_settings --infra $infra_name --settings $settings } catch { null }) if ($curr_settings == null) { _print $"⚠ ($infra_name): cannot load settings — skipping" continue } let servers = $curr_settings.data.servers let live_data = (do { mw_query_servers $curr_settings "" "" } | default []) let drift = ($servers | each {|srv| if ($name | is-not-empty) and $srv.hostname != $name { return null } let desired_type = ($srv.server_type? | default "") let live = ($live_data | where {|l| $l.name == $srv.hostname } | get 0? | default null) let actual_type = if $live != null { $live.server_type?.name? | default "unknown" } else { "not found" } let status = if $live != null { $live.status? | default "unknown" } else { "not found" } let needs_upgrade = ($desired_type != $actual_type and $actual_type != "not found" and $actual_type != "unknown") { infra: $infra_name, hostname: $srv.hostname, desired_type: $desired_type, actual_type: $actual_type, status: $status, drift: (if $needs_upgrade { "upgrade" } else { "ok" }), provider: ($srv.provider? | default "hetzner"), } } | where {|it| $it != null }) $all_drift = ($all_drift | append $drift) $all_settings = ($all_settings | append { infra: $infra_name, settings: $curr_settings }) } print ($all_drift | select infra hostname desired_type actual_type status drift | table) let to_upgrade = ($all_drift | where drift == "upgrade") if ($to_upgrade | is-empty) { _print "\n✅ No server type drift — all servers match settings" return } _print $"\n($to_upgrade | length) server\(s\) need upgrade:" for srv in $to_upgrade { _print $" ($srv.infra)/($srv.hostname): ($srv.actual_type) → ($srv.desired_type)" } if $check { _print "\n(--check: no changes applied)" return } if not $yes { _print $"\nUpgrade requires shutdown → change_type → start. Continue? Type yes: " let input = (input --numchar 3) if $input != "yes" and $input != "YES" { _print "Aborted." return } } # Execute upgrades for srv_drift in $to_upgrade { let infra_settings = ($all_settings | where infra == $srv_drift.infra | get 0?).settings let srv = ($infra_settings.data.servers | where hostname == $srv_drift.hostname | get 0?) if ($srv | is-empty) { continue } let hn = $srv_drift.hostname _print $"\n── ($srv_drift.infra)/($hn): ($srv_drift.actual_type) → ($srv_drift.desired_type) ──" # 1. Shutdown _print " ⏹ Shutting down ..." let res_shutdown = (do { ^hcloud server shutdown $hn } | complete) if $res_shutdown.exit_code != 0 { _print $" 🛑 shutdown failed: ($res_shutdown.stderr)" continue } # 2. Wait for server to be off _print " ⏳ Waiting for server to stop ..." mut is_off = false for _ in 1..30 { let status = (do { ^hcloud server describe $hn -o json | from json | get status } catch { "unknown" }) if $status == "off" { $is_off = true break } sleep 5sec } if not $is_off { _print $" 🛑 ($hn) did not stop — skipping" continue } # 3. Change type _print $" 🔄 Changing type to ($srv_drift.desired_type) ..." let res_change = (do { ^hcloud server change-type $hn $srv_drift.desired_type } | complete) if $res_change.exit_code != 0 { _print $" 🛑 change-type failed: ($res_change.stderr)" _print " ▶ Restarting server with original type ..." ^hcloud server poweron $hn | ignore continue } # 4. Start _print " ▶ Starting ..." let res_start = (do { ^hcloud server poweron $hn } | complete) if $res_start.exit_code != 0 { _print $" 🛑 poweron failed: ($res_start.stderr)" continue } # 5. Wait for running _print " ⏳ Waiting for server to start ..." mut is_running = false for _ in 1..30 { let status = (do { ^hcloud server describe $hn -o json | from json | get status } catch { "unknown" }) if $status == "running" { $is_running = true break } sleep 5sec } if $is_running { # Post-upgrade: ensure critical services are running after reboot. # The shutdown → change-type → poweron cycle can leave services in # bad/inactive state if systemd symlinks were disrupted. _print " 🔧 Ensuring services are active ..." let ip = (do { mw_get_ip $infra_settings $srv "public" false } catch { "" }) if ($ip | is-not-empty) { let svc_cmd = "for svc in containerd kubelet etcd coredns; do systemctl is-enabled $svc 2>/dev/null | grep -q enabled && systemctl start $svc 2>/dev/null; done; sleep 2; systemctl is-active containerd kubelet 2>&1" ssh_cmd $infra_settings $srv false $svc_cmd $ip } _print $" ✅ ($hn) upgraded to ($srv_drift.desired_type)" } else { _print $" ⚠ ($hn) changed but not yet running — check manually" } } _print $"\n✅ Upgrade complete" }