202 lines
8.2 KiB
Text
202 lines
8.2 KiB
Text
# Selective imports replacing `use lib_provisioning *` (ADR-025 Phase 4).
|
|
use lib_provisioning/result.nu [ok]
|
|
use lib_provisioning/utils/interface.nu [_print]
|
|
use lib_provisioning/utils/logging.nu [set-debug-enabled]
|
|
use lib_provisioning/utils/settings.nu [find_get_settings load]
|
|
use lib_provisioning/utils/ssh.nu [ssh_cmd]
|
|
use utils.nu *
|
|
|
|
# > Server upgrade — detect server_type drift and apply changes via provider API.
|
|
#
|
|
# Compares servers.ncl (desired server_type) against the live provider state.
|
|
# If a mismatch is found, executes: shutdown → change_type → start.
|
|
#
|
|
# Usage:
|
|
# provisioning server upgrade sgoyol-cp -i sgoyol # upgrade one server
|
|
# provisioning server upgrade -i sgoyol # check all, upgrade drifted
|
|
# provisioning server upgrade sgoyol-cp -i sgoyol --check # dry-run, show drift only
|
|
export def "main upgrade" [
|
|
name?: string # Server hostname (optional, all servers if omitted)
|
|
--infra (-i): string # Infra directory
|
|
--settings (-s): string # Settings path
|
|
--check (-c) # Dry-run: show drift without applying
|
|
--yes (-y) # Skip confirmation prompt
|
|
--debug (-x) # Debug mode
|
|
--helpinfo (-h) # Help
|
|
] {
|
|
if $helpinfo {
|
|
_print "Usage: provisioning server upgrade [hostname] -i <infra> [--check] [--yes]"
|
|
_print ""
|
|
_print " Detects server_type drift between servers.ncl and provider."
|
|
_print " If drift found: shutdown → change_type → start."
|
|
_print ""
|
|
_print " --check Show drift without applying"
|
|
_print " --yes Skip confirmation"
|
|
return
|
|
}
|
|
|
|
if $debug { set-debug-enabled true }
|
|
|
|
# Discover infras: explicit -i, or scan all infra dirs with settings.ncl
|
|
let infra_list = if ($infra | is-not-empty) {
|
|
[$infra]
|
|
} else {
|
|
let ws_path = ($env.PROVISIONING_WORKSPACE_PATH? | default $env.PWD)
|
|
let infra_dir = ($ws_path | path join "infra")
|
|
if not ($infra_dir | path exists) {
|
|
_print "No infra/ directory found. Use -i <infra> or run from a workspace."
|
|
return
|
|
}
|
|
ls $infra_dir
|
|
| where type == "dir"
|
|
| where { ($in.name | path join "settings.ncl" | path exists) }
|
|
| each {|d| $d.name | path basename }
|
|
}
|
|
|
|
if ($infra_list | is-empty) {
|
|
_print "No infras with settings.ncl found."
|
|
return
|
|
}
|
|
|
|
# Collect drift across all infras
|
|
mut all_drift = []
|
|
mut all_settings = []
|
|
|
|
for infra_name in $infra_list {
|
|
let curr_settings = (do { find_get_settings --infra $infra_name --settings $settings } catch { null })
|
|
if ($curr_settings == null) {
|
|
_print $"⚠ ($infra_name): cannot load settings — skipping"
|
|
continue
|
|
}
|
|
let servers = $curr_settings.data.servers
|
|
let live_data = (do { mw_query_servers $curr_settings "" "" } | default [])
|
|
|
|
let drift = ($servers | each {|srv|
|
|
if ($name | is-not-empty) and $srv.hostname != $name { return null }
|
|
let desired_type = ($srv.server_type? | default "")
|
|
let live = ($live_data | where {|l| $l.name == $srv.hostname } | get 0? | default null)
|
|
let actual_type = if $live != null { $live.server_type?.name? | default "unknown" } else { "not found" }
|
|
let status = if $live != null { $live.status? | default "unknown" } else { "not found" }
|
|
let needs_upgrade = ($desired_type != $actual_type and $actual_type != "not found" and $actual_type != "unknown")
|
|
{
|
|
infra: $infra_name,
|
|
hostname: $srv.hostname,
|
|
desired_type: $desired_type,
|
|
actual_type: $actual_type,
|
|
status: $status,
|
|
drift: (if $needs_upgrade { "upgrade" } else { "ok" }),
|
|
provider: ($srv.provider? | default "hetzner"),
|
|
}
|
|
} | where {|it| $it != null })
|
|
|
|
$all_drift = ($all_drift | append $drift)
|
|
$all_settings = ($all_settings | append { infra: $infra_name, settings: $curr_settings })
|
|
}
|
|
|
|
print ($all_drift | select infra hostname desired_type actual_type status drift | table)
|
|
|
|
let to_upgrade = ($all_drift | where drift == "upgrade")
|
|
if ($to_upgrade | is-empty) {
|
|
_print "\n✅ No server type drift — all servers match settings"
|
|
return
|
|
}
|
|
|
|
_print $"\n($to_upgrade | length) server\(s\) need upgrade:"
|
|
for srv in $to_upgrade {
|
|
_print $" ($srv.infra)/($srv.hostname): ($srv.actual_type) → ($srv.desired_type)"
|
|
}
|
|
|
|
if $check {
|
|
_print "\n(--check: no changes applied)"
|
|
return
|
|
}
|
|
|
|
if not $yes {
|
|
_print $"\nUpgrade requires shutdown → change_type → start. Continue? Type yes: "
|
|
let input = (input --numchar 3)
|
|
if $input != "yes" and $input != "YES" {
|
|
_print "Aborted."
|
|
return
|
|
}
|
|
}
|
|
|
|
# Execute upgrades
|
|
for srv_drift in $to_upgrade {
|
|
let infra_settings = ($all_settings | where infra == $srv_drift.infra | get 0?).settings
|
|
let srv = ($infra_settings.data.servers | where hostname == $srv_drift.hostname | get 0?)
|
|
if ($srv | is-empty) { continue }
|
|
|
|
let hn = $srv_drift.hostname
|
|
_print $"\n── ($srv_drift.infra)/($hn): ($srv_drift.actual_type) → ($srv_drift.desired_type) ──"
|
|
|
|
# 1. Shutdown
|
|
_print " ⏹ Shutting down ..."
|
|
let res_shutdown = (do { ^hcloud server shutdown $hn } | complete)
|
|
if $res_shutdown.exit_code != 0 {
|
|
_print $" 🛑 shutdown failed: ($res_shutdown.stderr)"
|
|
continue
|
|
}
|
|
|
|
# 2. Wait for server to be off
|
|
_print " ⏳ Waiting for server to stop ..."
|
|
mut is_off = false
|
|
for _ in 1..30 {
|
|
let status = (do { ^hcloud server describe $hn -o json | from json | get status } catch { "unknown" })
|
|
if $status == "off" {
|
|
$is_off = true
|
|
break
|
|
}
|
|
sleep 5sec
|
|
}
|
|
if not $is_off {
|
|
_print $" 🛑 ($hn) did not stop — skipping"
|
|
continue
|
|
}
|
|
|
|
# 3. Change type
|
|
_print $" 🔄 Changing type to ($srv_drift.desired_type) ..."
|
|
let res_change = (do { ^hcloud server change-type $hn $srv_drift.desired_type } | complete)
|
|
if $res_change.exit_code != 0 {
|
|
_print $" 🛑 change-type failed: ($res_change.stderr)"
|
|
_print " ▶ Restarting server with original type ..."
|
|
^hcloud server poweron $hn | ignore
|
|
continue
|
|
}
|
|
|
|
# 4. Start
|
|
_print " ▶ Starting ..."
|
|
let res_start = (do { ^hcloud server poweron $hn } | complete)
|
|
if $res_start.exit_code != 0 {
|
|
_print $" 🛑 poweron failed: ($res_start.stderr)"
|
|
continue
|
|
}
|
|
|
|
# 5. Wait for running
|
|
_print " ⏳ Waiting for server to start ..."
|
|
mut is_running = false
|
|
for _ in 1..30 {
|
|
let status = (do { ^hcloud server describe $hn -o json | from json | get status } catch { "unknown" })
|
|
if $status == "running" {
|
|
$is_running = true
|
|
break
|
|
}
|
|
sleep 5sec
|
|
}
|
|
if $is_running {
|
|
# Post-upgrade: ensure critical services are running after reboot.
|
|
# The shutdown → change-type → poweron cycle can leave services in
|
|
# bad/inactive state if systemd symlinks were disrupted.
|
|
_print " 🔧 Ensuring services are active ..."
|
|
let ip = (do { mw_get_ip $infra_settings $srv "public" false } catch { "" })
|
|
if ($ip | is-not-empty) {
|
|
let svc_cmd = "for svc in containerd kubelet etcd coredns; do systemctl is-enabled $svc 2>/dev/null | grep -q enabled && systemctl start $svc 2>/dev/null; done; sleep 2; systemctl is-active containerd kubelet 2>&1"
|
|
ssh_cmd $infra_settings $srv false $svc_cmd $ip
|
|
}
|
|
_print $" ✅ ($hn) upgraded to ($srv_drift.desired_type)"
|
|
} else {
|
|
_print $" ⚠ ($hn) changed but not yet running — check manually"
|
|
}
|
|
}
|
|
|
|
_print $"\n✅ Upgrade complete"
|
|
}
|