150 lines
7 KiB
Text
150 lines
7 KiB
Text
# provisioning sync — reconcile .provisioning-state.ncl against external APIs.
|
|
# Sources: Hetzner API (server existence/status), K8s API (pod/deploy readiness), SSH probe.
|
|
# Never marks a taskserv 'completed without positive confirmation.
|
|
# Ambiguous or timed-out probes write 'unknown.
|
|
|
|
use state.nu *
|
|
# Selective imports replacing `use ../lib_provisioning *` (ADR-025 Phase 4).
|
|
use lib_provisioning/utils/interface.nu [_print]
|
|
use lib_provisioning/result.nu [err]
|
|
|
|
# ─── Provider probe ───────────────────────────────────────────────────────────
|
|
|
|
# Query Hetzner API for a server and return { provider_id, provider_state }.
|
|
# Returns { provider_id: "", provider_state: "unknown" } on any error.
|
|
def probe-hetzner [settings: record, server: record]: nothing -> record {
|
|
let info = (do { mw_server_info $server true } | complete)
|
|
if $info.exit_code != 0 or ($info.stdout | is-empty) {
|
|
return { provider_id: "", provider_state: "unknown" }
|
|
}
|
|
let parsed = (do { $info.stdout | from json } catch { null })
|
|
if ($parsed | is-empty) {
|
|
return { provider_id: "", provider_state: "unknown" }
|
|
}
|
|
let raw_state = ($parsed.status? | default "unknown" | str downcase)
|
|
let mapped = match $raw_state {
|
|
"running" => "running",
|
|
"off" => "off",
|
|
_ => "unknown",
|
|
}
|
|
{
|
|
provider_id: ($parsed.id? | default "" | into string),
|
|
provider_state: $mapped,
|
|
}
|
|
}
|
|
|
|
# ─── K8s probe ────────────────────────────────────────────────────────────────
|
|
|
|
# Check if a K8s deployment or daemonset is ready via kubectl.
|
|
# Returns true only on explicit "available" status confirmation.
|
|
def probe-k8s-ready [
|
|
kubeconfig: string
|
|
resource_type: string # deployment | daemonset
|
|
name: string
|
|
namespace: string = "kube-system"
|
|
]: nothing -> bool {
|
|
let result = (do {
|
|
^kubectl --kubeconfig $kubeconfig -n $namespace get $resource_type $name -o jsonpath="{.status.readyReplicas}" err> /dev/null
|
|
} | complete)
|
|
if $result.exit_code != 0 { return false }
|
|
let ready = ($result.stdout | str trim | into int | default 0)
|
|
$ready > 0
|
|
}
|
|
|
|
# Map taskserv name to K8s resource for readiness probing.
|
|
# Returns null if the taskserv has no K8s resource to probe.
|
|
def taskserv-k8s-resource [taskserv: string]: nothing -> record {
|
|
match $taskserv {
|
|
"cilium" => { type: "daemonset", name: "cilium", ns: "kube-system" },
|
|
"hetzner_csi" => { type: "deployment", name: "hcloud-csi-controller", ns: "kube-system" },
|
|
"democratic_csi" => { type: "deployment", name: "democratic-csi-controller", ns: "democratic-csi" },
|
|
"coredns" => { type: "deployment", name: "coredns", ns: "kube-system" },
|
|
_ => null,
|
|
}
|
|
}
|
|
|
|
# ─── SSH probe ────────────────────────────────────────────────────────────────
|
|
|
|
# Returns true if the server responds to SSH on port 22 within 5 seconds.
|
|
def probe-ssh [ip: string]: nothing -> bool {
|
|
let result = (do {
|
|
^nc -z -w 5 $ip 22 err> /dev/null
|
|
} | complete)
|
|
$result.exit_code == 0
|
|
}
|
|
|
|
# ─── Main sync ────────────────────────────────────────────────────────────────
|
|
|
|
export def state-sync [
|
|
workspace_path: string
|
|
settings: record
|
|
--kubeconfig: string = "" # Path to kubeconfig for K8s probes (skipped if empty)
|
|
--skip-ssh # Skip SSH liveness probes
|
|
--infra: string = "" # Filter to specific infra name
|
|
]: nothing -> nothing {
|
|
_print "Syncing provisioning state against external APIs ..."
|
|
let ts = ((date now) | format date "%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
for srv in ($settings.data.servers? | default []) {
|
|
let hostname = $srv.hostname
|
|
_print $" → ($hostname)"
|
|
|
|
# 1. Hetzner API — provider existence and state
|
|
let htz = (probe-hetzner $settings $srv)
|
|
state-server-sync $workspace_path $hostname --provider-id $htz.provider_id --provider-state $htz.provider_state
|
|
|
|
if $htz.provider_state == "unknown" {
|
|
_print $" provider: unknown (API timeout or server not found)"
|
|
continue
|
|
}
|
|
_print $" provider: ($htz.provider_state) id=($htz.provider_id)"
|
|
|
|
# 2. SSH liveness
|
|
if not $skip_ssh {
|
|
let ip = (do { mw_get_ip $settings $srv "public" false } catch { "" } | str trim)
|
|
if ($ip | is-not-empty) {
|
|
let ssh_ok = (probe-ssh $ip)
|
|
_print $" ssh ($ip): (if $ssh_ok { "reachable" } else { "unreachable" })"
|
|
if not $ssh_ok {
|
|
_print $" skipping K8s probes — node unreachable"
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
# 3. K8s readiness probes (only when kubeconfig provided and server is running)
|
|
if ($kubeconfig | is-not-empty) and ($kubeconfig | path exists) and $htz.provider_state == "running" {
|
|
let st = (state-read $workspace_path)
|
|
let taskservs = ($st.servers | get -o $hostname | default {} | get -o taskservs | default {})
|
|
for ts_entry in ($taskservs | transpose taskserv node) {
|
|
let res = (taskserv-k8s-resource $ts_entry.taskserv)
|
|
if ($res | is-empty) { continue }
|
|
let ready = (probe-k8s-ready $kubeconfig $res.type $res.name $res.ns)
|
|
if $ready {
|
|
_print $" ($ts_entry.taskserv): K8s ready → confirmed completed"
|
|
state-node-set $workspace_path $hostname $ts_entry.taskserv {
|
|
state: "completed",
|
|
actor: { identity: "system", source: "sync" },
|
|
log: (log-trim ($ts_entry.node.log? | default [] | append {
|
|
ts: $ts,
|
|
event: "sync-confirmed",
|
|
source: "sync",
|
|
})),
|
|
}
|
|
} else {
|
|
_print $" ($ts_entry.taskserv): K8s not ready → unknown"
|
|
state-node-set $workspace_path $hostname $ts_entry.taskserv {
|
|
state: "unknown",
|
|
actor: { identity: "system", source: "sync" },
|
|
log: (log-trim ($ts_entry.node.log? | default [] | append {
|
|
ts: $ts,
|
|
event: "sync-unknown",
|
|
source: "sync",
|
|
})),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
_print "Sync complete."
|
|
}
|