# provisioning sync — reconcile .provisioning-state.ncl against external APIs. # Sources: Hetzner API (server existence/status), K8s API (pod/deploy readiness), SSH probe. # Never marks a taskserv 'completed without positive confirmation. # Ambiguous or timed-out probes write 'unknown. use state.nu * # Selective imports replacing `use ../lib_provisioning *` (ADR-025 Phase 4). use lib_provisioning/utils/interface.nu [_print] use lib_provisioning/result.nu [err] # ─── Provider probe ─────────────────────────────────────────────────────────── # Query Hetzner API for a server and return { provider_id, provider_state }. # Returns { provider_id: "", provider_state: "unknown" } on any error. def probe-hetzner [settings: record, server: record]: nothing -> record { let info = (do { mw_server_info $server true } | complete) if $info.exit_code != 0 or ($info.stdout | is-empty) { return { provider_id: "", provider_state: "unknown" } } let parsed = (do { $info.stdout | from json } catch { null }) if ($parsed | is-empty) { return { provider_id: "", provider_state: "unknown" } } let raw_state = ($parsed.status? | default "unknown" | str downcase) let mapped = match $raw_state { "running" => "running", "off" => "off", _ => "unknown", } { provider_id: ($parsed.id? | default "" | into string), provider_state: $mapped, } } # ─── K8s probe ──────────────────────────────────────────────────────────────── # Check if a K8s deployment or daemonset is ready via kubectl. # Returns true only on explicit "available" status confirmation. def probe-k8s-ready [ kubeconfig: string resource_type: string # deployment | daemonset name: string namespace: string = "kube-system" ]: nothing -> bool { let result = (do { ^kubectl --kubeconfig $kubeconfig -n $namespace get $resource_type $name -o jsonpath="{.status.readyReplicas}" err> /dev/null } | complete) if $result.exit_code != 0 { return false } let ready = ($result.stdout | str trim | into int | default 0) $ready > 0 } # Map taskserv name to K8s resource for readiness probing. # Returns null if the taskserv has no K8s resource to probe. def taskserv-k8s-resource [taskserv: string]: nothing -> record { match $taskserv { "cilium" => { type: "daemonset", name: "cilium", ns: "kube-system" }, "hetzner_csi" => { type: "deployment", name: "hcloud-csi-controller", ns: "kube-system" }, "democratic_csi" => { type: "deployment", name: "democratic-csi-controller", ns: "democratic-csi" }, "coredns" => { type: "deployment", name: "coredns", ns: "kube-system" }, _ => null, } } # ─── SSH probe ──────────────────────────────────────────────────────────────── # Returns true if the server responds to SSH on port 22 within 5 seconds. def probe-ssh [ip: string]: nothing -> bool { let result = (do { ^nc -z -w 5 $ip 22 err> /dev/null } | complete) $result.exit_code == 0 } # ─── Main sync ──────────────────────────────────────────────────────────────── export def state-sync [ workspace_path: string settings: record --kubeconfig: string = "" # Path to kubeconfig for K8s probes (skipped if empty) --skip-ssh # Skip SSH liveness probes --infra: string = "" # Filter to specific infra name ]: nothing -> nothing { _print "Syncing provisioning state against external APIs ..." let ts = ((date now) | format date "%Y-%m-%dT%H:%M:%SZ") for srv in ($settings.data.servers? | default []) { let hostname = $srv.hostname _print $" → ($hostname)" # 1. Hetzner API — provider existence and state let htz = (probe-hetzner $settings $srv) state-server-sync $workspace_path $hostname --provider-id $htz.provider_id --provider-state $htz.provider_state if $htz.provider_state == "unknown" { _print $" provider: unknown (API timeout or server not found)" continue } _print $" provider: ($htz.provider_state) id=($htz.provider_id)" # 2. SSH liveness if not $skip_ssh { let ip = (do { mw_get_ip $settings $srv "public" false } catch { "" } | str trim) if ($ip | is-not-empty) { let ssh_ok = (probe-ssh $ip) _print $" ssh ($ip): (if $ssh_ok { "reachable" } else { "unreachable" })" if not $ssh_ok { _print $" skipping K8s probes — node unreachable" continue } } } # 3. K8s readiness probes (only when kubeconfig provided and server is running) if ($kubeconfig | is-not-empty) and ($kubeconfig | path exists) and $htz.provider_state == "running" { let st = (state-read $workspace_path) let taskservs = ($st.servers | get -o $hostname | default {} | get -o taskservs | default {}) for ts_entry in ($taskservs | transpose taskserv node) { let res = (taskserv-k8s-resource $ts_entry.taskserv) if ($res | is-empty) { continue } let ready = (probe-k8s-ready $kubeconfig $res.type $res.name $res.ns) if $ready { _print $" ($ts_entry.taskserv): K8s ready → confirmed completed" state-node-set $workspace_path $hostname $ts_entry.taskserv { state: "completed", actor: { identity: "system", source: "sync" }, log: (log-trim ($ts_entry.node.log? | default [] | append { ts: $ts, event: "sync-confirmed", source: "sync", })), } } else { _print $" ($ts_entry.taskserv): K8s not ready → unknown" state-node-set $workspace_path $hostname $ts_entry.taskserv { state: "unknown", actor: { identity: "system", source: "sync" }, log: (log-trim ($ts_entry.node.log? | default [] | append { ts: $ts, event: "sync-unknown", source: "sync", })), } } } } } _print "Sync complete." }