- DAG architecture: `dag show/validate/export` (nulib/main_provisioning/dag.nu),
config loader (lib_provisioning/config/loader/dag.nu), taskserv dag-executor.
Backed by schemas/lib/dag/*.ncl; orchestrator emits NATS events via
WorkspaceComposition::into_workflow. See ADR-020, ADR-021.
- Unified Component Architecture: components/mod.nu, main_provisioning/
{components,workflow,extensions,ontoref-queries}.nu. Full workflow engine with
topological sort and NATS subject emission. Blocks A-H complete (libre-daoshi).
- Commands-registry: nulib/commands-registry.ncl (Nickel source, 314 lines) +
JSON cache at ~/.cache/provisioning/commands-registry.json rebuilt on source
change. cli/provisioning fast-path alias expansion avoids cold Nu startup.
ADDING_COMMANDS.md documents new-command workflow.
- Platform service manager: service-manager.nu (+573), startup.nu (+611),
service-check.nu (+255); autostart/bootstrap/health/target refactored.
- Nushell 0.112.2 migration: removed all try/catch and bash redirections;
external commands prefixed with ^; type signatures enforced. Driven by
scripts/refactor-try-catch{,-simplified}.nu.
- TTY stack: removed shlib/*-tty.sh; replaced by cli/tty-dispatch.sh,
tty-filter.sh, tty-commands.conf.
- New domain modules: images/ (golden image lifecycle), workspace/{state,sync}.nu,
main_provisioning/{bootstrap,cluster-deploy,fip,state}.nu, commands/{state,
build,integrations/auth,utilities/alias}.nu, platform.nu expanded (+874).
- Config loader overhaul: loader/core.nu slimmed (-759), cache/core.nu
refactored (-454), removed legacy loaders/file_loader.nu (-330).
- Thirteen new provisioning-<domain>.nu top-level modules for bash dispatcher.
- Tests: test_workspace_state.nu (+351); updates to test_oci_registry,
test_services.
- README + CHANGELOG updated.
357 lines
16 KiB
Text
357 lines
16 KiB
Text
use ../lib_provisioning/workspace *
|
||
use ../lib_provisioning/user/config.nu [get-workspace-path, get-active-workspace-details]
|
||
use ../lib_provisioning/utils/nickel_processor.nu [ncl-eval]
|
||
|
||
# Decrypt a SOPS-encrypted dotenv file and return its contents as a record.
|
||
#
|
||
# The file must be in dotenv format (KEY=VALUE lines). SOPS is called with
|
||
# --output-type=dotenv so the decrypted output is in the same format.
|
||
# Lines starting with # and blank lines are ignored.
|
||
#
|
||
# Auto-discovery: if secrets_path is empty, looks for cluster/<cluster>/secrets.sops.env
|
||
# relative to ws_root. Returns {} if no secrets file is found and path was not explicit.
|
||
def cd-load-secrets [secrets_path: string]: nothing -> record {
|
||
if (($secrets_path | path exists) == false) {
|
||
error make { msg: $"Secrets file not found: ($secrets_path)" }
|
||
}
|
||
let result = (do { ^sops --decrypt --output-type=dotenv $secrets_path } | complete)
|
||
if $result.exit_code != 0 {
|
||
error make { msg: $"SOPS decrypt failed for ($secrets_path):\n($result.stderr)" }
|
||
}
|
||
$result.stdout
|
||
| lines
|
||
| where { ($in | str starts-with "#") == false }
|
||
| where { ($in | str contains "=") }
|
||
| parse "{key}={value}"
|
||
| reduce --fold {} {|row, acc| $acc | insert $row.key $row.value }
|
||
}
|
||
|
||
# Export a Nickel file relative to the workspace root, with workspace and provisioning import paths.
|
||
def cd-ncl-export [ws_root: string, rel_path: string]: nothing -> record {
|
||
let prov_root = ($env.PROVISIONING? | default "/usr/local/provisioning")
|
||
let full_path = ($ws_root | path join $rel_path)
|
||
let result = (ncl-eval $full_path [$ws_root $prov_root])
|
||
$result
|
||
}
|
||
|
||
# Read .provisioning-state.json and return FIP env vars (FIP_A_IP/ID, FIP_B_IP/ID, FIP_C_IP/ID).
|
||
#
|
||
# FIP key mapping (set by bootstrap.nu naming convention after stripping "librecloud-fip-" prefix
|
||
# and replacing dashes with underscores):
|
||
# smtp → FIP_A (Stalwart SMTP, sgoyol-1)
|
||
# sgoyol_ingress → FIP_B (sgoyol Cilium ingress)
|
||
# wuji → FIP_C (wuji K8s API + ingress)
|
||
def cd-load-fip-env [ws_root: string]: nothing -> record {
|
||
let state_path = ($ws_root | path join ".provisioning-state.json")
|
||
if (($state_path | path exists) == false) {
|
||
error make { msg: ".provisioning-state.json not found — run: provisioning bootstrap first" }
|
||
}
|
||
let state = (open --raw $state_path | from json)
|
||
let fips = ($state | get bootstrap | get floating_ips)
|
||
let fip_a = ($fips | get -o smtp | default {})
|
||
let fip_b = ($fips | get -o sgoyol_ingress | default {})
|
||
let fip_c = ($fips | get -o wuji | default {})
|
||
{
|
||
FIP_A_IP: ($fip_a | get -o ip | default ""),
|
||
FIP_A_ID: ($fip_a | get -o id | default ""),
|
||
FIP_B_IP: ($fip_b | get -o ip | default ""),
|
||
FIP_B_ID: ($fip_b | get -o id | default ""),
|
||
FIP_C_IP: ($fip_c | get -o ip | default ""),
|
||
FIP_C_ID: ($fip_c | get -o id | default ""),
|
||
}
|
||
}
|
||
|
||
# Build env var record for an extension install script.
|
||
#
|
||
# Protocol: scalar fields → `<PREFIX>_<FIELD>`, lists/records → `<PREFIX>_<FIELD>_JSON`.
|
||
# Full config also available as `<PREFIX>_CONFIG_JSON`. FIP vars and KUBECONFIG always set.
|
||
def cd-ext-env [ext_name: string, cfg: any, fip_env: record, kubeconfig: string]: nothing -> record {
|
||
let prefix = ($ext_name | str upcase | str replace --all "-" "_" | str replace --all "." "_")
|
||
let flat = if ($cfg | describe | str starts-with "record") {
|
||
$cfg | transpose key val | reduce --fold {} {|entry, acc|
|
||
let raw_key = ($entry.key | str upcase | str replace --all "-" "_" | str replace --all "." "_")
|
||
let type_desc = ($entry.val | describe)
|
||
let is_scalar = ($type_desc in ["string", "int", "float", "bool"])
|
||
let env_key = if $is_scalar { $"($prefix)_($raw_key)" } else { $"($prefix)_($raw_key)_JSON" }
|
||
let env_val = if $type_desc == "string" {
|
||
$entry.val
|
||
} else if $is_scalar {
|
||
$entry.val | into string
|
||
} else {
|
||
$entry.val | to json --raw
|
||
}
|
||
$acc | insert $env_key $env_val
|
||
}
|
||
} else {
|
||
{}
|
||
}
|
||
$flat
|
||
| insert $"($prefix)_CONFIG_JSON" ($cfg | to json --raw)
|
||
| merge $fip_env
|
||
| insert KUBECONFIG $kubeconfig
|
||
}
|
||
|
||
# Locate the install script for an extension under extensions/clusters/.
|
||
#
|
||
# Extensions have inconsistent naming: some dirs use underscores (cert_manager, hcloud_floater)
|
||
# while scripts use dashes (install-cert-manager.sh, install-hcloud-floater.sh). Others are
|
||
# all-dash (oci-reg) or all-same (metallb, git, woodpecker, stalwart).
|
||
# Tries all 4 combinations of (dir: _ or -) × (script: _ or -).
|
||
def cd-find-script [prov_root: string, ext_name: string]: nothing -> string {
|
||
let dash_name = ($ext_name | str replace --all "_" "-")
|
||
let under_name = ($ext_name | str replace --all "-" "_")
|
||
# Pairs of [dir_name, script_name] — ordered by most-likely match first.
|
||
let combos = [
|
||
[$under_name, $under_name],
|
||
[$under_name, $dash_name],
|
||
[$dash_name, $dash_name],
|
||
[$dash_name, $under_name],
|
||
]
|
||
let found = ($combos | each {|pair|
|
||
let p = ($prov_root | path join "extensions/clusters" $pair.0 "default" $"install-($pair.1).sh")
|
||
if ($p | path exists) { $p } else { null }
|
||
} | where { $in != null })
|
||
if ($found | is-empty) {
|
||
error make { msg: $"No install script for extension '($ext_name)' in ($prov_root)/extensions/clusters/ (tried all _/- variants)" }
|
||
}
|
||
$found | first
|
||
}
|
||
|
||
# Locate the install script for a component under extensions/components/.
|
||
#
|
||
# Components are structured as extensions/components/{comp_name}/{mode}/install-{comp_name}.sh.
|
||
# Tries all 4 combinations of dir/script name with dashes and underscores.
|
||
def cd-find-component-script [prov_root: string, comp_name: string, mode: string]: nothing -> string {
|
||
let dash_name = ($comp_name | str replace --all "_" "-")
|
||
let under_name = ($comp_name | str replace --all "-" "_")
|
||
let combos = [
|
||
[$under_name, $under_name],
|
||
[$under_name, $dash_name],
|
||
[$dash_name, $dash_name],
|
||
[$dash_name, $under_name],
|
||
]
|
||
let found = ($combos | each {|pair|
|
||
let p = ($prov_root | path join "extensions/components" $pair.0 $mode $"install-($pair.1).sh")
|
||
if ($p | path exists) { $p } else { null }
|
||
} | where { $in != null })
|
||
if ($found | is-empty) {
|
||
error make { msg: $"No install script for component '($comp_name)' mode '($mode)' in ($prov_root)/extensions/components/ (tried all _/- variants)" }
|
||
}
|
||
$found | first
|
||
}
|
||
|
||
# Non-erroring variant for dry-run display — returns "<not found>" if no component script exists.
|
||
def cd-find-component-script-opt [prov_root: string, comp_name: string, mode: string]: nothing -> string {
|
||
let dash_name = ($comp_name | str replace --all "_" "-")
|
||
let under_name = ($comp_name | str replace --all "-" "_")
|
||
let combos = [
|
||
[$under_name, $under_name],
|
||
[$under_name, $dash_name],
|
||
[$dash_name, $dash_name],
|
||
[$dash_name, $under_name],
|
||
]
|
||
let found = ($combos | each {|pair|
|
||
let p = ($prov_root | path join "extensions/components" $pair.0 $mode $"install-($pair.1).sh")
|
||
if ($p | path exists) { $p } else { null }
|
||
} | where { $in != null })
|
||
if ($found | is-empty) { "<not found>" } else { $found | first }
|
||
}
|
||
|
||
# Non-erroring variant for dry-run display — returns "<not found>" if no script exists.
|
||
def cd-find-script-opt [prov_root: string, ext_name: string]: nothing -> string {
|
||
let dash_name = ($ext_name | str replace --all "_" "-")
|
||
let under_name = ($ext_name | str replace --all "-" "_")
|
||
let combos = [
|
||
[$under_name, $under_name],
|
||
[$under_name, $dash_name],
|
||
[$dash_name, $dash_name],
|
||
[$dash_name, $under_name],
|
||
]
|
||
let found = ($combos | each {|pair|
|
||
let p = ($prov_root | path join "extensions/clusters" $pair.0 "default" $"install-($pair.1).sh")
|
||
if ($p | path exists) { $p } else { null }
|
||
} | where { $in != null })
|
||
if ($found | is-empty) { "<not found>" } else { $found | first }
|
||
}
|
||
|
||
# Execute the health gate for an extension, retrying on transient failures.
|
||
def cd-health-gate [ext_id: string, gate: record]: nothing -> nothing {
|
||
mut remaining = $gate.retries
|
||
mut passed = false
|
||
while ($remaining > 0) and ($passed == false) {
|
||
let res = (do { ^bash -c $gate.check_cmd } | complete)
|
||
if $res.exit_code == 0 {
|
||
$passed = true
|
||
print $" [($ext_id)] health gate OK"
|
||
} else {
|
||
$remaining -= 1
|
||
if $remaining > 0 {
|
||
let attempt = ($gate.retries - $remaining)
|
||
print $" [($ext_id)] gate ($attempt)/($gate.retries) failed — retry in 10s"
|
||
^sleep 10
|
||
}
|
||
}
|
||
}
|
||
if $passed == false {
|
||
error make { msg: $"[($ext_id)] health gate failed after ($gate.retries) attempts.\nCmd: ($gate.check_cmd)" }
|
||
}
|
||
}
|
||
|
||
# Deploy cluster extensions — L3 platform or L4 application services.
|
||
#
|
||
# Reads the deployment DAG from cluster/<cluster>/<layer>-dag.ncl and extension configs
|
||
# from cluster/<cluster>/<layer>.ncl. Extensions execute in dependency order defined
|
||
# by the DAG `depends_on` arrays. FIP IPs and IDs come from .provisioning-state.json
|
||
# written by `provisioning bootstrap`.
|
||
#
|
||
# Each install script receives:
|
||
# <EXT>_<FIELD> — scalar config values (namespace, version, host, …)
|
||
# <EXT>_<FIELD>_JSON — complex config values (ip_pools, node_selector, …)
|
||
# <EXT>_CONFIG_JSON — full extension config as JSON
|
||
# FIP_A_IP / FIP_A_ID — FIP-A (Stalwart SMTP)
|
||
# FIP_B_IP / FIP_B_ID — FIP-B (sgoyol ingress)
|
||
# FIP_C_IP / FIP_C_ID — FIP-C (wuji)
|
||
# KUBECONFIG — path to kubeconfig
|
||
#
|
||
# Usage:
|
||
# provisioning cluster deploy platform sgoyol --workspace librecloud_renew
|
||
# provisioning cluster deploy apps sgoyol --workspace librecloud_renew
|
||
export def "main cluster deploy" [
|
||
layer: string # Deployment layer: platform | apps
|
||
cluster: string # Cluster name (e.g. sgoyol, wuji)
|
||
--workspace (-w): string # Workspace name (default: active workspace)
|
||
--dry-run (-n) # Print the execution plan without running install scripts
|
||
--kubeconfig (-k): string # Override KUBECONFIG path for kubectl calls
|
||
--secrets-file (-s): string # SOPS-encrypted dotenv file with install script secrets.
|
||
# Auto-discovered at cluster/<cluster>/secrets.sops.env if omitted.
|
||
] : nothing -> nothing {
|
||
if not ($layer in ["platform", "apps"]) {
|
||
error make { msg: $"layer must be 'platform' or 'apps', got: ($layer)" }
|
||
}
|
||
|
||
let ws_name = if ($workspace | is-not-empty) {
|
||
$workspace
|
||
} else {
|
||
let details = (get-active-workspace-details)
|
||
if ($details == null) {
|
||
error make { msg: "No active workspace — pass --workspace or activate one first" }
|
||
}
|
||
$details.name
|
||
}
|
||
|
||
let ws_root = (get-workspace-path $ws_name)
|
||
let prov_root = ($env.PROVISIONING? | default "/usr/local/provisioning")
|
||
let dag_rel = $"cluster/($cluster)/($layer)-dag.ncl"
|
||
let cfg_rel = $"cluster/($cluster)/($layer).ncl"
|
||
let kube_cfg = if ($kubeconfig | is-not-empty) {
|
||
$kubeconfig
|
||
} else {
|
||
$env.KUBECONFIG? | default "/etc/kubernetes/admin.conf"
|
||
}
|
||
|
||
print $"Cluster deploy | workspace: ($ws_name) | cluster: ($cluster) | layer: ($layer)"
|
||
if $dry_run { print "DRY RUN — install scripts will not execute" }
|
||
if ($secrets_file | is-not-empty) { print $" secrets: ($secrets_file)" }
|
||
print ""
|
||
|
||
let dag = (cd-ncl-export $ws_root $dag_rel)
|
||
let cfg = (cd-ncl-export $ws_root $cfg_rel)
|
||
let fip_env = (cd-load-fip-env $ws_root)
|
||
let ext_cfgs = ($cfg | get extensions)
|
||
|
||
# SOPS secrets: explicit path > auto-discovered cluster/<cluster>/secrets.sops.env > empty.
|
||
# Secrets are merged AFTER NCL env vars — they override any overlapping computed values.
|
||
let secrets_env = if ($secrets_file | is-not-empty) {
|
||
cd-load-secrets $secrets_file
|
||
} else {
|
||
let auto_path = ($ws_root | path join $"cluster/($cluster)/secrets.sops.env")
|
||
if ($auto_path | path exists) {
|
||
print $" secrets: ($auto_path)"
|
||
cd-load-secrets $auto_path
|
||
} else {
|
||
{}
|
||
}
|
||
}
|
||
|
||
# Walk extensions in array order; verify depends_on are satisfied, then install + gate.
|
||
let _completed = ($dag.extensions | reduce --fold [] {|entry, completed|
|
||
let ext_id = $entry.id
|
||
|
||
# Dependency guard — catches DAG authoring errors.
|
||
let unsatisfied = ($entry.depends_on | where {|dep|
|
||
($completed | any {|c| $c == $dep }) == false
|
||
})
|
||
if ($unsatisfied | is-not-empty) {
|
||
error make { msg: $"[($ext_id)] depends on [($unsatisfied | str join ', ')] not yet deployed — fix DAG ordering in ($dag_rel)" }
|
||
}
|
||
|
||
# Dispatch: component nodes use extensions/components/ path; extension nodes use extensions/clusters/.
|
||
let is_component = ("component" in $entry) and ($entry | get -o component | default null) != null
|
||
|
||
if $is_component {
|
||
let comp = ($entry.component)
|
||
let comp_name = $comp.name
|
||
let mode = ($comp | get -o mode | default "cluster")
|
||
let comp_cfg = ($cfg | get -o components | default {} | get -o $ext_id | default {})
|
||
let env_vars = (cd-ext-env $comp_name $comp_cfg $fip_env $kube_cfg | merge $secrets_env)
|
||
|
||
print $"[($ext_id)] component: ($comp_name) mode=($mode)"
|
||
if ($entry | get -o parallel | default false) { print " note: parallel=true (sequential execution)" }
|
||
|
||
if $dry_run {
|
||
let script_display = (cd-find-component-script-opt $prov_root $comp_name $mode)
|
||
print $" script: ($script_display)"
|
||
print $" env keys: ($env_vars | columns | sort | str join ', ')"
|
||
if ($entry | get -o health_gate | default null) != null {
|
||
print $" gate: ($entry.health_gate.check_cmd | str substring 0..80)..."
|
||
}
|
||
} else {
|
||
let script = (cd-find-component-script $prov_root $comp_name $mode)
|
||
print $" script: ($script)"
|
||
print ""
|
||
with-env $env_vars { ^bash $script }
|
||
let exit_code = $env.LAST_EXIT_CODE
|
||
if $exit_code != 0 {
|
||
error make { msg: $"[($ext_id)] component install script exited ($exit_code)" }
|
||
}
|
||
if ($entry | get -o health_gate | default null) != null {
|
||
cd-health-gate $ext_id $entry.health_gate
|
||
}
|
||
}
|
||
} else {
|
||
let ext_name = $entry.extension
|
||
let ext_cfg = ($ext_cfgs | get -o $ext_id | default {})
|
||
# secrets_env is merged last — its values win over any NCL-derived env var with the same key.
|
||
let env_vars = (cd-ext-env $ext_name $ext_cfg $fip_env $kube_cfg | merge $secrets_env)
|
||
|
||
print $"[($ext_id)] extension: ($ext_name)"
|
||
if ($entry | get -o parallel | default false) { print " note: parallel=true (sequential execution)" }
|
||
|
||
if $dry_run {
|
||
let script_display = (cd-find-script-opt $prov_root $ext_name)
|
||
print $" script: ($script_display)"
|
||
print $" env keys: ($env_vars | columns | sort | str join ', ')"
|
||
if ($entry | get -o health_gate | default null) != null {
|
||
print $" gate: ($entry.health_gate.check_cmd | str substring 0..80)..."
|
||
}
|
||
} else {
|
||
let script = (cd-find-script $prov_root $ext_name)
|
||
print $" script: ($script)"
|
||
print ""
|
||
with-env $env_vars { ^bash $script }
|
||
let exit_code = $env.LAST_EXIT_CODE
|
||
if $exit_code != 0 {
|
||
error make { msg: $"[($ext_id)] install script exited ($exit_code)" }
|
||
}
|
||
if ($entry | get -o health_gate | default null) != null {
|
||
cd-health-gate $ext_id $entry.health_gate
|
||
}
|
||
}
|
||
}
|
||
|
||
print ""
|
||
$completed | append $ext_id
|
||
})
|
||
|
||
print $"Cluster deploy complete: ($layer) on ($cluster)"
|
||
}
|