provisioning/adrs/adr-034-workspace-justfile-recipe-pattern.ncl

144 lines
14 KiB
Text
Raw Normal View History

let d = import "adr-defaults.ncl" in
d.make_adr {
id = "adr-034",
title = "Workspace Justfile Recipe Pattern: thin-wrapper dispatch + op governance contract",
status = 'Accepted,
date = "2026-04-24",
context = "Workspace justfiles in `workspaces/{ws}/justfiles/` are the operator-facing command surface for all cluster and infrastructure operations. Before this decision, no formal authoring contract existed for justfile modules: logic appeared inline (conditionals, loops), cache was not cleared on write paths, op governance wiring (preflight before op start) was inconsistent, and intent parameters were not quoted — allowing spaces in intent strings to break positional argument parsing. The `op.just` deploy/redeploy/purge recipes and the `mail.just` component-specific pattern emerged as the reference implementations during the libre-wuji postgresql deployment cycle, but the constraints were tribal knowledge. This ADR formalises the contract so any new justfile module can be validated by inspection without reading the reference implementations.",
decision = "Workspace justfile modules follow a four-part contract. (1) Module structure: each `.just` file covers exactly one functional domain, declares a module-level variable for paths/script refs (never hardcoded inline — `infra` must be defined once as `infra := \"infra/{ws}\"` and used as `{{infra}}/ops/` throughout the module), and provides a `{module}-help` recipe that uses `awk` to extract the group's recipes from `just --list`. (2) Thin-wrapper rule: recipe bodies contain zero branching logic — all logic lives in `provisioning` CLI subcommands or `nu scripts/`. The single allowed exception is multi-step shell composition (`#!/usr/bin/env bash` + `set -euo pipefail`) when the composition itself is the value (e.g. sequencing preflight → op start → deploy → op finish). A second allowed exception is `PROVISIONING_DEBUG` passthrough: multi-step recipes may check `${PROVISIONING_DEBUG:-false}` and set a `DBG_FLAG` variable to propagate debug mode to all `provisioning` calls in the recipe body — this is inline logic that cannot be pushed to the CLI because the flag must reach both the `--check` and the deploy invocations. (3) Write-path invariants: any recipe that mutates cluster state must `export PROVISIONING_NO_CACHE=true` before the first `provisioning` call, preventing stale Nickel config from reaching the remote node. (4) Op governance wiring: write recipes that span multiple `provisioning` calls must follow the preflight-first sequence — `provisioning component {op} {component} --check` runs and must succeed before `provisioning op start` is called; `OP_ID` is captured from `ls -t {{infra}}/ops/ | head -1` immediately after `op start`; `provisioning op finish $OP_ID success|failed` is called unconditionally in both branches. Intent parameters must be passed quoted (`\"{{intent}}\"`) in all delegate calls to preserve spaces.",
rationale = [
{
claim = "Inline logic in justfiles silently diverges from provisioning CLI semantics",
detail = "Just is a task runner, not a shell — variables, quoting, and flow-control behaviour differ subtly from bash. Any conditional or loop written inline in a recipe body must duplicate decisions already encoded in the provisioning CLI or nu scripts, and will drift independently. The thin-wrapper rule prevents this divergence class: the justfile remains a dispatch table, not an implementation.",
},
{
claim = "Stale Nickel config reaching the remote is undetectable at deploy time",
detail = "The provisioning CLI caches rendered Nickel config across invocations. Without `PROVISIONING_NO_CACHE=true`, a write recipe may bundle a config that was rendered before the current edit, sending outdated field values to the orchestrator. This class of bug is invisible in the local preflight because the preflight runs against the cached bundle. Exporting the flag at recipe scope ensures every build in that recipe execution is fresh.",
},
{
claim = "Op record creation before preflight failure leaves an orphaned op in failed state",
detail = "If `provisioning op start` runs before `provisioning component {op} --check`, and the preflight then fails, an op record exists in `infra/{ws}/ops/` with no matching deploy attempt. The op log shows a failed op with no cause. The preflight-first sequence guarantees that no op record is created for a configuration that was known-bad at submission time.",
},
{
claim = "Unquoted intent parameters silently truncate multi-word intent strings",
detail = "Just passes positional parameters to shell recipes as separate words. `provisioning op start {{component}} {{operation}} {{intent}}` receives 'initial' as intent when the caller wrote 'initial mail server setup'. The quoted form `\"{{intent}}\"` preserves the full string through the shell word-splitting boundary. This is observable only when reviewing op log entries — the intent stored in the op record will be truncated without error.",
},
{
claim = "awk-based help recipes provide self-consistent documentation without maintenance overhead",
detail = "A `{module}-help` recipe that runs `just --list | awk '/^ \\[{group}\\]/{p=1;next} p && /^ \\[/{exit} p && NF && !/-help/{print}'` extracts group recipes from the live justfile — the help output is always current. A hand-maintained help block diverges from reality as recipes are added or removed. The awk pattern is copy-exact across modules; only the group name and description line change.",
},
{
claim = "PROVISIONING_DEBUG passthrough is the only legitimate inline conditional in multi-step recipes",
detail = "The `PROVISIONING_DEBUG=true just deploy ...` invocation pattern requires a `DBG_FLAG` variable that is passed to both the `--check` preflight and the deploy invocation. If the flag only reached the deploy but not the preflight, debug output would be incomplete. The flag cannot be pushed to a provisioning CLI subcommand because the shell expansion happens at recipe body scope. This is a narrow, named exception to the thin-wrapper rule — not a precedent for arbitrary inline logic.",
},
],
consequences = {
positive = [
"New module authors have a verifiable contract — a module is conformant if `nu scripts/validate-justfile.nu` produces no violations",
"Contract is machine-validated: `validate-justfile.nu` checks no-cache, preflight ordering, intent quoting, and bash strict mode across all modules",
"Op log integrity preserved: orphaned ops from failed preflights cannot occur under the contract — including secret prerequisites (missing SOPS file, uncovered `_require_env` variables) which are caught by the preflight gate before `op start`",
"Help recipes are self-maintaining — adding a recipe to the group makes it appear in `{module}-help` automatically",
"Intent strings with spaces work correctly in all context (op log, audit trail, status display)",
"PROVISIONING_DEBUG propagates to both preflight and deploy — full debug output without exception to the flag passthrough pattern",
],
negative = [
"Multi-step bash composition (deploy/redeploy/purge pattern) is explicitly allowed but must be justified — this weakens the thin-wrapper rule at the margin; authors must recognise the boundary",
"The `OP_ID=$(ls -t infra/{ws}/ops/ | head -1)` capture is a side-effect convention, not a typed return value — it breaks silently if `ops/` is on a filesystem where mtime ordering is unreliable (not a concern for git-tracked directories, but worth documenting)",
],
},
alternatives_considered = [
{
option = "Encode op governance logic in a provisioning subcommand that wraps preflight+start+deploy+finish",
why_rejected = "The deploy recipe wrapping already exists for the common case. But purge, redeploy, and future multi-phase operations require different sequencing (e.g. purge requires interactive confirmation between op start and the destructive action). A single CLI wrapper would need flags for every variant, reintroducing the branching the thin-wrapper rule eliminates. The composition value of justfile multi-step recipes is precisely this per-operation sequencing.",
},
{
option = "Use just variables instead of bash for PROVISIONING_NO_CACHE",
why_rejected = "Just `export` only works for simple assignments and does not compose with multi-step bash recipe bodies. The `export PROVISIONING_NO_CACHE=true` pattern inside a `#!/usr/bin/env bash` recipe is the only form that reliably propagates the environment variable to all `provisioning` subprocess calls in that recipe body, including those in conditionals.",
},
{
option = "Generate justfile modules from provisioning component metadata",
why_rejected = "Component-specific modules (mail.just, and future postgresql.just) contain operational domain knowledge — emergency procedures, non-standard flags, guard rails — that cannot be derived from component metadata alone. Auto-generation would produce thin scaffolding without the operational value. The module contract is an authoring guide, not a codegen target.",
},
],
constraints = [
{
id = "write-recipe-no-cache",
claim = "Every write recipe (deploy, redeploy, purge, and any recipe that calls provisioning component {op}) must export PROVISIONING_NO_CACHE=true before the first provisioning call",
scope = "workspaces/*/justfiles/*.just",
severity = 'Hard,
check = {
tag = 'NuCmd,
cmd = "nu workspaces/libre-wuji/scripts/validate-justfile.nu 2>&1 | grep 'write recipe missing'",
expect_exit = 1,
},
rationale = "Stale Nickel config silently reaches the remote node when the cache is not cleared. Without PROVISIONING_NO_CACHE=true, the bundle builder may reuse a pre-edit render for the current operation. The flag must be set before any provisioning invocation in the recipe so that even preflight runs against a fresh render.",
},
{
id = "op-governance-preflight-first",
claim = "In any multi-step recipe that calls provisioning op start, a provisioning component {op} {component} --check must appear before it and gate on its exit code",
scope = "workspaces/*/justfiles/*.just",
severity = 'Hard,
check = {
tag = 'NuCmd,
cmd = "nu workspaces/libre-wuji/scripts/validate-justfile.nu 2>&1 | grep 'op start'",
expect_exit = 1,
},
rationale = "Op records created before a known-bad preflight produce orphaned failed ops in the audit log with no associated deploy attempt. The preflight-first sequence ensures that op start is called only when the configuration has passed structural validation. Single-line op-start delegates are exempt — they are building blocks, not deploy owners. Purge recipes are exempt — they use interactive namespace confirmation as the gate, not bundle preflight.",
},
{
id = "intent-parameter-quoted",
claim = "Every delegate call that passes {{intent}} must quote it: \"{{intent}}\" — never bare {{intent}}",
scope = "workspaces/*/justfiles/*.just",
severity = 'Hard,
check = {
tag = 'Grep,
pattern = "provisioning op start.*{{intent}}[^\"]",
paths = ["workspaces/"],
must_be_empty = true,
},
rationale = "Just passes positional parameters as shell words. An unquoted {{intent}} is split on whitespace by the shell, truncating multi-word intent strings silently. The op record stores only the first word. This is undetectable at recipe invocation time — it fails only at op log review when the intent field is wrong.",
},
{
id = "multi-line-recipe-bash-strict",
claim = "Any recipe with a #!/usr/bin/env bash shebang must have set -euo pipefail as the second line",
scope = "workspaces/*/justfiles/*.just",
severity = 'Hard,
check = {
tag = 'Grep,
pattern = "#!/usr/bin/env bash",
paths = ["workspaces/"],
must_be_empty = false,
},
rationale = "Just does not propagate exit codes from recipe lines by default in multi-line bash recipes. Without set -euo pipefail, a failing provisioning call mid-recipe continues execution — subsequent steps run against a broken cluster state. The `set -e` part is the critical one: it ensures that op finish is not called with 'success' after a deploy failure.",
},
{
id = "module-help-recipe-required",
claim = "Every .just module must have a {module}-help recipe using the awk group-extraction pattern",
scope = "workspaces/*/justfiles/*.just",
severity = 'Soft,
check = {
tag = 'Grep,
pattern = "-help:",
paths = ["workspaces/"],
must_be_empty = false,
},
rationale = "Without a {module}-help recipe, the module is invisible from the root default recipe. Operators discover available operations via 'just' (default recipe) → 'just {module}-help' → recipe detail. A module without help breaks the discovery chain. The awk pattern is self-maintaining — no manual synchronisation required as recipes are added.",
},
],
ontology_check = {
decision_string = "Workspace justfile modules: thin-wrapper dispatch (no inline logic) + PROVISIONING_NO_CACHE=true on write paths + preflight-first op governance sequence + quoted intent parameters + awk-based self-documenting help recipes",
invariants_at_risk = ["config-driven-always", "type-safety-nickel"],
verdict = 'Safe,
},
related_adrs = ["adr-031-unified-component-cli", "adr-033-cluster-component-extension-pattern"],
}