let d = import "adr-defaults.ncl" in d.make_adr { id = "adr-016", title = "Workspace Taskserv Execution as Typed DAGs (Formula Pattern)", status = 'Accepted, date = "2026-03-14", context = "Workspace server definitions declare taskservs as positional arrays — e.g. `taskservs = [etcd, kubernetes, containerd, cilium]`. The provisioning platform executes these in strict linear order regardless of actual dependencies between tasks. This model has three problems: (1) It cannot express parallelism — `containerd` and `coredns` are independent of each other but are serialized behind `kubernetes`. (2) It cannot express conditional edges — a failed `etcd` should halt `kubernetes` but a failed `coredns` should not. (3) The execution intent is implicit — there is no machine-readable artifact that declares which tasks depend on which, so no validation is possible at schema time. The Orchestrator already implements a full `DependencyGraph` with topological sort and `max_parallel_tasks` in `workflow.rs`, but `batch.rs` was building a linear chain from the positional array, ignoring the graph entirely.", decision = "Workspace infrastructure definitions declare taskserv execution order as typed DAGs via a `Formula` Nickel record exported from `schemas/lib/formula.ncl`. Each `FormulaNode` carries: `id`, a `TaskServDef` (name, profile, target_save_path), `depends_on: Array FormulaDep` (referential edges by node_id + DepKind), `parallel: Bool`, `on_error: [| Stop | Continue | Retry |]`, and `max_retries: u8`. The Formula is validated at schema time by a custom Nickel contract that checks: no duplicate node IDs, every `depends_on.node_id` references a declared node, every `edges.{from,to}` references a declared node. At runtime, `Formula::from_json` in `formula.rs` deserializes the JSON export and `Formula::into_workflow(FormulaWorkflowConfig)` converts it into a `WorkflowDefinition` fed directly to `BatchWorkflowEngine::execute_workflow`, which runs the existing `DependencyGraph` topological sort with `max_parallel_tasks`. Positional `taskservs` arrays remain valid — they are the per-server composition definition and are retrocompatible. Formulas are an additive artifact in the same `servers.ncl` file.", rationale = [ { claim = "Schema-time referential integrity catches broken DAGs before deployment", detail = "The `_Formula` custom Nickel contract validates all `depends_on.node_id` and edge endpoints against the declared `nodes` array. A missing node ID is a typecheck error, not a runtime panic. This enforces the type-safety-nickel axiom on execution topology.", }, { claim = "Parallelism is now explicit and governed", detail = "Nodes marked `parallel = true` with no shared dependency run concurrently up to `max_parallel`. The control plane formula runs etcd first, then kubernetes, containerd, and coredns in parallel (3 workers), then cilium after k8s+containerd. This halved the estimated provisioning time for a 5-node cluster compared to the linear chain.", }, { claim = "on_error semantics are declarative, not implicit", detail = "`on_error = 'Stop` halts the entire workflow on node failure (required for etcd, kubernetes). `on_error = 'Continue` allows the workflow to proceed past a non-critical failure (coredns can fail without blocking cilium). `on_error = 'Retry` retries up to max_retries times before propagating. Previously all failures were treated as Stop with no way to express Continue.", }, { claim = "Retrocompatible — zero migration cost for existing servers", detail = "TaskServDef now has `depends_on`, `on_error`, `max_retries` fields with defaults. Existing `servers.ncl` files typecheck unchanged. Formulas are an opt-in additive array alongside the existing `servers` array. Batch.rs preserves the linear execution path when no formula is supplied.", }, { claim = "Single runtime path — the existing DependencyGraph is reused", detail = "No new execution engine was written. `Formula::into_workflow` produces a standard `WorkflowDefinition` consumed by the existing `BatchWorkflowEngine::execute_workflow`. The DependencyGraph topological sort and parallel dispatch already existed in workflow.rs and were simply never reached via the batch coordinator.", }, ], consequences = { positive = [ "Parallel taskserv execution is now possible and schema-validated", "DAG structure is a first-class artifact — diffable, auditable, versionable in git", "on+re reflection mode `provisioning-validate-formula` provides cross-validation (taskserv existence, ConflictsWith, cycle detection)", "FormulaWorkflowConfig<'a> groups conversion parameters — batch.rs call sites are explicit and lint-clean", "Ontology node `formula-dag-execution` registers this pattern for on+re governance", ], negative = [ "Two parallel models exist: positional `taskservs` arrays (per-server composition) and `formulas` (execution DAGs). Authors must understand the distinction.", "Formula node IDs are a new namespace within a server definition — ID collisions across formulas in the same file are not currently detected at parse time (only within a single formula).", "Nickel's custom contract for referential integrity runs at export time, not at typecheck time — `nickel typecheck` alone is insufficient; `nickel export` is required for full validation.", ], }, alternatives_considered = [ { option = "Positional array with dependency annotations as comments", why_rejected = "Comments are not machine-readable. Cannot be validated, cannot drive runtime parallelism, cannot be consumed by on+re modes. Violates the type-safety-nickel axiom.", }, { option = "Separate formula file per server (e.g. wuji-formula.ncl)", why_rejected = "Separates declaration from context. The `servers.ncl` file already owns the server definition including its taskservs — the formula belongs alongside it. Import proliferation adds no structural benefit.", }, { option = "Encode DAG as a TOML/YAML file consumed by the Orchestrator", why_rejected = "Breaks the type-safety-nickel axiom. TOML/YAML have no contracts, no referential integrity, no schema composition. The Formula pattern allows the Nickel schema to own the execution topology, which is where it belongs.", }, { option = "Extend TaskServDef directly with execution metadata (depends_on, on_error) and derive the DAG implicitly", why_rejected = "Conflates composition (which taskservs a server needs) with orchestration (in what order and how). The Formula is a separate, named artifact that can be versioned, validated, and governed independently from the taskserv list.", }, ], constraints = [ { id = "formula-node-ids-unique-within-formula", claim = "Node IDs must be unique within a single Formula — the custom Nickel contract enforces this at export time", scope = "schemas/lib/formula.ncl (_Formula contract), workspaces/*/infra/*/servers.ncl", severity = 'Hard, check = { tag = 'NuCmd, cmd = "nickel export --format json examples/workspaces/basic/servers.ncl 2>/dev/null | jq '[.formulas[].nodes[].id] | group_by(.) | map(select(length > 1)) | length == 0' | grep -q true", expect_exit = 0 }, rationale = "Duplicate node IDs produce ambiguous depends_on resolution. The contract catches this before the JSON reaches formula.rs.", }, { id = "formula-depends-on-declared-nodes-only", claim = "Every depends_on.node_id and edge endpoint must reference a declared node in the same formula", scope = "schemas/lib/formula.ncl (_Formula contract)", severity = 'Hard, check = { tag = 'FileExists, path = "schemas/lib/formula.ncl", present = true }, rationale = "A reference to a non-existent node_id would silently drop the dependency at runtime, producing an incorrect execution order with no error.", }, { id = "formula-runtime-conversion-via-formula-rs-only", claim = "All Formula-to-WorkflowDefinition conversion must go through Formula::into_workflow — no ad-hoc JSON parsing in batch.rs or elsewhere", scope = "platform/crates/orchestrator/src/batch.rs, platform/crates/orchestrator/src/formula.rs", severity = 'Hard, check = { tag = 'Grep, pattern = "nickel export", paths = ["platform/crates/"], must_be_empty = false }, rationale = "The FormulaWorkflowConfig struct and into_workflow carry the semantic mapping (task names, arg construction, metadata injection). Bypassing it risks silent divergence between schema intent and runtime behavior.", }, ], ontology_check = { decision_string = "Workspace taskserv execution topology as typed DAGs via Formula Nickel pattern, converted to WorkflowDefinition at runtime by formula.rs", invariants_at_risk = ["type-safety-nickel", "config-driven-always"], verdict = 'Safe, }, related_adrs = ["adr-014-solid-enforcement", "adr-015-solo-mode-architecture"], }