99 lines
7.4 KiB
XML
99 lines
7.4 KiB
XML
let d = import "adr-defaults.ncl" in
|
|
|
|
d.make_adr {
|
|
id = "adr-024",
|
|
title = "ncl-sync: Event-driven cache invalidation via NATS",
|
|
status = 'Accepted,
|
|
date = "2026-04-17",
|
|
|
|
context = "ADR-022 established the ncl-sync daemon with a file watcher (notify) as the automatic invalidation mechanism. ADR-023 added an explicit sync-request sidecar written by Nu processes (state-write). Both mechanisms have limitations: the file watcher has a debounce window (~100ms) where cache can be momentarily stale, and sync-request polling adds 500ms latency. The orchestrator (Rust) writes state files from a separate process — it cannot easily participate in the file-watcher's same-process events, and requiring it to write sync-request sidecars would couple it to ncl-sync's internal protocol. NATS is already used by the orchestrator for DAG events (`provisioning.dag.*`) — extending it for cache invalidation is a natural fit.",
|
|
|
|
decision = "ncl-sync gains an optional NATS subscriber behind the `nats` Cargo feature (default-enabled). The subscriber listens on two subjects: `provisioning.workspace.ncl.changed` (file modified) and `provisioning.workspace.ncl.removed` (file deleted). Payload is a JSON object `{workspace, path, import_paths, source}`. On receipt, the subscriber validates that `workspace` matches its watched workspace, then calls `export_ncl` or `evict` directly — bypassing the file-watcher debounce and the sync-request poll. Cache is refreshed in <15ms vs ~100ms (watcher) or ~500ms (sidecar). The mechanism is opt-in via `ncl_sync.nats.enabled = true` in the config — without NATS, the daemon runs identically to before (watcher + sidecar fallback).",
|
|
|
|
rationale = [
|
|
{
|
|
claim = "NATS subscriber complements rather than replaces the file watcher",
|
|
detail = "Three invalidation mechanisms now exist with different failure characteristics: (1) file watcher — always active, catches any write including manual edits, ~100ms latency; (2) sync-request sidecar — written by Nu state-write, catches Nu-originated writes, ~500ms latency; (3) NATS events — written by any publisher, zero coupling to filesystem, <15ms latency. Each covers a different failure mode: watcher catches untracked writers, sidecar catches Nu writers, NATS catches Rust writers. Redundancy is intentional — duplicate events are idempotent (same cache_key, same content).",
|
|
},
|
|
{
|
|
claim = "Workspace validation prevents cross-daemon interference",
|
|
detail = "Multiple ncl-sync daemons may run (one per workspace). All subscribe to the same subject hierarchy. The subscriber canonicalizes both its watched workspace path and the event's workspace path; only events matching its workspace are processed. This allows NATS events to fan out to all relevant daemons without coordination.",
|
|
},
|
|
{
|
|
claim = "Subject hierarchy matches the workspace event model, not the orchestrator DAG model",
|
|
detail = "`provisioning.dag.*` subjects are about workflow execution. `provisioning.workspace.ncl.*` subjects are about configuration state. Keeping them separate lets ncl-sync subscribe narrowly (two subjects) without parsing unrelated events. Future publishers (installer, backup restore, etc.) use the same namespace.",
|
|
},
|
|
{
|
|
claim = "Cargo feature flag keeps NATS optional",
|
|
detail = "`default = [\"nats\"]` enables NATS in release builds. `cargo build --no-default-features` produces a binary without async-nats linkage — useful for minimal containers, air-gapped environments, or testing. The config field `ncl_sync.nats.enabled` is an additional runtime gate independent of the compile-time feature.",
|
|
},
|
|
],
|
|
|
|
consequences = {
|
|
positive = [
|
|
"Orchestrator-driven state mutations invalidate cache in <15ms (vs ~100ms via file watcher)",
|
|
"Zero coupling between orchestrator and ncl-sync — only the subject contract is shared",
|
|
"Other subscribers (dashboard UI, audit log) can watch the same subjects without touching ncl-sync",
|
|
"Redundant with watcher+sidecar — graceful degradation if NATS is down",
|
|
],
|
|
negative = [
|
|
"Adds ~6MB to ncl-sync binary size (async-nats + dependencies)",
|
|
"NATS must be running before ncl-sync connects (but failure is non-fatal — falls back to watcher)",
|
|
"Publishers (orchestrator, etc.) must be updated to emit the new subjects — until then, NATS layer has no effect",
|
|
],
|
|
},
|
|
|
|
alternatives_considered = [
|
|
{
|
|
option = "Single mechanism: file watcher only",
|
|
why_rejected = "Misses the ~100ms debounce window. For interactive CLI this is fine; for rapid orchestrator-driven state changes (deploy with many state updates), the cache can lag.",
|
|
},
|
|
{
|
|
option = "Single mechanism: NATS only",
|
|
why_rejected = "Hard dependency on NATS — ncl-sync fails if NATS isn't running. Manual NCL edits (user opens editor) wouldn't be caught. File watcher must remain as baseline.",
|
|
},
|
|
{
|
|
option = "HTTP endpoint on ncl-sync for invalidation",
|
|
why_rejected = "Requires every publisher to know the daemon's Unix socket or HTTP port. NATS decouples publishers from subscribers.",
|
|
},
|
|
{
|
|
option = "Reuse provisioning.dag.* subjects",
|
|
why_rejected = "DAG events are about workflow state, not config state. Overloading the subject hierarchy would force ncl-sync to filter noisy events it doesn't care about.",
|
|
},
|
|
],
|
|
|
|
ontology_check = {
|
|
decision_string = "ncl-sync adds opt-in NATS subscriber on provisioning.workspace.ncl.{changed,removed} for event-driven cache invalidation; watcher + sidecar remain as fallback",
|
|
invariants_at_risk = ["config-driven-always"],
|
|
verdict = 'Safe,
|
|
},
|
|
|
|
related_adrs = ["adr-022-ncl-sync-daemon", "adr-023-ncl-export-wrapper"],
|
|
|
|
constraints = [
|
|
{
|
|
id = "ncl-sync-nats-optional",
|
|
claim = "NATS subscriber must be an optional Cargo feature, and runtime-gated by config",
|
|
scope = "provisioning/platform/crates/ncl-sync/",
|
|
severity = 'Hard,
|
|
check = { tag = 'Grep, pattern = "cfg\\(feature = \"nats\"\\)|#\\[cfg\\(feature = \"nats\"\\)\\]", paths = ["provisioning/platform/crates/ncl-sync/src/"], must_be_empty = false },
|
|
rationale = "Air-gapped environments, minimal containers, and testing scenarios require ncl-sync to build and run without NATS. Removing the feature flag would violate this.",
|
|
},
|
|
{
|
|
id = "ncl-sync-nats-fallback",
|
|
claim = "NATS connection failure must be non-fatal — daemon continues with watcher + sidecar",
|
|
scope = "provisioning/platform/crates/ncl-sync/src/main.rs",
|
|
severity = 'Hard,
|
|
check = { tag = 'Grep, pattern = "tracing::warn", paths = ["provisioning/platform/crates/ncl-sync/src/main.rs"], must_be_empty = false },
|
|
rationale = "Hard dependency on NATS would break the workspace-local, zero-platform-service guarantee from ADR-022.",
|
|
},
|
|
{
|
|
id = "ncl-sync-workspace-scope",
|
|
claim = "Subscriber must filter events by workspace — only process events matching its watched workspace",
|
|
scope = "provisioning/platform/crates/ncl-sync/src/nats_subscriber.rs",
|
|
severity = 'Hard,
|
|
check = { tag = 'Grep, pattern = "workspace_matches", paths = ["provisioning/platform/crates/ncl-sync/src/nats_subscriber.rs"], must_be_empty = false },
|
|
rationale = "Multiple ncl-sync daemons share the subject namespace. Without filtering, daemon A would process events for workspace B's cache.",
|
|
},
|
|
],
|
|
}
|