92 lines
6.2 KiB
Text
92 lines
6.2 KiB
Text
let d = import "adr-defaults.ncl" in
|
|
|
|
d.make_adr {
|
|
id = "adr-012",
|
|
title = "NATS JetStream as Exclusive Inter-Service Event Broker",
|
|
status = 'Accepted,
|
|
date = "2026-02-17",
|
|
|
|
context = "The provisioning platform has four runtime execution contexts — CLI, platform services (Orchestrator, Control Center, Vault, Extension Registry), remote taskservs, and AI/MCP — that must coordinate without leaking credentials or state into transient channels. Prior to this decision, services communicated via direct HTTP polling, shared filesystem state, and environment variables. This created audit gaps (no durable record of which service triggered which operation), credential leakage (provider tokens passed as env vars or written to disk by the CLI process), race conditions (multiple CLI invocations racing over shared config files with no delivery guarantee), and no backpressure (a slow consumer could starve or block a fast producer with no visibility).",
|
|
|
|
decision = "NATS with JetStream is the exclusive inter-service event broker. All inter-service communication that is not synchronous credential retrieval (Vault HTTPS) or session validation (Control Center HTTPS) must use NATS subjects under the `provisioning.>` hierarchy. Six JetStream streams are defined at startup by Orchestrator: TASKS (work queue), VAULT (interest), AUTH (interest), WORKSPACE (7-day limits), AUDIT (90-day limits), HEALTH (interest). Credentials never travel over NATS — only identifiers (lease_id, task_id, session_id) are published. Solo mode: nats-server -js as child process. Multi-user: external NATS cluster.",
|
|
|
|
rationale = [
|
|
{
|
|
claim = "At-least-once delivery with durable persistence",
|
|
detail = "JetStream provides durable message persistence for task log replay and audit trail reconstruction. Pull consumers ack explicitly; unacknowledged messages are redelivered.",
|
|
},
|
|
{
|
|
claim = "Work-queue semantics enforce SOLID — CLI cannot call providers directly",
|
|
detail = "CLI submits to provisioning.tasks.submitted only. It cannot call provider APIs directly. This is the primary structural enforcement of the SOLID boundary between CLI and Orchestrator.",
|
|
},
|
|
{
|
|
claim = "Push semantics for real-time status streaming without polling",
|
|
detail = "Control Center streams task status to browser via WebSocket without polling Orchestrator. NATS push consumers bridge the event stream to the WebSocket layer.",
|
|
},
|
|
{
|
|
claim = "Multi-tenant subject namespacing maps to bounded contexts",
|
|
detail = "The provisioning.> hierarchy with six streams maps each stream to its bounded context (tasks, vault, auth, workspace, audit, health). Each service subscribes only to its own subjects.",
|
|
},
|
|
],
|
|
|
|
consequences = {
|
|
positive = [
|
|
"Full audit trail: every state transition is a durable NATS message consumed by AuditCollector",
|
|
"No polling: Control Center streams task status to browser via WebSocket",
|
|
"Backpressure: JetStream consumers ack explicitly; unacknowledged messages are redelivered",
|
|
"SOLID enforcement: CLI can only submit to provisioning.tasks.submitted; cannot call provider APIs directly",
|
|
],
|
|
negative = [
|
|
"nats-server is a required external process in solo mode, adding a startup step",
|
|
"Message ordering within a subject is guaranteed but cross-subject ordering is not",
|
|
"JetStream persistence requires disk space for AUDIT stream (90-day retention)",
|
|
"Pull consumers in VAULT stream add one round-trip vs direct HTTP for lease issuance",
|
|
],
|
|
},
|
|
|
|
alternatives_considered = [
|
|
{
|
|
option = "Direct HTTP polling between services",
|
|
why_rejected = "Creates coupling between services and requires each service to know the addresses of others. No delivery guarantee, no audit trail, polling adds latency.",
|
|
},
|
|
{
|
|
option = "Redis Pub/Sub for event distribution",
|
|
why_rejected = "Redis Pub/Sub has no persistence — messages are lost if no subscriber is listening. No work-queue semantics, no backpressure, no durable audit trail.",
|
|
},
|
|
],
|
|
|
|
constraints = [
|
|
{
|
|
id = "credentials-never-in-nats",
|
|
claim = "Actual credentials (tokens, secrets, keys) must never be published to any NATS subject",
|
|
scope = "platform/crates/orchestrator/src/, platform/crates/platform-nats/",
|
|
severity = 'Hard,
|
|
check = { tag = 'NuCmd, cmd = "rg 'publish|nats' platform/crates/ -l | xargs rg -l 'token|secret|password|key'", expect_exit = 1 },
|
|
rationale = "Credentials in NATS messages would be visible to all subscribers on the subject. Only lease_id, task_id, and session_id travel over NATS; actual secrets are fetched over HTTPS from Vault.",
|
|
},
|
|
{
|
|
id = "six-streams-defined-by-orchestrator",
|
|
claim = "JetStream stream definitions (TASKS, VAULT, AUTH, WORKSPACE, AUDIT, HEALTH) are created by Orchestrator on startup and must not be redefined by other services",
|
|
scope = "platform/crates/orchestrator/src/nats.rs",
|
|
severity = 'Hard,
|
|
check = { tag = 'NuCmd, cmd = "rg 'create_stream|add_stream' platform/crates/ --include='*.rs' -l | grep -v orchestrator", expect_exit = 1 },
|
|
rationale = "Single point of stream definition prevents conflicting stream configurations. Other services are consumers only.",
|
|
},
|
|
{
|
|
id = "nats-subject-hierarchy",
|
|
claim = "All NATS subjects must be under the provisioning.> hierarchy with the stream-to-subject mapping documented in schemas/platform/common/nats.ncl",
|
|
scope = "platform/crates/platform-nats/",
|
|
severity = 'Soft,
|
|
check = { tag = 'NuCmd, cmd = "rg '\"[a-z]' platform/crates/ --include='*.rs' | grep -v 'provisioning\\.'", expect_exit = 1 },
|
|
rationale = "Consistent subject hierarchy enables subject-level access control and prevents cross-context pollution.",
|
|
},
|
|
],
|
|
|
|
related_adrs = ["adr-014-solid-enforcement", "adr-015-solo-mode-architecture"],
|
|
|
|
ontology_check = {
|
|
decision_string = "NATS JetStream is the exclusive inter-service event broker; credentials never travel over NATS; six streams defined by Orchestrator",
|
|
invariants_at_risk = ["solid-boundaries"],
|
|
verdict = 'Safe,
|
|
},
|
|
}
|