provisioning/adrs/adr-015-solo-mode-architecture.ncl

91 lines
6.7 KiB
XML

let d = import "adr-defaults.ncl" in
d.make_adr {
id = "adr-015",
title = "Solo Mode — Full Architecture with Relaxed Auth",
status = 'Accepted,
date = "2026-02-17",
context = "The platform must run on a single operator's laptop for local development, testing, and single-operator production deployments. Two options were available: (1) Simplified mode — stripped-down binary bypassing services, writing directly to disk/files, skipping NATS and SurrealDB; (2) Full architecture with relaxed auth — same services, same NATS subjects, same SurrealDB schema, but auth middleware replaced with a no-op that auto-creates an admin session. Option 1 creates two separate code paths: solo vs multi-user. Scripts, integrations, and the CLI behave differently per mode. Testing in solo mode cannot validate multi-user behavior. Option 2 preserves a single code path with auth as the only runtime difference.",
decision = "Solo mode uses the full architecture with relaxed auth. Every service (Orchestrator, Control Center, Vault, Extension Registry, AI/MCP) runs as the same binary with the same NATS subjects and the same SurrealDB schema. Runtime differences: SurrealDB uses embedded RocksDB in solo vs WebSocket server in multi-user; NATS uses nats-server -js child process in solo vs external cluster; auth middleware is solo_auth_middleware (auto-session, no JWT) in solo vs auth_middleware (JWT + Cedar) in multi-user; Vault auto-unseals with local age key in solo vs Shamir threshold or KMS; Cedar default-permits local user in solo vs full policy evaluation. solo_auth_middleware injects fixed UserContext { roles: [admin], mfa_verified: true, user_id: Uuid::nil() } and is gated behind --mode solo runtime flag.",
rationale = [
{
claim = "Single code path eliminates solo/multi-user behavioral divergence",
detail = "Any script or integration written for solo mode works in multi-user without modification — only the connection strings change. This makes solo mode a valid staging environment for multi-user behavior.",
},
{
claim = "solo_auth_middleware is isolated and auditable",
detail = "The auth bypass is in one function, gated behind a runtime flag, explicitly tested. Auditing solo mode auth is a grep away: rg 'solo_auth_middleware'. This is safer than multiple ad-hoc bypasses scattered across services.",
},
{
claim = "SurrealDB and NATS data persist across restarts in solo mode",
detail = "RocksDB + JetStream storage persist to disk. Solo mode is not ephemeral — state survives service restarts, enabling realistic local testing of long-running task scenarios.",
},
{
claim = "CI can run integration tests against the solo mode harness without external infrastructure",
detail = "The solo mode harness (embedded RocksDB, child nats-server) runs in CI without network or external service dependencies. Full integration test coverage without infrastructure overhead.",
},
],
consequences = {
positive = [
"Any script or integration written for solo mode works in multi-user without modification — only connection strings change",
"The auth bypass is isolated to one function (solo_auth_middleware) — auditing solo mode auth is a grep away",
"SurrealDB and NATS data persist across restarts in solo mode (RocksDB + JetStream storage to disk)",
"CI can run the full integration test suite against the solo mode harness without external infrastructure",
],
negative = [
"Solo mode requires starting three service binaries (vs one monolith) — managed by service-manager.nu",
"The age key on disk is the only credential that bypasses Vault — its path must be chmod 600",
"nats-server must be in $PATH for solo mode startup",
],
},
alternatives_considered = [
{
option = "Simplified mono-binary for solo, full services for multi-user",
why_rejected = "Creates two code paths. Testing in solo mode does not validate multi-user behavior. Scripts written for solo mode require adaptation for multi-user. Doubles the maintenance surface.",
},
{
option = "Feature flags at compile time (cfg(solo)) to disable auth",
why_rejected = "Compile-time flags prevent running the same binary in both modes. Deployment would require two separate builds. A runtime flag (--mode solo) is more operationally flexible.",
},
],
constraints = [
{
id = "solo-mode-runtime-flag-only",
claim = "solo_auth_middleware must only be activated via --mode solo runtime flag, never via environment variable or compile-time feature",
scope = "platform/crates/control-center/src/lib.rs",
severity = 'Hard,
check = { tag = 'NuCmd, cmd = "rg 'solo_auth_middleware' --include='*.rs' platform/ | grep -v '#\\[cfg(test)'", expect_exit = 0 },
rationale = "A runtime flag is explicit and auditable in process listings. An environment variable or compile-time flag creates an invisible bypass that cannot be detected without reading code or config.",
},
{
id = "age-key-file-permissions",
claim = "The age key at ${data_dir}/vault/master.age must be created with mode 0600 and must be the only file-based secret in the platform",
scope = "platform/secretumvault/src/solo.rs",
severity = 'Hard,
check = { tag = 'NuCmd, cmd = "rg 'master.age|vault.*key' --include='*.rs' platform/ | grep -v 'chmod|0o600|0600'", expect_exit = 1 },
rationale = "The age key is the bootstrap secret — the only credential that bypasses Vault. Strict file permissions are the only protection. Any additional file-based secrets would violate the single-secret constraint.",
},
{
id = "nats-server-child-lifecycle",
claim = "Orchestrator must start nats-server -js as a managed child process with TCP availability wait (10s timeout) and SIGTERM on shutdown",
scope = "platform/crates/orchestrator/src/nats.rs",
severity = 'Soft,
check = { tag = 'Grep, pattern = "nats-server|nats_server", paths = ["platform/crates/orchestrator/"], must_be_empty = false },
rationale = "Unmanaged nats-server processes leak across service restarts and leave stale JetStream state. The 10s TCP wait prevents race conditions between Orchestrator and the NATS server on startup.",
},
],
related_adrs = ["adr-012-nats-event-broker", "adr-013-surrealdb-global-store", "adr-014-solid-enforcement"],
ontology_check = {
decision_string = "Solo mode uses full architecture with solo_auth_middleware as the only auth bypass, gated behind --mode solo runtime flag",
invariants_at_risk = ["solid-boundaries"],
verdict = 'Safe,
},
}