92 lines
6.7 KiB
Text
92 lines
6.7 KiB
Text
|
|
let d = import "adr-defaults.ncl" in
|
||
|
|
|
||
|
|
d.make_adr {
|
||
|
|
id = "adr-015",
|
||
|
|
title = "Solo Mode — Full Architecture with Relaxed Auth",
|
||
|
|
status = 'Accepted,
|
||
|
|
date = "2026-02-17",
|
||
|
|
|
||
|
|
context = "The platform must run on a single operator's laptop for local development, testing, and single-operator production deployments. Two options were available: (1) Simplified mode — stripped-down binary bypassing services, writing directly to disk/files, skipping NATS and SurrealDB; (2) Full architecture with relaxed auth — same services, same NATS subjects, same SurrealDB schema, but auth middleware replaced with a no-op that auto-creates an admin session. Option 1 creates two separate code paths: solo vs multi-user. Scripts, integrations, and the CLI behave differently per mode. Testing in solo mode cannot validate multi-user behavior. Option 2 preserves a single code path with auth as the only runtime difference.",
|
||
|
|
|
||
|
|
decision = "Solo mode uses the full architecture with relaxed auth. Every service (Orchestrator, Control Center, Vault, Extension Registry, AI/MCP) runs as the same binary with the same NATS subjects and the same SurrealDB schema. Runtime differences: SurrealDB uses embedded RocksDB in solo vs WebSocket server in multi-user; NATS uses nats-server -js child process in solo vs external cluster; auth middleware is solo_auth_middleware (auto-session, no JWT) in solo vs auth_middleware (JWT + Cedar) in multi-user; Vault auto-unseals with local age key in solo vs Shamir threshold or KMS; Cedar default-permits local user in solo vs full policy evaluation. solo_auth_middleware injects fixed UserContext { roles: [admin], mfa_verified: true, user_id: Uuid::nil() } and is gated behind --mode solo runtime flag.",
|
||
|
|
|
||
|
|
rationale = [
|
||
|
|
{
|
||
|
|
claim = "Single code path eliminates solo/multi-user behavioral divergence",
|
||
|
|
detail = "Any script or integration written for solo mode works in multi-user without modification — only the connection strings change. This makes solo mode a valid staging environment for multi-user behavior.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
claim = "solo_auth_middleware is isolated and auditable",
|
||
|
|
detail = "The auth bypass is in one function, gated behind a runtime flag, explicitly tested. Auditing solo mode auth is a grep away: rg 'solo_auth_middleware'. This is safer than multiple ad-hoc bypasses scattered across services.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
claim = "SurrealDB and NATS data persist across restarts in solo mode",
|
||
|
|
detail = "RocksDB + JetStream storage persist to disk. Solo mode is not ephemeral — state survives service restarts, enabling realistic local testing of long-running task scenarios.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
claim = "CI can run integration tests against the solo mode harness without external infrastructure",
|
||
|
|
detail = "The solo mode harness (embedded RocksDB, child nats-server) runs in CI without network or external service dependencies. Full integration test coverage without infrastructure overhead.",
|
||
|
|
},
|
||
|
|
],
|
||
|
|
|
||
|
|
consequences = {
|
||
|
|
positive = [
|
||
|
|
"Any script or integration written for solo mode works in multi-user without modification — only connection strings change",
|
||
|
|
"The auth bypass is isolated to one function (solo_auth_middleware) — auditing solo mode auth is a grep away",
|
||
|
|
"SurrealDB and NATS data persist across restarts in solo mode (RocksDB + JetStream storage to disk)",
|
||
|
|
"CI can run the full integration test suite against the solo mode harness without external infrastructure",
|
||
|
|
],
|
||
|
|
negative = [
|
||
|
|
"Solo mode requires starting three service binaries (vs one monolith) — managed by service-manager.nu",
|
||
|
|
"The age key on disk is the only credential that bypasses Vault — its path must be chmod 600",
|
||
|
|
"nats-server must be in $PATH for solo mode startup",
|
||
|
|
],
|
||
|
|
},
|
||
|
|
|
||
|
|
alternatives_considered = [
|
||
|
|
{
|
||
|
|
option = "Simplified mono-binary for solo, full services for multi-user",
|
||
|
|
why_rejected = "Creates two code paths. Testing in solo mode does not validate multi-user behavior. Scripts written for solo mode require adaptation for multi-user. Doubles the maintenance surface.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
option = "Feature flags at compile time (cfg(solo)) to disable auth",
|
||
|
|
why_rejected = "Compile-time flags prevent running the same binary in both modes. Deployment would require two separate builds. A runtime flag (--mode solo) is more operationally flexible.",
|
||
|
|
},
|
||
|
|
],
|
||
|
|
|
||
|
|
constraints = [
|
||
|
|
{
|
||
|
|
id = "solo-mode-runtime-flag-only",
|
||
|
|
claim = "solo_auth_middleware must only be activated via --mode solo runtime flag, never via environment variable or compile-time feature",
|
||
|
|
scope = "platform/crates/control-center/src/lib.rs",
|
||
|
|
severity = 'Hard,
|
||
|
|
check = { tag = 'NuCmd, cmd = "rg 'solo_auth_middleware' --include='*.rs' platform/ | grep -v '#\\[cfg(test)'", expect_exit = 0 },
|
||
|
|
rationale = "A runtime flag is explicit and auditable in process listings. An environment variable or compile-time flag creates an invisible bypass that cannot be detected without reading code or config.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
id = "age-key-file-permissions",
|
||
|
|
claim = "The age key at ${data_dir}/vault/master.age must be created with mode 0600 and must be the only file-based secret in the platform",
|
||
|
|
scope = "platform/secretumvault/src/solo.rs",
|
||
|
|
severity = 'Hard,
|
||
|
|
check = { tag = 'NuCmd, cmd = "rg 'master.age|vault.*key' --include='*.rs' platform/ | grep -v 'chmod|0o600|0600'", expect_exit = 1 },
|
||
|
|
rationale = "The age key is the bootstrap secret — the only credential that bypasses Vault. Strict file permissions are the only protection. Any additional file-based secrets would violate the single-secret constraint.",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
id = "nats-server-child-lifecycle",
|
||
|
|
claim = "Orchestrator must start nats-server -js as a managed child process with TCP availability wait (10s timeout) and SIGTERM on shutdown",
|
||
|
|
scope = "platform/crates/orchestrator/src/nats.rs",
|
||
|
|
severity = 'Soft,
|
||
|
|
check = { tag = 'Grep, pattern = "nats-server|nats_server", paths = ["platform/crates/orchestrator/"], must_be_empty = false },
|
||
|
|
rationale = "Unmanaged nats-server processes leak across service restarts and leave stale JetStream state. The 10s TCP wait prevents race conditions between Orchestrator and the NATS server on startup.",
|
||
|
|
},
|
||
|
|
],
|
||
|
|
|
||
|
|
related_adrs = ["adr-012-nats-event-broker", "adr-013-surrealdb-global-store", "adr-014-solid-enforcement"],
|
||
|
|
|
||
|
|
ontology_check = {
|
||
|
|
decision_string = "Solo mode uses full architecture with solo_auth_middleware as the only auth bypass, gated behind --mode solo runtime flag",
|
||
|
|
invariants_at_risk = ["solid-boundaries"],
|
||
|
|
verdict = 'Safe,
|
||
|
|
},
|
||
|
|
}
|