provisioning/adrs/adr-013-surrealdb-global-store.ncl

96 lines
6.6 KiB
Text
Raw Permalink Normal View History

let d = import "adr-defaults.ncl" in
d.make_adr {
id = "adr-013",
title = "SurrealDB as the Global Persistent Store",
status = 'Accepted,
date = "2026-02-17",
context = "The platform needs a single persistent data store that can operate embedded (RocksDB, zero external process) in solo mode, run as an external WebSocket server in multi-user deployments without schema changes, support five distinct service namespaces with well-typed schemas (orchestrator, vault, control_center, audit, workspace), store heterogeneous data (task logs append-only, secrets as encrypted blobs, Cedar policies as documents, audit events as time-series, git sync state as mutable), and be queryable by the AI/MCP service for context gathering without a separate analytics database. PostgreSQL requires a server process, SQLite has no native namespacing, Redis has no real persistence. SurrealDB is the only option supporting all five requirements simultaneously.",
decision = "SurrealDB is the exclusive persistent store for all platform state. No service reads raw files or environment variables for credentials at runtime — all reads go through SurrealDB (secrets via Vault Service, which stores ciphertext in SurrealDB). Namespace layout under `provisioning` database: orchestrator (tasks, task_events, execution_logs, config_hashes, provider_cache), vault (secrets, keys, leases, secret_versions, audit_trail), control_center (users, sessions, cedar_policies, policy_evaluations), audit (events, metrics), workspace (registrations, deployments, git_sync_state, extensions). Mode selection via DbConfig: Memory (tests), Embedded (solo, RocksDB), Server (multi-user, WebSocket). Schema initialization via DEFINE TABLE IF NOT EXISTS DDL — no migration framework for additive changes.",
rationale = [
{
claim = "Single storage abstraction across solo and multi-user modes",
detail = "SurrealPool is Clone (Arc<Surreal<Any>> internally), shareable across tokio tasks. The same codebase connects to embedded RocksDB in solo mode and WebSocket server in multi-user — only the DbConfig changes.",
},
{
claim = "No external process in solo mode",
detail = "Embedded RocksDB starts with the service binary. Solo mode requires no external database process, reducing startup dependencies and enabling CI runs without infrastructure.",
},
{
claim = "AI/MCP context without ETL pipelines",
detail = "The AI service queries audit:events and orchestrator:tasks directly for context. SurrealDB's document+relational model handles heterogeneous schemas without separate analytics infrastructure.",
},
{
claim = "Test isolation via DbConfig::Memory",
detail = "In-process Surreal<Mem> requires no external binary — every cargo test run gets a fresh, isolated database. Integration tests run without external infrastructure.",
},
],
consequences = {
positive = [
"Single storage abstraction: SurrealPool is Clone, shareable across tokio tasks",
"No external process in solo mode: embedded RocksDB starts with the service binary",
"AI/MCP context: AI service queries audit:events and orchestrator:tasks directly without ETL pipelines",
"Test isolation: DbConfig::Memory (in-process Surreal<Mem>) requires no external binary",
],
negative = [
"SurrealDB v2 API uses snake_case builtins; bind() requires owned values; ID fields in structs needed to avoid RecordId parsing issues",
"MVCC conflicts under concurrent write load require retry_on_conflict with exponential backoff + jitter on store_secret, store_key, and lease operations",
"Full-text search and graph queries are available but deferred to avoid over-engineering",
],
},
alternatives_considered = [
{
option = "PostgreSQL",
why_rejected = "Requires an external server process — no embedded mode for solo deployment. Schema evolution requires explicit migration tooling. No native document storage for Cedar policies.",
},
{
option = "SQLite",
why_rejected = "No native namespace/tenant isolation. No document model. Concurrent write performance under multiple async tasks is constrained. No WebSocket server mode for multi-user.",
},
{
option = "Redis",
why_rejected = "No real persistence guarantees (AOF is not the same as durable embedded storage). Key-value only — no document or relational queries for audit trail or task history.",
},
],
constraints = [
{
id = "cli-no-surrealdb-direct",
claim = "CLI code (.nu files) must NOT access SurrealDB directly — all reads/writes from CLI go through service HTTP APIs",
scope = "core/nulib/, extensions/",
severity = 'Hard,
check = { tag = 'Grep, pattern = "surrealdb|surreal|SurrealDB", paths = ["core/nulib/", "catalog/"], must_be_empty = true },
rationale = "Direct SurrealDB access from CLI violates the SOLID boundary (ADR-014). All state mutations must go through the service layer to maintain audit trail and authorization.",
},
{
id = "namespace-layout-fixed",
claim = "The five namespaces (orchestrator, vault, control_center, audit, workspace) under the `provisioning` database must not be changed without an ADR",
scope = "platform/crates/platform-db/",
severity = 'Hard,
check = { tag = 'FileExists, path = "platform/crates/platform-db/", present = true },
rationale = "Namespace layout is the boundary contract between services. Changing it without an ADR risks data loss and cross-service coupling.",
},
{
id = "retry-on-mvcc-conflict",
claim = "Operations on store_secret, store_key, and lease operations must use retry_on_conflict with exponential backoff + jitter",
scope = "platform/crates/platform-db/src/retry.rs, platform/secretumvault/",
severity = 'Soft,
check = { tag = 'NuCmd, cmd = "rg 'store_secret|store_key|create_lease' platform/ --include='*.rs' | grep -v retry", expect_exit = 1 },
rationale = "SurrealDB MVCC conflicts are expected under concurrent write load. Without retry, concurrent task executions silently fail on lease creation.",
},
],
related_adrs = ["adr-012-nats-event-broker", "adr-014-solid-enforcement", "adr-015-solo-mode-architecture"],
ontology_check = {
decision_string = "SurrealDB is the exclusive persistent store; CLI accesses state only via service HTTP APIs; five fixed namespaces under provisioning database",
invariants_at_risk = ["solid-boundaries", "config-driven-always"],
verdict = 'Safe,
},
}