provisioning/adrs/adr-036-db-operation-abstraction.ncl

97 lines
10 KiB
XML

let d = import "adr-defaults.ncl" in
d.make_adr {
id = "adr-036",
title = "db-* operation abstraction: standard manifest_plan actions for database lifecycle across MySQL, PostgreSQL, and SurrealDB",
status = 'Accepted,
date = "2026-04-24",
context = "Database components (postgresql, and future mysql, surrealdb) each implement ad-hoc backup, restore, and health-check methods in their lib.sh files with no shared naming contract. `provisioning component backup postgresql` works today because ManifestPlan supports arbitrary plan section keys and the component CLI dispatches any op name to the bundle builder. However, each engine invents its own method names (`_method_backup`, `_method_dump`, `_method_db-dump`, etc.) and parameter conventions. There is no standard for: (a) how backup artifacts are named, (b) whether Object Storage is involved, (c) what 'state' means (connection count? replication lag? table sizes?), or (d) how restore locates its source. Additionally, Object Storage integration (e.g., Hetzner Object Storage, Backblaze B2) for archival is not modeled. This ADR establishes seven standard db-* operation names as the cross-engine contract.",
decision = "Define seven standard manifest_plan action names for database lifecycle operations. Each action maps to a `_method_{action}` implementation in the engine's lib.sh. The operations are: `db-init` (create databases, roles, and initial schema — idempotent), `db-backup` (full consistent backup, compressed, optionally pushed to object storage — artifact named `{name}-{timestamp}.dump.gz`), `db-restore` (restore from artifact path or object storage key, passed via BACKUP_SRC env or params.src), `db-dump` (plain SQL export to stdout or local path — lighter than db-backup, no binary format), `db-state` (query operational state: database sizes, connection counts, replication lag, bloat — output to stdout as structured text), `db-query` (run ad-hoc SQL from params.sql or QUERY env — read-only by default), `db-snap` (engine-native point-in-time snapshot — e.g., pg_basebackup for PostgreSQL, file-level copy for SurrealDB). These seven names become the convention: any database component that declares `operations.backup = true` in its Nickel config must implement `_method_db-backup`. The corresponding justfile module (`justfiles/db.just`) provides generic recipes that work for any database component. Engine-specific modules (e.g., `justfiles/postgresql.just`) thin-wrap the generic db.just recipes for their component.",
rationale = [
{
claim = "Seven operations cover the complete database lifecycle without engine-specific command surface",
detail = "db-init handles first-time setup (idempotent). db-backup and db-restore are the data safety pair. db-dump complements backup for portability — pg_dump output is readable, binary backup formats are not. db-state is the operational health surface: sizes, connections, lag — enough to answer 'is the database healthy' without custom dashboards. db-query enables one-off queries from the operator without exec-ing into the pod. db-snap provides near-zero-RPO backup using engine-native mechanisms when available. No other operations have emerged across the postgresql and docker_mailserver deployment cycles.",
},
{
claim = "Object Storage integration lives in db-backup and db-snap, not in a separate operation",
detail = "Adding a separate 'archive' operation would require sequencing: backup → archive → verify. This three-step sequence is exactly what db-backup params.dest is for: if BACKUP_DEST is set to an S3 URI (s3://bucket/prefix), the backup method uploads directly. The method retains local copy for BACKUP_KEEP_LOCAL hours before deletion. This single-operation model means `just pg-backup` and `just pg-backup dest=s3://mybucket/pg` are the same code path with different params, avoiding a separate archive stage and its op governance overhead.",
},
{
claim = "Naming convention db-{verb} avoids collision with existing component op names",
detail = "Existing component ops (install, update, delete, restart, backup, restore) are generic and dispatched by the component CLI. The db-* prefix is reserved for database-semantic operations that require SQL engine awareness. This avoids ambiguity: 'backup' as a component op is 'snapshot the entire component state', while 'db-backup' is 'dump database contents'. Both can coexist in manifest_plan.ncl without naming conflict because they are distinct section keys.",
},
{
claim = "params.src and params.dest are the standard interface for artifact location, not env vars",
detail = "ManifestEntry.params is a `{ _ | String }` record — arbitrary string key-value pairs passed to _method_* implementations. Using params.src (restore source) and params.dest (backup destination) is self-documenting in manifest_plan.ncl and in the justfile recipe: `just pg-backup dest=s3://bucket/pg`. Environment variables (BACKUP_SRC, BACKUP_DEST) are the fallback when params is absent — the method checks params first, env second. This two-tier resolution allows interactive override without modifying manifest_plan.ncl.",
},
],
consequences = {
positive = [
"Cross-engine tooling: a `justfiles/db.just` with generic recipes works for postgresql, mysql, surrealdb without modification",
"Object Storage backup path is a convention (s3://bucket/prefix), not per-engine config — backup tooling is uniform",
"db-state provides a standard operational query without exec into pod — consistent with no-SSH-for-observability principle",
"db-init idempotency means reprovisioning a database component doesn't require manual schema recreation",
"Seven operations cover backup, restore, observability, and ad-hoc queries — no further operations expected for standard OLTP databases",
],
negative = [
"db-snap is engine-specific: pg_basebackup for PostgreSQL, file-level copy for SurrealDB, xtrabackup for MySQL — method implementations are not portable across engines",
"params.dest S3 URI handling requires credentials (S3 access key, secret) in the component SOPS file — operators must add S3 credentials alongside DB credentials before using db-backup with object storage",
"db-state output format is unstructured text per engine — there is no typed structured output contract, which limits automated parsing",
],
},
alternatives_considered = [
{
option = "Add database operations to the component manifest_plan operations field as boolean flags",
why_rejected = "The operations record (`operations.backup = true`) already controls whether the component supports an op. Adding db-specific booleans (operations.db_backup, operations.db_restore) would double the operations field without adding new information — the presence of a db-backup section in manifest_plan is the declaration. The operations field is for CLI feature gating, not for naming.",
},
{
option = "Implement a separate 'db-operator' component that manages databases across engines",
why_rejected = "A cross-engine db-operator requires a running sidecar or separate deployment with access to all database pods. This adds infrastructure complexity and a failure mode (operator pod down → no backup). The lib.sh-in-bundle pattern keeps operations self-contained: the run-db-backup.sh script carries everything it needs, runs on the control plane node, and requires only kubectl + the database client binary. No additional components.",
},
{
option = "Use Velero for backup instead of engine-native methods",
why_rejected = "Velero provides consistent volume snapshots (application-consistent requires hooks) and is CSI-level, not database-level. It cannot produce a pg_dump or mysqldump — only a filesystem snapshot. For PostgreSQL, a consistent SQL dump is more portable and restorable than a volume snapshot across different PostgreSQL versions. Velero is complementary (infrastructure-level DR), not a replacement for db-backup.",
},
],
constraints = [
{
id = "db-backup-method-required-if-operations-backup",
claim = "Any database component with operations.backup = true in its Nickel config must implement _method_db-backup in its lib.sh",
scope = "provisioning/extensions/components/*/cluster/*-lib.sh",
severity = 'Hard,
check = {
tag = 'NuCmd,
cmd = "grep -l 'backup.*=.*true' provisioning/extensions/components/*/nickel/defaults.ncl | each { |f| let comp = ($f | path dirname | path dirname | path basename); let lib = $'provisioning/extensions/components/($comp)/cluster/($comp)-lib.sh'; if ($lib | path exists) and (not (open $lib | str contains '_method_db-backup')) { print $'($comp): missing _method_db-backup' } } | str join ''",
expect_exit = 0,
},
rationale = "The component CLI dispatches 'backup' to the bundle builder which extracts the manifest_plan.backup section. If the plan has a db-backup step but lib.sh does not implement _method_db-backup, the run script fails mid-execution on the remote node. The preflight method coverage check catches this — the constraint here documents the naming convention.",
},
{
id = "db-backup-artifact-naming",
claim = "db-backup method implementations must produce artifacts named {component}-{timestamp}.dump.gz or {component}-{timestamp}.tar.gz",
scope = "provisioning/extensions/components/*/cluster/*-lib.sh",
severity = 'Soft,
check = {
tag = 'Grep,
pattern = "dump\\.gz\\|tar\\.gz",
paths = ["provisioning/extensions/components/"],
must_be_empty = false,
},
rationale = "A consistent artifact naming scheme allows automated retention policies and object storage lifecycle rules to match on prefix. Without it, each engine invents its own format (pg-backup-20260424.sql, dump_2026-04-24.tar.bz2) and rotation scripts must be per-engine. The soft severity reflects that existing backup implementations predate this ADR.",
},
],
ontology_check = {
decision_string = "db-* operation abstraction: seven standard manifest_plan action names (db-init, db-backup, db-restore, db-dump, db-state, db-query, db-snap) as cross-engine database lifecycle contract + params.src/params.dest for artifact location + Object Storage integration via BACKUP_DEST s3:// URI",
invariants_at_risk = ["config-driven-always"],
verdict = 'Safe,
},
related_adrs = ["adr-033-cluster-component-extension-pattern", "adr-035-storage-config-schema"],
}