let d = import "defaults.ncl" in d.make_adr { id = "adr-017", title = "Registry Credential Vault Model: src-vault, Multi-Recipient sops, and Actor-Scoped Access", status = 'Accepted, date = "2026-05-01", context = "The provisioning registry (zot OCI) became the primary coordination hub for domain and mode artifacts (ADR-016 consequences, migration 0015). This introduced registry credentials as a first-class concern: every project that pushes or pulls artifacts needs credentials, and those credentials must be distributed across actors (developers, CI pipelines, the ontoref daemon, AI agents, ops tooling) with different access levels. The naive model — environment variables or ambient ~/.docker/config.json — has three compounding failure modes: (1) credentials are ambient and unscoped, so an operation against project A can use credentials that belong to project B; (2) there is no distribution mechanism — adding a new team member or CI key requires manual redistribution of a shared secret; (3) there is no audit trail for credential access or changes. A supply-chain attack via misconfigured registry namespace endpoints is the concrete risk: if the daemon proxied registry calls, any actor with MCP access could redirect them to an arbitrary endpoint using ambient credentials.", decision = "Registry credentials are managed through a per-project src-vault stored as an OCI artifact in the same ZOT registry it protects. The src-vault uses sops with age multi-recipient encryption: each actor role has its own age keypair, and credential files are encrypted for the exact set of recipients that need access. The vault backend (restic or kopia) handles versioning and local copies; the ZOT registry handles distribution. Each project has a local access.sops.yaml in ~/.config/ontoref/vaults// containing three fields: zot_username, zot_password, and vault_key. All three are encrypted by the actor's master age private key (.kage), which lives at an external path the actor controls (hardware key, encrypted disk, or declared path in config.ncl) — never inside the vault directory. At operation time, sops decrypts access.sops.yaml in memory: zot_username and zot_password are used to pull the src-vault OCI artifact via a DOCKER_CONFIG tmpdir that is deleted immediately after; vault_key is passed as RESTIC_PASSWORD or KOPIA_PASSWORD env var for the duration of the vault operation and never written to disk. No plaintext credential of any kind persists beyond the operation scope. Credential resolution runs exclusively in the ontoref CLI — the daemon is structurally excluded. All oras invocations use an isolated DOCKER_CONFIG tmpdir; no ambient ~/.docker/config.json is consulted. Access logs are appended to a jsonl file stored as a layer in the same src-vault OCI artifact and mirrored locally in ~/.config/ontoref/vaults//logs/access.jsonl.", rationale = [ { claim = "src-vault in ZOT closes the distribution gap structurally", detail = "Storing the vault as an OCI artifact in the same registry that protects means distribution is the same operation as registry access. Any actor with registry RO credentials can pull the vault; only admin can push. No separate key distribution channel is needed. When a developer joins, their public key is added to src-vault and they can immediately pull their copy — no coordinator required.", }, { claim = "Multi-recipient sops eliminates shared secrets", detail = "Each actor has its own age keypair. The sops DEK is encrypted separately for each recipient public key. Removing a recipient requires only updating .sops.yaml and running sops updatekeys — no key redistribution. The private key of the removed actor becomes useless for future vault operations the moment updatekeys completes. This is the only model that supports revocation without rotation of all other credentials.", }, { claim = "Daemon structural exclusion is not a policy, it is an architectural property", detail = "The daemon process has no age private key and no access to project .kage files. It cannot decrypt sops files even if it reads them. The daemon surfaces only the declarative topology from manifest.ncl (registry_provides, credential_sops paths as strings). Credential resolution happens in the CLI process of the authenticated user. This cannot be bypassed without modifying the daemon to acquire keys — which would be an explicit, reviewable change, not a configuration drift.", }, { claim = "Lock state in ZOT with admin-only write ACL makes the lock structural", detail = "The src-vault namespace has write ACL restricted to admin credentials in ZOT config. The lock artifact (src-vault/:lock) can only be pushed or deleted with admin credentials. Other roles can read the lock state but cannot modify it. This converts a cooperative lock into a structurally enforced one — bypassing the lock requires admin credentials, which is an auditable action, not an accident.", }, { claim = "Audit log co-located with vault enables tamper-evident access history", detail = "The audit.jsonl layer is part of the same OCI artifact as the vault content. Every push of src-vault::latest includes the updated log. Since only admin can push, and the log is append-only within a vault session, the log cannot be modified without admin credentials and a vault open/close cycle — both of which are themselves logged. This does not prevent a malicious admin from editing the log, but it makes any edit detectable via the OCI manifest digest history.", }, { claim = "Role scope enforcement at two independent layers", detail = "Scope is enforced first by the CLI (pre-check against scopes/.ncl before calling oras) and second by the ZOT ACL (the token itself carries the permissions issued by the registry). These layers are independent: a misconfigured scope file does not grant registry permissions, and a misconfigured registry ACL does not bypass the CLI pre-check. The two layers must be kept in sync via secrets-audit in CI.", }, { claim = "credential_env is excluded by design, not by convention", detail = "Environment variables are visible in ps aux, inherited by all child processes, logged by CI systems unless explicitly masked, and cannot be scoped to a specific operation. Excluding credential_env from the schema means it is impossible to accidentally use it — there is no field to populate. The only credential reference fields are credential_sops (current) and credential_oidc (declared for future OIDC workload identity support).", }, { claim = "vault_key in sops ensures the restic/kopia key is never plaintext at rest", detail = "The restic/kopia encryption key (vault_key) lives inside access.sops.yaml, encrypted by the actor's age master key. It is decrypted into RESTIC_PASSWORD or KOPIA_PASSWORD env var only for the duration of the vault operation — never written to a key file on disk. This means the only plaintext secrets on disk are the actor's .kage (which the actor is responsible for protecting) and the sops-encrypted access.sops.yaml (unreadable without the .kage). The pattern is: one master key protects everything else; everything else is encrypted at rest.", }, { claim = "restic and kopia are equivalent vault backends behind a thin abstraction", detail = "Both restic and kopia provide encrypted, versioned snapshots with content-addressed storage. The choice between them depends on operational preference (kopia has a richer UI and faster incremental backups; restic has broader ecosystem support). The vault-backend.nu abstraction wraps init, backup, restore, and snapshot-list — switching backends requires changing vault_backend.tool in config.ncl and re-initializing the local repo. The OCI artifact in ZOT is backend-agnostic.", }, ], consequences = { positive = [ "Credential distribution is the same operation as registry access — no separate channel", "Revoking a developer's access requires one command (secrets-remove-key) with no coordination", "The daemon cannot be used as a credential amplifier regardless of MCP actor permissions", "Every vault open, edit, and close is logged in the artifact that contains the credentials", "Adding a new role (e.g. backup-agent) requires only a new keypair and a .sops.yaml update", "Local copy in ~/.config/ontoref/vaults// provides offline fallback for admin", "Bootstrap creates the first consistent vault state before any live registry interaction", "impact analysis on secrets-close identifies services affected by credential changes before confirming", ], negative = [ "Bootstrap requires admin to collect all public keys before first vault push — cannot be fully automated", "sops updatekeys must run after every recipient change — forgetting it leaves old recipients in files", "Zot ACL and scope NCL files must be kept in sync manually — drift produces security theater", "Loss of .kage means access.sops.yaml cannot be decrypted — vault_key and ZOT credentials are unrecoverable without the master key; admin must keep the .kage backed up independently", "ore secrets open/close adds a ritual to any secrets change session — acceptable overhead, but real", "Direct sops invocations bypass the lock and audit log — CI audit detects but does not prevent", ], }, alternatives_considered = [ { option = "Environment variables (REGISTRY_TOKEN, DOCKER_CONFIG) per CI job", why_rejected = "Visible in process list, inherited by subprocesses, logged by CI systems, cannot be scoped to a specific registry endpoint. No revocation without rotating all consumers. The primary attack surface for supply-chain credential leakage.", }, { option = "Shared age keyring — one private key distributed to all developers", why_rejected = "Revocation requires rotating the shared key and redistributing to all remaining members — coordination cost scales with team size. No per-actor audit trail. A leaked key compromises all actors simultaneously.", }, { option = "HashiCorp Vault or similar external secrets manager", why_rejected = "Introduces a network dependency for every credential resolution. Requires operating a separate service with its own HA, backup, and auth model. The OCI registry is already the coordination hub — using it as the vault distribution backend reuses existing infrastructure and auth.", }, { option = "Daemon resolves credentials on behalf of CLI actors", why_rejected = "The daemon is a long-lived process accessible to multiple actors (developer, agent, CI via MCP). Giving it credential resolution capability means any actor with daemon access can trigger registry operations using credentials they do not personally hold. The structural exclusion of the daemon from credential resolution is a load-bearing architectural property.", }, { option = "Single credential file per registry (no RO/RW split)", why_rejected = "A single credential with RW access distributed to read-only actors (cdci, ontoref, agent) violates least-privilege. A compromised CI pipeline with RW credentials can push malicious artifacts. The RO/RW split means a compromised read-only credential cannot alter the artifact namespace.", }, ], constraints = [ { id = "vault-access-credentials-independent", claim = "The credentials required to pull the src-vault OCI artifact from ZOT must be stored outside the vault itself — each src-vault has its own access credentials, held in the actor's local .kage, not inside any vault it protects", scope = "all projects with src-vault in ZOT", severity = 'Hard, check = { tag = 'NuCmd, cmd = "ore secrets audit --check bootstrap-credentials", }, rationale = "A vault whose access credential is inside itself cannot be opened. The bootstrap credential (registry RO for the src-vault namespace) must preexist in the actor's local .kage. Two projects on the same ZOT instance have independent src-vault namespaces with independent access credentials — access to one does not imply access to the other.", }, { id = "no-credential-env", claim = "RegistryEntry must not use credential_env — only credential_sops or credential_oidc are valid credential reference fields", scope = "all manifest.ncl files declaring registry_provides", severity = 'Hard, check = { tag = 'Grep, pattern = "credential_env", paths = [".ontology/manifest.ncl"], must_be_empty = true, }, rationale = "credential_env is excluded from the schema. Its presence indicates a manual edit bypassing the contract.", }, { id = "multi-recipient-mandatory", claim = "Every *.sops.yaml credential file must include at minimum the admin and the bound_actor recipients for its access class", scope = "all projects with registry_provides.registries[].credential_sops declared", severity = 'Hard, check = { tag = 'NuCmd, cmd = "ore secrets audit --check recipients", }, rationale = "A credential file encrypted for a single recipient is indistinguishable from a shared secret. Multi-recipient is the mechanism that enables revocation without rotation.", }, { id = "uses-registry-declared", claim = "Any domain or mode that pushes or pulls from a registry must declare uses_registry referencing the RegistryEntry id in manifest.ncl", scope = "domain and mode NCL declarations", severity = 'Hard, check = { tag = 'NuCmd, cmd = "ore secrets audit --check registry-deps", }, rationale = "Without the explicit dependency declaration, secrets-close cannot compute the impact of credential changes. Undeclared dependencies produce silent breakage after a credential rotation.", }, { id = "ore-secrets-exclusive-wrapper", claim = "All sops operations on registry credential files must go through ore secrets — direct sops invocations bypass lock state and audit log", scope = "CI pipeline and developer workflow", severity = 'Soft, check = { tag = 'NuCmd, cmd = "ore secrets audit --check lock-compliance", }, rationale = "ore secrets enforces lock state verification before any sops edit. Direct sops invocations are detectable in CI audit but not preventable at the filesystem level. The constraint is soft because enforcement is post-hoc.", }, { id = "docker-config-isolation", claim = "Every oras invocation must use DOCKER_CONFIG pointing to a tmpdir containing only the credential for the target registry endpoint — no ambient ~/.docker/config.json", scope = "nulib/cli/integration.nu and any code calling oras", severity = 'Hard, check = { tag = 'Grep, pattern = "DOCKER_CONFIG", paths = ["provisioning/core/nulib/"], must_be_empty = false, }, rationale = "Ambient credentials allow operations against project A to use credentials that belong to project B if both resolve to the same registry hostname. Isolation is enforced by constructing the config at call time and deleting it immediately after.", }, { id = "vault-key-never-plaintext", claim = "vault_key must never be written to disk in plaintext — it is decrypted from access.sops.yaml into RESTIC_PASSWORD or KOPIA_PASSWORD for the duration of the operation only", scope = "nulib/platform/vault-backend.nu and all ore secrets callers", severity = 'Hard, check = { tag = 'Grep, pattern = "vault_key.*save\\|vault_key.*write\\|restic.*--key-file", paths = ["provisioning/core/nulib/"], must_be_empty = true, }, rationale = "Writing vault_key to a file defeats the purpose of encrypting it in sops. The env var approach (RESTIC_PASSWORD, KOPIA_PASSWORD) keeps the key in process memory only. The .kage master key is the single plaintext secret the actor manages — everything derived from it must be ephemeral.", }, { id = "vault-backend-abstracted", claim = "All vault snapshot operations must use vault-backend.nu — direct restic or kopia invocations are not permitted in recipes or modules", scope = "justfiles/secrets.just and nulib/platform/vault-backend.nu callers", severity = 'Soft, check = { tag = 'Grep, pattern = "^\\s*restic\\|^\\s*kopia", paths = ["justfiles/", "provisioning/core/nulib/"], must_be_empty = true, }, rationale = "Switching between restic and kopia must require only changing vault_backend.tool in config — not hunting direct invocations across recipes and modules.", }, { id = "src-vault-cosign-signed", claim = "Every push of src-vault/:latest to ZOT must produce a cosign signature; every pull must verify the signature before the artifact is trusted", scope = "justfiles/secrets.just secrets-push and secrets-sync recipes", severity = 'Hard, check = { tag = 'NuCmd, cmd = "ore secrets audit --check cosign-signature", }, rationale = "The src-vault artifact contains access.sops.yaml with encrypted registry credentials. An unsigned artifact can be substituted without detection — a malicious actor with write access to the registry could replace it with a version encrypted for attacker-controlled recipients. COSIGN provides tamper evidence independent of ZOT ACLs: even if ACLs are misconfigured, the signature check prevents a substituted vault from being consumed. cosign is now a Hard prerequisite for ontoref (declared in requirements) for this reason.", }, ], related_adrs = [ "adr-005-unified-auth-session-model", "adr-012-domain-extension-system", "adr-016-component-lift-out-pattern", ], ontology_check = { decision_string = "registry credentials are managed via per-project sops multi-recipient vaults stored as OCI artifacts in ZOT; the daemon is structurally excluded from credential resolution; actor-role binding is declared in project.ncl; vault backend is restic or kopia behind a thin abstraction; access logs are co-located with the vault artifact", invariants_at_risk = ["protocol-not-runtime", "no-enforcement"], verdict = 'Safe, }, }