provisioning/adrs/adr-019-ai-rag-integration.ncl

let d = import "adr-defaults.ncl" in

d.make_adr {
  id     = "adr-019",
  title  = "Schema-Aware AI and RAG — Nickel Contracts Constrain AI Config Generation",
  status = 'Accepted,
  date   = "2026-01-08",

  context = "Infrastructure configuration generation via LLM is unreliable without grounding: generic AI produces plausible but structurally invalid configs (wrong field names, invalid enum values, incompatible option combinations). Two risks: (1) hallucination — AI generates configs that fail schema validation; (2) security — AI agents with unrestricted access to secrets and deployment operations create unaudited paths. The platform has Nickel schemas for all configuration surfaces and Cedar for authorization — both can be used to constrain AI behavior.",

  decision = "AI config generation is constrained by Nickel schemas at generation time and by Cedar policies at authorization time. The ai-service is the HTTP entry point for all AI operations. RAG indexes Nickel schemas, documentation, and past deployments as retrieval context — AI generates WITH schema context, making hallucination structurally harder. Cedar policy forbids ai-service from accessing any secret and requires `context.human_approved == true` before any deployment operation. The mcp-server exposes tool calling (nickel_validate, schema_query, best_practices) to LLM agents.",

  rationale = [
    {
      claim  = "Schema-constrained generation eliminates invalid config hallucination",
      detail = "Generic LLMs generate `engine = 'postgresql'` when the contract says `engine | [| 'postgres, 'mysql |]`. Providing the schema as RAG context gives the model the exact valid values. Post-generation nickel export validates the output against the same contract.",
    },
    {
      claim  = "Cedar is the enforcement layer — not prompt engineering",
      detail = "Prompting AI to 'not access secrets' is not a security boundary. Cedar policy `forbid(principal == Service::\"ai-service\", action == Action::\"read\", resource in Secret::\"*\")` is enforced at the platform layer regardless of what the LLM requests.",
    },
    {
      claim  = "RAG over project artifacts is more accurate than generic LLM for project-specific configs",
      detail = "Indexing `schemas/`, `docs/`, and past successful deployments means AI answers are grounded in actual project patterns — not generic infrastructure knowledge that may conflict with project constraints.",
    },
  ],

  consequences = {
    positive = [
      "AI cannot generate configs that fail Nickel schema validation — structural correctness enforced",
      "Cedar prevents AI from accessing secrets or deploying without human approval",
      "RAG over project artifacts reduces hallucination on project-specific options",
      "MCP tool calling (nickel_validate, schema_query) enables LLM agents to self-correct",
    ],
    negative = [
      "RAG index must be kept current as schemas and docs evolve — stale index degrades answer quality",
      "ai-service adds a service dependency for all AI-assisted operations",
      "Cost tracking required: rate limiting at 60 req/min, 1M tokens/day, $100/day",
    ],
  },

  alternatives_considered = [
    {
      option       = "Generic LLM without schema grounding (GitHub Copilot style)",
      why_rejected = "Generates syntactically valid but semantically wrong configs — wrong enum values, missing required fields, invalid option combinations. Schema validation must happen after generation and frequently fails.",
    },
    {
      option       = "Fine-tuned model on project schemas",
      why_rejected = "Fine-tuning is expensive, requires retraining on every schema change, and does not generalize across projects. RAG is dynamic and always reflects the current schema state.",
    },
  ],

  constraints = [
    {
      id        = "ai-cannot-access-secrets",
      claim     = "ai-service must have a Cedar policy explicitly forbidding access to any Secret resource",
      scope     = "platform/crates/control-center/src/policies/",
      severity  = 'Hard,
      check     = { tag = 'Grep, pattern = "ai-service.*Secret|Secret.*ai-service", paths = ["platform/"], must_be_empty = false },
      rationale = "AI agents with secret access create unaudited credential exposure. The constraint must be at the authorization layer, not in the LLM prompt.",
    },
    {
      id        = "ai-deployment-requires-human-approval",
      claim     = "Any deployment action triggered by ai-service must have context.human_approved == true in the Cedar evaluation context",
      scope     = "platform/crates/orchestrator/src/",
      severity  = 'Hard,
      check     = { tag = 'Grep, pattern = "human_approved", paths = ["platform/"], must_be_empty = false },
      rationale = "Autonomous deployment without human review is an unacceptable risk for production infrastructure. The approval gate is enforced by Cedar, not by AI self-restraint.",
    },
    {
      id        = "ai-generation-validates-against-schema",
      claim     = "All AI-generated Nickel configs must be validated via nickel export before being presented to the user or submitted to the orchestrator",
      scope     = "platform/crates/ai-service/src/",
      severity  = 'Hard,
      check     = { tag = 'Grep, pattern = "nickel.*export|nickel_validate", paths = ["platform/crates/ai-service/"], must_be_empty = false },
      rationale = "Post-generation validation closes the loop — if the LLM generates an invalid config despite schema grounding, the user sees a validation error, not a deployment failure.",
    },
  ],

  related_adrs = ["adr-014-solid-enforcement", "adr-017-typedialog-web-ui", "adr-018-secretumvault-integration"],

  ontology_check = {
    decision_string    = "AI config generation is constrained by Nickel schemas (RAG grounding) and Cedar policies (secret isolation, human approval gate)",
    invariants_at_risk = ["solid-boundaries", "type-safety-nickel"],
    verdict            = 'Safe,
  },
}
docs: update README and CHANGELOG for nickel branch (2026-05-12) 2026-05-12 02:23:01 +01:00			`let d = import "adr-defaults.ncl" in`

			`d.make_adr {`
			`id = "adr-019",`
			`title = "Schema-Aware AI and RAG — Nickel Contracts Constrain AI Config Generation",`
			`status = 'Accepted,`
			`date = "2026-01-08",`

			context = "Infrastructure configuration generation via LLM is unreliable without grounding: generic AI produces plausible but structurally invalid configs (wrong field names, invalid enum values, incompatible option combinations). Two risks: (1) hallucination — AI generates configs that fail schema validation; (2) security — AI agents with unrestricted access to secrets and deployment operations create unaudited paths. The platform has Nickel schemas for all configuration surfaces and Cedar for authorization — both can be used to constrain AI behavior.",

			decision = "AI config generation is constrained by Nickel schemas at generation time and by Cedar policies at authorization time. The ai-service is the HTTP entry point for all AI operations. RAG indexes Nickel schemas, documentation, and past deployments as retrieval context — AI generates WITH schema context, making hallucination structurally harder. Cedar policy forbids ai-service from accessing any secret and requires `context.human_approved == true` before any deployment operation. The mcp-server exposes tool calling (nickel_validate, schema_query, best_practices) to LLM agents.",

			`rationale = [`
			`{`
			`claim = "Schema-constrained generation eliminates invalid config hallucination",`
			detail = "Generic LLMs generate `engine = 'postgresql'` when the contract says `engine \| [\| 'postgres, 'mysql \|]`. Providing the schema as RAG context gives the model the exact valid values. Post-generation nickel export validates the output against the same contract.",
			`},`
			`{`
			`claim = "Cedar is the enforcement layer — not prompt engineering",`
			detail = "Prompting AI to 'not access secrets' is not a security boundary. Cedar policy `forbid(principal == Service::\"ai-service\", action == Action::\"read\", resource in Secret::\"*\")` is enforced at the platform layer regardless of what the LLM requests.",
			`},`
			`{`
			`claim = "RAG over project artifacts is more accurate than generic LLM for project-specific configs",`
			detail = "Indexing `schemas/`, `docs/`, and past successful deployments means AI answers are grounded in actual project patterns — not generic infrastructure knowledge that may conflict with project constraints.",
			`},`
			`],`

			`consequences = {`
			`positive = [`
			`"AI cannot generate configs that fail Nickel schema validation — structural correctness enforced",`
			`"Cedar prevents AI from accessing secrets or deploying without human approval",`
			`"RAG over project artifacts reduces hallucination on project-specific options",`
			`"MCP tool calling (nickel_validate, schema_query) enables LLM agents to self-correct",`
			`],`
			`negative = [`
			`"RAG index must be kept current as schemas and docs evolve — stale index degrades answer quality",`
			`"ai-service adds a service dependency for all AI-assisted operations",`
			`"Cost tracking required: rate limiting at 60 req/min, 1M tokens/day, $100/day",`
			`],`
			`},`

			`alternatives_considered = [`
			`{`
			`option = "Generic LLM without schema grounding (GitHub Copilot style)",`
			`why_rejected = "Generates syntactically valid but semantically wrong configs — wrong enum values, missing required fields, invalid option combinations. Schema validation must happen after generation and frequently fails.",`
			`},`
			`{`
			`option = "Fine-tuned model on project schemas",`
			`why_rejected = "Fine-tuning is expensive, requires retraining on every schema change, and does not generalize across projects. RAG is dynamic and always reflects the current schema state.",`
			`},`
			`],`

			`constraints = [`
			`{`
			`id = "ai-cannot-access-secrets",`
			`claim = "ai-service must have a Cedar policy explicitly forbidding access to any Secret resource",`
			`scope = "platform/crates/control-center/src/policies/",`
			`severity = 'Hard,`
			`check = { tag = 'Grep, pattern = "ai-service.Secret\|Secret.ai-service", paths = ["platform/"], must_be_empty = false },`
			`rationale = "AI agents with secret access create unaudited credential exposure. The constraint must be at the authorization layer, not in the LLM prompt.",`
			`},`
			`{`
			`id = "ai-deployment-requires-human-approval",`
			`claim = "Any deployment action triggered by ai-service must have context.human_approved == true in the Cedar evaluation context",`
			`scope = "platform/crates/orchestrator/src/",`
			`severity = 'Hard,`
			`check = { tag = 'Grep, pattern = "human_approved", paths = ["platform/"], must_be_empty = false },`
			`rationale = "Autonomous deployment without human review is an unacceptable risk for production infrastructure. The approval gate is enforced by Cedar, not by AI self-restraint.",`
			`},`
			`{`
			`id = "ai-generation-validates-against-schema",`
			`claim = "All AI-generated Nickel configs must be validated via nickel export before being presented to the user or submitted to the orchestrator",`
			`scope = "platform/crates/ai-service/src/",`
			`severity = 'Hard,`
			`check = { tag = 'Grep, pattern = "nickel.*export\|nickel_validate", paths = ["platform/crates/ai-service/"], must_be_empty = false },`
			`rationale = "Post-generation validation closes the loop — if the LLM generates an invalid config despite schema grounding, the user sees a validation error, not a deployment failure.",`
			`},`
			`],`

			`related_adrs = ["adr-014-solid-enforcement", "adr-017-typedialog-web-ui", "adr-018-secretumvault-integration"],`

			`ontology_check = {`
			`decision_string = "AI config generation is constrained by Nickel schemas (RAG grounding) and Cedar policies (secret isolation, human approval gate)",`
			`invariants_at_risk = ["solid-boundaries", "type-safety-nickel"],`
			`verdict = 'Safe,`
			`},`
			`}`