Vapora/adrs/adr-006-rig-framework.ncl

79 lines
4.8 KiB
Text
Raw Normal View History

let d = import "adr-defaults.ncl" in
d.make_adr {
id = "adr-006",
title = "rig-core as LLM Agent Framework with LLMClient Abstraction Boundary",
status = 'Accepted,
date = "2024-11-01",
context = "Vapora needs to call Claude, OpenAI, Gemini, and Ollama with tool calling and streaming support from Rust. As of 2026-03-27, rig-core 0.30 is in use (the markdown ADR references 0.15, which is stale). The critical architectural decision is not just which crate is used, but that the LLMClient trait in vapora-llm-router is the sole abstraction boundary — no crate calls provider APIs directly.",
decision = "rig-core is the LLM agent framework for tool calling, streaming, and provider adapters. The LLMClient trait in vapora-llm-router is the only permitted interface through which any vapora crate invokes LLM providers. No crate may call Anthropic, OpenAI, Gemini, or Ollama APIs directly — all calls go through LLMClient implementations backed by rig-core provider adapters.",
rationale = [
{
claim = "LLMClient trait decouples callers from rig-core's API surface",
detail = "rig-core 0.15 to 0.30 introduced breaking API changes. Because all callers depend on LLMClient (not rig-core directly), the upgrade required changes only in vapora-llm-router's provider adapters, not in the 5 crates that invoke LLM providers.",
},
{
claim = "rig-core provides Rust-native tool calling without a Python bridge",
detail = "LangChain Python bridge requires IPC, a Python runtime in the container, and serialization overhead for every tool call. rig-core compiles tool schemas to JSON at build time via Rust proc macros — zero runtime overhead.",
},
{
claim = "Cost tracking is only possible when all calls funnel through a single interface",
detail = "BudgetEnforcer and CostTracker in vapora-llm-router can only count every token if every LLM call passes through LLMClient. Direct provider calls bypass cost tracking and invalidate budget enforcement.",
},
],
consequences = {
positive = [
"Provider substitution (swap Claude for Gemini for a role) is a config change in llm-router.toml, not a code change",
"BudgetEnforcer sees every LLM call — no calls escape cost tracking",
"Streaming and tool calling are abstracted — callers are provider-agnostic",
"rig-core version upgrades affect only vapora-llm-router provider adapters",
],
negative = [
"rig-core is a relatively young crate — breaking API changes between minor versions have occurred",
"The LLMClient abstraction hides provider-specific capabilities (e.g. Claude extended thinking, OpenAI structured outputs) that can only be accessed by downgrading to rig-core directly",
],
},
alternatives_considered = [
{
option = "LangChain via Python bridge",
why_rejected = "Python runtime in container, IPC overhead, serialization for every call. Eliminates the full-stack Rust advantage and introduces a Python/Rust type boundary.",
},
{
option = "Direct provider SDKs (async-anthropic, openai-rust, etc.)",
why_rejected = "Each provider SDK has a different async interface, error type, and streaming API. Implementing cost tracking, fallback chains, and budget enforcement across N different APIs multiplies maintenance burden by N.",
},
],
constraints = [
{
id = "no-direct-provider-calls",
claim = "No crate outside vapora-llm-router may import anthropic, openai, or gemini client crates directly",
scope = "vapora (all crates except vapora-llm-router)",
severity = 'Hard,
check = { tag = 'Cargo, crate = "vapora-backend", forbidden_deps = ["async-anthropic", "openai", "google-generativeai"] },
rationale = "Direct provider calls bypass BudgetEnforcer and CostTracker, making cost enforcement impossible. They also bypass fallback chains, causing provider failures to surface as hard errors instead of automatic fallback.",
},
{
id = "llm-client-trait-boundary",
claim = "All LLM invocations in vapora-agents must go through the LLMClient trait",
scope = "vapora-agents (all executor and coordinator code)",
severity = 'Hard,
check = { tag = 'Grep, pattern = "LLMClient", paths = ["crates/vapora-agents/src/"], must_be_empty = false },
rationale = "The trait is the enforcement point for routing rules, budget limits, and provider abstraction. Code that bypasses it defeats all three.",
},
],
related_adrs = ["adr-002", "adr-009"],
ontology_check = {
decision_string = "rig-core 0.30 for LLM agent framework; LLMClient trait is the sole abstraction boundary; no direct provider API calls outside vapora-llm-router",
invariants_at_risk = ["provider-abstraction", "cost-aware-routing"],
verdict = 'Safe,
},
}