Vapora/adrs/adr-009-learning-profiles.ncl

let d = import "adr-defaults.ncl" in

d.make_adr {
  id     = "adr-009",
  title  = "Per-Task-Type Learning Profiles with Recency Bias for Agent Selection",
  status = 'Accepted,
  date   = "2024-11-01",

  context = "The swarm coordinator assigns tasks to agents. Naive round-robin wastes budget on agents that have poor track records for a specific task type. The agent selection scoring formula must balance current load (avoid overloading), expertise (prefer agents with high success rates for this task type), and confidence (discount scores based on small sample sizes). The recency bias addresses the reality that agent performance changes: a model update or config change can quickly improve or degrade an agent's capability on a specific task type.",

  decision = "Each agent maintains a per-task-type LearningProfile in vapora-agents/src/learning_profile.rs. The swarm scoring formula is: `score = 0.3*load_factor + 0.5*expertise_score + 0.2*confidence_weight`. The last 7 days of execution history are weighted 3x relative to older executions. Confidence weighting is applied when an agent has fewer than 20 executions for a task type: `confidence_weight = min(executions / 20, 1.0)`. Profiles are stored in SurrealDB and survive agent restarts.",

  rationale = [
    {
      claim  = "Recency bias reflects that agent performance changes over time",
      detail = "An all-time average treats a task succeeded 6 months ago equally to one succeeded yesterday. If an agent's model was updated or its config was tuned last week, the all-time average undersells current capability. A 7-day window with 3x weighting surfaces recent performance changes within days.",
    },
    {
      claim  = "Confidence weighting prevents the cold-start exploitation problem",
      detail = "A new agent with 2 successful executions would score 100% expertise without confidence weighting, outranking a veteran with 200 executions and 90% success rate. The min(n/20, 1.0) factor ensures new agents are not over-promoted until their sample size is statistically meaningful.",
    },
    {
      claim  = "The 0.3/0.5/0.2 weight distribution prioritizes expertise over load",
      detail = "An agent at 80% load but with 95% expertise beats an idle agent with 50% expertise. This reflects the reality that getting the task done correctly is more valuable than perfect load distribution — especially for expensive LLM tasks where failure costs money.",
    },
  ],

  consequences = {
    positive = [
      "Agent selection improves automatically over the first few weeks as profiles accumulate data",
      "Agents that consistently fail specific task types are automatically deprioritized without manual configuration",
      "The scoring formula is explicit and observable — selection decisions can be explained from profile data",
    ],
    negative = [
      "Cold-start period: new agents are ranked conservatively until 20 executions per task type accumulate",
      "7-day recency window means a one-week outage resets expertise scores for that period",
      "Profile data accumulation requires SurrealDB persistence — in-memory-only deployments lose learning across restarts",
    ],
  },

  alternatives_considered = [
    {
      option       = "All-time average success rate",
      why_rejected = "Does not adapt to recent performance changes. An agent that improved last week still carries the drag of its earlier poor performance for months.",
    },
    {
      option       = "Last-N sliding window",
      why_rejected = "Artificial cutoff — performance from execution N+1 is completely ignored. The exponential recency bias is a smoother approximation of 'recent performance matters more'.",
    },
  ],

  constraints = [
    {
      id        = "scoring-formula-in-swarm",
      claim     = "The agent selection scoring formula (0.3*load + 0.5*expertise + 0.2*confidence) must be implemented in vapora-swarm — not duplicated across multiple crates",
      scope     = "vapora-swarm/src/coordinator.rs",
      severity  = 'Hard,
      check     = { tag = 'Grep, pattern = "0\\.3|0\\.5|expertise", paths = ["crates/vapora-swarm/src/"], must_be_empty = false },
      rationale = "If the scoring formula is duplicated, the two copies will diverge. Selection decisions will be inconsistent depending on which codepath selected the agent.",
    },
    {
      id        = "profiles-persisted-to-surrealdb",
      claim     = "LearningProfile data must be persisted to SurrealDB — no in-memory-only profile storage",
      scope     = "vapora-agents/src/learning_profile.rs",
      severity  = 'Hard,
      check     = { tag = 'Grep, pattern = "db|surreal|persist", paths = ["crates/vapora-agents/src/learning_profile.rs"], must_be_empty = false },
      rationale = "In-memory profiles are lost on agent restart, resetting all accumulated expertise data. This effectively resets the learning system on every deployment.",
    },
    {
      id        = "confidence-threshold-twenty",
      claim     = "Confidence weighting must apply until an agent reaches 20 executions per task type",
      scope     = "vapora-agents/src/learning_profile.rs, vapora-swarm/src/",
      severity  = 'Soft,
      check     = { tag = 'Grep, pattern = "20\\.0|min_executions|confidence", paths = ["crates/vapora-agents/src/", "crates/vapora-swarm/src/"], must_be_empty = false },
      rationale = "The threshold of 20 executions was tuned to balance cold-start speed against exploitation of new agents with small samples. Changing it without analysis risks either slow ramp-up or premature promotion.",
    },
  ],

  related_adrs = ["adr-006", "adr-008"],

  ontology_check = {
    decision_string    = "per-task-type LearningProfile; scoring formula 0.3*load+0.5*expertise+0.2*confidence; 7-day recency bias 3x; confidence ramp to 20 executions; SurrealDB persistence",
    invariants_at_risk = ["learning-based-selection"],
    verdict            = 'Safe,
  },
}