Vapora/docs/adrs/0017-confidence-weighting.html

459 lines
20 KiB
HTML
Raw Normal View History

<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>0017: Confidence Weighting - VAPORA Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "light";
const default_dark_theme = "dark";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('light')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">VAPORA Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../adrs/0017-confidence-weighting.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="adr-017-confidence-weighting-en-learning-profiles"><a class="header" href="#adr-017-confidence-weighting-en-learning-profiles">ADR-017: Confidence Weighting en Learning Profiles</a></h1>
<p><strong>Status</strong>: Accepted | Implemented
<strong>Date</strong>: 2024-11-01
<strong>Deciders</strong>: Agent Architecture Team
<strong>Technical Story</strong>: Preventing new agents from being preferred on lucky first runs</p>
<hr />
<h2 id="decision"><a class="header" href="#decision">Decision</a></h2>
<p>Implementar <strong>Confidence Weighting</strong> con fórmula <code>confidence = min(1.0, total_executions / 20)</code>.</p>
<hr />
<h2 id="rationale"><a class="header" href="#rationale">Rationale</a></h2>
<ol>
<li><strong>Prevents Overfitting</strong>: Agentes nuevos con 1 éxito no deben ser preferred</li>
<li><strong>Statistical Significance</strong>: 20 ejecuciones proporciona confianza estadística</li>
<li><strong>Gradual Increase</strong>: Confianza sube mientras agente ejecuta más tareas</li>
<li><strong>Prevents Lucky Streaks</strong>: Requiere evidencia antes de preferencia</li>
</ol>
<hr />
<h2 id="alternatives-considered"><a class="header" href="#alternatives-considered">Alternatives Considered</a></h2>
<h3 id="-no-confidence-weighting"><a class="header" href="#-no-confidence-weighting">❌ No Confidence Weighting</a></h3>
<ul>
<li><strong>Pros</strong>: Simple</li>
<li><strong>Cons</strong>: New agent with 1 success could be selected</li>
</ul>
<h3 id="-higher-threshold-eg-50-executions"><a class="header" href="#-higher-threshold-eg-50-executions">❌ Higher Threshold (e.g., 50 executions)</a></h3>
<ul>
<li><strong>Pros</strong>: More statistical rigor</li>
<li><strong>Cons</strong>: Cold-start problem worse, new agents never selected</li>
</ul>
<h3 id="-confidence--min10-executions20-chosen"><a class="header" href="#-confidence--min10-executions20-chosen">✅ Confidence = min(1.0, executions/20) (CHOSEN)</a></h3>
<ul>
<li>Reasonable threshold, balances learning and avoiding lucky streaks</li>
</ul>
<hr />
<h2 id="trade-offs"><a class="header" href="#trade-offs">Trade-offs</a></h2>
<p><strong>Pros</strong>:</p>
<ul>
<li>✅ Prevents overfitting on single success</li>
<li>✅ Reasonable learning curve (20 executions)</li>
<li>✅ Simple formula</li>
<li>✅ Transparent and explainable</li>
</ul>
<p><strong>Cons</strong>:</p>
<ul>
<li>⚠️ Cold-start: new agents take 20 runs to full confidence</li>
<li>⚠️ Not adaptive (same threshold for all task types)</li>
<li>⚠️ May still allow lucky streaks (before 20 runs)</li>
</ul>
<hr />
<h2 id="implementation"><a class="header" href="#implementation">Implementation</a></h2>
<p><strong>Confidence Model</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// crates/vapora-agents/src/learning_profile.rs
impl TaskTypeLearning {
/// Confidence score: how much to trust this agent's score
/// min(1.0, executions / 20) = 0.05 at 1 execution, 1.0 at 20+
pub fn confidence(&amp;self) -&gt; f32 {
std::cmp::min(
1.0,
(self.executions_total as f32) / 20.0
)
}
/// Adjusted score: expertise * confidence
/// Even with perfect expertise, low confidence reduces score
pub fn adjusted_score(&amp;self) -&gt; f32 {
let expertise = self.expertise_score();
let confidence = self.confidence();
expertise * confidence
}
/// Confidence progression examples:
/// 1 exec: confidence = 0.05 (5%)
/// 5 exec: confidence = 0.25 (25%)
/// 10 exec: confidence = 0.50 (50%)
/// 20 exec: confidence = 1.0 (100%)
}
<span class="boring">}</span></code></pre></pre>
<p><strong>Agent Selection with Confidence</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>pub async fn select_best_agent_with_confidence(
db: &amp;Surreal&lt;Ws&gt;,
task_type: &amp;str,
) -&gt; Result&lt;String&gt; {
// Query all agents for this task type
let profiles = db.query(
"SELECT agent_id, executions_total, expertise_score(), confidence() \
FROM task_type_learning \
WHERE task_type = $1 \
ORDER BY (expertise_score * confidence) DESC \
LIMIT 5"
)
.bind(task_type)
.await?;
let best = profiles
.take::&lt;TaskTypeLearning&gt;(0)?
.first()
.ok_or(Error::NoAgentsAvailable)?;
// Log selection with confidence for debugging
tracing::info!(
"Selected agent {} with confidence {:.2}% (after {} executions)",
best.agent_id,
best.confidence() * 100.0,
best.executions_total
);
Ok(best.agent_id.clone())
}
<span class="boring">}</span></code></pre></pre>
<p><strong>Preventing Lucky Streaks</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// Example: Agent with 1 success but 5% confidence
let agent_1_success = TaskTypeLearning {
agent_id: "new-agent-1".to_string(),
task_type: "code_generation".to_string(),
executions_total: 1,
executions_successful: 1,
avg_quality_score: 0.95, // Perfect on first try!
records: vec![ExecutionRecord { /* ... */ }],
};
// Expertise would be 0.95, but confidence is only 0.05
let score = agent_1_success.adjusted_score(); // 0.95 * 0.05 = 0.0475
// This agent scores much lower than established agent with 0.80 expertise, 0.50 confidence
// 0.80 * 0.50 = 0.40 &gt; 0.0475
// Agent needs ~20 successes before reaching full confidence
let agent_20_success = TaskTypeLearning {
executions_total: 20,
executions_successful: 20,
avg_quality_score: 0.95,
/* ... */
};
let score = agent_20_success.adjusted_score(); // 0.95 * 1.0 = 0.95
<span class="boring">}</span></code></pre></pre>
<p><strong>Confidence Visualization</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>pub fn confidence_ramp() -&gt; Vec&lt;(u32, f32)&gt; {
(0..=40)
.map(|execs| {
let confidence = std::cmp::min(1.0, (execs as f32) / 20.0);
(execs, confidence)
})
.collect()
}
// Output:
// 0 execs: 0.00
// 1 exec: 0.05
// 2 execs: 0.10
// 5 execs: 0.25
// 10 execs: 0.50
// 20 execs: 1.00 ← Full confidence reached
// 30 execs: 1.00 ← Capped at 1.0
// 40 execs: 1.00 ← Still capped
<span class="boring">}</span></code></pre></pre>
<p><strong>Key Files</strong>:</p>
<ul>
<li><code>/crates/vapora-agents/src/learning_profile.rs</code> (confidence calculation)</li>
<li><code>/crates/vapora-agents/src/selector.rs</code> (agent selection logic)</li>
<li><code>/crates/vapora-agents/src/scoring.rs</code> (score calculations)</li>
</ul>
<hr />
<h2 id="verification"><a class="header" href="#verification">Verification</a></h2>
<pre><code class="language-bash"># Test confidence calculation at key milestones
cargo test -p vapora-agents test_confidence_at_1_exec
cargo test -p vapora-agents test_confidence_at_5_execs
cargo test -p vapora-agents test_confidence_at_20_execs
cargo test -p vapora-agents test_confidence_cap_at_1
# Test lucky streak prevention
cargo test -p vapora-agents test_lucky_streak_prevention
# Test adjusted score (expertise * confidence)
cargo test -p vapora-agents test_adjusted_score_calculation
# Integration: new agent vs established agent selection
cargo test -p vapora-agents test_agent_selection_with_confidence
</code></pre>
<p><strong>Expected Output</strong>:</p>
<ul>
<li>1 execution: confidence = 0.05 (5%)</li>
<li>5 executions: confidence = 0.25 (25%)</li>
<li>10 executions: confidence = 0.50 (50%)</li>
<li>20 executions: confidence = 1.0 (100%)</li>
<li>New agent with 1 success not selected over established agent</li>
<li>Confidence gradually increases as agent executes more</li>
<li>Adjusted score properly combines expertise and confidence</li>
</ul>
<hr />
<h2 id="consequences"><a class="header" href="#consequences">Consequences</a></h2>
<h3 id="agent-cold-start"><a class="header" href="#agent-cold-start">Agent Cold-Start</a></h3>
<ul>
<li>New agents require ~20 successful executions before reaching full score</li>
<li>Longer ramp-up but prevents bad deployments</li>
<li>Users understand why new agents aren't immediately selected</li>
</ul>
<h3 id="agent-ranking"><a class="header" href="#agent-ranking">Agent Ranking</a></h3>
<ul>
<li>Established agents (20+ executions) ranked by expertise only</li>
<li>Developing agents (&lt; 20 executions) ranked by expertise * confidence</li>
<li>Creates natural progression for agent improvement</li>
</ul>
<h3 id="learning-curve"><a class="header" href="#learning-curve">Learning Curve</a></h3>
<ul>
<li>First 20 executions critical for agent adoption</li>
<li>After 20, confidence no longer a limiting factor</li>
<li>Encourages testing new agents early</li>
</ul>
<h3 id="monitoring"><a class="header" href="#monitoring">Monitoring</a></h3>
<ul>
<li>Track which agents reach 20 executions</li>
<li>Identify agents stuck below 20 (poor performance)</li>
<li>Celebrate agents reaching full confidence</li>
</ul>
<hr />
<h2 id="references"><a class="header" href="#references">References</a></h2>
<ul>
<li><code>/crates/vapora-agents/src/learning_profile.rs</code> (implementation)</li>
<li><code>/crates/vapora-agents/src/selector.rs</code> (usage)</li>
<li>ADR-014 (Learning Profiles)</li>
<li>ADR-018 (Swarm Load Balancing)</li>
</ul>
<hr />
<p><strong>Related ADRs</strong>: ADR-014 (Learning Profiles), ADR-018 (Load Balancing), ADR-019 (Temporal History)</p>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../../adrs/0016-cost-efficiency-ranking.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../adrs/0018-swarm-load-balancing.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../../adrs/0016-cost-efficiency-ranking.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../adrs/0018-swarm-load-balancing.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>