Vapora/docs/adrs/0018-swarm-load-balancing.html
Jesús Pérez 7110ffeea2
Some checks failed
Rust CI / Security Audit (push) Has been cancelled
Rust CI / Check + Test + Lint (nightly) (push) Has been cancelled
Rust CI / Check + Test + Lint (stable) (push) Has been cancelled
chore: extend doc: adr, tutorials, operations, etc
2026-01-12 03:32:47 +00:00

475 lines
20 KiB
HTML

<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>0018: Swarm Load Balancing - VAPORA Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "light";
const default_dark_theme = "dark";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('light')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">VAPORA Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../adrs/0018-swarm-load-balancing.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="adr-018-swarm-load-balanced-task-assignment"><a class="header" href="#adr-018-swarm-load-balanced-task-assignment">ADR-018: Swarm Load-Balanced Task Assignment</a></h1>
<p><strong>Status</strong>: Accepted | Implemented
<strong>Date</strong>: 2024-11-01
<strong>Deciders</strong>: Swarm Coordination Team
<strong>Technical Story</strong>: Distributing tasks across agents considering both capability and current load</p>
<hr />
<h2 id="decision"><a class="header" href="#decision">Decision</a></h2>
<p>Implementar <strong>load-balanced task assignment</strong> con fórmula <code>assignment_score = success_rate / (1 + load)</code>.</p>
<hr />
<h2 id="rationale"><a class="header" href="#rationale">Rationale</a></h2>
<ol>
<li><strong>Success Rate</strong>: Seleccionar agentes que han tenido éxito en tareas similares</li>
<li><strong>Load Factor</strong>: Balancear entre expertise y disponibilidad (no sobrecargar)</li>
<li><strong>Single Formula</strong>: Combina ambas dimensiones en una métrica comparable</li>
<li><strong>Prevents Concentration</strong>: Evitar que todos los tasks vayan a un solo agent</li>
</ol>
<hr />
<h2 id="alternatives-considered"><a class="header" href="#alternatives-considered">Alternatives Considered</a></h2>
<h3 id="-success-rate-only"><a class="header" href="#-success-rate-only">❌ Success Rate Only</a></h3>
<ul>
<li><strong>Pros</strong>: Selecciona best performer</li>
<li><strong>Cons</strong>: Concentra todas las tasks, agent se sobrecarga</li>
</ul>
<h3 id="-round-robin-equal-distribution"><a class="header" href="#-round-robin-equal-distribution">❌ Round-Robin (Equal Distribution)</a></h3>
<ul>
<li><strong>Pros</strong>: Simple, fair distribution</li>
<li><strong>Cons</strong>: No considera capability, bad agents get same load</li>
</ul>
<h3 id="-success-rate--1--load-chosen"><a class="header" href="#-success-rate--1--load-chosen">✅ Success Rate / (1 + Load) (CHOSEN)</a></h3>
<ul>
<li>Balancea expertise con availability</li>
</ul>
<hr />
<h2 id="trade-offs"><a class="header" href="#trade-offs">Trade-offs</a></h2>
<p><strong>Pros</strong>:</p>
<ul>
<li>✅ Considers both capability and availability</li>
<li>✅ Simple, single metric for comparison</li>
<li>✅ Prevents overloading high-performing agents</li>
<li>✅ Encourages fair distribution</li>
</ul>
<p><strong>Cons</strong>:</p>
<ul>
<li>⚠️ Formula is simplified (linear load penalty)</li>
<li>⚠️ May sacrifice quality for load balance</li>
<li>⚠️ Requires real-time load tracking</li>
</ul>
<hr />
<h2 id="implementation"><a class="header" href="#implementation">Implementation</a></h2>
<p><strong>Agent Load Tracking</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// crates/vapora-swarm/src/coordinator.rs
pub struct AgentState {
pub id: String,
pub role: AgentRole,
pub status: AgentStatus, // Ready, Busy, Offline
pub in_flight_tasks: u32,
pub max_concurrent: u32,
pub success_rate: f32, // [0.0, 1.0]
pub avg_latency_ms: u32,
}
impl AgentState {
/// Current load (0.0 = idle, 1.0 = at capacity)
pub fn current_load(&amp;self) -&gt; f32 {
(self.in_flight_tasks as f32) / (self.max_concurrent as f32)
}
/// Assignment score: success_rate / (1 + load)
/// Higher = better candidate for task
pub fn assignment_score(&amp;self) -&gt; f32 {
self.success_rate / (1.0 + self.current_load())
}
}
<span class="boring">}</span></code></pre></pre>
<p><strong>Task Assignment Logic</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>pub async fn assign_task_to_best_agent(
task: &amp;Task,
agents: &amp;[AgentState],
) -&gt; Result&lt;String&gt; {
// Filter eligible agents (matching role, online)
let eligible: Vec&lt;_&gt; = agents
.iter()
.filter(|a| {
a.status == AgentStatus::Ready || a.status == AgentStatus::Busy
})
.collect();
if eligible.is_empty() {
return Err(Error::NoAgentsAvailable);
}
// Score each agent
let mut scored: Vec&lt;_&gt; = eligible
.iter()
.map(|agent| {
let score = agent.assignment_score();
(agent.id.clone(), score)
})
.collect();
// Sort by score descending
scored.sort_by(|a, b| {
b.1.partial_cmp(&amp;a.1).unwrap_or(std::cmp::Ordering::Equal)
});
// Assign to highest scoring agent
let selected_agent_id = scored[0].0.clone();
// Increment in-flight counter
if let Some(agent) = agents.iter_mut().find(|a| a.id == selected_agent_id) {
agent.in_flight_tasks += 1;
}
Ok(selected_agent_id)
}
<span class="boring">}</span></code></pre></pre>
<p><strong>Load Calculation Examples</strong>:</p>
<pre><code>Agent A: success_rate = 0.95, in_flight = 2, max_concurrent = 5
load = 2/5 = 0.4
score = 0.95 / (1 + 0.4) = 0.95 / 1.4 = 0.68
Agent B: success_rate = 0.85, in_flight = 0, max_concurrent = 5
load = 0/5 = 0.0
score = 0.85 / (1 + 0.0) = 0.85 / 1.0 = 0.85 ← Selected
Agent C: success_rate = 0.90, in_flight = 5, max_concurrent = 5
load = 5/5 = 1.0
score = 0.90 / (1 + 1.0) = 0.90 / 2.0 = 0.45
</code></pre>
<p><strong>Real-Time Metrics</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>pub async fn collect_swarm_metrics(
agents: &amp;[AgentState],
) -&gt; SwarmMetrics {
SwarmMetrics {
total_agents: agents.len(),
idle_agents: agents.iter().filter(|a| a.in_flight_tasks == 0).count(),
busy_agents: agents.iter().filter(|a| a.in_flight_tasks &gt; 0).count(),
offline_agents: agents.iter().filter(|a| a.status == AgentStatus::Offline).count(),
total_in_flight: agents.iter().map(|a| a.in_flight_tasks).sum::&lt;u32&gt;(),
avg_success_rate: agents.iter().map(|a| a.success_rate).sum::&lt;f32&gt;() / agents.len() as f32,
avg_load: agents.iter().map(|a| a.current_load()).sum::&lt;f32&gt;() / agents.len() as f32,
}
}
<span class="boring">}</span></code></pre></pre>
<p><strong>Prometheus Metrics</strong>:</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// Register metrics
lazy_static::lazy_static! {
static ref TASK_ASSIGNMENTS: Counter = Counter::new(
"vapora_task_assignments_total",
"Total task assignments"
).unwrap();
static ref AGENT_LOAD: Gauge = Gauge::new(
"vapora_agent_current_load",
"Current agent load (0-1)"
).unwrap();
static ref ASSIGNMENT_SCORE: Histogram = Histogram::new(
"vapora_assignment_score",
"Assignment score distribution"
).unwrap();
}
// Record metrics
TASK_ASSIGNMENTS.inc();
AGENT_LOAD.set(best_agent.current_load());
ASSIGNMENT_SCORE.observe(best_agent.assignment_score());
<span class="boring">}</span></code></pre></pre>
<p><strong>Key Files</strong>:</p>
<ul>
<li><code>/crates/vapora-swarm/src/coordinator.rs</code> (assignment logic)</li>
<li><code>/crates/vapora-swarm/src/metrics.rs</code> (Prometheus metrics)</li>
<li><code>/crates/vapora-backend/src/api/</code> (task creation triggers assignment)</li>
</ul>
<hr />
<h2 id="verification"><a class="header" href="#verification">Verification</a></h2>
<pre><code class="language-bash"># Test assignment score calculation
cargo test -p vapora-swarm test_assignment_score_calculation
# Test load factor impact
cargo test -p vapora-swarm test_load_factor_impact
# Test best agent selection
cargo test -p vapora-swarm test_select_best_agent
# Test fair distribution (no concentration)
cargo test -p vapora-swarm test_fair_distribution
# Integration: assign multiple tasks sequentially
cargo test -p vapora-swarm test_assignment_sequence
# Load balancing under stress
cargo test -p vapora-swarm test_load_balancing_stress
</code></pre>
<p><strong>Expected Output</strong>:</p>
<ul>
<li>Agents with high success_rate + low load selected first</li>
<li>Load increases after each assignment</li>
<li>Fair distribution across agents</li>
<li>No single agent receiving all tasks</li>
<li>Metrics tracked accurately</li>
<li>Scores properly reflect trade-off</li>
</ul>
<hr />
<h2 id="consequences"><a class="header" href="#consequences">Consequences</a></h2>
<h3 id="fairness"><a class="header" href="#fairness">Fairness</a></h3>
<ul>
<li>High-performing agents get more tasks (deserved)</li>
<li>Overloaded agents get fewer tasks (protection)</li>
<li>Fair distribution emerges automatically</li>
</ul>
<h3 id="performance"><a class="header" href="#performance">Performance</a></h3>
<ul>
<li>Task latency depends on agent load (may queue)</li>
<li>Peak throughput = sum of all agent max_concurrent</li>
<li>SLA contracts respect per-agent limits</li>
</ul>
<h3 id="scaling"><a class="header" href="#scaling">Scaling</a></h3>
<ul>
<li>Adding agents increases total capacity</li>
<li>Load automatically redistributes</li>
<li>Horizontal scaling works naturally</li>
</ul>
<h3 id="monitoring"><a class="header" href="#monitoring">Monitoring</a></h3>
<ul>
<li>Track assignment distribution</li>
<li>Alert if concentration detected</li>
<li>Identify bottleneck agents</li>
</ul>
<hr />
<h2 id="references"><a class="header" href="#references">References</a></h2>
<ul>
<li><code>/crates/vapora-swarm/src/coordinator.rs</code> (implementation)</li>
<li><code>/crates/vapora-swarm/src/metrics.rs</code> (metrics collection)</li>
<li>ADR-014 (Learning Profiles)</li>
<li>ADR-018 (This ADR)</li>
</ul>
<hr />
<p><strong>Related ADRs</strong>: ADR-014 (Learning Profiles), ADR-020 (Audit Trail)</p>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../../adrs/0017-confidence-weighting.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../adrs/0019-temporal-execution-history.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../../adrs/0017-confidence-weighting.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../adrs/0019-temporal-execution-history.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>