492 lines
21 KiB
HTML
492 lines
21 KiB
HTML
|
|
<!DOCTYPE HTML>
|
||
|
|
<html lang="en" class="light sidebar-visible" dir="ltr">
|
||
|
|
<head>
|
||
|
|
<!-- Book generated using mdBook -->
|
||
|
|
<meta charset="UTF-8">
|
||
|
|
<title>0016: Cost Efficiency Ranking - VAPORA Platform Documentation</title>
|
||
|
|
|
||
|
|
|
||
|
|
<!-- Custom HTML head -->
|
||
|
|
|
||
|
|
<meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
|
||
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
|
|
<meta name="theme-color" content="#ffffff">
|
||
|
|
|
||
|
|
<link rel="icon" href="../favicon.svg">
|
||
|
|
<link rel="shortcut icon" href="../favicon.png">
|
||
|
|
<link rel="stylesheet" href="../css/variables.css">
|
||
|
|
<link rel="stylesheet" href="../css/general.css">
|
||
|
|
<link rel="stylesheet" href="../css/chrome.css">
|
||
|
|
<link rel="stylesheet" href="../css/print.css" media="print">
|
||
|
|
|
||
|
|
<!-- Fonts -->
|
||
|
|
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
|
||
|
|
<link rel="stylesheet" href="../fonts/fonts.css">
|
||
|
|
|
||
|
|
<!-- Highlight.js Stylesheets -->
|
||
|
|
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
|
||
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
|
||
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
|
||
|
|
|
||
|
|
<!-- Custom theme stylesheets -->
|
||
|
|
|
||
|
|
|
||
|
|
<!-- Provide site root and default themes to javascript -->
|
||
|
|
<script>
|
||
|
|
const path_to_root = "../";
|
||
|
|
const default_light_theme = "light";
|
||
|
|
const default_dark_theme = "dark";
|
||
|
|
</script>
|
||
|
|
<!-- Start loading toc.js asap -->
|
||
|
|
<script src="../toc.js"></script>
|
||
|
|
</head>
|
||
|
|
<body>
|
||
|
|
<div id="mdbook-help-container">
|
||
|
|
<div id="mdbook-help-popup">
|
||
|
|
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
|
||
|
|
<div>
|
||
|
|
<p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p>
|
||
|
|
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
|
||
|
|
<p>Press <kbd>?</kbd> to show this help</p>
|
||
|
|
<p>Press <kbd>Esc</kbd> to hide this help</p>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
<div id="body-container">
|
||
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
||
|
|
<script>
|
||
|
|
try {
|
||
|
|
let theme = localStorage.getItem('mdbook-theme');
|
||
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
||
|
|
|
||
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
||
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
||
|
|
}
|
||
|
|
|
||
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
||
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
||
|
|
}
|
||
|
|
} catch (e) { }
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
||
|
|
<script>
|
||
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
||
|
|
let theme;
|
||
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
||
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
||
|
|
const html = document.documentElement;
|
||
|
|
html.classList.remove('light')
|
||
|
|
html.classList.add(theme);
|
||
|
|
html.classList.add("js");
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
||
|
|
|
||
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
||
|
|
<script>
|
||
|
|
let sidebar = null;
|
||
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
||
|
|
if (document.body.clientWidth >= 1080) {
|
||
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
||
|
|
sidebar = sidebar || 'visible';
|
||
|
|
} else {
|
||
|
|
sidebar = 'hidden';
|
||
|
|
}
|
||
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
||
|
|
html.classList.remove('sidebar-visible');
|
||
|
|
html.classList.add("sidebar-" + sidebar);
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
||
|
|
<!-- populated by js -->
|
||
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
||
|
|
<noscript>
|
||
|
|
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
|
||
|
|
</noscript>
|
||
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
||
|
|
<div class="sidebar-resize-indicator"></div>
|
||
|
|
</div>
|
||
|
|
</nav>
|
||
|
|
|
||
|
|
<div id="page-wrapper" class="page-wrapper">
|
||
|
|
|
||
|
|
<div class="page">
|
||
|
|
<div id="menu-bar-hover-placeholder"></div>
|
||
|
|
<div id="menu-bar" class="menu-bar sticky">
|
||
|
|
<div class="left-buttons">
|
||
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
||
|
|
<i class="fa fa-bars"></i>
|
||
|
|
</label>
|
||
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
||
|
|
<i class="fa fa-paint-brush"></i>
|
||
|
|
</button>
|
||
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
||
|
|
</ul>
|
||
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
|
||
|
|
<i class="fa fa-search"></i>
|
||
|
|
</button>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<h1 class="menu-title">VAPORA Platform Documentation</h1>
|
||
|
|
|
||
|
|
<div class="right-buttons">
|
||
|
|
<a href="../print.html" title="Print this book" aria-label="Print this book">
|
||
|
|
<i id="print-button" class="fa fa-print"></i>
|
||
|
|
</a>
|
||
|
|
<a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
|
||
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
||
|
|
</a>
|
||
|
|
<a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../adrs/0016-cost-efficiency-ranking.md" title="Suggest an edit" aria-label="Suggest an edit">
|
||
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<div id="search-wrapper" class="hidden">
|
||
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
||
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
||
|
|
</form>
|
||
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
||
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
||
|
|
<ul id="searchresults">
|
||
|
|
</ul>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
||
|
|
<script>
|
||
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
||
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
||
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
||
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
||
|
|
});
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<div id="content" class="content">
|
||
|
|
<main>
|
||
|
|
<h1 id="adr-016-cost-efficiency-ranking-algorithm"><a class="header" href="#adr-016-cost-efficiency-ranking-algorithm">ADR-016: Cost Efficiency Ranking Algorithm</a></h1>
|
||
|
|
<p><strong>Status</strong>: Accepted | Implemented
|
||
|
|
<strong>Date</strong>: 2024-11-01
|
||
|
|
<strong>Deciders</strong>: Cost Architecture Team
|
||
|
|
<strong>Technical Story</strong>: Ranking LLM providers by quality-to-cost ratio to prevent cost overfitting</p>
|
||
|
|
<hr />
|
||
|
|
<h2 id="decision"><a class="header" href="#decision">Decision</a></h2>
|
||
|
|
<p>Implementar <strong>Cost Efficiency Ranking</strong> con fórmula <code>efficiency = (quality_score * 100) / (cost_cents + 1)</code>.</p>
|
||
|
|
<hr />
|
||
|
|
<h2 id="rationale"><a class="header" href="#rationale">Rationale</a></h2>
|
||
|
|
<ol>
|
||
|
|
<li><strong>Prevents Cost Overfitting</strong>: No preferir siempre provider más barato (quality importa)</li>
|
||
|
|
<li><strong>Balances Quality and Cost</strong>: Fórmula explícita que combina ambas dimensiones</li>
|
||
|
|
<li><strong>Handles Zero-Cost</strong>: <code>+ 1</code> evita division-by-zero para Ollama ($0)</li>
|
||
|
|
<li><strong>Normalized Scale</strong>: Scores comparables entre providers</li>
|
||
|
|
</ol>
|
||
|
|
<hr />
|
||
|
|
<h2 id="alternatives-considered"><a class="header" href="#alternatives-considered">Alternatives Considered</a></h2>
|
||
|
|
<h3 id="-quality-only-ignore-cost"><a class="header" href="#-quality-only-ignore-cost">❌ Quality Only (Ignore Cost)</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li><strong>Pros</strong>: Highest quality</li>
|
||
|
|
<li><strong>Cons</strong>: Unbounded costs</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="-cost-only-ignore-quality"><a class="header" href="#-cost-only-ignore-quality">❌ Cost Only (Ignore Quality)</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li><strong>Pros</strong>: Lowest cost</li>
|
||
|
|
<li><strong>Cons</strong>: Poor quality results</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="-qualitycost-ratio-chosen"><a class="header" href="#-qualitycost-ratio-chosen">✅ Quality/Cost Ratio (CHOSEN)</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li>Balances both dimensions mathematically</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="trade-offs"><a class="header" href="#trade-offs">Trade-offs</a></h2>
|
||
|
|
<p><strong>Pros</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>✅ Single metric for comparison</li>
|
||
|
|
<li>✅ Prevents cost overfitting</li>
|
||
|
|
<li>✅ Prevents quality overfitting</li>
|
||
|
|
<li>✅ Handles zero-cost providers</li>
|
||
|
|
<li>✅ Easy to understand and explain</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Cons</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>⚠️ Formula is simplified (assumes linear quality/cost)</li>
|
||
|
|
<li>⚠️ Quality scores must be comparable across providers</li>
|
||
|
|
<li>⚠️ May not capture all cost factors (latency, tokens)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="implementation"><a class="header" href="#implementation">Implementation</a></h2>
|
||
|
|
<p><strong>Quality Scores (Baseline)</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>// crates/vapora-llm-router/src/cost_ranker.rs
|
||
|
|
|
||
|
|
pub struct ProviderQuality {
|
||
|
|
provider: String,
|
||
|
|
model: String,
|
||
|
|
quality_score: f32, // 0.0 - 1.0
|
||
|
|
}
|
||
|
|
|
||
|
|
pub const QUALITY_SCORES: &[ProviderQuality] = &[
|
||
|
|
ProviderQuality {
|
||
|
|
provider: "claude",
|
||
|
|
model: "claude-opus",
|
||
|
|
quality_score: 0.95, // Best reasoning
|
||
|
|
},
|
||
|
|
ProviderQuality {
|
||
|
|
provider: "openai",
|
||
|
|
model: "gpt-4",
|
||
|
|
quality_score: 0.92, // Excellent code generation
|
||
|
|
},
|
||
|
|
ProviderQuality {
|
||
|
|
provider: "gemini",
|
||
|
|
model: "gemini-2.0-flash",
|
||
|
|
quality_score: 0.88, // Good balance
|
||
|
|
},
|
||
|
|
ProviderQuality {
|
||
|
|
provider: "ollama",
|
||
|
|
model: "llama2",
|
||
|
|
quality_score: 0.75, // Lower quality (local)
|
||
|
|
},
|
||
|
|
];
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Cost Efficiency Calculation</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>pub struct CostEfficiency {
|
||
|
|
provider: String,
|
||
|
|
quality_score: f32,
|
||
|
|
cost_cents: u32,
|
||
|
|
efficiency_score: f32,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl CostEfficiency {
|
||
|
|
pub fn calculate(
|
||
|
|
provider: &str,
|
||
|
|
quality: f32,
|
||
|
|
cost_cents: u32,
|
||
|
|
) -> f32 {
|
||
|
|
(quality * 100.0) / ((cost_cents as f32) + 1.0)
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn from_provider(
|
||
|
|
provider: &str,
|
||
|
|
quality: f32,
|
||
|
|
cost_cents: u32,
|
||
|
|
) -> Self {
|
||
|
|
let efficiency = Self::calculate(provider, quality, cost_cents);
|
||
|
|
|
||
|
|
Self {
|
||
|
|
provider: provider.to_string(),
|
||
|
|
quality_score: quality,
|
||
|
|
cost_cents,
|
||
|
|
efficiency_score: efficiency,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Examples:
|
||
|
|
// Claude Opus: quality=0.95, cost=50¢ → efficiency = (0.95*100)/(50+1) = 1.86
|
||
|
|
// GPT-4: quality=0.92, cost=30¢ → efficiency = (0.92*100)/(30+1) = 2.97
|
||
|
|
// Gemini: quality=0.88, cost=5¢ → efficiency = (0.88*100)/(5+1) = 14.67
|
||
|
|
// Ollama: quality=0.75, cost=0¢ → efficiency = (0.75*100)/(0+1) = 75.0
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Ranking by Efficiency</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>pub async fn rank_providers_by_efficiency(
|
||
|
|
providers: &[LLMClient],
|
||
|
|
task_type: &str,
|
||
|
|
) -> Result<Vec<(String, f32)>> {
|
||
|
|
let mut efficiencies = Vec::new();
|
||
|
|
|
||
|
|
for provider in providers {
|
||
|
|
let quality = get_quality_for_task(&provider.id, task_type)?;
|
||
|
|
let cost_per_token = provider.cost_per_token();
|
||
|
|
let estimated_tokens = estimate_tokens_for_task(task_type);
|
||
|
|
let total_cost_cents = (cost_per_token * estimated_tokens as f64) as u32;
|
||
|
|
|
||
|
|
let efficiency = CostEfficiency::calculate(
|
||
|
|
&provider.id,
|
||
|
|
quality,
|
||
|
|
total_cost_cents,
|
||
|
|
);
|
||
|
|
|
||
|
|
efficiencies.push((provider.id.clone(), efficiency));
|
||
|
|
}
|
||
|
|
|
||
|
|
// Sort by efficiency descending
|
||
|
|
efficiencies.sort_by(|a, b| {
|
||
|
|
b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
|
||
|
|
});
|
||
|
|
|
||
|
|
Ok(efficiencies)
|
||
|
|
}
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Provider Selection with Efficiency</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>pub async fn select_best_provider_by_efficiency(
|
||
|
|
task: &Task,
|
||
|
|
available_providers: &[LLMClient],
|
||
|
|
) -> Result<&'_ LLMClient> {
|
||
|
|
let ranked = rank_providers_by_efficiency(available_providers, &task.task_type).await?;
|
||
|
|
|
||
|
|
// Return highest efficiency
|
||
|
|
ranked
|
||
|
|
.first()
|
||
|
|
.and_then(|(provider_id, _)| {
|
||
|
|
available_providers.iter().find(|p| p.id == *provider_id)
|
||
|
|
})
|
||
|
|
.ok_or(Error::NoProvidersAvailable)
|
||
|
|
}
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Efficiency Metrics</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>pub async fn report_efficiency(
|
||
|
|
db: &Surreal<Ws>,
|
||
|
|
) -> Result<String> {
|
||
|
|
// Query: execution history with cost and quality
|
||
|
|
let query = r#"
|
||
|
|
SELECT
|
||
|
|
provider,
|
||
|
|
avg(quality_score) as avg_quality,
|
||
|
|
avg(cost_cents) as avg_cost,
|
||
|
|
(avg(quality_score) * 100) / (avg(cost_cents) + 1) as avg_efficiency
|
||
|
|
FROM executions
|
||
|
|
WHERE timestamp > now() - 1d -- Last 24 hours
|
||
|
|
GROUP BY provider
|
||
|
|
ORDER BY avg_efficiency DESC
|
||
|
|
"#;
|
||
|
|
|
||
|
|
let results = db.query(query).await?;
|
||
|
|
Ok(format_efficiency_report(results))
|
||
|
|
}
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Key Files</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li><code>/crates/vapora-llm-router/src/cost_ranker.rs</code> (efficiency calculations)</li>
|
||
|
|
<li><code>/crates/vapora-llm-router/src/router.rs</code> (provider selection)</li>
|
||
|
|
<li><code>/crates/vapora-backend/src/services/</code> (cost analysis)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="verification"><a class="header" href="#verification">Verification</a></h2>
|
||
|
|
<pre><code class="language-bash"># Test efficiency calculation with various costs
|
||
|
|
cargo test -p vapora-llm-router test_cost_efficiency_calculation
|
||
|
|
|
||
|
|
# Test zero-cost handling (Ollama)
|
||
|
|
cargo test -p vapora-llm-router test_zero_cost_efficiency
|
||
|
|
|
||
|
|
# Test provider ranking by efficiency
|
||
|
|
cargo test -p vapora-llm-router test_provider_ranking_efficiency
|
||
|
|
|
||
|
|
# Test efficiency comparison across providers
|
||
|
|
cargo test -p vapora-llm-router test_efficiency_comparison
|
||
|
|
|
||
|
|
# Integration: select best provider by efficiency
|
||
|
|
cargo test -p vapora-llm-router test_select_by_efficiency
|
||
|
|
</code></pre>
|
||
|
|
<p><strong>Expected Output</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Claude Opus ranked well despite higher cost (quality offset)</li>
|
||
|
|
<li>Ollama ranked very high (zero cost, decent quality)</li>
|
||
|
|
<li>Gemini ranked between (good efficiency)</li>
|
||
|
|
<li>GPT-4 ranked based on balanced cost/quality</li>
|
||
|
|
<li>Rankings consistent across multiple runs</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="consequences"><a class="header" href="#consequences">Consequences</a></h2>
|
||
|
|
<h3 id="cost-optimization"><a class="header" href="#cost-optimization">Cost Optimization</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li>Prevents pure cost minimization (quality matters)</li>
|
||
|
|
<li>Prevents pure quality maximization (cost matters)</li>
|
||
|
|
<li>Balanced strategy emerges</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="provider-selection"><a class="header" href="#provider-selection">Provider Selection</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li>No single provider always selected (depends on task)</li>
|
||
|
|
<li>Ollama used frequently (high efficiency)</li>
|
||
|
|
<li>Premium providers used for high-quality tasks only</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="reporting"><a class="header" href="#reporting">Reporting</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li>Efficiency metrics tracked over time</li>
|
||
|
|
<li>Identify providers underperforming cost-wise</li>
|
||
|
|
<li>Guide budget allocation</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="monitoring"><a class="header" href="#monitoring">Monitoring</a></h3>
|
||
|
|
<ul>
|
||
|
|
<li>Alert if efficiency drops for any provider</li>
|
||
|
|
<li>Track efficiency trends</li>
|
||
|
|
<li>Recommend provider switches if efficiency improves</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="references"><a class="header" href="#references">References</a></h2>
|
||
|
|
<ul>
|
||
|
|
<li><code>/crates/vapora-llm-router/src/cost_ranker.rs</code> (implementation)</li>
|
||
|
|
<li><code>/crates/vapora-llm-router/src/router.rs</code> (usage)</li>
|
||
|
|
<li>ADR-007 (Multi-Provider LLM)</li>
|
||
|
|
<li>ADR-015 (Budget Enforcement)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<p><strong>Related ADRs</strong>: ADR-007 (Multi-Provider), ADR-015 (Budget), ADR-012 (Routing Tiers)</p>
|
||
|
|
|
||
|
|
</main>
|
||
|
|
|
||
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
||
|
|
<!-- Mobile navigation buttons -->
|
||
|
|
<a rel="prev" href="../../adrs/0015-budget-enforcement.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
|
|
<i class="fa fa-angle-left"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<a rel="next prefetch" href="../../adrs/0017-confidence-weighting.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
|
|
<i class="fa fa-angle-right"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<div style="clear: both"></div>
|
||
|
|
</nav>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
||
|
|
<a rel="prev" href="../../adrs/0015-budget-enforcement.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
|
|
<i class="fa fa-angle-left"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<a rel="next prefetch" href="../../adrs/0017-confidence-weighting.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
|
|
<i class="fa fa-angle-right"></i>
|
||
|
|
</a>
|
||
|
|
</nav>
|
||
|
|
|
||
|
|
</div>
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
<script>
|
||
|
|
window.playground_copyable = true;
|
||
|
|
</script>
|
||
|
|
|
||
|
|
|
||
|
|
<script src="../elasticlunr.min.js"></script>
|
||
|
|
<script src="../mark.min.js"></script>
|
||
|
|
<script src="../searcher.js"></script>
|
||
|
|
|
||
|
|
<script src="../clipboard.min.js"></script>
|
||
|
|
<script src="../highlight.js"></script>
|
||
|
|
<script src="../book.js"></script>
|
||
|
|
|
||
|
|
<!-- Custom JS scripts -->
|
||
|
|
|
||
|
|
|
||
|
|
</div>
|
||
|
|
</body>
|
||
|
|
</html>
|