567 lines
25 KiB
HTML
567 lines
25 KiB
HTML
|
|
<!DOCTYPE HTML>
|
||
|
|
<html lang="en" class="rust sidebar-visible" dir="ltr">
|
||
|
|
<head>
|
||
|
|
<!-- Book generated using mdBook -->
|
||
|
|
<meta charset="UTF-8">
|
||
|
|
<title>ADR-002: FastEmbed via AI Providers - KOGRAL Documentation</title>
|
||
|
|
|
||
|
|
|
||
|
|
<!-- Custom HTML head -->
|
||
|
|
|
||
|
|
<meta name="description" content="Complete documentation for KOGRAL - Git-native knowledge graphs for developer teams">
|
||
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
|
|
<meta name="theme-color" content="#ffffff">
|
||
|
|
|
||
|
|
<link rel="icon" href="../../favicon.svg">
|
||
|
|
<link rel="shortcut icon" href="../../favicon.png">
|
||
|
|
<link rel="stylesheet" href="../../css/variables.css">
|
||
|
|
<link rel="stylesheet" href="../../css/general.css">
|
||
|
|
<link rel="stylesheet" href="../../css/chrome.css">
|
||
|
|
<link rel="stylesheet" href="../../css/print.css" media="print">
|
||
|
|
|
||
|
|
<!-- Fonts -->
|
||
|
|
<link rel="stylesheet" href="../../FontAwesome/css/font-awesome.css">
|
||
|
|
<link rel="stylesheet" href="../../fonts/fonts.css">
|
||
|
|
|
||
|
|
<!-- Highlight.js Stylesheets -->
|
||
|
|
<link rel="stylesheet" id="highlight-css" href="../../highlight.css">
|
||
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="../../tomorrow-night.css">
|
||
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="../../ayu-highlight.css">
|
||
|
|
|
||
|
|
<!-- Custom theme stylesheets -->
|
||
|
|
|
||
|
|
|
||
|
|
<!-- Provide site root and default themes to javascript -->
|
||
|
|
<script>
|
||
|
|
const path_to_root = "../../";
|
||
|
|
const default_light_theme = "rust";
|
||
|
|
const default_dark_theme = "navy";
|
||
|
|
</script>
|
||
|
|
<!-- Start loading toc.js asap -->
|
||
|
|
<script src="../../toc.js"></script>
|
||
|
|
</head>
|
||
|
|
<body>
|
||
|
|
<div id="mdbook-help-container">
|
||
|
|
<div id="mdbook-help-popup">
|
||
|
|
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
|
||
|
|
<div>
|
||
|
|
<p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p>
|
||
|
|
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
|
||
|
|
<p>Press <kbd>?</kbd> to show this help</p>
|
||
|
|
<p>Press <kbd>Esc</kbd> to hide this help</p>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
<div id="body-container">
|
||
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
||
|
|
<script>
|
||
|
|
try {
|
||
|
|
let theme = localStorage.getItem('mdbook-theme');
|
||
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
||
|
|
|
||
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
||
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
||
|
|
}
|
||
|
|
|
||
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
||
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
||
|
|
}
|
||
|
|
} catch (e) { }
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
||
|
|
<script>
|
||
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
||
|
|
let theme;
|
||
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
||
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
||
|
|
const html = document.documentElement;
|
||
|
|
html.classList.remove('rust')
|
||
|
|
html.classList.add(theme);
|
||
|
|
html.classList.add("js");
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
||
|
|
|
||
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
||
|
|
<script>
|
||
|
|
let sidebar = null;
|
||
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
||
|
|
if (document.body.clientWidth >= 1080) {
|
||
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
||
|
|
sidebar = sidebar || 'visible';
|
||
|
|
} else {
|
||
|
|
sidebar = 'hidden';
|
||
|
|
}
|
||
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
||
|
|
html.classList.remove('sidebar-visible');
|
||
|
|
html.classList.add("sidebar-" + sidebar);
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
||
|
|
<!-- populated by js -->
|
||
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
||
|
|
<noscript>
|
||
|
|
<iframe class="sidebar-iframe-outer" src="../../toc.html"></iframe>
|
||
|
|
</noscript>
|
||
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
||
|
|
<div class="sidebar-resize-indicator"></div>
|
||
|
|
</div>
|
||
|
|
</nav>
|
||
|
|
|
||
|
|
<div id="page-wrapper" class="page-wrapper">
|
||
|
|
|
||
|
|
<div class="page">
|
||
|
|
<div id="menu-bar-hover-placeholder"></div>
|
||
|
|
<div id="menu-bar" class="menu-bar sticky">
|
||
|
|
<div class="left-buttons">
|
||
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
||
|
|
<i class="fa fa-bars"></i>
|
||
|
|
</label>
|
||
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
||
|
|
<i class="fa fa-paint-brush"></i>
|
||
|
|
</button>
|
||
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
||
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
||
|
|
</ul>
|
||
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
|
||
|
|
<i class="fa fa-search"></i>
|
||
|
|
</button>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<h1 class="menu-title">KOGRAL Documentation</h1>
|
||
|
|
|
||
|
|
<div class="right-buttons">
|
||
|
|
<a href="../../print.html" title="Print this book" aria-label="Print this book">
|
||
|
|
<i id="print-button" class="fa fa-print"></i>
|
||
|
|
</a>
|
||
|
|
<a href="https://github.com/your-org/knowledge-base" title="Git repository" aria-label="Git repository">
|
||
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
||
|
|
</a>
|
||
|
|
<a href="https://github.com/your-org/knowledge-base/edit/main/docs/./architecture/adrs/002-fastembed-ai-providers.md" title="Suggest an edit" aria-label="Suggest an edit">
|
||
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<div id="search-wrapper" class="hidden">
|
||
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
||
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
||
|
|
</form>
|
||
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
||
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
||
|
|
<ul id="searchresults">
|
||
|
|
</ul>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
||
|
|
<script>
|
||
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
||
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
||
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
||
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
||
|
|
});
|
||
|
|
</script>
|
||
|
|
|
||
|
|
<div id="content" class="content">
|
||
|
|
<main>
|
||
|
|
<h1 id="adr-002-fastembed-via-ai-providers-for-embeddings"><a class="header" href="#adr-002-fastembed-via-ai-providers-for-embeddings">ADR-002: FastEmbed via AI Providers for Embeddings</a></h1>
|
||
|
|
<p><strong>Status</strong>: Accepted</p>
|
||
|
|
<p><strong>Date</strong>: 2026-01-17</p>
|
||
|
|
<p><strong>Deciders</strong>: Architecture Team</p>
|
||
|
|
<p><strong>Context</strong>: Embedding Strategy for Semantic Search</p>
|
||
|
|
<hr />
|
||
|
|
<h2 id="context"><a class="header" href="#context">Context</a></h2>
|
||
|
|
<p>The KOGRAL requires embedding generation for semantic search capabilities. Embeddings convert text into numerical vectors that capture semantic meaning, enabling "find concepts" rather than just "find keywords".</p>
|
||
|
|
<p><strong>Requirements</strong>:</p>
|
||
|
|
<ol>
|
||
|
|
<li><strong>Local-First Option</strong>: Must work offline without external API dependencies</li>
|
||
|
|
<li><strong>Production Scalability</strong>: Support cloud AI providers for large-scale deployments</li>
|
||
|
|
<li><strong>Multiple Providers</strong>: Flexibility to choose based on cost, quality, privacy</li>
|
||
|
|
<li><strong>Cost-Effective Development</strong>: Free local embeddings for development and testing</li>
|
||
|
|
<li><strong>Quality</strong>: Good enough embeddings for finding related concepts</li>
|
||
|
|
</ol>
|
||
|
|
<p><strong>Options Evaluated</strong>:</p>
|
||
|
|
<h3 id="option-1-only-local-embeddings-fastembed"><a class="header" href="#option-1-only-local-embeddings-fastembed">Option 1: Only Local Embeddings (fastembed)</a></h3>
|
||
|
|
<p><strong>Pros</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>No API costs</li>
|
||
|
|
<li>Works offline</li>
|
||
|
|
<li>Privacy-preserving (no data leaves machine)</li>
|
||
|
|
<li>Fast (local GPU acceleration possible)</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Cons</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Limited model quality compared to cloud providers</li>
|
||
|
|
<li>Resource-intensive (requires download ~100MB models)</li>
|
||
|
|
<li>Single provider lock-in (fastembed library)</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Example</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>use fastembed::{TextEmbedding, InitOptions};
|
||
|
|
|
||
|
|
let model = TextEmbedding::try_new(InitOptions {
|
||
|
|
model_name: "BAAI/bge-small-en-v1.5",
|
||
|
|
..Default::default()
|
||
|
|
})?;
|
||
|
|
|
||
|
|
let embeddings = model.embed(vec!["Hello world"], None)?;
|
||
|
|
// Output: Vec<Vec<f32>> with 384 dimensions
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<h3 id="option-2-only-cloud-ai-providers-openai-claude-etc"><a class="header" href="#option-2-only-cloud-ai-providers-openai-claude-etc">Option 2: Only Cloud AI Providers (OpenAI, Claude, etc.)</a></h3>
|
||
|
|
<p><strong>Pros</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>State-of-the-art embedding quality</li>
|
||
|
|
<li>No local resource usage</li>
|
||
|
|
<li>Latest models available</li>
|
||
|
|
<li>Scalable to millions of documents</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Cons</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Requires API keys (cost per embedding)</li>
|
||
|
|
<li>Network dependency (no offline mode)</li>
|
||
|
|
<li>Privacy concerns (data sent to third parties)</li>
|
||
|
|
<li>Vendor lock-in risk</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Example</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>use rig::providers::openai;
|
||
|
|
|
||
|
|
let client = openai::Client::new("sk-...");
|
||
|
|
let embeddings = client.embeddings("text-embedding-3-small")
|
||
|
|
.embed_documents(vec!["Hello world"]).await?;
|
||
|
|
// Output: Vec<Vec<f32>> with 1536 dimensions
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<h3 id="option-3-hybrid-strategy-fastembed--ai-providers-via-rig-core"><a class="header" href="#option-3-hybrid-strategy-fastembed--ai-providers-via-rig-core">Option 3: Hybrid Strategy (fastembed + AI providers via rig-core)</a></h3>
|
||
|
|
<p><strong>Pros</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>✅ Best of both worlds: local dev, cloud production</li>
|
||
|
|
<li>✅ User choice: privacy-first or quality-first</li>
|
||
|
|
<li>✅ Cost flexibility: free for small projects, paid for scale</li>
|
||
|
|
<li>✅ Unified interface via <code>rig-core</code> library</li>
|
||
|
|
<li>✅ Easy provider switching (config-driven)</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Cons</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>❌ More complex implementation (multiple providers)</li>
|
||
|
|
<li>❌ Dimension mismatch between providers (384 vs 1536)</li>
|
||
|
|
<li>❌ Additional dependencies (<code>rig-core</code>, <code>fastembed</code>)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="decision"><a class="header" href="#decision">Decision</a></h2>
|
||
|
|
<p><strong>We will use a hybrid strategy: fastembed (local) + AI providers (via rig-core).</strong></p>
|
||
|
|
<p><strong>Implementation</strong>:</p>
|
||
|
|
<ol>
|
||
|
|
<li><strong>Default</strong>: <code>fastembed</code> with <code>BAAI/bge-small-en-v1.5</code> (384 dimensions)</li>
|
||
|
|
<li><strong>Optional</strong>: OpenAI, Claude, Ollama via <code>rig-core</code> (configurable)</li>
|
||
|
|
<li><strong>Interface</strong>: <code>EmbeddingProvider</code> trait abstracts provider details</li>
|
||
|
|
<li><strong>Config-Driven</strong>: Provider selection via Nickel configuration</li>
|
||
|
|
</ol>
|
||
|
|
<p><strong>Architecture</strong>:</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>#[async_trait]
|
||
|
|
pub trait EmbeddingProvider: Send + Sync {
|
||
|
|
async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>>;
|
||
|
|
fn dimensions(&self) -> usize;
|
||
|
|
fn model_name(&self) -> &str;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Local implementation
|
||
|
|
pub struct FastEmbedProvider {
|
||
|
|
model: TextEmbedding,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl FastEmbedProvider {
|
||
|
|
pub fn new(model_name: &str) -> Result<Self> {
|
||
|
|
let model = TextEmbedding::try_new(InitOptions {
|
||
|
|
model_name: model_name.into(),
|
||
|
|
..Default::default()
|
||
|
|
})?;
|
||
|
|
Ok(Self { model })
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl EmbeddingProvider for FastEmbedProvider {
|
||
|
|
async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
|
||
|
|
Ok(self.model.embed(texts, None)?)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn dimensions(&self) -> usize { 384 }
|
||
|
|
fn model_name(&self) -> &str { "BAAI/bge-small-en-v1.5" }
|
||
|
|
}
|
||
|
|
|
||
|
|
// Cloud provider implementation (via rig-core)
|
||
|
|
pub struct RigEmbeddingProvider {
|
||
|
|
client: rig::Client,
|
||
|
|
model: String,
|
||
|
|
dimensions: usize,
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl EmbeddingProvider for RigEmbeddingProvider {
|
||
|
|
async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
|
||
|
|
let embeddings = self.client
|
||
|
|
.embeddings(&self.model)
|
||
|
|
.embed_documents(texts)
|
||
|
|
.await?;
|
||
|
|
Ok(embeddings)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn dimensions(&self) -> usize { self.dimensions }
|
||
|
|
fn model_name(&self) -> &str { &self.model }
|
||
|
|
}
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<p><strong>Configuration</strong> (Nickel):</p>
|
||
|
|
<pre><code class="language-nickel"># Local development (default)
|
||
|
|
{
|
||
|
|
embeddings = {
|
||
|
|
enabled = true,
|
||
|
|
provider = 'fastembed,
|
||
|
|
model = "BAAI/bge-small-en-v1.5",
|
||
|
|
dimensions = 384,
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Production with OpenAI
|
||
|
|
{
|
||
|
|
embeddings = {
|
||
|
|
enabled = true,
|
||
|
|
provider = 'openai,
|
||
|
|
model = "text-embedding-3-small",
|
||
|
|
dimensions = 1536,
|
||
|
|
api_key_env = "OPENAI_API_KEY",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Self-hosted with Ollama
|
||
|
|
{
|
||
|
|
embeddings = {
|
||
|
|
enabled = true,
|
||
|
|
provider = 'ollama,
|
||
|
|
model = "nomic-embed-text",
|
||
|
|
dimensions = 768,
|
||
|
|
},
|
||
|
|
}
|
||
|
|
</code></pre>
|
||
|
|
<p><strong>Provider Selection</strong> (<code>kb-core/src/embeddings/mod.rs</code>):</p>
|
||
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
||
|
|
</span><span class="boring">fn main() {
|
||
|
|
</span>pub fn create_provider(config: &EmbeddingConfig) -> Result<Box<dyn EmbeddingProvider>> {
|
||
|
|
match config.provider {
|
||
|
|
EmbeddingProviderType::FastEmbed => {
|
||
|
|
Ok(Box::new(FastEmbedProvider::new(&config.model)?))
|
||
|
|
}
|
||
|
|
EmbeddingProviderType::OpenAI => {
|
||
|
|
let api_key = std::env::var(&config.api_key_env)?;
|
||
|
|
Ok(Box::new(RigEmbeddingProvider::new_openai(api_key, &config.model)?))
|
||
|
|
}
|
||
|
|
EmbeddingProviderType::Claude => {
|
||
|
|
let api_key = std::env::var(&config.api_key_env)?;
|
||
|
|
Ok(Box::new(RigEmbeddingProvider::new_claude(api_key, &config.model)?))
|
||
|
|
}
|
||
|
|
EmbeddingProviderType::Ollama => {
|
||
|
|
Ok(Box::new(RigEmbeddingProvider::new_ollama(&config.model)?))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
<span class="boring">}</span></code></pre></pre>
|
||
|
|
<hr />
|
||
|
|
<h2 id="consequences"><a class="header" href="#consequences">Consequences</a></h2>
|
||
|
|
<h3 id="positive"><a class="header" href="#positive">Positive</a></h3>
|
||
|
|
<p>✅ <strong>Development Flexibility</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Developers can use <code>fastembed</code> without API keys</li>
|
||
|
|
<li>Fast feedback loop (local embeddings, no network calls)</li>
|
||
|
|
<li>Works offline (train trips, flights)</li>
|
||
|
|
</ul>
|
||
|
|
<p>✅ <strong>Production Quality</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Production deployments can use OpenAI/Claude for better quality</li>
|
||
|
|
<li>Latest embedding models available</li>
|
||
|
|
<li>Scalable to millions of documents</li>
|
||
|
|
</ul>
|
||
|
|
<p>✅ <strong>Privacy Control</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Privacy-sensitive projects use local embeddings</li>
|
||
|
|
<li>Public projects can use cloud providers</li>
|
||
|
|
<li>User choice via configuration</li>
|
||
|
|
</ul>
|
||
|
|
<p>✅ <strong>Cost Optimization</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Small projects: free (fastembed)</li>
|
||
|
|
<li>Large projects: pay for quality (cloud providers)</li>
|
||
|
|
<li>Hybrid: important docs via cloud, bulk via local</li>
|
||
|
|
</ul>
|
||
|
|
<p>✅ <strong>Unified Interface</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li><code>EmbeddingProvider</code> trait abstracts provider details</li>
|
||
|
|
<li>Query code doesn't know/care about provider</li>
|
||
|
|
<li>Easy to add new providers</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="negative"><a class="header" href="#negative">Negative</a></h3>
|
||
|
|
<p>❌ <strong>Dimension Mismatch</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>fastembed: 384 dimensions</li>
|
||
|
|
<li>OpenAI: 1536 dimensions</li>
|
||
|
|
<li>Cannot mix in same index</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Mitigation</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Store provider + dimensions in node metadata</li>
|
||
|
|
<li>Rebuild index when changing providers</li>
|
||
|
|
<li>Document dimension constraints</li>
|
||
|
|
</ul>
|
||
|
|
<p>❌ <strong>Model Download</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>First use of fastembed downloads ~100MB model</li>
|
||
|
|
<li>Slow initial startup</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Mitigation</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Pre-download in Docker images</li>
|
||
|
|
<li>Document model download in setup guide</li>
|
||
|
|
<li>Cache models in <code>~/.cache/fastembed</code></li>
|
||
|
|
</ul>
|
||
|
|
<p>❌ <strong>Complex Configuration</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Multiple provider options may confuse users</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Mitigation</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Sane default (fastembed)</li>
|
||
|
|
<li>Clear examples for each provider</li>
|
||
|
|
<li>Validation errors explain misconfigurations</li>
|
||
|
|
</ul>
|
||
|
|
<h3 id="neutral"><a class="header" href="#neutral">Neutral</a></h3>
|
||
|
|
<p>⚪ <strong>Dependency Trade-off</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li><code>fastembed</code> adds ~5MB to binary</li>
|
||
|
|
<li><code>rig-core</code> adds ~2MB</li>
|
||
|
|
<li>Total: ~7MB overhead</li>
|
||
|
|
</ul>
|
||
|
|
<p>Not a concern for CLI/MCP server use case.</p>
|
||
|
|
<hr />
|
||
|
|
<h2 id="provider-comparison"><a class="header" href="#provider-comparison">Provider Comparison</a></h2>
|
||
|
|
<div class="table-wrapper"><table><thead><tr><th>Provider</th><th>Dimensions</th><th>Quality</th><th>Cost</th><th>Privacy</th><th>Offline</th></tr></thead><tbody>
|
||
|
|
<tr><td><strong>fastembed</strong></td><td>384</td><td>Good</td><td>Free</td><td>✅ Local</td><td>✅ Yes</td></tr>
|
||
|
|
<tr><td><strong>OpenAI</strong></td><td>1536</td><td>Excellent</td><td>$0.0001/1K</td><td>❌ Cloud</td><td>❌ No</td></tr>
|
||
|
|
<tr><td><strong>Claude</strong></td><td>1024</td><td>Excellent</td><td>$0.00025/1K</td><td>❌ Cloud</td><td>❌ No</td></tr>
|
||
|
|
<tr><td><strong>Ollama</strong></td><td>768</td><td>Very Good</td><td>Free</td><td>✅ Local</td><td>✅ Yes</td></tr>
|
||
|
|
</tbody></table>
|
||
|
|
</div>
|
||
|
|
<p><strong>Recommendation by Use Case</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li><strong>Development</strong>: fastembed (fast, free, offline)</li>
|
||
|
|
<li><strong>Small Teams</strong>: fastembed or Ollama (privacy, no costs)</li>
|
||
|
|
<li><strong>Enterprise</strong>: OpenAI or Claude (best quality, scalable)</li>
|
||
|
|
<li><strong>Self-Hosted</strong>: Ollama (good quality, local control)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="implementation-timeline"><a class="header" href="#implementation-timeline">Implementation Timeline</a></h2>
|
||
|
|
<ol>
|
||
|
|
<li>✅ Define <code>EmbeddingProvider</code> trait</li>
|
||
|
|
<li>✅ Implement FastEmbedProvider (stub, feature-gated)</li>
|
||
|
|
<li>✅ Implement RigEmbeddingProvider (stub, feature-gated)</li>
|
||
|
|
<li>⏳ Complete FastEmbed integration with model download</li>
|
||
|
|
<li>⏳ Complete rig-core integration (OpenAI, Claude, Ollama)</li>
|
||
|
|
<li>⏳ Add query engine with similarity search</li>
|
||
|
|
<li>⏳ Document provider selection and trade-offs</li>
|
||
|
|
</ol>
|
||
|
|
<hr />
|
||
|
|
<h2 id="monitoring"><a class="header" href="#monitoring">Monitoring</a></h2>
|
||
|
|
<p><strong>Success Criteria</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Users can switch providers via config change</li>
|
||
|
|
<li>Local embeddings work without API keys</li>
|
||
|
|
<li>Production deployments use cloud providers successfully</li>
|
||
|
|
<li>Query quality acceptable for both local and cloud embeddings</li>
|
||
|
|
</ul>
|
||
|
|
<p><strong>Metrics</strong>:</p>
|
||
|
|
<ul>
|
||
|
|
<li>Embedding generation latency (local vs cloud)</li>
|
||
|
|
<li>Query accuracy (precision@10 for semantic search)</li>
|
||
|
|
<li>API costs (cloud providers)</li>
|
||
|
|
<li>User satisfaction (feedback on search quality)</li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="references"><a class="header" href="#references">References</a></h2>
|
||
|
|
<ul>
|
||
|
|
<li><a href="https://github.com/Anush008/fastembed-rs">fastembed Documentation</a></li>
|
||
|
|
<li><a href="https://github.com/0xPlaygrounds/rig">rig-core Documentation</a></li>
|
||
|
|
<li><a href="https://platform.openai.com/docs/guides/embeddings">OpenAI Embeddings API</a></li>
|
||
|
|
<li><a href="https://huggingface.co/BAAI/bge-small-en-v1.5">BAAI/bge Models</a></li>
|
||
|
|
<li><a href="https://ollama.com/blog/embedding-models">Ollama Embeddings</a></li>
|
||
|
|
</ul>
|
||
|
|
<hr />
|
||
|
|
<h2 id="revision-history"><a class="header" href="#revision-history">Revision History</a></h2>
|
||
|
|
<div class="table-wrapper"><table><thead><tr><th>Date</th><th>Author</th><th>Change</th></tr></thead><tbody>
|
||
|
|
<tr><td>2026-01-17</td><td>Architecture Team</td><td>Initial decision</td></tr>
|
||
|
|
</tbody></table>
|
||
|
|
</div>
|
||
|
|
<hr />
|
||
|
|
<p><strong>Previous ADR</strong>: <a href="001-nickel-vs-toml.html">ADR-001: Nickel vs TOML</a>
|
||
|
|
<strong>Next ADR</strong>: <a href="003-hybrid-storage.html">ADR-003: Hybrid Storage Strategy</a></p>
|
||
|
|
|
||
|
|
</main>
|
||
|
|
|
||
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
||
|
|
<!-- Mobile navigation buttons -->
|
||
|
|
<a rel="prev" href="../../architecture/adrs/001-nickel-vs-toml.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
|
|
<i class="fa fa-angle-left"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<a rel="next prefetch" href="../../architecture/adrs/003-hybrid-storage.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
|
|
<i class="fa fa-angle-right"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<div style="clear: both"></div>
|
||
|
|
</nav>
|
||
|
|
</div>
|
||
|
|
</div>
|
||
|
|
|
||
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
||
|
|
<a rel="prev" href="../../architecture/adrs/001-nickel-vs-toml.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
|
|
<i class="fa fa-angle-left"></i>
|
||
|
|
</a>
|
||
|
|
|
||
|
|
<a rel="next prefetch" href="../../architecture/adrs/003-hybrid-storage.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
|
|
<i class="fa fa-angle-right"></i>
|
||
|
|
</a>
|
||
|
|
</nav>
|
||
|
|
|
||
|
|
</div>
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
<script>
|
||
|
|
window.playground_copyable = true;
|
||
|
|
</script>
|
||
|
|
|
||
|
|
|
||
|
|
<script src="../../elasticlunr.min.js"></script>
|
||
|
|
<script src="../../mark.min.js"></script>
|
||
|
|
<script src="../../searcher.js"></script>
|
||
|
|
|
||
|
|
<script src="../../clipboard.min.js"></script>
|
||
|
|
<script src="../../highlight.js"></script>
|
||
|
|
<script src="../../book.js"></script>
|
||
|
|
|
||
|
|
<!-- Custom JS scripts -->
|
||
|
|
|
||
|
|
|
||
|
|
</div>
|
||
|
|
</body>
|
||
|
|
</html>
|