Jesús Pérez 0ae853c2fa
Some checks failed
Rust CI / Security Audit (push) Has been cancelled
Rust CI / Check + Test + Lint (nightly) (push) Has been cancelled
Rust CI / Check + Test + Lint (stable) (push) Has been cancelled
chore: create stratum-embeddings and stratum-llm crates, docs
2026-01-24 02:03:12 +00:00

68 lines
2.2 KiB
Rust

use std::time::Duration;
use stratum_embeddings::{
EmbeddingOptions, EmbeddingService, FastEmbedProvider, LanceDbStore, MemoryCache, VectorStore,
VectorStoreConfig,
};
use tempfile::tempdir;
use tracing::info;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();
info!("Initializing embedding service...");
let provider = FastEmbedProvider::small()?;
let cache = MemoryCache::new(1000, Duration::from_secs(300));
let service = EmbeddingService::new(provider).with_cache(cache);
let dir = tempdir()?;
let db_path = dir.path().to_str().unwrap();
info!("Creating LanceDB store at: {}", db_path);
let config = VectorStoreConfig::new(384);
let store = LanceDbStore::new(db_path, "embeddings", config).await?;
let documents = vec![
(
"doc1",
"Rust provides memory safety without garbage collection",
),
("doc2", "Knowledge graphs represent structured information"),
("doc3", "Vector databases enable semantic similarity search"),
("doc4", "Machine learning models learn from data patterns"),
("doc5", "Embeddings capture semantic meaning in vectors"),
];
info!("Embedding and storing {} documents...", documents.len());
let options = EmbeddingOptions::default_with_cache();
for (id, text) in &documents {
let embedding = service.embed(text, &options).await?;
let metadata = serde_json::json!({
"text": text,
"source": "demo"
});
store.upsert(id, &embedding, metadata).await?;
}
info!("Documents stored successfully");
info!("Performing semantic search...");
let query = "How do databases support similarity matching?";
let query_embedding = service.embed(query, &options).await?;
let results = store.search(&query_embedding, 3, None).await?;
info!("Search results for: '{}'", query);
for (i, result) in results.iter().enumerate() {
let text = result.metadata["text"].as_str().unwrap_or("N/A");
info!(" {}. [score: {:.4}] {}", i + 1, result.score, text);
}
let count = store.count().await?;
info!("Total documents in store: {}", count);
Ok(())
}