68 lines
2.2 KiB
Rust
68 lines
2.2 KiB
Rust
|
|
use std::time::Duration;
|
||
|
|
|
||
|
|
use stratum_embeddings::{
|
||
|
|
EmbeddingOptions, EmbeddingService, FastEmbedProvider, LanceDbStore, MemoryCache, VectorStore,
|
||
|
|
VectorStoreConfig,
|
||
|
|
};
|
||
|
|
use tempfile::tempdir;
|
||
|
|
use tracing::info;
|
||
|
|
|
||
|
|
#[tokio::main]
|
||
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||
|
|
tracing_subscriber::fmt::init();
|
||
|
|
|
||
|
|
info!("Initializing embedding service...");
|
||
|
|
let provider = FastEmbedProvider::small()?;
|
||
|
|
let cache = MemoryCache::new(1000, Duration::from_secs(300));
|
||
|
|
let service = EmbeddingService::new(provider).with_cache(cache);
|
||
|
|
|
||
|
|
let dir = tempdir()?;
|
||
|
|
let db_path = dir.path().to_str().unwrap();
|
||
|
|
|
||
|
|
info!("Creating LanceDB store at: {}", db_path);
|
||
|
|
let config = VectorStoreConfig::new(384);
|
||
|
|
let store = LanceDbStore::new(db_path, "embeddings", config).await?;
|
||
|
|
|
||
|
|
let documents = vec![
|
||
|
|
(
|
||
|
|
"doc1",
|
||
|
|
"Rust provides memory safety without garbage collection",
|
||
|
|
),
|
||
|
|
("doc2", "Knowledge graphs represent structured information"),
|
||
|
|
("doc3", "Vector databases enable semantic similarity search"),
|
||
|
|
("doc4", "Machine learning models learn from data patterns"),
|
||
|
|
("doc5", "Embeddings capture semantic meaning in vectors"),
|
||
|
|
];
|
||
|
|
|
||
|
|
info!("Embedding and storing {} documents...", documents.len());
|
||
|
|
let options = EmbeddingOptions::default_with_cache();
|
||
|
|
|
||
|
|
for (id, text) in &documents {
|
||
|
|
let embedding = service.embed(text, &options).await?;
|
||
|
|
let metadata = serde_json::json!({
|
||
|
|
"text": text,
|
||
|
|
"source": "demo"
|
||
|
|
});
|
||
|
|
store.upsert(id, &embedding, metadata).await?;
|
||
|
|
}
|
||
|
|
|
||
|
|
info!("Documents stored successfully");
|
||
|
|
|
||
|
|
info!("Performing semantic search...");
|
||
|
|
let query = "How do databases support similarity matching?";
|
||
|
|
let query_embedding = service.embed(query, &options).await?;
|
||
|
|
|
||
|
|
let results = store.search(&query_embedding, 3, None).await?;
|
||
|
|
|
||
|
|
info!("Search results for: '{}'", query);
|
||
|
|
for (i, result) in results.iter().enumerate() {
|
||
|
|
let text = result.metadata["text"].as_str().unwrap_or("N/A");
|
||
|
|
info!(" {}. [score: {:.4}] {}", i + 1, result.score, text);
|
||
|
|
}
|
||
|
|
|
||
|
|
let count = store.count().await?;
|
||
|
|
info!("Total documents in store: {}", count);
|
||
|
|
|
||
|
|
Ok(())
|
||
|
|
}
|