use std::time::Duration; use stratum_embeddings::{ EmbeddingOptions, EmbeddingService, FastEmbedProvider, LanceDbStore, MemoryCache, VectorStore, VectorStoreConfig, }; use tempfile::tempdir; use tracing::info; #[tokio::main] async fn main() -> Result<(), Box> { tracing_subscriber::fmt::init(); info!("Initializing embedding service..."); let provider = FastEmbedProvider::small()?; let cache = MemoryCache::new(1000, Duration::from_secs(300)); let service = EmbeddingService::new(provider).with_cache(cache); let dir = tempdir()?; let db_path = dir.path().to_str().unwrap(); info!("Creating LanceDB store at: {}", db_path); let config = VectorStoreConfig::new(384); let store = LanceDbStore::new(db_path, "embeddings", config).await?; let documents = vec![ ( "doc1", "Rust provides memory safety without garbage collection", ), ("doc2", "Knowledge graphs represent structured information"), ("doc3", "Vector databases enable semantic similarity search"), ("doc4", "Machine learning models learn from data patterns"), ("doc5", "Embeddings capture semantic meaning in vectors"), ]; info!("Embedding and storing {} documents...", documents.len()); let options = EmbeddingOptions::default_with_cache(); for (id, text) in &documents { let embedding = service.embed(text, &options).await?; let metadata = serde_json::json!({ "text": text, "source": "demo" }); store.upsert(id, &embedding, metadata).await?; } info!("Documents stored successfully"); info!("Performing semantic search..."); let query = "How do databases support similarity matching?"; let query_embedding = service.embed(query, &options).await?; let results = store.search(&query_embedding, 3, None).await?; info!("Search results for: '{}'", query); for (i, result) in results.iter().enumerate() { let text = result.metadata["text"].as_str().unwrap_or("N/A"); info!(" {}. [score: {:.4}] {}", i + 1, result.score, text); } let count = store.count().await?; info!("Total documents in store: {}", count); Ok(()) }