171 lines
7.8 KiB
Rust
171 lines
7.8 KiB
Rust
//! AI Backend Example: RAG System with Batch Operations
|
|
//!
|
|
//! Demonstrates:
|
|
//! - Creating a RAG (Retrieval-Augmented Generation) system
|
|
//! - Adding documents using batch operations (efficient)
|
|
//! - Retrieving relevant documents for queries
|
|
//! - Removing documents using batch operations
|
|
//! - Performance comparison: batch vs sequential operations
|
|
//!
|
|
//! Run with: cargo run --example main --features ai_backend
|
|
//! Or via just: just build::ai && cargo run --example main --features ai_backend
|
|
|
|
#[cfg(feature = "ai_backend")]
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
use std::time::Instant;
|
|
use typedialog_core::ai::rag::{RagConfig, RagSystem};
|
|
|
|
println!("\n╔════════════════════════════════════════════════════════╗");
|
|
println!("║ TypeDialog AI Backend - RAG System Example ║");
|
|
println!("║ Batch Operations Performance Demonstration ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
// Create RAG system with default configuration
|
|
let mut rag = RagSystem::new(RagConfig::default())?;
|
|
println!("✓ Created RAG system with default config");
|
|
println!(" - Semantic weight: 0.6");
|
|
println!(" - Keyword weight: 0.4");
|
|
println!(" - Max results: 5\n");
|
|
|
|
// === BATCH ADD DOCUMENTS ===
|
|
println!("╔════════════════════════════════════════════════════════╗");
|
|
println!("║ 1. BATCH ADD DOCUMENTS (Efficient) ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
let documents = vec![
|
|
(
|
|
"doc1".to_string(),
|
|
"Rust is a systems programming language with memory safety guarantees."
|
|
.to_string(),
|
|
),
|
|
(
|
|
"doc2".to_string(),
|
|
"TypeDialog is an interactive forms library for multiple backends."
|
|
.to_string(),
|
|
),
|
|
(
|
|
"doc3".to_string(),
|
|
"RAG combines retrieval with generation for better LLM responses.".to_string(),
|
|
),
|
|
(
|
|
"doc4".to_string(),
|
|
"Knowledge graphs model relationships between entities in data."
|
|
.to_string(),
|
|
),
|
|
(
|
|
"doc5".to_string(),
|
|
"Embeddings transform text into dense vector representations."
|
|
.to_string(),
|
|
),
|
|
];
|
|
|
|
let start = Instant::now();
|
|
rag.add_documents_batch(documents)?;
|
|
let batch_duration = start.elapsed();
|
|
|
|
println!("✓ Added 5 documents using batch operation");
|
|
println!(" Duration: {:.2}ms", batch_duration.as_secs_f64() * 1000.0);
|
|
println!(" Document count: {}\n", rag.doc_count());
|
|
|
|
// === RETRIEVE DOCUMENTS ===
|
|
println!("╔════════════════════════════════════════════════════════╗");
|
|
println!("║ 2. RETRIEVE RELEVANT DOCUMENTS ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
let query = "programming language memory safety";
|
|
println!("Query: \"{}\"\n", query);
|
|
|
|
let results = rag.retrieve(query)?;
|
|
println!("Retrieved {} document(s):\n", results.len());
|
|
|
|
for (idx, result) in results.iter().enumerate() {
|
|
println!(
|
|
"{}. [{}] Score: {:.3} (Semantic: {:.3}, Keyword: {:.3})",
|
|
idx + 1,
|
|
result.doc_id,
|
|
result.combined_score,
|
|
result.semantic_score,
|
|
result.keyword_score
|
|
);
|
|
println!(" Content: {}\n", result.content);
|
|
}
|
|
|
|
// === BATCH REMOVE DOCUMENTS ===
|
|
println!("╔════════════════════════════════════════════════════════╗");
|
|
println!("║ 3. BATCH REMOVE DOCUMENTS (Efficient) ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
let to_remove = vec!["doc2", "doc4"];
|
|
println!("Removing: {:?}", to_remove);
|
|
|
|
let removed = rag.remove_documents_batch(&to_remove);
|
|
println!(
|
|
"✓ Removed {} document(s)",
|
|
removed
|
|
);
|
|
println!(" Remaining documents: {}\n", rag.doc_count());
|
|
|
|
// === PERFORMANCE COMPARISON ===
|
|
println!("╔════════════════════════════════════════════════════════╗");
|
|
println!("║ 4. PERFORMANCE: BATCH vs SEQUENTIAL (20 docs) ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
let large_docs: Vec<_> = (0..20)
|
|
.map(|i| {
|
|
(
|
|
format!("perf_doc{}", i),
|
|
format!("Performance test document number {}", i),
|
|
)
|
|
})
|
|
.collect();
|
|
|
|
// Sequential
|
|
let mut rag_seq = RagSystem::new(RagConfig::default())?;
|
|
let start = Instant::now();
|
|
rag_seq.add_documents(large_docs.clone())?;
|
|
let seq_duration = start.elapsed();
|
|
|
|
// Batch
|
|
let mut rag_batch = RagSystem::new(RagConfig::default())?;
|
|
let start = Instant::now();
|
|
rag_batch.add_documents_batch(large_docs)?;
|
|
let batch_duration = start.elapsed();
|
|
|
|
let speedup = seq_duration.as_nanos() as f64 / batch_duration.as_nanos() as f64;
|
|
|
|
println!("Sequential add (20 docs): {:.2}ms", seq_duration.as_secs_f64() * 1000.0);
|
|
println!("Batch add (20 docs): {:.2}ms", batch_duration.as_secs_f64() * 1000.0);
|
|
println!("Speedup: {:.2}x\n", speedup);
|
|
|
|
println!("💡 Tip: Use batch operations for large document sets (100+)");
|
|
println!(" Batch avoids multiple HNSW index rebuilds!\n");
|
|
|
|
// === SUMMARY ===
|
|
println!("╔════════════════════════════════════════════════════════╗");
|
|
println!("║ SUMMARY ║");
|
|
println!("╚════════════════════════════════════════════════════════╝\n");
|
|
|
|
println!("RAG System Features:");
|
|
println!(" ✓ Semantic search (vector embeddings)");
|
|
println!(" ✓ Keyword search (full-text indexing)");
|
|
println!(" ✓ Hybrid retrieval (combines both)");
|
|
println!(" ✓ Batch operations (efficient bulk processing)");
|
|
println!(" ✓ Configurable weights and thresholds");
|
|
println!(" ✓ Persistence (save/load to disk)\n");
|
|
|
|
println!("Next Steps:");
|
|
println!(" • Build with Knowledge Graph integration");
|
|
println!(" • Use with different embedding models");
|
|
println!(" • Integrate with LLM backends");
|
|
println!(" • Add custom similarity metrics\n");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(not(feature = "ai_backend"))]
|
|
fn main() {
|
|
eprintln!("This example requires the 'ai_backend' feature.");
|
|
eprintln!("Run with: cargo run --example main --features ai_backend");
|
|
std::process::exit(1);
|
|
}
|