2025-12-24 03:22:48 +00:00

171 lines
7.8 KiB
Rust

//! AI Backend Example: RAG System with Batch Operations
//!
//! Demonstrates:
//! - Creating a RAG (Retrieval-Augmented Generation) system
//! - Adding documents using batch operations (efficient)
//! - Retrieving relevant documents for queries
//! - Removing documents using batch operations
//! - Performance comparison: batch vs sequential operations
//!
//! Run with: cargo run --example main --features ai_backend
//! Or via just: just build::ai && cargo run --example main --features ai_backend
#[cfg(feature = "ai_backend")]
fn main() -> Result<(), Box<dyn std::error::Error>> {
use std::time::Instant;
use typedialog_core::ai::rag::{RagConfig, RagSystem};
println!("\n╔════════════════════════════════════════════════════════╗");
println!("║ TypeDialog AI Backend - RAG System Example ║");
println!("║ Batch Operations Performance Demonstration ║");
println!("╚════════════════════════════════════════════════════════╝\n");
// Create RAG system with default configuration
let mut rag = RagSystem::new(RagConfig::default())?;
println!("✓ Created RAG system with default config");
println!(" - Semantic weight: 0.6");
println!(" - Keyword weight: 0.4");
println!(" - Max results: 5\n");
// === BATCH ADD DOCUMENTS ===
println!("╔════════════════════════════════════════════════════════╗");
println!("║ 1. BATCH ADD DOCUMENTS (Efficient) ║");
println!("╚════════════════════════════════════════════════════════╝\n");
let documents = vec![
(
"doc1".to_string(),
"Rust is a systems programming language with memory safety guarantees."
.to_string(),
),
(
"doc2".to_string(),
"TypeDialog is an interactive forms library for multiple backends."
.to_string(),
),
(
"doc3".to_string(),
"RAG combines retrieval with generation for better LLM responses.".to_string(),
),
(
"doc4".to_string(),
"Knowledge graphs model relationships between entities in data."
.to_string(),
),
(
"doc5".to_string(),
"Embeddings transform text into dense vector representations."
.to_string(),
),
];
let start = Instant::now();
rag.add_documents_batch(documents)?;
let batch_duration = start.elapsed();
println!("✓ Added 5 documents using batch operation");
println!(" Duration: {:.2}ms", batch_duration.as_secs_f64() * 1000.0);
println!(" Document count: {}\n", rag.doc_count());
// === RETRIEVE DOCUMENTS ===
println!("╔════════════════════════════════════════════════════════╗");
println!("║ 2. RETRIEVE RELEVANT DOCUMENTS ║");
println!("╚════════════════════════════════════════════════════════╝\n");
let query = "programming language memory safety";
println!("Query: \"{}\"\n", query);
let results = rag.retrieve(query)?;
println!("Retrieved {} document(s):\n", results.len());
for (idx, result) in results.iter().enumerate() {
println!(
"{}. [{}] Score: {:.3} (Semantic: {:.3}, Keyword: {:.3})",
idx + 1,
result.doc_id,
result.combined_score,
result.semantic_score,
result.keyword_score
);
println!(" Content: {}\n", result.content);
}
// === BATCH REMOVE DOCUMENTS ===
println!("╔════════════════════════════════════════════════════════╗");
println!("║ 3. BATCH REMOVE DOCUMENTS (Efficient) ║");
println!("╚════════════════════════════════════════════════════════╝\n");
let to_remove = vec!["doc2", "doc4"];
println!("Removing: {:?}", to_remove);
let removed = rag.remove_documents_batch(&to_remove);
println!(
"✓ Removed {} document(s)",
removed
);
println!(" Remaining documents: {}\n", rag.doc_count());
// === PERFORMANCE COMPARISON ===
println!("╔════════════════════════════════════════════════════════╗");
println!("║ 4. PERFORMANCE: BATCH vs SEQUENTIAL (20 docs) ║");
println!("╚════════════════════════════════════════════════════════╝\n");
let large_docs: Vec<_> = (0..20)
.map(|i| {
(
format!("perf_doc{}", i),
format!("Performance test document number {}", i),
)
})
.collect();
// Sequential
let mut rag_seq = RagSystem::new(RagConfig::default())?;
let start = Instant::now();
rag_seq.add_documents(large_docs.clone())?;
let seq_duration = start.elapsed();
// Batch
let mut rag_batch = RagSystem::new(RagConfig::default())?;
let start = Instant::now();
rag_batch.add_documents_batch(large_docs)?;
let batch_duration = start.elapsed();
let speedup = seq_duration.as_nanos() as f64 / batch_duration.as_nanos() as f64;
println!("Sequential add (20 docs): {:.2}ms", seq_duration.as_secs_f64() * 1000.0);
println!("Batch add (20 docs): {:.2}ms", batch_duration.as_secs_f64() * 1000.0);
println!("Speedup: {:.2}x\n", speedup);
println!("💡 Tip: Use batch operations for large document sets (100+)");
println!(" Batch avoids multiple HNSW index rebuilds!\n");
// === SUMMARY ===
println!("╔════════════════════════════════════════════════════════╗");
println!("║ SUMMARY ║");
println!("╚════════════════════════════════════════════════════════╝\n");
println!("RAG System Features:");
println!(" ✓ Semantic search (vector embeddings)");
println!(" ✓ Keyword search (full-text indexing)");
println!(" ✓ Hybrid retrieval (combines both)");
println!(" ✓ Batch operations (efficient bulk processing)");
println!(" ✓ Configurable weights and thresholds");
println!(" ✓ Persistence (save/load to disk)\n");
println!("Next Steps:");
println!(" • Build with Knowledge Graph integration");
println!(" • Use with different embedding models");
println!(" • Integrate with LLM backends");
println!(" • Add custom similarity metrics\n");
Ok(())
}
#[cfg(not(feature = "ai_backend"))]
fn main() {
eprintln!("This example requires the 'ai_backend' feature.");
eprintln!("Run with: cargo run --example main --features ai_backend");
std::process::exit(1);
}