//! Basic RAG system example: Document ingestion and embedding //! //! This example demonstrates: //! 1. Creating a chunking engine //! 2. Creating an embedding engine //! 3. Creating a document ingester //! 4. Chunking and embedding documents //! //! Run with: `cargo run --example basic_ingestion` #![allow(clippy::field_reassign_with_default)] use provisioning_rag::{ chunking::ChunkingEngine, config::{EmbeddingConfig, IngestionConfig}, embeddings::EmbeddingEngine, ingestion::DocumentIngester, }; #[tokio::main] async fn main() -> Result<(), Box> { // Initialize tracing for logging tracing_subscriber::fmt::init(); println!("🚀 Provisioning RAG System - Basic Ingestion Example\n"); // Step 1: Create embedding engine println!("Step 1: Creating embedding engine..."); let mut embedding_config = EmbeddingConfig::default(); embedding_config.provider = "local".to_string(); // Use local to avoid API costs let embedding_engine = EmbeddingEngine::new(embedding_config)?; println!("✓ Embedding engine created (local model)\n"); // Step 2: Create document ingester println!("Step 2: Creating document ingester..."); let ingestion_config = IngestionConfig::default(); let ingester = DocumentIngester::new(ingestion_config, embedding_engine); println!("✓ Document ingester created\n"); // Step 3: Chunk a sample markdown document println!("Step 3: Chunking sample markdown document..."); let markdown_content = r#" # Provisioning Platform ## Overview The provisioning platform is a unified infrastructure automation system built with Rust and Nushell for managing cloud resources across multiple providers. ### Key Features - Multi-cloud support (AWS, UpCloud, local) - KCL configuration language integration - Kubernetes cluster management - Comprehensive security system - REST API and MCP integration ## Architecture ### Core Components 1. **Orchestrator**: Central task coordination service 2. **Control Center**: Web-based management interface 3. **MCP Server**: Model Context Protocol integration 4. **Platform Services**: Additional ecosystem services ### Data Flow Documents flow through: 1. Configuration (KCL schemas) 2. Validation (type checking) 3. Execution (provider operations) 4. Monitoring (health checks) "#; let chunking_engine = ChunkingEngine::new(1024, 100); let chunks = chunking_engine.chunk_markdown(markdown_content, "README.md")?; println!("✓ Document chunked into {} chunks\n", chunks.len()); for (i, chunk) in chunks.iter().enumerate() { println!("Chunk {}:", i + 1); println!(" ID: {}", chunk.id); println!(" Size: {} chars", chunk.content.len()); if let Some(heading) = chunk.metadata.get("heading_path") { println!(" Path: {}", heading); } println!(); } // Step 4: Embed the chunks println!("Step 4: Embedding chunks..."); let embedded_docs = ingester.embedding_engine().embed_batch(&chunks).await?; println!("✓ Embedded {} documents\n", embedded_docs.len()); for (i, doc) in embedded_docs.iter().enumerate() { println!("Embedded document {}:", i + 1); println!(" ID: {}", doc.id); println!(" Vector dimension: {}", doc.embedding.len()); println!( " Content preview: {}...", &doc.content[..doc.content.len().min(50)] ); println!(); } println!("✅ Example completed successfully!"); println!("\nNext steps:"); println!("1. Store embedded documents in SurrealDB"); println!("2. Implement RAG agent for semantic search"); println!("3. Add LLM integration for response generation"); Ok(()) }