//! Complete RAG workflow: Chunking -> Embedding -> Storage -> Retrieval //! //! This example demonstrates the full RAG pipeline including document //! persistence in SurrealDB and similarity-based retrieval. //! //! Run with: `cargo run --example storage_integration` #![allow(clippy::field_reassign_with_default)] use provisioning_rag::{ config::{EmbeddingConfig, IngestionConfig, VectorDbConfig}, db::DbConnection, embeddings::EmbeddingEngine, ingestion::DocumentIngester, }; #[tokio::main] async fn main() -> Result<(), Box> { // Initialize logging tracing_subscriber::fmt::init(); println!("šŸš€ Provisioning RAG System - Storage Integration Example\n"); // Step 1: Setup database connection println!("Step 1: Connecting to SurrealDB..."); let mut db_config = VectorDbConfig::default(); db_config.url = "memory".to_string(); // Use in-memory for demo db_config.database = "rag_demo".to_string(); let db = DbConnection::new(db_config).await?; db.initialize_schema().await?; println!("āœ“ Connected and schema initialized\n"); // Step 2: Create embedding engine (local for demo) println!("Step 2: Creating embedding engine..."); let mut embedding_config = EmbeddingConfig::default(); embedding_config.provider = "local".to_string(); let embedding_engine = EmbeddingEngine::new(embedding_config)?; println!("āœ“ Embedding engine ready (local mode)\n"); // Step 3: Create document ingester println!("Step 3: Creating document ingester..."); let ingestion_config = IngestionConfig::default(); let ingester = DocumentIngester::new(ingestion_config, embedding_engine); println!("āœ“ Document ingester created\n"); // Step 4: Prepare sample documents println!("Step 4: Processing sample documents..."); let sample_markdown = r#" # Provisioning Platform Architecture ## Overview The provisioning platform provides unified infrastructure automation across multiple cloud providers. ## Key Components ### Orchestrator Central task coordination service that manages all infrastructure operations. ### Control Center Web-based management interface for monitoring and control. ### MCP Server Model Context Protocol integration for AI-powered operations. ## Security Enterprise-grade security with JWT authentication, Cedar authorization, and MFA support. "#; // Chunk the document let chunks = ingester .chunking_engine() .chunk_markdown(sample_markdown, "architecture.md")?; println!("āœ“ Document chunked into {} chunks\n", chunks.len()); // Step 5: Embed and store documents println!("Step 5: Embedding and storing documents..."); let embedded_docs = ingester.embedding_engine().embed_batch(&chunks).await?; let stored_count = db.store_documents(&embedded_docs).await?; println!("āœ“ Stored {} documents\n", stored_count); // Step 6: Store deployment event println!("Step 6: Recording deployment event..."); db.store_deployment_event( "librecloud", // workspace "aws-prod", // infrastructure "taskserv_create", // event_type "success", // status "kubernetes", // resource_name "aws", // provider ) .await?; println!("āœ“ Deployment event recorded\n"); // Step 7: Get system statistics println!("Step 7: Retrieving system statistics..."); let stats = db.get_statistics().await?; println!( "šŸ“Š RAG Statistics:\n Documents: {}\n Deployments: {}\n", stats.total_documents, stats.total_deployments ); // Step 8: Demonstrate retrieval (mock similarity search) println!("Step 8: Performing similarity search..."); if let Some(first_doc) = embedded_docs.first() { // Search for documents similar to the first document let similar_docs = db.search_similar(&first_doc.embedding, 5, 0.5).await?; println!("āœ“ Found {} similar documents", similar_docs.len()); if let Some(result) = similar_docs.first() { println!( "\nšŸ“„ Top Result:\n ID: {}\n Type: {}\n Size: {} chars\n", result.id, result.doc_type, result.content.len() ); } } // Step 9: Demonstrate retrieval by ID println!("\nStep 9: Retrieving document by ID..."); if let Some(first_doc) = embedded_docs.first() { if let Some(retrieved) = db.get_document(&first_doc.id).await? { println!( "āœ“ Retrieved: {} ({} bytes)", retrieved.id, retrieved.content.len() ); } } println!("\nāœ… Complete integration example finished!\n"); println!("Key features demonstrated:"); println!(" • Document chunking (heading-aware)"); println!(" • Embedding generation"); println!(" • Batch document storage"); println!(" • Deployment event tracking"); println!(" • Vector similarity search"); println!(" • Document retrieval by ID"); println!(" • System statistics"); Ok(()) }