prvng_platform/crates/rag/examples/rag_batch_processing.rs

251 lines
8.6 KiB
Rust
Raw Normal View History

//! Example: RAG Batch Processing
//!
//! Demonstrates parallel query processing with:
//! - Priority-based execution
//! - Concurrent query processing
//! - Progress tracking
//! - Error handling and retries
//! - Result aggregation and statistics
#![allow(clippy::useless_vec)]
use provisioning_rag::{BatchJob, BatchQuery};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Initialize logging
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();
println!("=== RAG Batch Processing Example ===\n");
// 1. Create batch queries
println!("1. Creating Batch Queries\n");
let queries = vec![
("What is Kubernetes?", 90), // High priority
("How do I deploy applications?", 50), // Medium priority
("Explain networking concepts", 70), // Higher priority
("What about persistence?", 40), // Lower priority
("Compare orchestration tools", 80), // Higher priority
];
let batch_queries: Vec<BatchQuery> = queries
.into_iter()
.map(|(query, priority)| {
BatchQuery::new(query.into())
.with_priority(priority)
.with_retries(2)
})
.collect();
println!("Created {} queries", batch_queries.len());
for query in &batch_queries {
println!(" - ID: {}", query.query_id);
println!(" Query: \"{}\"", query.query);
println!(" Priority: {}", query.priority);
println!(" Retries: {}\n", query.retry_count);
}
// 2. Create batch job
println!("\n2. Creating Batch Job Configuration\n");
let mut job = BatchJob::new(batch_queries);
println!("Job ID: {}", job.job_id);
println!("Total queries: {}", job.total_queries());
println!("Default max_concurrent: {}", job.max_concurrent);
println!("Default timeout: {}s\n", job.timeout_secs);
// 3. Configure job
println!("\n3. Configuring Batch Job\n");
job = job.with_max_concurrent(2).with_timeout(30);
println!(
"Updated max_concurrent: {} (process 2 queries in parallel)",
job.max_concurrent
);
println!("Updated timeout: {}s per query\n", job.timeout_secs);
// 4. Demonstrate priority sorting
println!("\n4. Priority-Based Query Ordering\n");
println!("Before sorting:");
for (i, q) in job.queries.iter().enumerate() {
println!(" {}. \"{}\" (priority: {})", i + 1, q.query, q.priority);
}
job.sort_by_priority();
println!("\nAfter sorting by priority (highest first):");
for (i, q) in job.queries.iter().enumerate() {
println!(" {}. \"{}\" (priority: {})", i + 1, q.query, q.priority);
}
println!();
// 5. Demonstrate parallel execution strategy
println!("\n5. Parallel Execution Strategy\n");
println!("With max_concurrent=2, queries execute in batches:\n");
let max_concurrent = job.max_concurrent;
for (batch_num, chunk) in job.queries.chunks(max_concurrent).enumerate() {
println!("Batch {} (parallel execution):", batch_num + 1);
for (i, q) in chunk.iter().enumerate() {
println!(" {}. \"{}\" (p:{})", i + 1, q.query, q.priority);
}
println!();
}
// 6. Explain timeout handling
println!("\n6. Timeout and Retry Strategy\n");
println!("Per-query configuration:");
for q in &job.queries {
println!(" Query: \"{}\"", q.query);
println!(" Timeout: {}s", job.timeout_secs);
println!(" Max retries: {}", q.retry_count);
println!(" Logic:");
println!(" 1. Try to execute query");
if q.retry_count > 0 {
println!(" 2. If timeout, retry up to {} times", q.retry_count);
} else {
println!(" 2. If timeout, fail immediately");
}
println!(" 3. If max retries exceeded, store error result\n");
}
// 7. Expected progress tracking
println!("\n7. Expected Progress Tracking During Execution\n");
println!("Simulated execution timeline:\n");
let simulated_batches = vec![
vec![
"Q1: What is Kubernetes? (p:90)",
"Q2: Explain networking (p:70)",
],
vec!["Q3: Compare tools (p:80)", "Q4: How to deploy? (p:50)"],
vec!["Q5: What about persistence? (p:40)"],
];
for (batch_num, batch) in simulated_batches.iter().enumerate() {
println!(
"Time: {}ms - Executing Batch {}:",
batch_num * 500,
batch_num + 1
);
for query in batch {
println!("{}", query);
}
let completed = ((batch_num + 1) * 2).min(5);
let percent = (completed as f32 / 5.0 * 100.0) as u8;
println!("Progress: {}/{} completed ({}%)\n", completed, 5, percent);
}
// 8. Result aggregation
println!("\n8. Result Aggregation and Statistics\n");
println!("Expected batch results:");
println!(" ✅ Q1: What is Kubernetes? → Success (520ms)");
println!(" ✅ Q2: Explain networking → Success (480ms)");
println!(" ✅ Q3: Compare tools → Success (610ms)");
println!(" ✅ Q4: How to deploy? → Success (450ms)");
println!(" ✅ Q5: What about persistence? → Success (490ms)\n");
println!("Aggregated Statistics:");
println!(" Total queries: 5");
println!(" Successful: 5");
println!(" Failed: 0");
println!(" Success rate: 100.0%");
println!(" Total duration: 2550ms");
println!(" Average duration: 510ms\n");
// 9. Error handling scenarios
println!("\n9. Error Handling Scenarios\n");
println!("Scenario 1: Query Timeout");
println!(" Query exceeds 30s timeout");
println!(" Action: Retry up to N times");
println!(" Result: Error result stored\n");
println!("Scenario 2: Query Fails");
println!(" Query processing error occurs");
println!(" Action: Retry based on query config");
println!(" Result: Error with details stored\n");
println!("Scenario 3: Partial Success");
println!(" Some queries succeed, some fail");
println!(" Result: Mixed results in output");
println!(" Statistics: Partial success rate\n");
// 10. Performance comparison
println!("\n10. Performance Comparison\n");
println!("Processing 5 queries:");
println!(" Sequential (1 at a time):");
println!(" 5 × 500ms = 2500ms total");
println!(" Parallel (2 concurrent):");
println!(" [500ms batch, 500ms batch, 500ms batch] = 1500ms total");
println!(" Speedup: 1.67x faster\n");
println!("Processing 20 queries:");
println!(" Sequential: 20 × 500ms = 10000ms total");
println!(" Parallel (max_concurrent=5):");
println!(" [500ms] × 4 batches = 2000ms total");
println!(" Speedup: 5x faster!\n");
// 11. Configuration recommendations
println!("\n11. Configuration Recommendations\n");
let configs = vec![
(
"Quick response required",
1,
5,
"Sequential, minimize overhead",
),
("Balanced (default)", 5, 30, "Good balance of parallelism"),
("High throughput", 10, 60, "Aggressive parallelism"),
("Batch processing", 20, 120, "Maximum parallelism"),
];
for (scenario, max_concurrent, timeout_secs, reason) in configs {
println!("Scenario: {}", scenario);
println!(" max_concurrent: {}", max_concurrent);
println!(" timeout: {}s", timeout_secs);
println!(" Reason: {}\n", reason);
}
// 12. Integration workflow
println!("\n12. Typical Integration Workflow\n");
println!("1. User submits multiple questions");
println!("2. Create BatchQuery for each (with priority)");
println!("3. Create BatchJob with configuration");
println!("4. Call batch_agent.process_batch(job)");
println!("5. Queries execute in parallel by priority");
println!("6. Progress tracked in real-time");
println!("7. Results collected with statistics");
println!("8. Display results to user\n");
// 13. Best practices
println!("\n13. Best Practices for Batch Processing\n");
println!("✓ Set realistic priorities based on importance");
println!("✓ Adjust max_concurrent based on system load");
println!("✓ Set appropriate timeouts for query type");
println!("✓ Use retries for transient failures");
println!("✓ Monitor success rates and adjust config");
println!("✓ Log results for analytics");
println!("✓ Combine with query optimization");
println!("✓ Cache frequent batch patterns\n");
println!("✅ Batch processing example complete!\n");
Ok(())
}