prvng_platform/crates/rag/examples/rag_rest_api.rs

310 lines
12 KiB
Rust
Raw Normal View History

//! Example: RAG REST API Server
//!
//! Demonstrates how to set up and use the REST API for all RAG Phase 7 features
//!
//! # Features
//! - Query endpoint with optional conversation context
//! - Batch processing endpoint
//! - Conversation management
//! - Cache statistics and control
//! - Tool execution
//! - Health and status checks
#![allow(unused_imports)]
use std::sync::Arc;
use provisioning_rag::{
create_router, ApiState, BatchAgent, BatchQueryRequest, ConversationAgent, ConversationRequest,
QueryOptimizer, QueryRequest, RagAgent, ResponseCache,
};
use tokio::sync::RwLock;
/// Example REST API Server Setup
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Initialize logging
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();
println!("=== RAG REST API Example ===\n");
// 1. Initialize RAG system components
println!("1. Initializing RAG System Components\n");
// Create core components (in real implementation, these would be properly
// initialized)
println!(" • RAG Agent");
println!(" • Response Cache (LRU, 1000 items)");
println!(" • Query Optimizer");
println!(" • Conversation Agent");
println!(" • Batch Agent");
// 2. Create API state
println!("\n2. Creating API State\n");
println!(" State will contain:");
println!(" • Active RAG agent instance");
println!(" • Response cache for hit rate optimization");
println!(" • Query optimizer for intent detection");
println!(" • Conversation context manager");
println!(" • Batch processing coordinator");
// 3. Set up routes
println!("\n3. API Endpoints Configuration\n");
println!(" Health & Info:");
println!(" GET /health - Service health check");
println!(" GET /info - API information");
println!("\n Query Endpoints:");
println!(" POST /query - Single query processing");
println!(" POST /query/stream - Streaming response");
println!("\n Batch Processing:");
println!(" POST /batch - Submit batch job");
println!(" GET /batch/:job_id - Get batch status");
println!("\n Conversation Management:");
println!(" POST /conversation - Send message");
println!(" GET /conversation/:conv_id - Get history");
println!("\n Cache Management:");
println!(" GET /cache/stats - Cache statistics");
println!(" POST /cache/clear - Clear cache");
println!("\n Tool Execution:");
println!(" GET /tools - List available tools");
println!(" POST /tools/:id/execute - Execute tool");
// 4. Example requests
println!("\n4. Example API Requests\n");
println!(" a) Simple Query Request:");
println!(" POST /query");
println!(" {{");
println!(" \"query\": \"What is Kubernetes?\",");
println!(" \"conversation_context\": null,");
println!(" \"use_hybrid_search\": true,");
println!(" \"num_results\": 5");
println!(" }}");
println!("\n b) Query with Context (Follow-up):");
println!(" POST /query");
println!(" {{");
println!(" \"query\": \"Tell me more about services\",");
println!(" \"conversation_context\": \"We discussed Kubernetes deployment\",");
println!(" \"use_hybrid_search\": true,");
println!(" \"num_results\": 5");
println!(" }}");
println!("\n c) Batch Processing Request:");
println!(" POST /batch");
println!(" {{");
println!(" \"queries\": [");
println!(" \"What is Docker?\",");
println!(" \"How to use volumes?\",");
println!(" \"Networking best practices\"");
println!(" ],");
println!(" \"max_concurrent\": 3,");
println!(" \"timeout_secs\": 30");
println!(" }}");
println!("\n d) Conversation Request:");
println!(" POST /conversation");
println!(" {{");
println!(" \"message\": \"How do I deploy?\",");
println!(" \"conversation_id\": \"conv-12345\"");
println!(" }}");
println!("\n e) Cache Statistics:");
println!(" GET /cache/stats");
println!(" Response: {{");
println!(" \"items_in_cache\": 125,");
println!(" \"hits\": 4523,");
println!(" \"misses\": 1456,");
println!(" \"hit_rate\": 0.756");
println!(" }}");
// 5. Response format
println!("\n5. Response Format\n");
println!(" Success Response (200 OK):");
println!(" {{");
println!(" \"answer\": \"Kubernetes is...\",");
println!(" \"sources\": [");
println!(" {{");
println!(" \"doc_id\": \"doc-1\",");
println!(" \"source_path\": \"/docs/kubernetes.md\",");
println!(" \"doc_type\": \"markdown\",");
println!(" \"content\": \"...\",");
println!(" \"similarity\": 0.95,");
println!(" \"metadata\": {{}}");
println!(" }}");
println!(" ],");
println!(" \"confidence\": 0.92,");
println!(" \"context\": \"...\"");
println!(" }}");
println!("\n Error Response (4xx/5xx):");
println!(" {{");
println!(" \"error\": \"Invalid input provided\",");
println!(" \"code\": \"INVALID_INPUT\",");
println!(" \"status\": 400");
println!(" }}");
// 6. Batch response
println!("\n6. Batch Processing Response\n");
println!(" POST /batch Response (200 OK):");
println!(" {{");
println!(" \"job_id\": \"batch-xyz789\",");
println!(" \"results\": [");
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.9, ... }},");
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.85, ... }},");
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.88, ... }}");
println!(" ],");
println!(" \"stats\": {{");
println!(" \"total_queries\": 3,");
println!(" \"successful_queries\": 3,");
println!(" \"failed_queries\": 0,");
println!(" \"success_rate\": 1.0,");
println!(" \"total_duration_ms\": 1500");
println!(" }}");
println!(" }}");
// 7. Health check
println!("\n7. Health Check Response\n");
println!(" GET /health");
println!(" {{");
println!(" \"status\": \"healthy\",");
println!(" \"version\": \"0.1.0\",");
println!(" \"components\": {{");
println!(" \"agent\": \"operational\",");
println!(" \"cache\": \"operational\",");
println!(" \"database\": \"operational\"");
println!(" }}");
println!(" }}");
// 8. Integration patterns
println!("\n8. Integration Patterns\n");
println!(" Pattern 1: Simple Query");
println!(" 1. POST /query with question");
println!(" 2. Parse JSON response");
println!(" 3. Display answer and sources");
println!("\n Pattern 2: Multi-turn Conversation");
println!(" 1. POST /conversation with first message");
println!(" 2. POST /conversation with follow-up (system maintains context)");
println!(" 3. System detects follow-ups and injects context");
println!("\n Pattern 3: Batch Processing");
println!(" 1. POST /batch with multiple queries");
println!(" 2. Poll GET /batch/:job_id for progress");
println!(" 3. Receive aggregated results");
println!("\n Pattern 4: Cache Optimization");
println!(" 1. Monitor GET /cache/stats");
println!(" 2. Cache hit rate >70% indicates good effectiveness");
println!(" 3. POST /cache/clear if needed");
// 9. Performance expectations
println!("\n9. Performance Characteristics\n");
println!(" Response Latency:");
println!(" Cache hit: <5ms");
println!(" Cache miss: 500-1000ms");
println!(" Batch job: 500ms per query (parallel)");
println!(" Conversation: <20ms additional overhead");
println!("\n Throughput:");
println!(" Single query: 2-3 requests/second");
println!(" Batch (5 concurrent): 10-15 queries/second");
println!(" With caching: 100+ cache hits/second");
println!("\n Resource Usage:");
println!(" Memory per query: ~1-5 MB");
println!(" Cache storage: ~100 KB per cached response");
println!(" Batch job overhead: <10 MB for 1000 queries");
// 10. Error handling
println!("\n10. Error Handling\n");
println!(" HTTP Status Codes:");
println!(" 200 OK - Successful query");
println!(" 400 Bad Request - Invalid query or parameters");
println!(" 404 Not Found - Resource not found");
println!(" 500 Internal Server Error - System error");
println!("\n Error Codes:");
println!(" INVALID_CONFIG - Configuration problem");
println!(" INVALID_INPUT - Bad request data");
println!(" EMBEDDING_ERROR - Vector generation failed");
println!(" RETRIEVAL_ERROR - Document search failed");
println!(" LLM_ERROR - Language model error");
println!(" DB_ERROR - Database connection error");
println!(" TOOL_ERROR - Tool execution failed");
// 11. Security considerations
println!("\n11. Security Considerations\n");
println!(" ✓ Request validation on all endpoints");
println!(" ✓ Error messages don't expose internal details");
println!(" ✓ Tool execution requires authorization");
println!(" ✓ Rate limiting per endpoint");
println!(" ✓ Query complexity limits");
println!(" ✓ Audit logging of all operations");
// 12. Deployment
println!("\n12. Deployment\n");
println!(" Development:");
println!(" PORT=3000 cargo run --example rag_rest_api");
println!("\n Production:");
println!(" - Docker containerization");
println!(" - Kubernetes deployment");
println!(" - Load balancing across instances");
println!(" - Health check endpoints");
println!(" - Graceful shutdown handling");
println!(" - TLS/HTTPS enforcement");
// 13. Monitoring
println!("\n13. Monitoring & Observability\n");
println!(" Metrics to track:");
println!(" - Request latency (P50, P95, P99)");
println!(" - Error rate by endpoint");
println!(" - Cache hit rate");
println!(" - Batch processing throughput");
println!(" - Tool execution success rate");
println!(" - Database query performance");
println!("\n Logging:");
println!(" - All requests with metadata");
println!(" - Errors with full context");
println!(" - Performance metrics");
println!(" - Cache statistics");
// 14. Next steps
println!("\n14. Implementation Checklist\n");
println!(" ☐ Set up Axum HTTP server");
println!(" ☐ Implement streaming responses");
println!(" ☐ Add request validation middleware");
println!(" ☐ Implement batch job persistence");
println!(" ☐ Add CORS support");
println!(" ☐ Create OpenAPI/Swagger documentation");
println!(" ☐ Set up request logging");
println!(" ☐ Implement graceful shutdown");
println!(" ☐ Add rate limiting middleware");
println!(" ☐ Create deployment manifests");
println!("\n✅ REST API example complete!\n");
Ok(())
}