310 lines
12 KiB
Rust
310 lines
12 KiB
Rust
|
|
//! Example: RAG REST API Server
|
||
|
|
//!
|
||
|
|
//! Demonstrates how to set up and use the REST API for all RAG Phase 7 features
|
||
|
|
//!
|
||
|
|
//! # Features
|
||
|
|
//! - Query endpoint with optional conversation context
|
||
|
|
//! - Batch processing endpoint
|
||
|
|
//! - Conversation management
|
||
|
|
//! - Cache statistics and control
|
||
|
|
//! - Tool execution
|
||
|
|
//! - Health and status checks
|
||
|
|
|
||
|
|
#![allow(unused_imports)]
|
||
|
|
|
||
|
|
use std::sync::Arc;
|
||
|
|
|
||
|
|
use provisioning_rag::{
|
||
|
|
create_router, ApiState, BatchAgent, BatchQueryRequest, ConversationAgent, ConversationRequest,
|
||
|
|
QueryOptimizer, QueryRequest, RagAgent, ResponseCache,
|
||
|
|
};
|
||
|
|
use tokio::sync::RwLock;
|
||
|
|
|
||
|
|
/// Example REST API Server Setup
|
||
|
|
#[tokio::main]
|
||
|
|
async fn main() -> anyhow::Result<()> {
|
||
|
|
// Initialize logging
|
||
|
|
tracing_subscriber::fmt()
|
||
|
|
.with_max_level(tracing::Level::INFO)
|
||
|
|
.init();
|
||
|
|
|
||
|
|
println!("=== RAG REST API Example ===\n");
|
||
|
|
|
||
|
|
// 1. Initialize RAG system components
|
||
|
|
println!("1. Initializing RAG System Components\n");
|
||
|
|
|
||
|
|
// Create core components (in real implementation, these would be properly
|
||
|
|
// initialized)
|
||
|
|
println!(" • RAG Agent");
|
||
|
|
println!(" • Response Cache (LRU, 1000 items)");
|
||
|
|
println!(" • Query Optimizer");
|
||
|
|
println!(" • Conversation Agent");
|
||
|
|
println!(" • Batch Agent");
|
||
|
|
|
||
|
|
// 2. Create API state
|
||
|
|
println!("\n2. Creating API State\n");
|
||
|
|
|
||
|
|
println!(" State will contain:");
|
||
|
|
println!(" • Active RAG agent instance");
|
||
|
|
println!(" • Response cache for hit rate optimization");
|
||
|
|
println!(" • Query optimizer for intent detection");
|
||
|
|
println!(" • Conversation context manager");
|
||
|
|
println!(" • Batch processing coordinator");
|
||
|
|
|
||
|
|
// 3. Set up routes
|
||
|
|
println!("\n3. API Endpoints Configuration\n");
|
||
|
|
|
||
|
|
println!(" Health & Info:");
|
||
|
|
println!(" GET /health - Service health check");
|
||
|
|
println!(" GET /info - API information");
|
||
|
|
|
||
|
|
println!("\n Query Endpoints:");
|
||
|
|
println!(" POST /query - Single query processing");
|
||
|
|
println!(" POST /query/stream - Streaming response");
|
||
|
|
|
||
|
|
println!("\n Batch Processing:");
|
||
|
|
println!(" POST /batch - Submit batch job");
|
||
|
|
println!(" GET /batch/:job_id - Get batch status");
|
||
|
|
|
||
|
|
println!("\n Conversation Management:");
|
||
|
|
println!(" POST /conversation - Send message");
|
||
|
|
println!(" GET /conversation/:conv_id - Get history");
|
||
|
|
|
||
|
|
println!("\n Cache Management:");
|
||
|
|
println!(" GET /cache/stats - Cache statistics");
|
||
|
|
println!(" POST /cache/clear - Clear cache");
|
||
|
|
|
||
|
|
println!("\n Tool Execution:");
|
||
|
|
println!(" GET /tools - List available tools");
|
||
|
|
println!(" POST /tools/:id/execute - Execute tool");
|
||
|
|
|
||
|
|
// 4. Example requests
|
||
|
|
println!("\n4. Example API Requests\n");
|
||
|
|
|
||
|
|
println!(" a) Simple Query Request:");
|
||
|
|
println!(" POST /query");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"query\": \"What is Kubernetes?\",");
|
||
|
|
println!(" \"conversation_context\": null,");
|
||
|
|
println!(" \"use_hybrid_search\": true,");
|
||
|
|
println!(" \"num_results\": 5");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
println!("\n b) Query with Context (Follow-up):");
|
||
|
|
println!(" POST /query");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"query\": \"Tell me more about services\",");
|
||
|
|
println!(" \"conversation_context\": \"We discussed Kubernetes deployment\",");
|
||
|
|
println!(" \"use_hybrid_search\": true,");
|
||
|
|
println!(" \"num_results\": 5");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
println!("\n c) Batch Processing Request:");
|
||
|
|
println!(" POST /batch");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"queries\": [");
|
||
|
|
println!(" \"What is Docker?\",");
|
||
|
|
println!(" \"How to use volumes?\",");
|
||
|
|
println!(" \"Networking best practices\"");
|
||
|
|
println!(" ],");
|
||
|
|
println!(" \"max_concurrent\": 3,");
|
||
|
|
println!(" \"timeout_secs\": 30");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
println!("\n d) Conversation Request:");
|
||
|
|
println!(" POST /conversation");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"message\": \"How do I deploy?\",");
|
||
|
|
println!(" \"conversation_id\": \"conv-12345\"");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
println!("\n e) Cache Statistics:");
|
||
|
|
println!(" GET /cache/stats");
|
||
|
|
println!(" Response: {{");
|
||
|
|
println!(" \"items_in_cache\": 125,");
|
||
|
|
println!(" \"hits\": 4523,");
|
||
|
|
println!(" \"misses\": 1456,");
|
||
|
|
println!(" \"hit_rate\": 0.756");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
// 5. Response format
|
||
|
|
println!("\n5. Response Format\n");
|
||
|
|
|
||
|
|
println!(" Success Response (200 OK):");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"answer\": \"Kubernetes is...\",");
|
||
|
|
println!(" \"sources\": [");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"doc_id\": \"doc-1\",");
|
||
|
|
println!(" \"source_path\": \"/docs/kubernetes.md\",");
|
||
|
|
println!(" \"doc_type\": \"markdown\",");
|
||
|
|
println!(" \"content\": \"...\",");
|
||
|
|
println!(" \"similarity\": 0.95,");
|
||
|
|
println!(" \"metadata\": {{}}");
|
||
|
|
println!(" }}");
|
||
|
|
println!(" ],");
|
||
|
|
println!(" \"confidence\": 0.92,");
|
||
|
|
println!(" \"context\": \"...\"");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
println!("\n Error Response (4xx/5xx):");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"error\": \"Invalid input provided\",");
|
||
|
|
println!(" \"code\": \"INVALID_INPUT\",");
|
||
|
|
println!(" \"status\": 400");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
// 6. Batch response
|
||
|
|
println!("\n6. Batch Processing Response\n");
|
||
|
|
|
||
|
|
println!(" POST /batch Response (200 OK):");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"job_id\": \"batch-xyz789\",");
|
||
|
|
println!(" \"results\": [");
|
||
|
|
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.9, ... }},");
|
||
|
|
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.85, ... }},");
|
||
|
|
println!(" {{ \"answer\": \"...\", \"sources\": [...], \"confidence\": 0.88, ... }}");
|
||
|
|
println!(" ],");
|
||
|
|
println!(" \"stats\": {{");
|
||
|
|
println!(" \"total_queries\": 3,");
|
||
|
|
println!(" \"successful_queries\": 3,");
|
||
|
|
println!(" \"failed_queries\": 0,");
|
||
|
|
println!(" \"success_rate\": 1.0,");
|
||
|
|
println!(" \"total_duration_ms\": 1500");
|
||
|
|
println!(" }}");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
// 7. Health check
|
||
|
|
println!("\n7. Health Check Response\n");
|
||
|
|
|
||
|
|
println!(" GET /health");
|
||
|
|
println!(" {{");
|
||
|
|
println!(" \"status\": \"healthy\",");
|
||
|
|
println!(" \"version\": \"0.1.0\",");
|
||
|
|
println!(" \"components\": {{");
|
||
|
|
println!(" \"agent\": \"operational\",");
|
||
|
|
println!(" \"cache\": \"operational\",");
|
||
|
|
println!(" \"database\": \"operational\"");
|
||
|
|
println!(" }}");
|
||
|
|
println!(" }}");
|
||
|
|
|
||
|
|
// 8. Integration patterns
|
||
|
|
println!("\n8. Integration Patterns\n");
|
||
|
|
|
||
|
|
println!(" Pattern 1: Simple Query");
|
||
|
|
println!(" 1. POST /query with question");
|
||
|
|
println!(" 2. Parse JSON response");
|
||
|
|
println!(" 3. Display answer and sources");
|
||
|
|
|
||
|
|
println!("\n Pattern 2: Multi-turn Conversation");
|
||
|
|
println!(" 1. POST /conversation with first message");
|
||
|
|
println!(" 2. POST /conversation with follow-up (system maintains context)");
|
||
|
|
println!(" 3. System detects follow-ups and injects context");
|
||
|
|
|
||
|
|
println!("\n Pattern 3: Batch Processing");
|
||
|
|
println!(" 1. POST /batch with multiple queries");
|
||
|
|
println!(" 2. Poll GET /batch/:job_id for progress");
|
||
|
|
println!(" 3. Receive aggregated results");
|
||
|
|
|
||
|
|
println!("\n Pattern 4: Cache Optimization");
|
||
|
|
println!(" 1. Monitor GET /cache/stats");
|
||
|
|
println!(" 2. Cache hit rate >70% indicates good effectiveness");
|
||
|
|
println!(" 3. POST /cache/clear if needed");
|
||
|
|
|
||
|
|
// 9. Performance expectations
|
||
|
|
println!("\n9. Performance Characteristics\n");
|
||
|
|
|
||
|
|
println!(" Response Latency:");
|
||
|
|
println!(" Cache hit: <5ms");
|
||
|
|
println!(" Cache miss: 500-1000ms");
|
||
|
|
println!(" Batch job: 500ms per query (parallel)");
|
||
|
|
println!(" Conversation: <20ms additional overhead");
|
||
|
|
|
||
|
|
println!("\n Throughput:");
|
||
|
|
println!(" Single query: 2-3 requests/second");
|
||
|
|
println!(" Batch (5 concurrent): 10-15 queries/second");
|
||
|
|
println!(" With caching: 100+ cache hits/second");
|
||
|
|
|
||
|
|
println!("\n Resource Usage:");
|
||
|
|
println!(" Memory per query: ~1-5 MB");
|
||
|
|
println!(" Cache storage: ~100 KB per cached response");
|
||
|
|
println!(" Batch job overhead: <10 MB for 1000 queries");
|
||
|
|
|
||
|
|
// 10. Error handling
|
||
|
|
println!("\n10. Error Handling\n");
|
||
|
|
|
||
|
|
println!(" HTTP Status Codes:");
|
||
|
|
println!(" 200 OK - Successful query");
|
||
|
|
println!(" 400 Bad Request - Invalid query or parameters");
|
||
|
|
println!(" 404 Not Found - Resource not found");
|
||
|
|
println!(" 500 Internal Server Error - System error");
|
||
|
|
|
||
|
|
println!("\n Error Codes:");
|
||
|
|
println!(" INVALID_CONFIG - Configuration problem");
|
||
|
|
println!(" INVALID_INPUT - Bad request data");
|
||
|
|
println!(" EMBEDDING_ERROR - Vector generation failed");
|
||
|
|
println!(" RETRIEVAL_ERROR - Document search failed");
|
||
|
|
println!(" LLM_ERROR - Language model error");
|
||
|
|
println!(" DB_ERROR - Database connection error");
|
||
|
|
println!(" TOOL_ERROR - Tool execution failed");
|
||
|
|
|
||
|
|
// 11. Security considerations
|
||
|
|
println!("\n11. Security Considerations\n");
|
||
|
|
|
||
|
|
println!(" ✓ Request validation on all endpoints");
|
||
|
|
println!(" ✓ Error messages don't expose internal details");
|
||
|
|
println!(" ✓ Tool execution requires authorization");
|
||
|
|
println!(" ✓ Rate limiting per endpoint");
|
||
|
|
println!(" ✓ Query complexity limits");
|
||
|
|
println!(" ✓ Audit logging of all operations");
|
||
|
|
|
||
|
|
// 12. Deployment
|
||
|
|
println!("\n12. Deployment\n");
|
||
|
|
|
||
|
|
println!(" Development:");
|
||
|
|
println!(" PORT=3000 cargo run --example rag_rest_api");
|
||
|
|
|
||
|
|
println!("\n Production:");
|
||
|
|
println!(" - Docker containerization");
|
||
|
|
println!(" - Kubernetes deployment");
|
||
|
|
println!(" - Load balancing across instances");
|
||
|
|
println!(" - Health check endpoints");
|
||
|
|
println!(" - Graceful shutdown handling");
|
||
|
|
println!(" - TLS/HTTPS enforcement");
|
||
|
|
|
||
|
|
// 13. Monitoring
|
||
|
|
println!("\n13. Monitoring & Observability\n");
|
||
|
|
|
||
|
|
println!(" Metrics to track:");
|
||
|
|
println!(" - Request latency (P50, P95, P99)");
|
||
|
|
println!(" - Error rate by endpoint");
|
||
|
|
println!(" - Cache hit rate");
|
||
|
|
println!(" - Batch processing throughput");
|
||
|
|
println!(" - Tool execution success rate");
|
||
|
|
println!(" - Database query performance");
|
||
|
|
|
||
|
|
println!("\n Logging:");
|
||
|
|
println!(" - All requests with metadata");
|
||
|
|
println!(" - Errors with full context");
|
||
|
|
println!(" - Performance metrics");
|
||
|
|
println!(" - Cache statistics");
|
||
|
|
|
||
|
|
// 14. Next steps
|
||
|
|
println!("\n14. Implementation Checklist\n");
|
||
|
|
|
||
|
|
println!(" ☐ Set up Axum HTTP server");
|
||
|
|
println!(" ☐ Implement streaming responses");
|
||
|
|
println!(" ☐ Add request validation middleware");
|
||
|
|
println!(" ☐ Implement batch job persistence");
|
||
|
|
println!(" ☐ Add CORS support");
|
||
|
|
println!(" ☐ Create OpenAPI/Swagger documentation");
|
||
|
|
println!(" ☐ Set up request logging");
|
||
|
|
println!(" ☐ Implement graceful shutdown");
|
||
|
|
println!(" ☐ Add rate limiting middleware");
|
||
|
|
println!(" ☐ Create deployment manifests");
|
||
|
|
|
||
|
|
println!("\n✅ REST API example complete!\n");
|
||
|
|
|
||
|
|
Ok(())
|
||
|
|
}
|