use anyhow::{Context, Result}; use axum::{ extract::{Path, State}, http::StatusCode, response::Json, routing::{get, post}, Router, }; use clap::Parser; use serde::{Deserialize, Serialize}; use std::{ process::Stdio, sync::Arc, }; use tokio::process::Command; use tower_http::cors::CorsLayer; use tracing::{error, info, warn}; use uuid::Uuid; // Use types from the library use provisioning_orchestrator::{ Args, WorkflowTask, TaskStatus, CreateServerWorkflow, TaskservWorkflow, ClusterWorkflow, storage::{create_storage_from_args, TaskStorage}, workflow::{WorkflowExecutionState}, batch::{BatchCoordinator, BatchCoordinatorFactory, BatchOperationRequest, BatchOperationResult}, dependency::{DependencyResolver}, state::{WorkflowStateManager, StateConfig, ProgressTracker, ProgressInfo, StateSnapshot, SystemMetrics, StateManagerStatistics}, monitor::{MonitoringSystem, MonitoringConfig, SystemResourceHealthCheck, MonitoringEvent, MonitoringEventType, SystemHealthStatus}, rollback::{RollbackSystem, RollbackConfig, RollbackResult, Checkpoint, RollbackStatistics, RollbackStrategy, providers::*}, test_environment::{TestEnvironment, CreateTestEnvironmentRequest, RunTestRequest, TestEnvironmentResponse, TestResult}, test_orchestrator::TestOrchestrator, dns::DnsManager, extensions::ExtensionManager, oci::OciManager, services::ServiceOrchestrator, }; // Application state for the orchestrator pub type SharedState = Arc; pub struct AppState { pub task_storage: Arc, pub batch_coordinator: BatchCoordinator, pub dependency_resolver: DependencyResolver, pub state_manager: Arc, pub monitoring_system: Arc, pub progress_tracker: Arc, pub rollback_system: Arc, pub test_orchestrator: Arc, pub dns_manager: Arc, pub extension_manager: Arc, pub oci_manager: Arc, pub service_orchestrator: Arc, pub args: Args, } impl AppState { pub async fn new(args: Args) -> Result { // Create storage using the factory pattern let task_storage = create_storage_from_args(&args).await .context("Failed to create storage backend")?; info!("Successfully initialized {} storage backend", args.storage_type); // Create batch coordinator let batch_coordinator = BatchCoordinatorFactory::create_from_args( task_storage.clone(), args.clone(), ).await.context("Failed to create batch coordinator")?; info!("Successfully initialized batch coordinator"); // Create dependency resolver with default configuration let dependency_resolver = DependencyResolver::new(); info!("Successfully initialized dependency resolver"); // Create state manager with default configuration let state_config = StateConfig::default(); let state_manager = Arc::new(WorkflowStateManager::new(task_storage.clone(), state_config)); // Initialize state manager state_manager.init().await .context("Failed to initialize state manager")?; info!("Successfully initialized state manager"); // Create monitoring system with default configuration let monitoring_config = MonitoringConfig::default(); let monitoring_system = Arc::new(MonitoringSystem::new( monitoring_config, task_storage.clone(), state_manager.clone(), )); // Initialize monitoring system monitoring_system.init().await .context("Failed to initialize monitoring system")?; info!("Successfully initialized monitoring system"); // Register system resource health check let health_monitor = monitoring_system.health_monitor(); health_monitor.register_health_check( "system_resources".to_string(), SystemResourceHealthCheck::new(1024, 80.0), // 1GB memory, 80% CPU thresholds ).await; // Create progress tracker let progress_tracker = Arc::new(ProgressTracker::new(state_manager.clone())); info!("Successfully initialized progress tracker"); // Create rollback system with configuration-driven strategy let rollback_config = RollbackConfig { checkpoint_interval_seconds: 300, // 5 minutes auto_checkpoint_enabled: true, strategy: RollbackStrategy::ConfigDriven, ..RollbackConfig::default() }; let mut rollback_system = RollbackSystem::new(task_storage.clone(), rollback_config); // Register provider-specific rollback handlers rollback_system.resource_tracker.register_provider_handler( Arc::new(UpCloudRollbackHandler {}) ).await; rollback_system.resource_tracker.register_provider_handler( Arc::new(AwsRollbackHandler {}) ).await; rollback_system.resource_tracker.register_provider_handler( Arc::new(LocalRollbackHandler {}) ).await; // Initialize rollback system rollback_system.init().await .context("Failed to initialize rollback system")?; info!("Successfully initialized rollback system with {} providers", 3); let rollback_system = Arc::new(rollback_system); // Create test orchestrator let test_orchestrator = Arc::new(TestOrchestrator::new() .context("Failed to initialize test orchestrator")?); info!("Successfully initialized test orchestrator"); // Create DNS manager let dns_manager = Arc::new(DnsManager::new( "http://localhost:53".to_string(), // TODO: Read from config true, // auto_register 300, // default_ttl )); info!("Successfully initialized DNS manager"); // Create extension manager let extension_manager = Arc::new(ExtensionManager::new( args.nu_path.clone(), args.provisioning_path.clone(), )); info!("Successfully initialized extension manager"); // Create OCI manager let oci_manager = Arc::new(OciManager::new( "http://localhost:5000".to_string(), // TODO: Read from config "provisioning-extensions".to_string(), // namespace std::path::PathBuf::from(&args.data_dir).join("oci-cache"), )); info!("Successfully initialized OCI manager"); // Create service orchestrator let service_orchestrator = Arc::new(ServiceOrchestrator::new( args.nu_path.clone(), args.provisioning_path.clone(), true, // auto_start_dependencies )); info!("Successfully initialized service orchestrator"); Ok(Self { task_storage, batch_coordinator, dependency_resolver, state_manager, monitoring_system, progress_tracker, rollback_system, test_orchestrator, dns_manager, extension_manager, oci_manager, service_orchestrator, args, }) } pub async fn execute_nushell_command(&self, command: &str, args: &[String]) -> Result { let mut cmd = Command::new(&self.args.nu_path); cmd.arg("-c") .arg(format!("{} {}", command, args.join(" "))) .stdout(Stdio::piped()) .stderr(Stdio::piped()); let output = cmd.output().await .context("Failed to execute Nushell command")?; if output.status.success() { Ok(String::from_utf8_lossy(&output.stdout).to_string()) } else { let error = String::from_utf8_lossy(&output.stderr); Err(anyhow::anyhow!("Nushell command failed: {}", error)) } } } #[derive(Serialize)] struct ApiResponse { success: bool, data: Option, error: Option, } impl ApiResponse { fn success(data: T) -> Self { Self { success: true, data: Some(data), error: None, } } fn error(message: String) -> Self { Self { success: false, data: None, error: Some(message), } } } // REST API Routes async fn create_server_workflow( State(state): State, Json(workflow): Json, ) -> Result>, StatusCode> { let task_id = Uuid::new_v4().to_string(); let task = WorkflowTask { id: task_id.clone(), name: "create_servers".to_string(), command: format!("{} servers create", state.args.provisioning_path), args: vec![ "--infra".to_string(), workflow.infra.clone(), "--settings".to_string(), workflow.settings.clone(), if workflow.check_mode { "--check".to_string() } else { "".to_string() }, if workflow.wait { "--wait".to_string() } else { "".to_string() }, ].into_iter().filter(|s| !s.is_empty()).collect(), dependencies: vec![], status: TaskStatus::Pending, created_at: chrono::Utc::now(), started_at: None, completed_at: None, output: None, error: None, }; match state.task_storage.enqueue(task, 5).await { Ok(()) => { info!("Enqueued server creation workflow: {}", task_id); Ok(Json(ApiResponse::success(task_id))) } Err(e) => { error!("Failed to enqueue task: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn create_taskserv_workflow( State(state): State, Json(workflow): Json, ) -> Result>, StatusCode> { let task_id = Uuid::new_v4().to_string(); let task = WorkflowTask { id: task_id.clone(), name: format!("{}_taskserv", workflow.operation), command: format!("{} taskserv {}", state.args.provisioning_path, workflow.operation), args: vec![ workflow.taskserv.clone(), "--infra".to_string(), workflow.infra.clone(), "--settings".to_string(), workflow.settings.clone(), if workflow.check_mode { "--check".to_string() } else { "".to_string() }, if workflow.wait { "--wait".to_string() } else { "".to_string() }, ].into_iter().filter(|s| !s.is_empty()).collect(), dependencies: vec![], status: TaskStatus::Pending, created_at: chrono::Utc::now(), started_at: None, completed_at: None, output: None, error: None, }; match state.task_storage.enqueue(task, 6).await { Ok(()) => { info!("Enqueued taskserv {} workflow: {}", workflow.operation, task_id); Ok(Json(ApiResponse::success(task_id))) } Err(e) => { error!("Failed to enqueue task: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn create_cluster_workflow( State(state): State, Json(workflow): Json, ) -> Result>, StatusCode> { let task_id = Uuid::new_v4().to_string(); let task = WorkflowTask { id: task_id.clone(), name: format!("{}_cluster", workflow.operation), command: format!("{} cluster {}", state.args.provisioning_path, workflow.operation), args: vec![ workflow.cluster_type.clone(), "--infra".to_string(), workflow.infra.clone(), "--settings".to_string(), workflow.settings.clone(), if workflow.check_mode { "--check".to_string() } else { "".to_string() }, if workflow.wait { "--wait".to_string() } else { "".to_string() }, ].into_iter().filter(|s| !s.is_empty()).collect(), dependencies: vec![], status: TaskStatus::Pending, created_at: chrono::Utc::now(), started_at: None, completed_at: None, output: None, error: None, }; match state.task_storage.enqueue(task, 7).await { Ok(()) => { info!("Enqueued cluster {} workflow: {}", workflow.operation, task_id); Ok(Json(ApiResponse::success(task_id))) } Err(e) => { error!("Failed to enqueue task: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn get_task_status( State(state): State, Path(task_id): Path, ) -> Result>, StatusCode> { match state.task_storage.get_task(&task_id).await { Ok(Some(task)) => Ok(Json(ApiResponse::success(task))), Ok(None) => Err(StatusCode::NOT_FOUND), Err(e) => { error!("Failed to get task {}: {}", task_id, e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn list_tasks( State(state): State, ) -> Result>>, StatusCode> { match state.task_storage.list_tasks(None).await { Ok(task_list) => Ok(Json(ApiResponse::success(task_list))), Err(e) => { error!("Failed to list tasks: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn health_check() -> Json> { Json(ApiResponse::success("Orchestrator is healthy".to_string())) } // Batch operation routes async fn execute_batch_operation( State(state): State, Json(request): Json, ) -> Result>, StatusCode> { match state.batch_coordinator.execute_batch_operation(request).await { Ok(result) => Ok(Json(ApiResponse::success(result))), Err(e) => { error!("Failed to execute batch operation: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn get_batch_operation_status( State(state): State, Path(operation_id): Path, ) -> Result>, StatusCode> { match state.batch_coordinator.get_operation_status(&operation_id).await { Some(state) => Ok(Json(ApiResponse::success(state))), None => Err(StatusCode::NOT_FOUND), } } async fn list_batch_operations( State(state): State, ) -> Result>>, StatusCode> { let operations = state.batch_coordinator.list_operations().await; Ok(Json(ApiResponse::success(operations))) } async fn cancel_batch_operation( State(state): State, Path(operation_id): Path, ) -> Result>, StatusCode> { match state.batch_coordinator.cancel_operation(&operation_id).await { Ok(_) => Ok(Json(ApiResponse::success(format!("Operation {} cancelled", operation_id)))), Err(e) => { error!("Failed to cancel operation {}: {}", operation_id, e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // Rollback and recovery routes #[derive(Deserialize)] struct CreateCheckpointRequest { name: String, description: Option, } #[derive(Deserialize)] struct RollbackRequest { checkpoint_id: Option, operation_ids: Option>, } async fn create_checkpoint( State(state): State, Json(request): Json, ) -> Result>, StatusCode> { match state.rollback_system.checkpoint_manager.create_checkpoint( request.name, request.description, ).await { Ok(checkpoint_id) => { info!("Created checkpoint: {}", checkpoint_id); Ok(Json(ApiResponse::success(checkpoint_id))) } Err(e) => { error!("Failed to create checkpoint: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn list_checkpoints( State(state): State, ) -> Result>>, StatusCode> { let checkpoints = state.rollback_system.checkpoint_manager.list_checkpoints().await; Ok(Json(ApiResponse::success(checkpoints))) } async fn get_checkpoint( State(state): State, Path(checkpoint_id): Path, ) -> Result>, StatusCode> { match state.rollback_system.checkpoint_manager.get_checkpoint(&checkpoint_id).await { Some(checkpoint) => Ok(Json(ApiResponse::success(checkpoint))), None => Err(StatusCode::NOT_FOUND), } } async fn execute_rollback( State(state): State, Json(request): Json, ) -> Result>, StatusCode> { let result = if let Some(checkpoint_id) = request.checkpoint_id { // Rollback to specific checkpoint match state.rollback_system.rollback_executor.rollback_to_checkpoint(&checkpoint_id).await { Ok(result) => result, Err(e) => { error!("Failed to execute rollback to checkpoint {}: {}", checkpoint_id, e); return Err(StatusCode::INTERNAL_SERVER_ERROR); } } } else if let Some(operation_ids) = request.operation_ids { // Partial rollback of specific operations match state.rollback_system.rollback_executor.rollback_operations(operation_ids).await { Ok(result) => result, Err(e) => { error!("Failed to execute partial rollback: {}", e); return Err(StatusCode::INTERNAL_SERVER_ERROR); } } } else { return Err(StatusCode::BAD_REQUEST); }; if result.success { info!("Rollback executed successfully: {} operations", result.operations_executed); } else { warn!("Rollback completed with errors: {}/{} operations failed", result.operations_failed, result.operations_executed + result.operations_failed); } Ok(Json(ApiResponse::success(result))) } async fn get_rollback_statistics( State(state): State, ) -> Result>, StatusCode> { let stats = state.rollback_system.get_statistics().await; Ok(Json(ApiResponse::success(stats))) } async fn restore_from_checkpoint( State(state): State, Path(checkpoint_id): Path, ) -> Result>, StatusCode> { match state.rollback_system.state_restorer.restore_from_checkpoint(&checkpoint_id).await { Ok(_) => { info!("Successfully restored state from checkpoint: {}", checkpoint_id); Ok(Json(ApiResponse::success(format!("State restored from checkpoint {}", checkpoint_id)))) } Err(e) => { error!("Failed to restore from checkpoint {}: {}", checkpoint_id, e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // State management and monitoring routes async fn get_workflow_progress( State(state): State, Path(workflow_id): Path, ) -> Result>, StatusCode> { match state.progress_tracker.get_real_time_progress(&workflow_id).await { Some(progress) => Ok(Json(ApiResponse::success(progress))), None => Err(StatusCode::NOT_FOUND), } } async fn get_workflow_snapshots( State(state): State, Path(workflow_id): Path, ) -> Result>>, StatusCode> { let snapshots = state.state_manager.get_workflow_snapshots(&workflow_id).await; Ok(Json(ApiResponse::success(snapshots))) } async fn get_system_metrics( State(state): State, ) -> Result>, StatusCode> { let metrics = state.state_manager.get_system_metrics().await; Ok(Json(ApiResponse::success(metrics))) } async fn get_system_health( State(state): State, ) -> Result>, StatusCode> { let health_status = state.monitoring_system.health_monitor().get_system_health().await; Ok(Json(ApiResponse::success(health_status))) } async fn get_state_statistics( State(state): State, ) -> Result>, StatusCode> { match state.state_manager.get_statistics().await { Ok(stats) => Ok(Json(ApiResponse::success(stats))), Err(e) => { error!("Failed to get state statistics: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // Test environment routes async fn create_test_environment( State(state): State, Json(request): Json, ) -> Result>, StatusCode> { match state.test_orchestrator.create_environment(request).await { Ok(response) => Ok(Json(ApiResponse::success(response))), Err(e) => { error!("Failed to create test environment: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn get_test_environment( State(state): State, Path(env_id): Path, ) -> Result>, StatusCode> { match state.test_orchestrator.get_environment(&env_id).await { Some(env) => Ok(Json(ApiResponse::success(env))), None => Err(StatusCode::NOT_FOUND), } } async fn list_test_environments( State(state): State, ) -> Result>>, StatusCode> { let environments = state.test_orchestrator.list_environments().await; Ok(Json(ApiResponse::success(environments))) } async fn run_environment_tests( State(state): State, Path(env_id): Path, Json(request): Json, ) -> Result>>, StatusCode> { match state.test_orchestrator.run_tests(&env_id, request).await { Ok(results) => Ok(Json(ApiResponse::success(results))), Err(e) => { error!("Failed to run tests: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn cleanup_test_environment( State(state): State, Path(env_id): Path, ) -> Result>, StatusCode> { match state.test_orchestrator.cleanup_environment(&env_id, false).await { Ok(_) => Ok(Json(ApiResponse::success(format!("Environment {} cleaned up", env_id)))), Err(e) => { error!("Failed to cleanup environment: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } async fn get_environment_logs( State(state): State, Path(env_id): Path, ) -> Result>>, StatusCode> { match state.test_orchestrator.get_environment_logs(&env_id).await { Ok(logs) => Ok(Json(ApiResponse::success(logs))), Err(e) => { error!("Failed to get environment logs: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // DNS Management Routes async fn list_dns_records( State(state): State, ) -> Result>>, StatusCode> { match state.dns_manager.list_records().await { Ok(records) => Ok(Json(ApiResponse::success(records))), Err(e) => { error!("Failed to list DNS records: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // Extension Management Routes async fn list_loaded_extensions( State(state): State, ) -> Result>>, StatusCode> { let extensions = state.extension_manager.list_loaded_extensions().await; Ok(Json(ApiResponse::success(extensions))) } #[derive(Deserialize)] struct ReloadExtensionRequest { extension_type: String, name: String, } async fn reload_extension( State(state): State, Json(request): Json, ) -> Result>, StatusCode> { use provisioning_orchestrator::extensions::ExtensionType; let ext_type = match request.extension_type.as_str() { "provider" => ExtensionType::Provider, "taskserv" => ExtensionType::Taskserv, "cluster" => ExtensionType::Cluster, _ => return Err(StatusCode::BAD_REQUEST), }; match state.extension_manager.reload_extension(ext_type, request.name.clone()).await { Ok(_) => Ok(Json(ApiResponse::success(format!("Extension {} reloaded", request.name)))), Err(e) => { error!("Failed to reload extension: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // OCI Registry Routes #[derive(Deserialize)] struct ListOciArtifactsRequest { namespace: String, } async fn list_oci_artifacts( State(state): State, Json(request): Json, ) -> Result>>, StatusCode> { match state.oci_manager.list_oci_artifacts(&request.namespace).await { Ok(artifacts) => Ok(Json(ApiResponse::success(artifacts))), Err(e) => { error!("Failed to list OCI artifacts: {}", e); Err(StatusCode::INTERNAL_SERVER_ERROR) } } } // Service Orchestration Routes async fn list_services( State(state): State, ) -> Result>>, StatusCode> { let services = state.service_orchestrator.list_services().await; Ok(Json(ApiResponse::success(services))) } #[derive(Serialize)] struct ServiceStatusResponse { name: String, status: provisioning_orchestrator::services::ServiceStatus, } async fn get_services_status( State(state): State, ) -> Result>>, StatusCode> { let services = state.service_orchestrator.list_services().await; let mut statuses = Vec::new(); for service in services { if let Ok(status) = state.service_orchestrator.get_service_status(&service.name).await { statuses.push(ServiceStatusResponse { name: service.name, status, }); } } Ok(Json(ApiResponse::success(statuses))) } // Task processor - runs tasks from the queue async fn process_tasks(state: SharedState) { info!("Starting task processor"); let metrics_collector = state.monitoring_system.metrics_collector(); loop { match state.task_storage.dequeue().await { Ok(Some(mut task)) => { // Track task dequeue metrics_collector.increment_task_counter(); metrics_collector.record_storage_operation(true); if let Err(e) = state.task_storage.update_task_status(&task.id, TaskStatus::Running).await { error!("Failed to update task status: {}", e); metrics_collector.record_storage_operation(false); continue; } info!("Processing task: {} ({})", task.id, task.name); let task_start = std::time::Instant::now(); let result = state.execute_nushell_command(&task.command, &task.args).await; let task_duration = task_start.elapsed(); match result { Ok(output) => { info!("Task {} completed successfully", task.id); task.output = Some(output); task.status = TaskStatus::Completed; task.completed_at = Some(chrono::Utc::now()); // Record metrics metrics_collector.record_task_completion(task_duration.as_millis() as u64); // Publish monitoring event let event = MonitoringEvent { event_type: MonitoringEventType::TaskStatusChanged, timestamp: chrono::Utc::now(), data: serde_json::to_value(&task).unwrap_or_default(), metadata: { let mut meta = std::collections::HashMap::new(); meta.insert("task_id".to_string(), task.id.clone()); meta.insert("status".to_string(), "completed".to_string()); meta.insert("duration_ms".to_string(), task_duration.as_millis().to_string()); meta }, }; if let Err(e) = state.monitoring_system.publish_event(event).await { error!("Failed to publish monitoring event: {}", e); } if let Err(e) = state.task_storage.update_task(task).await { error!("Failed to update task: {}", e); metrics_collector.record_storage_operation(false); } else { metrics_collector.record_storage_operation(true); } } Err(e) => { error!("Task {} failed: {}", task.id, e); task.error = Some(e.to_string()); task.status = TaskStatus::Failed; task.completed_at = Some(chrono::Utc::now()); // Record metrics metrics_collector.record_task_failure(); // Publish monitoring event let event = MonitoringEvent { event_type: MonitoringEventType::TaskStatusChanged, timestamp: chrono::Utc::now(), data: serde_json::to_value(&task).unwrap_or_default(), metadata: { let mut meta = std::collections::HashMap::new(); meta.insert("task_id".to_string(), task.id.clone()); meta.insert("status".to_string(), "failed".to_string()); meta.insert("error".to_string(), e.to_string()); meta }, }; if let Err(e) = state.monitoring_system.publish_event(event).await { error!("Failed to publish monitoring event: {}", e); } if let Err(e) = state.task_storage.update_task(task.clone()).await { error!("Failed to update task: {}", e); metrics_collector.record_storage_operation(false); } else { metrics_collector.record_storage_operation(true); } // Try to requeue for retry if let Err(e) = state.task_storage.requeue_failed_task(&task.id).await { error!("Failed to requeue task: {}", e); metrics_collector.record_storage_operation(false); } } } } Ok(None) => { // No tasks in queue, sleep tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; } Err(e) => { error!("Error dequeuing task: {}", e); metrics_collector.record_storage_operation(false); tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; } } } } #[tokio::main] async fn main() -> Result<()> { tracing_subscriber::fmt::init(); let args = Args::parse(); let port = args.port; info!("Starting provisioning orchestrator on port {}", port); let state = Arc::new(AppState::new(args).await?); // Start task processor let processor_state = state.clone(); tokio::spawn(async move { process_tasks(processor_state).await; }); let app = Router::new() .route("/health", get(health_check)) .route("/tasks", get(list_tasks)) .route("/tasks/{id}", get(get_task_status)) .route("/workflows/servers/create", post(create_server_workflow)) .route("/workflows/taskserv/create", post(create_taskserv_workflow)) .route("/workflows/cluster/create", post(create_cluster_workflow)) // Batch operation routes .route("/batch/execute", post(execute_batch_operation)) .route("/batch/operations", get(list_batch_operations)) .route("/batch/operations/{id}", get(get_batch_operation_status)) .route("/batch/operations/{id}/cancel", post(cancel_batch_operation)) // State management routes .route("/state/workflows/{id}/progress", get(get_workflow_progress)) .route("/state/workflows/{id}/snapshots", get(get_workflow_snapshots)) .route("/state/system/metrics", get(get_system_metrics)) .route("/state/system/health", get(get_system_health)) .route("/state/statistics", get(get_state_statistics)) // Rollback and recovery routes .route("/rollback/checkpoints", post(create_checkpoint)) .route("/rollback/checkpoints", get(list_checkpoints)) .route("/rollback/checkpoints/{id}", get(get_checkpoint)) .route("/rollback/execute", post(execute_rollback)) .route("/rollback/restore/{id}", post(restore_from_checkpoint)) .route("/rollback/statistics", get(get_rollback_statistics)) // Test environment routes .route("/test/environments/create", post(create_test_environment)) .route("/test/environments", get(list_test_environments)) .route("/test/environments/{id}", get(get_test_environment)) .route("/test/environments/{id}/run", post(run_environment_tests)) .route("/test/environments/{id}", axum::routing::delete(cleanup_test_environment)) .route("/test/environments/{id}/logs", get(get_environment_logs)) // DNS integration routes .route("/api/v1/dns/records", get(list_dns_records)) // Extension loading routes .route("/api/v1/extensions/loaded", get(list_loaded_extensions)) .route("/api/v1/extensions/reload", post(reload_extension)) // OCI registry routes .route("/api/v1/oci/artifacts", post(list_oci_artifacts)) // Service orchestration routes .route("/api/v1/services/list", get(list_services)) .route("/api/v1/services/status", get(get_services_status)) // Merge monitoring routes (includes /metrics, /ws, /events) .merge(state.monitoring_system.create_routes()) .layer(CorsLayer::permissive()) .with_state(state); let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", port)) .await .context("Failed to bind to address")?; info!("Orchestrator listening on port {}", port); axum::serve(listener, app) .await .context("Server error")?; Ok(()) }