Jesús Pérez bb55c80d2b
feat(workflow-engine): autonomous scheduling with timezone and distributed lock
Add cron-based autonomous workflow firing with two hardening layers:

  - Timezone-aware scheduling via chrono-tz: ScheduledWorkflow.timezone
    (IANA identifier), compute_next_fire_at/after_tz, validate_timezone;
    DST-safe, UTC fallback when absent; validated at config load and REST API

  - Distributed fire-lock via SurrealDB conditional UPDATE (locked_by/locked_at
    fields, 120 s TTL); WorkflowScheduler gains instance_id (UUID) as lock owner;
    prevents double-fires across multi-instance deployments without extra infra

  - ScheduleStore: try_acquire_fire_lock, release_fire_lock (own-instance guard),
    full CRUD (load_one/all, full_upsert, patch, delete, load_runs)

  - REST: 7 endpoints (GET/PUT/PATCH/DELETE schedules, runs history, manual fire)
    with timezone field in all request/response types

  - Migrations 010 (schedule tables) + 011 (timezone + lock columns)
  - Tests: 48 passing (was 26); ADR-0034; changelog; feature docs updated
2026-02-26 11:34:44 +00:00

373 lines
12 KiB
Rust

// vapora-backend: REST API server for VAPORA v1.0
// Phase 1: Complete backend with SurrealDB integration
mod api;
mod audit;
mod config;
mod services;
mod workflow;
use std::net::SocketAddr;
use std::sync::Arc;
use anyhow::Result;
use axum::{
routing::{delete, get, post, put},
Extension, Router,
};
use clap::Parser;
use tower_http::cors::{Any, CorsLayer};
use tracing::{info, Level};
use vapora_swarm::{SwarmCoordinator, SwarmMetrics};
use vapora_workflow_engine::ScheduleStore;
use crate::api::AppState;
use crate::config::Config;
use crate::services::{
AgentService, ProjectService, ProposalService, ProviderAnalyticsService, TaskService,
};
#[derive(Parser, Debug)]
#[command(
name = "vapora-backend",
version,
about = "VAPORA Backend - REST API server for multi-agent orchestration",
long_about = "REST API server providing endpoints for project management, task coordination, \
and agent orchestration.\n\nConfiguration can be provided via:\n - CLI \
arguments (highest priority)\n - Environment variables (via config \
interpolation)\n - Config file (default: config/vapora.toml)"
)]
struct Args {
/// Path to configuration file
#[arg(
short,
long,
default_value = "config/vapora.toml",
env = "VAPORA_CONFIG"
)]
config: String,
}
#[tokio::main]
async fn main() -> Result<()> {
// Parse CLI arguments
let args = Args::parse();
// Load environment variables from .env file if present
dotenv::dotenv().ok();
// Initialize logging
tracing_subscriber::fmt()
.with_max_level(Level::INFO)
.with_target(false)
.compact()
.init();
info!("VAPORA Backend v{}", env!("CARGO_PKG_VERSION"));
info!("Phase 1: Backend Core + SurrealDB");
// Load configuration from specified path
let config = Config::load(&args.config)?;
info!("Configuration loaded successfully");
// Connect to SurrealDB via WebSocket
info!("Connecting to SurrealDB at {}", config.database.url);
let db =
surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>(&config.database.url).await?;
// Sign in to database
db.signin(surrealdb::opt::auth::Root {
username: "root".to_string(),
password: "root".to_string(),
})
.await?;
// Use namespace and database
db.use_ns("vapora").use_db("main").await?;
info!("Connected to SurrealDB");
// Initialize services
let project_service = ProjectService::new(db.clone());
let task_service = TaskService::new(db.clone());
let agent_service = AgentService::new(db.clone());
let proposal_service = ProposalService::new(db.clone());
let provider_analytics_service = ProviderAnalyticsService::new(db.clone());
// Create KG Persistence for analytics
let kg_persistence = Arc::new(vapora_knowledge_graph::KGPersistence::new(db.clone()));
// Create RLM engine for distributed reasoning (Phase 8)
let rlm_storage = vapora_rlm::storage::SurrealDBStorage::new(db.clone());
let rlm_bm25_index = Arc::new(vapora_rlm::search::bm25::BM25Index::new()?);
let rlm_engine = Arc::new(vapora_rlm::RLMEngine::new(
Arc::new(rlm_storage),
rlm_bm25_index,
)?);
info!("RLM engine initialized for Phase 8");
// Initialize schedule store (backed by the same SurrealDB connection)
let schedule_store = Arc::new(ScheduleStore::new(Arc::new(db.clone())));
info!("ScheduleStore initialized for autonomous scheduling");
// Create application state
let app_state = AppState::new(
project_service,
task_service,
agent_service,
proposal_service,
provider_analytics_service,
)
.with_rlm_engine(rlm_engine)
.with_schedule_store(schedule_store);
// Create SwarmMetrics for Prometheus monitoring
let metrics = match SwarmMetrics::new() {
Ok(m) => {
info!("SwarmMetrics initialized for Prometheus monitoring");
m
}
Err(e) => {
tracing::warn!(
"Failed to initialize SwarmMetrics: {:?}, continuing without metrics",
e
);
// Create new registry and metrics as fallback
SwarmMetrics::new().unwrap()
}
};
// Create SwarmCoordinator for multi-agent coordination
let mut swarm_coordinator = SwarmCoordinator::new();
swarm_coordinator.set_metrics(Arc::clone(&metrics));
let swarm_coordinator = Arc::new(swarm_coordinator);
info!("SwarmCoordinator initialized for Phase 5.2");
// Initialize analytics metrics (Phase 6)
api::analytics_metrics::register_analytics_metrics();
info!("Analytics metrics registered for Prometheus");
// Initialize provider metrics (Phase 7)
api::provider_metrics::register_provider_metrics();
info!("Provider metrics registered for Prometheus");
// Start metrics collector background task (Phase 6)
let metrics_collector =
api::metrics_collector::MetricsCollector::new(kg_persistence.clone(), 60);
let _collector_handle = metrics_collector.start();
info!("Metrics collector started (60s interval)");
// Configure CORS
let cors = CorsLayer::new()
.allow_origin(Any)
.allow_methods(Any)
.allow_headers(Any);
// Build router
let app = Router::new()
// Health endpoint
.route("/health", get(api::health::health))
// Metrics endpoint (Prometheus)
.route("/metrics", get(api::metrics::metrics_handler))
// Project endpoints
.route(
"/api/v1/projects",
get(api::projects::list_projects).post(api::projects::create_project),
)
.route(
"/api/v1/projects/:id",
get(api::projects::get_project)
.put(api::projects::update_project)
.delete(api::projects::delete_project),
)
.route(
"/api/v1/projects/:id/features",
post(api::projects::add_feature),
)
.route(
"/api/v1/projects/:id/features/:feature",
delete(api::projects::remove_feature),
)
.route(
"/api/v1/projects/:id/archive",
post(api::projects::archive_project),
)
// Task endpoints
.route(
"/api/v1/tasks",
get(api::tasks::list_tasks).post(api::tasks::create_task),
)
.route(
"/api/v1/tasks/:id",
get(api::tasks::get_task)
.put(api::tasks::update_task)
.delete(api::tasks::delete_task),
)
.route("/api/v1/tasks/:id/reorder", put(api::tasks::reorder_task))
.route(
"/api/v1/tasks/:id/status",
put(api::tasks::update_task_status),
)
.route("/api/v1/tasks/:id/assign", put(api::tasks::assign_task))
.route(
"/api/v1/tasks/:id/priority",
put(api::tasks::update_priority),
)
// Agent endpoints (specific routes before parameterized routes)
.route(
"/api/v1/agents",
get(api::agents::list_agents).post(api::agents::register_agent),
)
.route(
"/api/v1/agents/available",
get(api::agents::get_available_agents),
)
.route(
"/api/v1/agents/:id",
get(api::agents::get_agent)
.put(api::agents::update_agent)
.delete(api::agents::deregister_agent),
)
.route(
"/api/v1/agents/:id/health",
get(api::agents::check_agent_health),
)
.route(
"/api/v1/agents/:id/status",
put(api::agents::update_agent_status),
)
.route(
"/api/v1/agents/:id/capabilities",
post(api::agents::add_capability),
)
.route(
"/api/v1/agents/:id/capabilities/:capability",
delete(api::agents::remove_capability),
)
.route("/api/v1/agents/:id/skills", post(api::agents::add_skill))
// Proposal endpoints (Approval Gates)
.route(
"/api/v1/proposals",
get(api::proposals::list_proposals).post(api::proposals::create_proposal),
)
.route(
"/api/v1/proposals/:id",
get(api::proposals::get_proposal)
.put(api::proposals::update_proposal)
.delete(api::proposals::delete_proposal),
)
.route(
"/api/v1/proposals/:id/submit",
put(api::proposals::submit_proposal),
)
.route(
"/api/v1/proposals/:id/approve",
put(api::proposals::approve_proposal),
)
.route(
"/api/v1/proposals/:id/reject",
put(api::proposals::reject_proposal),
)
.route(
"/api/v1/proposals/:id/executed",
put(api::proposals::mark_executed),
)
.route(
"/api/v1/proposals/:id/reviews",
get(api::proposals::list_reviews).post(api::proposals::add_review),
)
// Tracking endpoints
.route(
"/api/v1/tracking/entries",
get(api::tracking::list_tracking_entries),
)
.route(
"/api/v1/tracking/summary",
get(api::tracking::get_tracking_summary),
)
.route(
"/api/v1/tracking/health",
get(api::tracking::tracking_health),
)
// Swarm endpoints (Phase 5.2)
.route("/api/v1/swarm/stats", get(api::swarm::swarm_statistics))
.route("/api/v1/swarm/health", get(api::swarm::swarm_health))
// Analytics endpoints (Phase 6)
.route(
"/api/v1/analytics/agent/:id",
get(api::analytics::get_agent_performance),
)
.route(
"/api/v1/analytics/task-types/:task_type",
get(api::analytics::get_task_type_analytics),
)
.route(
"/api/v1/analytics/dashboard",
get(api::analytics::get_dashboard_metrics),
)
.route(
"/api/v1/analytics/cost-report",
get(api::analytics::get_cost_report),
)
.route(
"/api/v1/analytics/summary",
get(api::analytics::get_analytics_summary),
)
// Provider analytics endpoints (Phase 7)
.route(
"/api/v1/analytics/providers",
get(api::provider_analytics::get_provider_cost_breakdown),
)
.route(
"/api/v1/analytics/providers/efficiency",
get(api::provider_analytics::get_provider_efficiency),
)
.route(
"/api/v1/analytics/providers/:provider",
get(api::provider_analytics::get_provider_analytics),
)
.route(
"/api/v1/analytics/providers/:provider/forecast",
get(api::provider_analytics::get_provider_forecast),
)
.route(
"/api/v1/analytics/providers/:provider/tasks/:task_type",
get(api::provider_analytics::get_provider_task_type_metrics),
)
// RLM endpoints (Phase 8)
.route("/api/v1/rlm/documents", post(api::rlm::load_document))
.route("/api/v1/rlm/query", post(api::rlm::query_document))
.route("/api/v1/rlm/analyze", post(api::rlm::analyze_document))
// Schedule endpoints
.route("/api/v1/schedules", get(api::schedules::list_schedules))
.route(
"/api/v1/schedules/:id",
get(api::schedules::get_schedule)
.put(api::schedules::put_schedule)
.patch(api::schedules::patch_schedule)
.delete(api::schedules::delete_schedule),
)
.route(
"/api/v1/schedules/:id/runs",
get(api::schedules::list_schedule_runs),
)
.route(
"/api/v1/schedules/:id/fire",
post(api::schedules::fire_schedule),
)
// Apply CORS, state, and extensions
.layer(Extension(swarm_coordinator))
.layer(cors)
.with_state(app_state);
// Start server
let addr = SocketAddr::from(([127, 0, 0, 1], config.server.port));
info!("Server listening on {}", addr);
info!("Health check: http://{}/health", addr);
info!("API documentation: http://{}/api/v1", addr);
let listener = tokio::net::TcpListener::bind(addr).await?;
axum::serve(listener, app).await?;
Ok(())
}