Vapora/crates/vapora-agents/src/runtime/state_machine.rs
Jesús Pérez c5f4caa2ab
feat(agents): stable identity + hot-reload for zero learning loss on config change
Introduce stable_id = role on AgentMetadata so learning profiles and KG
  execution records survive process restarts and hot-reloads. Previously
  every Uuid::new_v4() rotation orphaned accumulated expertise.

  - registry: add stable_id field (serde default, backward-compatible),
    stable_id_or_role() fallback helper, drain_role(), list_roles()
  - coordinator: profile lookup and KG writes use stable_id_or_role()
    instead of the ephemeral UUID; drain_role() drops Sender to close
    mpsc channels after in-flight messages drain; registry_arc() accessor
  - executor: agent_id written to KG now uses stable_id_or_role()
  - server: reload_agents() drain-and-respawn function; SIGHUP handler
    via while sighup.recv().await.is_some(); POST /reload endpoint;
    AppState extended with config_path, router, cap_registry
  - fix: SIGHUP recv() spin-loop guard (is_some())
  - fix: io_other_error clippy lint in vapora-agents, vapora-llm-router,
    vapora-workflow-engine (std::io::Error::other instead of Error::new)
  - docs: ADR-0040, CHANGELOG entry, README hot-reload section
2026-03-02 22:54:28 +00:00

244 lines
6.5 KiB
Rust

// Type-state machine for agent lifecycle
// Ensures safe state transitions at compile time
use std::marker::PhantomData;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::messages::TaskAssignment;
use crate::registry::AgentMetadata;
/// Agent states - compile-time enforced state machine
/// Initial state: Agent is idle
pub struct Idle;
/// Task assigned state
pub struct Assigned {
pub task: TaskAssignment,
}
/// Executing state
pub struct Executing {
pub task: TaskAssignment,
pub started_at: DateTime<Utc>,
}
/// Completed state
pub struct Completed;
/// Failed state
pub struct Failed {
pub error: String,
}
/// Execution result containing outcome data
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExecutionResult {
pub output: String,
pub input_tokens: u64,
pub output_tokens: u64,
pub duration_ms: u64,
}
/// Agent with compile-time state tracking
pub struct Agent<S> {
pub metadata: AgentMetadata,
state: PhantomData<S>,
result: Option<ExecutionResult>,
}
/// Transitions from Idle state
impl Agent<Idle> {
/// Create new idle agent
pub fn new(metadata: AgentMetadata) -> Self {
Self {
metadata,
state: PhantomData,
result: None,
}
}
/// Transition to Assigned state
pub fn assign_task(self, _task: TaskAssignment) -> Agent<Assigned> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: None,
}
}
}
/// Transitions from Assigned state
impl Agent<Assigned> {
/// Transition to Executing state
pub fn start_execution(self) -> Agent<Executing> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: None,
}
}
}
/// Transitions from Executing state
impl Agent<Executing> {
/// Complete execution successfully
pub fn complete(self, result: ExecutionResult) -> Agent<Completed> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: Some(result),
}
}
/// Fail execution
pub fn fail(self, _error: String) -> Agent<Failed> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: None,
}
}
}
/// Transitions from Completed state
impl Agent<Completed> {
/// Get execution result
pub fn result(&self) -> Option<&ExecutionResult> {
self.result.as_ref()
}
/// Transition back to Idle
pub fn reset(self) -> Agent<Idle> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: None,
}
}
}
/// Transitions from Failed state
impl Agent<Failed> {
/// Get error message
pub fn error(&self) -> String {
match &self.result {
Some(result) => format!("Error: {}", result.output),
None => "Unknown error".to_string(),
}
}
/// Transition back to Idle
pub fn reset(self) -> Agent<Idle> {
Agent {
metadata: self.metadata,
state: PhantomData,
result: None,
}
}
}
#[cfg(test)]
mod tests {
use chrono::Utc;
use super::*;
#[test]
fn test_type_state_transitions() {
// Create metadata for testing
let metadata = AgentMetadata {
id: "test-agent".to_string(),
stable_id: "developer".to_string(),
role: "developer".to_string(),
name: "Test Developer".to_string(),
version: "0.1.0".to_string(),
status: crate::registry::AgentStatus::Active,
capabilities: vec!["coding".to_string()],
llm_provider: "claude".to_string(),
llm_model: "claude-sonnet-4".to_string(),
max_concurrent_tasks: 5,
current_tasks: 0,
created_at: Utc::now(),
last_heartbeat: Utc::now(),
uptime_percentage: 100.0,
total_tasks_completed: 0,
system_prompt: None,
};
// Type-state chain: Idle → Assigned → Executing → Completed → Idle
let agent = Agent::new(metadata.clone());
let task = TaskAssignment {
id: "task-1".to_string(),
agent_id: "test-agent".to_string(),
required_role: "developer".to_string(),
title: "Test task".to_string(),
description: "Test description".to_string(),
context: "{}".to_string(),
priority: 1,
deadline: None,
assigned_at: Utc::now(),
};
let agent = agent.assign_task(task);
let agent = agent.start_execution();
let result = ExecutionResult {
output: "Success".to_string(),
input_tokens: 100,
output_tokens: 50,
duration_ms: 1000,
};
let agent = agent.complete(result);
assert!(agent.result().is_some());
let _agent = agent.reset();
// agent is now back to Idle state - type system ensures this
}
#[test]
fn test_failed_state_transition() {
let metadata = AgentMetadata {
id: "test-agent".to_string(),
stable_id: "developer".to_string(),
role: "developer".to_string(),
name: "Test Developer".to_string(),
version: "0.1.0".to_string(),
status: crate::registry::AgentStatus::Active,
capabilities: vec!["coding".to_string()],
llm_provider: "claude".to_string(),
llm_model: "claude-sonnet-4".to_string(),
max_concurrent_tasks: 5,
current_tasks: 0,
created_at: Utc::now(),
last_heartbeat: Utc::now(),
uptime_percentage: 100.0,
total_tasks_completed: 0,
system_prompt: None,
};
let agent = Agent::new(metadata);
let task = TaskAssignment {
id: "task-1".to_string(),
agent_id: "test-agent".to_string(),
required_role: "developer".to_string(),
title: "Test task".to_string(),
description: "Test description".to_string(),
context: "{}".to_string(),
priority: 1,
deadline: None,
assigned_at: Utc::now(),
};
let agent = agent.assign_task(task);
let agent = agent.start_execution();
let agent = agent.fail("API timeout".to_string());
let _error = agent.error();
let _agent = agent.reset();
// agent is now back to Idle state
}
}