feat(workflow-engine): add saga, persistence, auth, and NATS-integrated orchestrator hardening

Key changes driving this: new saga.rs, persistence.rs, auth.rs in workflow-engine; SurrealDB migration 009_workflow_state.surql; backend services refactored; frontend dist built; ADR-0033 documenting the hardening decision.
2026-02-22 21:44:42 +00:00 · 2026-02-22 21:44:42 +00:00 · b9e2cee9f7
commit b9e2cee9f7
parent e91e3cb67a
43 changed files with 3208 additions and 3674 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+### Added - Workflow Engine Hardening (Persistence · Saga · Cedar)
+
+#### `vapora-workflow-engine` — three new hardening layers
+
+- **`persistence.rs`**: `SurrealWorkflowStore` — crash-recoverable `WorkflowInstance` state in SurrealDB
+  - `save()` upserts on every state-mutating operation; serializes via `serde_json::Value` (surrealdb v3 `SurrealValue` requirement)
+  - `load_active()` on startup restores all non-terminal instances to the in-memory `DashMap`
+  - `delete()` removes terminal instances after completion
+- **`saga.rs`**: `SagaCompensator` — reverse-order rollback dispatch via `SwarmCoordinator`
+  - Iterates executed stages in reverse; skips stages without `compensation_agents` in `StageConfig`
+  - Dispatches `{ type: "compensation", stage_name, workflow_id, original_context, artifacts_to_undo }` payload
+  - Best-effort: errors are logged and never propagated
+- **`auth.rs`**: `CedarAuthorizer` — per-stage Cedar policy enforcement
+  - `load_from_dir(path)` reads all `*.cedar` files and compiles a single `PolicySet`
+  - Called before each `SwarmCoordinator::assign_task()`; deny returns `WorkflowError::Unauthorized`
+  - Disabled when `EngineConfig.cedar_policy_dir` is `None`
+- **`config.rs`**: `StageConfig` gains `compensation_agents: Option<Vec<String>>`; `EngineConfig` gains `cedar_policy_dir: Option<String>`
+- **`instance.rs`**: `WorkflowInstance::mark_current_task_failed()` — isolates the `current_stage_mut()` borrow to avoid NLL conflicts and clippy `excessive_nesting` in `on_task_failed()`
+- **`migrations/009_workflow_state.surql`**: SCHEMAFULL `workflow_instances` table; indexes on `template_name` and `created_at`
+- New deps: `surrealdb = { workspace = true }`, `cedar-policy = "4.9"`
+- Tests: 31 pass (5 new — `auth` × 3, `saga` × 2); 0 clippy warnings
+
+#### `vapora-knowledge-graph` — surrealdb v3 compatibility fixes
+
+- All `response.take(0)` call sites updated from custom `#[derive(Deserialize)]` structs to `Vec<serde_json::Value>` intermediary pattern
+  - Affected: `find_similar_executions`, `get_agent_success_rate`, `get_task_distribution`, `cleanup_old_executions`, `get_execution_count`, `get_executions_for_task_type`, `get_agent_executions`, `get_task_type_analytics`, `get_dashboard_metrics`, `get_cost_report`, `get_rlm_executions_by_doc`, `find_similar_rlm_tasks`, `get_rlm_execution_count`, `cleanup_old_rlm_executions`
+- Root cause: `surrealdb` v3 changed `take()` bound from `T: DeserializeOwned` to `T: SurrealValue`; `serde_json::Value` satisfies this; custom structs do not
+
+---
+
 ### Fixed - `distro.just` build and installation

 - `distro::install`: now builds all 5 server binaries in one `cargo build --release` pass
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -53,6 +53,12 @@ vapora-rlm = { path = "crates/vapora-rlm" }
 secretumvault = { path = "../secretumvault", default-features = true }
 # ["openssl", "filesystem", "server", "surrealdb-storage", "pqc", "cli", "cedar"]

+# Stratumiops — shared graph, state and embedding primitives
+stratum-graph = { path = "../stratumiops/crates/stratum-graph" }
+stratum-state = { path = "../stratumiops/crates/stratum-state", features = ["mem-store"] }
+stratum-embeddings = { path = "../stratumiops/crates/stratum-embeddings", features = ["openai-provider", "ollama-provider", "fastembed-provider", "huggingface-provider", "memory-cache", "persistent-cache", "surrealdb-store"] }
+stratum-llm = { path = "../stratumiops/crates/stratum-llm", features = ["anthropic", "openai", "ollama"] }
+
 # Leptos ecosystem (CSR-only for frontend)
 leptos = { version = "0.8.15" }
 leptos_router = { version = "0.8.11" }
@ -139,19 +145,25 @@ time = { version = "0.3", features = ["serde"] }

 # Database
 sqlx = { version = "0.8.6", features = ["runtime-tokio-rustls", "postgres", "sqlite", "chrono", "uuid", "migrate"] }
-# Note: SurrealDB will be added when available in workspace
-surrealdb = "2.6"
+surrealdb = { version = "3", features = ["protocol-ws", "rustls"] }

 # Message Queue
-# Note: async-nats will be added when available in workspace
 async-nats = "0.46"

 # LLM Agent Framework & RAG
 rig-core = "0.30"
-# Embeddings: Use provider APIs (Claude, OpenAI, Gemini, Ollama) instead of fastembed
-# - rig-core integrates with all major providers
-# - Routing through vapora-llm-router for optimal provider selection
-# - Phase 3: Implement embedding service using provider APIs
+
+# Stratum embedding/llm workspace dep requirements (all referenced with workspace=true in crate Cargo.tomls)
+fastembed = "5.11"
+moka = { version = "0.12", features = ["future"] }
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
+humantime-serde = "1.1"
+sled = "0.34"
+lancedb = "0.26"
+# Arrow 56.x pinned: lancedb 0.26 uses Arrow 56 internally; DO NOT upgrade until lancedb supports Arrow 57
+arrow = "=56"
+dirs = "6.0"
+which = "8.0"

 # Cryptography
 aes-gcm = { version = "0.10" }
--- a/crates/vapora-a2a/src/main.rs
+++ b/crates/vapora-a2a/src/main.rs
@ -56,8 +56,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
        surrealdb::Surreal::new::<surrealdb::engine::remote::ws::Ws>(surreal_url.as_str()).await?;

    db.signin(surrealdb::opt::auth::Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await?;

--- a/crates/vapora-a2a/src/server.rs
+++ b/crates/vapora-a2a/src/server.rs
@ -136,8 +136,8 @@ mod tests {
            .unwrap();

        db.signin(surrealdb::opt::auth::Root {
-            username: "root",
-            password: "root",
+            username: "root".to_string(),
+            password: "root".to_string(),
        })
        .await
        .unwrap();
--- a/crates/vapora-a2a/src/task_manager.rs
+++ b/crates/vapora-a2a/src/task_manager.rs
@ -271,8 +271,8 @@ mod tests {
            .unwrap();

        db.signin(surrealdb::opt::auth::Root {
-            username: "root",
-            password: "root",
+            username: "root".to_string(),
+            password: "root".to_string(),
        })
        .await
        .unwrap();
--- a/crates/vapora-a2a/tests/integration_test.rs
+++ b/crates/vapora-a2a/tests/integration_test.rs
@ -25,8 +25,8 @@ async fn setup_test_db() -> Surreal<Client> {
        .expect("Failed to connect to SurrealDB");

    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
--- a/crates/vapora-backend/src/api/workflow_orchestrator.rs
+++ b/crates/vapora-backend/src/api/workflow_orchestrator.rs
@ -288,6 +288,7 @@ mod tests {
                parallel: false,
                max_parallel: None,
                approval_required: false,
+                compensation_agents: None,
            }],
        };

--- a/crates/vapora-backend/src/main.rs
+++ b/crates/vapora-backend/src/main.rs
@ -76,8 +76,8 @@ async fn main() -> Result<()> {

    // Sign in to database
    db.signin(surrealdb::opt::auth::Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await?;

--- a/crates/vapora-backend/src/services/agent_service.rs
+++ b/crates/vapora-backend/src/services/agent_service.rs
@ -1,5 +1,3 @@
-// Agent service - Registry and management for the 12 agent roles
-
 use chrono::Utc;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
@ -20,10 +18,8 @@ impl AgentService {

    /// Register a new agent
    pub async fn register_agent(&self, mut agent: Agent) -> Result<Agent> {
-        // Set creation timestamp
        agent.created_at = Utc::now();

-        // Check if agent with this role already exists
        let existing = self.get_agent_by_role(&agent.role).await;
        if existing.is_ok() {
            return Err(VaporaError::InvalidInput(format!(
@ -32,16 +28,10 @@ impl AgentService {
            )));
        }

-        // Create agent in database
-        let created: Option<Agent> = self
-            .db
-            .create("agents")
-            .content(agent)
-            .await?
-            .into_iter()
-            .next();
-
-        created.ok_or_else(|| VaporaError::DatabaseError("Failed to register agent".to_string()))
+        let json = serde_json::to_value(&agent)?;
+        let raw: Option<serde_json::Value> = self.db.create("agents").content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to register agent".to_string()))
    }

    /// List all agents
@ -51,8 +41,11 @@ impl AgentService {
            .query("SELECT * FROM agents ORDER BY role ASC")
            .await?;

-        let agents: Vec<Agent> = response.take(0)?;
-        Ok(agents)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// List agents by status
@ -71,15 +64,18 @@ impl AgentService {
            .bind(("status", status_str.to_string()))
            .await?;

-        let agents: Vec<Agent> = response.take(0)?;
-        Ok(agents)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get an agent by ID
    pub async fn get_agent(&self, id: &str) -> Result<Agent> {
-        let agent: Option<Agent> = self.db.select(("agents", id)).await?;
-
-        agent.ok_or_else(|| VaporaError::NotFound(format!("Agent with id '{}' not found", id)))
+        let raw: Option<serde_json::Value> = self.db.select(("agents", id)).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::NotFound(format!("Agent with id '{}' not found", id)))
    }

    /// Get an agent by role
@ -105,43 +101,34 @@ impl AgentService {
            .bind(("role", role_str.to_string()))
            .await?;

-        let agents: Vec<Agent> = response.take(0)?;
-
-        agents
-            .into_iter()
-            .next()
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        raw.into_iter()
+            .find_map(|v| serde_json::from_value(v).ok())
            .ok_or_else(|| VaporaError::NotFound(format!("Agent with role '{:?}' not found", role)))
    }

    /// Update an agent
    pub async fn update_agent(&self, id: &str, mut updates: Agent) -> Result<Agent> {
-        // Verify agent exists
        let existing = self.get_agent(id).await?;
-
-        // Preserve certain fields
        updates.id = existing.id;
        updates.created_at = existing.created_at;

-        // Update in database
-        let updated: Option<Agent> = self.db.update(("agents", id)).content(updates).await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to update agent".to_string()))
+        let json = serde_json::to_value(&updates)?;
+        let raw: Option<serde_json::Value> = self.db.update(("agents", id)).content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to update agent".to_string()))
    }

    /// Update agent status
    pub async fn update_agent_status(&self, id: &str, status: AgentStatus) -> Result<Agent> {
-        // Verify agent exists
        self.get_agent(id).await?;

-        let updated: Option<Agent> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("agents", id))
-            .merge(serde_json::json!({
-                "status": status
-            }))
+            .merge(serde_json::json!({ "status": status }))
            .await?;
-
-        updated
+        raw.and_then(|v| serde_json::from_value(v).ok())
            .ok_or_else(|| VaporaError::DatabaseError("Failed to update agent status".to_string()))
    }

@ -149,19 +136,16 @@ impl AgentService {
    pub async fn add_capability(&self, id: &str, capability: String) -> Result<Agent> {
        let mut agent = self.get_agent(id).await?;

-        // Add capability if not already present
        if !agent.capabilities.contains(&capability) {
            agent.capabilities.push(capability);

-            let updated: Option<Agent> = self
+            let raw: Option<serde_json::Value> = self
                .db
                .update(("agents", id))
-                .merge(serde_json::json!({
-                    "capabilities": agent.capabilities
-                }))
+                .merge(serde_json::json!({ "capabilities": agent.capabilities }))
                .await?;
-
-            return updated
+            return raw
+                .and_then(|v| serde_json::from_value(v).ok())
                .ok_or_else(|| VaporaError::DatabaseError("Failed to add capability".to_string()));
        }

@ -171,38 +155,31 @@ impl AgentService {
    /// Remove capability from an agent
    pub async fn remove_capability(&self, id: &str, capability: &str) -> Result<Agent> {
        let mut agent = self.get_agent(id).await?;
-
-        // Remove capability
        agent.capabilities.retain(|c| c != capability);

-        let updated: Option<Agent> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("agents", id))
-            .merge(serde_json::json!({
-                "capabilities": agent.capabilities
-            }))
+            .merge(serde_json::json!({ "capabilities": agent.capabilities }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to remove capability".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to remove capability".to_string()))
    }

    /// Add skill to an agent
    pub async fn add_skill(&self, id: &str, skill: String) -> Result<Agent> {
        let mut agent = self.get_agent(id).await?;

-        // Add skill if not already present
        if !agent.skills.contains(&skill) {
            agent.skills.push(skill);

-            let updated: Option<Agent> = self
+            let raw: Option<serde_json::Value> = self
                .db
                .update(("agents", id))
-                .merge(serde_json::json!({
-                    "skills": agent.skills
-                }))
+                .merge(serde_json::json!({ "skills": agent.skills }))
                .await?;
-
-            return updated
+            return raw
+                .and_then(|v| serde_json::from_value(v).ok())
                .ok_or_else(|| VaporaError::DatabaseError("Failed to add skill".to_string()));
        }

@ -211,30 +188,29 @@ impl AgentService {

    /// Deregister an agent
    pub async fn deregister_agent(&self, id: &str) -> Result<()> {
-        // Verify agent exists
        self.get_agent(id).await?;
-
-        // Delete from database
-        let _: Option<Agent> = self.db.delete(("agents", id)).await?;
-
+        let _: Option<serde_json::Value> = self.db.delete(("agents", id)).await?;
        Ok(())
    }

-    /// Get agent health status (checks if agent is active and responding)
+    /// Get agent health status
    pub async fn check_agent_health(&self, id: &str) -> Result<bool> {
        let agent = self.get_agent(id).await?;
        Ok(agent.status == AgentStatus::Active)
    }

-    /// Get agents available for task assignment (active agents with capacity)
+    /// Get agents available for task assignment
    pub async fn get_available_agents(&self) -> Result<Vec<Agent>> {
        let mut response = self
            .db
            .query("SELECT * FROM agents WHERE status = 'active' ORDER BY role ASC")
            .await?;

-        let agents: Vec<Agent> = response.take(0)?;
-        Ok(agents)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }
 }

@ -243,13 +219,6 @@ mod tests {
    #[allow(unused_imports)]
    use super::*;

-    // Note: These are placeholder tests. Real tests require a running SurrealDB
-    // instance or mocking. For Phase 1, we'll add integration tests that use a
-    // test database.
-
    #[test]
-    fn test_agent_service_creation() {
-        // This test just verifies the service can be created
-        // Real database tests will be in integration tests
-    }
+    fn test_agent_service_creation() {}
 }
--- a/crates/vapora-backend/src/services/project_service.rs
+++ b/crates/vapora-backend/src/services/project_service.rs
@ -1,5 +1,3 @@
-// Project service - CRUD operations for projects
-
 use chrono::Utc;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
@ -20,21 +18,14 @@ impl ProjectService {

    /// Create a new project
    pub async fn create_project(&self, mut project: Project) -> Result<Project> {
-        // Set timestamps
        let now = Utc::now();
        project.created_at = now;
        project.updated_at = now;

-        // Create project in database
-        let created: Option<Project> = self
-            .db
-            .create("projects")
-            .content(project)
-            .await?
-            .into_iter()
-            .next();
-
-        created.ok_or_else(|| VaporaError::DatabaseError("Failed to create project".to_string()))
+        let json = serde_json::to_value(&project)?;
+        let raw: Option<serde_json::Value> = self.db.create("projects").content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to create project".to_string()))
    }

    /// List all projects for a tenant
@ -45,8 +36,11 @@ impl ProjectService {
            .bind(("tenant_id", tenant_id.to_string()))
            .await?;

-        let projects: Vec<Project> = response.take(0)?;
-        Ok(projects)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// List projects by status for a tenant
@ -72,18 +66,20 @@ impl ProjectService {
            .bind(("status", status_str.to_string()))
            .await?;

-        let projects: Vec<Project> = response.take(0)?;
-        Ok(projects)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get a project by ID
    pub async fn get_project(&self, id: &str, tenant_id: &str) -> Result<Project> {
-        let project: Option<Project> = self.db.select(("projects", id)).await?;
-
-        let project = project
+        let raw: Option<serde_json::Value> = self.db.select(("projects", id)).await?;
+        let project: Project = raw
+            .and_then(|v| serde_json::from_value(v).ok())
            .ok_or_else(|| VaporaError::NotFound(format!("Project with id '{}' not found", id)))?;

-        // Verify tenant ownership
        if project.tenant_id != tenant_id {
            return Err(VaporaError::Unauthorized(
                "Project does not belong to this tenant".to_string(),
@ -100,29 +96,22 @@ impl ProjectService {
        tenant_id: &str,
        mut updates: Project,
    ) -> Result<Project> {
-        // Verify project exists and belongs to tenant
        let existing = self.get_project(id, tenant_id).await?;
-
-        // Preserve certain fields
        updates.id = existing.id;
        updates.tenant_id = existing.tenant_id;
        updates.created_at = existing.created_at;
        updates.updated_at = Utc::now();

-        // Update in database
-        let updated: Option<Project> = self.db.update(("projects", id)).content(updates).await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to update project".to_string()))
+        let json = serde_json::to_value(&updates)?;
+        let raw: Option<serde_json::Value> = self.db.update(("projects", id)).content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to update project".to_string()))
    }

    /// Delete a project
    pub async fn delete_project(&self, id: &str, tenant_id: &str) -> Result<()> {
-        // Verify project exists and belongs to tenant
        self.get_project(id, tenant_id).await?;
-
-        // Delete from database
-        let _: Option<Project> = self.db.delete(("projects", id)).await?;
-
+        let _: Option<serde_json::Value> = self.db.delete(("projects", id)).await?;
        Ok(())
    }

@ -130,12 +119,11 @@ impl ProjectService {
    pub async fn add_feature(&self, id: &str, tenant_id: &str, feature: String) -> Result<Project> {
        let mut project = self.get_project(id, tenant_id).await?;

-        // Add feature if not already present
        if !project.features.contains(&feature) {
            project.features.push(feature);
            project.updated_at = Utc::now();

-            let updated: Option<Project> = self
+            let raw: Option<serde_json::Value> = self
                .db
                .update(("projects", id))
                .merge(serde_json::json!({
@ -143,8 +131,8 @@ impl ProjectService {
                    "updated_at": project.updated_at
                }))
                .await?;
-
-            return updated
+            return raw
+                .and_then(|v| serde_json::from_value(v).ok())
                .ok_or_else(|| VaporaError::DatabaseError("Failed to add feature".to_string()));
        }

@ -159,12 +147,10 @@ impl ProjectService {
        feature: &str,
    ) -> Result<Project> {
        let mut project = self.get_project(id, tenant_id).await?;
-
-        // Remove feature
        project.features.retain(|f| f != feature);
        project.updated_at = Utc::now();

-        let updated: Option<Project> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("projects", id))
            .merge(serde_json::json!({
@ -172,17 +158,17 @@ impl ProjectService {
                "updated_at": project.updated_at
            }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to remove feature".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to remove feature".to_string()))
    }

-    /// Archive a project (set status to archived)
+    /// Archive a project
    pub async fn archive_project(&self, id: &str, tenant_id: &str) -> Result<Project> {
        let mut project = self.get_project(id, tenant_id).await?;
        project.status = ProjectStatus::Archived;
        project.updated_at = Utc::now();

-        let updated: Option<Project> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("projects", id))
            .merge(serde_json::json!({
@ -190,8 +176,8 @@ impl ProjectService {
                "updated_at": project.updated_at
            }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to archive project".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to archive project".to_string()))
    }
 }

@ -200,13 +186,6 @@ mod tests {
    #[allow(unused_imports)]
    use super::*;

-    // Note: These are placeholder tests. Real tests require a running SurrealDB
-    // instance or mocking. For Phase 1, we'll add integration tests that use a
-    // test database.
-
    #[test]
-    fn test_project_service_creation() {
-        // This test just verifies the service can be created
-        // Real database tests will be in integration tests
-    }
+    fn test_project_service_creation() {}
 }
--- a/crates/vapora-backend/src/services/proposal_service.rs
+++ b/crates/vapora-backend/src/services/proposal_service.rs
@ -1,5 +1,3 @@
-// Proposal service - CRUD operations for approval gate proposals
-
 use chrono::Utc;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
@ -20,18 +18,12 @@ impl ProposalService {

    /// Create a new proposal
    pub async fn create_proposal(&self, mut proposal: Proposal) -> Result<Proposal> {
-        let now = Utc::now();
-        proposal.created_at = now;
+        proposal.created_at = Utc::now();

-        let created: Option<Proposal> = self
-            .db
-            .create("proposals")
-            .content(proposal)
-            .await?
-            .into_iter()
-            .next();
-
-        created.ok_or_else(|| VaporaError::DatabaseError("Failed to create proposal".to_string()))
+        let json = serde_json::to_value(&proposal)?;
+        let raw: Option<serde_json::Value> = self.db.create("proposals").content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to create proposal".to_string()))
    }

    /// Get proposal by ID
@ -43,10 +35,9 @@ impl ProposalService {
            .bind(("tenant_id", tenant_id.to_string()))
            .await?;

-        let proposals: Vec<Proposal> = response.take(0)?;
-        proposals
-            .into_iter()
-            .next()
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        raw.into_iter()
+            .find_map(|v| serde_json::from_value(v).ok())
            .ok_or_else(|| VaporaError::NotFound(format!("Proposal not found: {}", id)))
    }

@ -57,7 +48,7 @@ impl ProposalService {
        tenant_id: &str,
        status: Option<ProposalStatus>,
    ) -> Result<Vec<Proposal>> {
-        let query = if let Some(ref _s) = status {
+        let query = if status.is_some() {
            "SELECT * FROM proposals WHERE project_id = $project_id AND tenant_id = $tenant_id AND \
             status = $status ORDER BY created_at DESC"
                .to_string()
@ -78,8 +69,11 @@ impl ProposalService {
        }

        let mut response = response.await?;
-        let proposals: Vec<Proposal> = response.take(0)?;
-        Ok(proposals)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// List proposals by task ID
@ -99,51 +93,42 @@ impl ProposalService {
            .bind(("tenant_id", tenant_id.to_string()))
            .await?;

-        let proposals: Vec<Proposal> = response.take(0)?;
-        Ok(proposals)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Submit proposal for approval
    pub async fn submit_proposal(&self, id: &str, tenant_id: &str) -> Result<Proposal> {
-        let now = Utc::now();
-
        let mut proposal = self.get_proposal(id, tenant_id).await?;
        proposal.status = ProposalStatus::Proposed;
-        proposal.submitted_at = Some(now);
-
+        proposal.submitted_at = Some(Utc::now());
        self.update_proposal(id, tenant_id, proposal).await
    }

    /// Approve proposal
    pub async fn approve_proposal(&self, id: &str, tenant_id: &str) -> Result<Proposal> {
-        let now = Utc::now();
-
        let mut proposal = self.get_proposal(id, tenant_id).await?;
        proposal.status = ProposalStatus::Approved;
-        proposal.reviewed_at = Some(now);
-
+        proposal.reviewed_at = Some(Utc::now());
        self.update_proposal(id, tenant_id, proposal).await
    }

    /// Reject proposal
    pub async fn reject_proposal(&self, id: &str, tenant_id: &str) -> Result<Proposal> {
-        let now = Utc::now();
-
        let mut proposal = self.get_proposal(id, tenant_id).await?;
        proposal.status = ProposalStatus::Rejected;
-        proposal.reviewed_at = Some(now);
-
+        proposal.reviewed_at = Some(Utc::now());
        self.update_proposal(id, tenant_id, proposal).await
    }

    /// Mark proposal as executed
    pub async fn mark_executed(&self, id: &str, tenant_id: &str) -> Result<Proposal> {
-        let now = Utc::now();
-
        let mut proposal = self.get_proposal(id, tenant_id).await?;
        proposal.status = ProposalStatus::Executed;
-        proposal.executed_at = Some(now);
-
+        proposal.executed_at = Some(Utc::now());
        self.update_proposal(id, tenant_id, proposal).await
    }

@ -154,20 +139,19 @@ impl ProposalService {
        tenant_id: &str,
        proposal: Proposal,
    ) -> Result<Proposal> {
-        // Verify ownership
        self.get_proposal(id, tenant_id).await?;

-        let updated: Option<Proposal> = self.db.update(("proposals", id)).content(proposal).await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to update proposal".to_string()))
+        let json = serde_json::to_value(&proposal)?;
+        let raw: Option<serde_json::Value> =
+            self.db.update(("proposals", id)).content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to update proposal".to_string()))
    }

    /// Delete proposal
    pub async fn delete_proposal(&self, id: &str, tenant_id: &str) -> Result<()> {
-        // Verify ownership
        self.get_proposal(id, tenant_id).await?;
-
-        let _: Option<()> = self.db.delete(("proposals", id)).await?;
+        let _: Option<serde_json::Value> = self.db.delete(("proposals", id)).await?;
        Ok(())
    }

@ -180,15 +164,11 @@ impl ProposalService {
        review.proposal_id = proposal_id.to_string();
        review.created_at = Utc::now();

-        let created: Option<ProposalReview> = self
-            .db
-            .create("proposal_reviews")
-            .content(review.clone())
-            .await?
-            .into_iter()
-            .next();
-
-        created.ok_or_else(|| VaporaError::DatabaseError("Failed to create review".to_string()))
+        let json = serde_json::to_value(&review)?;
+        let raw: Option<serde_json::Value> =
+            self.db.create("proposal_reviews").content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to create review".to_string()))
    }

    /// List reviews for proposal
@ -202,8 +182,11 @@ impl ProposalService {
            .bind(("proposal_id", proposal_id.to_string()))
            .await?;

-        let reviews: Vec<ProposalReview> = response.take(0)?;
-        Ok(reviews)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get proposals pending approval with risk level
@ -227,12 +210,13 @@ impl ProposalService {
            .bind(("risk_level", risk_str))
            .await?;

-        let proposals: Vec<Proposal> = response.take(0)?;
-        Ok(proposals)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

-    // Helper functions for serialization
-
    fn status_to_string(status: &ProposalStatus) -> String {
        match status {
            ProposalStatus::Proposed => "proposed",
--- a/crates/vapora-backend/src/services/task_service.rs
+++ b/crates/vapora-backend/src/services/task_service.rs
@ -1,5 +1,3 @@
-// Task service - CRUD operations and Kanban management for tasks
-
 use chrono::Utc;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
@ -20,12 +18,10 @@ impl TaskService {

    /// Create a new task
    pub async fn create_task(&self, mut task: Task) -> Result<Task> {
-        // Set timestamps
        let now = Utc::now();
        task.created_at = now;
        task.updated_at = now;

-        // If task_order is not set, get the max order for this project/status and add 1
        if task.task_order == 0 {
            let max_order = self
                .get_max_task_order(&task.project_id, &task.status)
@ -33,16 +29,10 @@ impl TaskService {
            task.task_order = max_order + 1;
        }

-        // Create task in database
-        let created: Option<Task> = self
-            .db
-            .create("tasks")
-            .content(task)
-            .await?
-            .into_iter()
-            .next();
-
-        created.ok_or_else(|| VaporaError::DatabaseError("Failed to create task".to_string()))
+        let json = serde_json::to_value(&task)?;
+        let raw: Option<serde_json::Value> = self.db.create("tasks").content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to create task".to_string()))
    }

    /// List all tasks for a project
@ -57,8 +47,11 @@ impl TaskService {
            .bind(("tenant_id", tenant_id.to_string()))
            .await?;

-        let tasks: Vec<Task> = response.take(0)?;
-        Ok(tasks)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// List tasks by status (for Kanban columns)
@ -86,8 +79,11 @@ impl TaskService {
            .bind(("status", status_str.to_string()))
            .await?;

-        let tasks: Vec<Task> = response.take(0)?;
-        Ok(tasks)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// List tasks by assignee
@ -108,18 +104,20 @@ impl TaskService {
            .bind(("assignee", assignee.to_string()))
            .await?;

-        let tasks: Vec<Task> = response.take(0)?;
-        Ok(tasks)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get a task by ID
    pub async fn get_task(&self, id: &str, tenant_id: &str) -> Result<Task> {
-        let task: Option<Task> = self.db.select(("tasks", id)).await?;
+        let raw: Option<serde_json::Value> = self.db.select(("tasks", id)).await?;
+        let task: Task = raw
+            .and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::NotFound(format!("Task with id '{}' not found", id)))?;

-        let task =
-            task.ok_or_else(|| VaporaError::NotFound(format!("Task with id '{}' not found", id)))?;
-
-        // Verify tenant ownership
        if task.tenant_id != tenant_id {
            return Err(VaporaError::Unauthorized(
                "Task does not belong to this tenant".to_string(),
@ -131,19 +129,16 @@ impl TaskService {

    /// Update a task
    pub async fn update_task(&self, id: &str, tenant_id: &str, mut updates: Task) -> Result<Task> {
-        // Verify task exists and belongs to tenant
        let existing = self.get_task(id, tenant_id).await?;
-
-        // Preserve certain fields
        updates.id = existing.id;
        updates.tenant_id = existing.tenant_id;
        updates.created_at = existing.created_at;
        updates.updated_at = Utc::now();

-        // Update in database
-        let updated: Option<Task> = self.db.update(("tasks", id)).content(updates).await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to update task".to_string()))
+        let json = serde_json::to_value(&updates)?;
+        let raw: Option<serde_json::Value> = self.db.update(("tasks", id)).content(json).await?;
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to update task".to_string()))
    }

    /// Update task status (for Kanban column changes)
@ -154,11 +149,9 @@ impl TaskService {
        status: TaskStatus,
    ) -> Result<Task> {
        let task = self.get_task(id, tenant_id).await?;
-
-        // Get max order for new status
        let max_order = self.get_max_task_order(&task.project_id, &status).await?;

-        let updated: Option<Task> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("tasks", id))
            .merge(serde_json::json!({
@ -167,8 +160,7 @@ impl TaskService {
                "updated_at": Utc::now()
            }))
            .await?;
-
-        updated
+        raw.and_then(|v| serde_json::from_value(v).ok())
            .ok_or_else(|| VaporaError::DatabaseError("Failed to update task status".to_string()))
    }

@ -182,7 +174,6 @@ impl TaskService {
    ) -> Result<Task> {
        let mut task = self.get_task(id, tenant_id).await?;

-        // Update status if provided
        if let Some(status) = new_status {
            task.status = status;
        }
@ -190,7 +181,7 @@ impl TaskService {
        task.task_order = new_order;
        task.updated_at = Utc::now();

-        let updated: Option<Task> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("tasks", id))
            .merge(serde_json::json!({
@ -199,8 +190,8 @@ impl TaskService {
                "updated_at": task.updated_at
            }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to reorder task".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to reorder task".to_string()))
    }

    /// Assign task to agent/user
@ -209,7 +200,7 @@ impl TaskService {
        task.assignee = assignee;
        task.updated_at = Utc::now();

-        let updated: Option<Task> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("tasks", id))
            .merge(serde_json::json!({
@ -217,8 +208,8 @@ impl TaskService {
                "updated_at": task.updated_at
            }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to assign task".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to assign task".to_string()))
    }

    /// Update task priority
@ -232,7 +223,7 @@ impl TaskService {
        task.priority = priority;
        task.updated_at = Utc::now();

-        let updated: Option<Task> = self
+        let raw: Option<serde_json::Value> = self
            .db
            .update(("tasks", id))
            .merge(serde_json::json!({
@ -240,18 +231,14 @@ impl TaskService {
                "updated_at": task.updated_at
            }))
            .await?;
-
-        updated.ok_or_else(|| VaporaError::DatabaseError("Failed to update priority".to_string()))
+        raw.and_then(|v| serde_json::from_value(v).ok())
+            .ok_or_else(|| VaporaError::DatabaseError("Failed to update priority".to_string()))
    }

    /// Delete a task
    pub async fn delete_task(&self, id: &str, tenant_id: &str) -> Result<()> {
-        // Verify task exists and belongs to tenant
        self.get_task(id, tenant_id).await?;
-
-        // Delete from database
-        let _: Option<Task> = self.db.delete(("tasks", id)).await?;
-
+        let _: Option<serde_json::Value> = self.db.delete(("tasks", id)).await?;
        Ok(())
    }

@ -274,8 +261,11 @@ impl TaskService {
            .bind(("status", status_str.to_string()))
            .await?;

-        let orders: Vec<i32> = response.take(0)?;
-        Ok(orders.first().copied().unwrap_or(0))
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .find_map(|v| v.as_i64().map(|n| n as i32))
+            .unwrap_or(0))
    }
 }

@ -284,13 +274,6 @@ mod tests {
    #[allow(unused_imports)]
    use super::*;

-    // Note: These are placeholder tests. Real tests require a running SurrealDB
-    // instance or mocking. For Phase 1, we'll add integration tests that use a
-    // test database.
-
    #[test]
-    fn test_task_service_creation() {
-        // This test just verifies the service can be created
-        // Real database tests will be in integration tests
-    }
+    fn test_task_service_creation() {}
 }
--- a/crates/vapora-backend/tests/rlm_api_test.rs
+++ b/crates/vapora-backend/tests/rlm_api_test.rs
@ -24,8 +24,8 @@ async fn setup_test_app() -> Router {
        .expect("Failed to connect to SurrealDB");

    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
--- a/crates/vapora-frontend/dist/index.html
+++ b/crates/vapora-frontend/dist/index.html
@ -0,0 +1,74 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>VAPORA - Multi-Agent Development Platform</title>
+
+  <!-- UnoCSS Generated CSS -->
+  <link rel="stylesheet" href="/assets/styles/website.css" />
+
+  <style>
+    /* Base reset */
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+
+    /* Root variables */
+    :root {
+      --bg-primary: #0a0118;
+      --bg-glass: rgba(255, 255, 255, 0.05);
+      --bg-glass-hover: rgba(255, 255, 255, 0.08);
+      --accent-cyan: #22d3ee;
+      --accent-purple: #a855f7;
+      --accent-pink: #ec4899;
+      --border-glass: rgba(34, 211, 238, 0.3);
+      --text-primary: #ffffff;
+      --text-secondary: #cbd5e1;
+    }
+
+    /* Base styles */
+    html, body {
+      width: 100%;
+      height: 100%;
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+      background: linear-gradient(135deg, #0f0c29, #302b63, #24243e);
+      color: var(--text-primary);
+    }
+
+    #app { width: 100%; height: 100%; }
+
+    /* Loading spinner */
+    .loading {
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      height: 100vh;
+      font-size: 1.5rem;
+    }
+
+    @keyframes spin {
+      from { transform: rotate(0deg); }
+      to { transform: rotate(360deg); }
+    }
+  </style>
+<link rel="modulepreload" href="/vapora-frontend-4e82c70edbf89867.js" crossorigin="anonymous" integrity="sha384-WVmg7Zh4CBJ/gCS937rrzmWrpZN13Ry6YUgLVPXSAfHvtNx/wo3RzAlqG3WBypEI"><link rel="preload" href="/vapora-frontend-4e82c70edbf89867_bg.wasm" crossorigin="anonymous" integrity="sha384-rV/f856J/cjGDdfFUdE9rmSxIBMD+jT21YAYuC4MdK3Mi7ewdVwrHo+HZD0JLXL3" as="fetch" type="application/wasm"></head>
+<body>
+  <div id="app">
+    <div class="loading">Loading VAPORA...</div>
+  </div>
+
+<script type="module">
+import init, * as bindings from '/vapora-frontend-4e82c70edbf89867.js';
+const wasm = await init({ module_or_path: '/vapora-frontend-4e82c70edbf89867_bg.wasm' });
+
+
+window.wasmBindings = bindings;
+
+
+dispatchEvent(new CustomEvent("TrunkApplicationStarted", {detail: {wasm}}));
+
+</script></body>
+</html>
--- a/crates/vapora-frontend/dist/vapora-frontend-4e82c70edbf89867.js
+++ b/crates/vapora-frontend/dist/vapora-frontend-4e82c70edbf89867.js
@ -0,0 +1,451 @@
+export class IntoUnderlyingByteSource {
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        IntoUnderlyingByteSourceFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_intounderlyingbytesource_free(ptr, 0);
+    }
+    /**
+     * @returns {number}
+     */
+    get autoAllocateChunkSize() {
+        const ret = wasm.intounderlyingbytesource_autoAllocateChunkSize(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    cancel() {
+        const ptr = this.__destroy_into_raw();
+        wasm.intounderlyingbytesource_cancel(ptr);
+    }
+    /**
+     * @param {ReadableByteStreamController} controller
+     * @returns {Promise<any>}
+     */
+    pull(controller) {
+        const ret = wasm.intounderlyingbytesource_pull(this.__wbg_ptr, controller);
+        return ret;
+    }
+    /**
+     * @param {ReadableByteStreamController} controller
+     */
+    start(controller) {
+        wasm.intounderlyingbytesource_start(this.__wbg_ptr, controller);
+    }
+    /**
+     * @returns {ReadableStreamType}
+     */
+    get type() {
+        const ret = wasm.intounderlyingbytesource_type(this.__wbg_ptr);
+        return __wbindgen_enum_ReadableStreamType[ret];
+    }
+}
+if (Symbol.dispose) IntoUnderlyingByteSource.prototype[Symbol.dispose] = IntoUnderlyingByteSource.prototype.free;
+
+export class IntoUnderlyingSink {
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        IntoUnderlyingSinkFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_intounderlyingsink_free(ptr, 0);
+    }
+    /**
+     * @param {any} reason
+     * @returns {Promise<any>}
+     */
+    abort(reason) {
+        const ptr = this.__destroy_into_raw();
+        const ret = wasm.intounderlyingsink_abort(ptr, reason);
+        return ret;
+    }
+    /**
+     * @returns {Promise<any>}
+     */
+    close() {
+        const ptr = this.__destroy_into_raw();
+        const ret = wasm.intounderlyingsink_close(ptr);
+        return ret;
+    }
+    /**
+     * @param {any} chunk
+     * @returns {Promise<any>}
+     */
+    write(chunk) {
+        const ret = wasm.intounderlyingsink_write(this.__wbg_ptr, chunk);
+        return ret;
+    }
+}
+if (Symbol.dispose) IntoUnderlyingSink.prototype[Symbol.dispose] = IntoUnderlyingSink.prototype.free;
+
+export class IntoUnderlyingSource {
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        IntoUnderlyingSourceFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_intounderlyingsource_free(ptr, 0);
+    }
+    cancel() {
+        const ptr = this.__destroy_into_raw();
+        wasm.intounderlyingsource_cancel(ptr);
+    }
+    /**
+     * @param {ReadableStreamDefaultController} controller
+     * @returns {Promise<any>}
+     */
+    pull(controller) {
+        const ret = wasm.intounderlyingsource_pull(this.__wbg_ptr, controller);
+        return ret;
+    }
+}
+if (Symbol.dispose) IntoUnderlyingSource.prototype[Symbol.dispose] = IntoUnderlyingSource.prototype.free;
+
+function __wbg_get_imports() {
+    const import0 = {
+        __proto__: null,
+        __wbg___wbindgen_is_function_0095a73b8b156f76: function(arg0) {
+            const ret = typeof(arg0) === 'function';
+            return ret;
+        },
+        __wbg___wbindgen_is_undefined_9e4d92534c42d778: function(arg0) {
+            const ret = arg0 === undefined;
+            return ret;
+        },
+        __wbg___wbindgen_throw_be289d5034ed271b: function(arg0, arg1) {
+            throw new Error(getStringFromWasm0(arg0, arg1));
+        },
+        __wbg__wbg_cb_unref_d9b87ff7982e3b21: function(arg0) {
+            arg0._wbg_cb_unref();
+        },
+        __wbg_buffer_26d0910f3a5bc899: function(arg0) {
+            const ret = arg0.buffer;
+            return ret;
+        },
+        __wbg_byobRequest_80e594e6da4e1af7: function(arg0) {
+            const ret = arg0.byobRequest;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_byteLength_3417f266f4bf562a: function(arg0) {
+            const ret = arg0.byteLength;
+            return ret;
+        },
+        __wbg_byteOffset_f88547ca47c86358: function(arg0) {
+            const ret = arg0.byteOffset;
+            return ret;
+        },
+        __wbg_call_389efe28435a9388: function() { return handleError(function (arg0, arg1) {
+            const ret = arg0.call(arg1);
+            return ret;
+        }, arguments); },
+        __wbg_call_4708e0c13bdc8e95: function() { return handleError(function (arg0, arg1, arg2) {
+            const ret = arg0.call(arg1, arg2);
+            return ret;
+        }, arguments); },
+        __wbg_close_06dfa0a815b9d71f: function() { return handleError(function (arg0) {
+            arg0.close();
+        }, arguments); },
+        __wbg_close_a79afee31de55b36: function() { return handleError(function (arg0) {
+            arg0.close();
+        }, arguments); },
+        __wbg_enqueue_2c63f2044f257c3e: function() { return handleError(function (arg0, arg1) {
+            arg0.enqueue(arg1);
+        }, arguments); },
+        __wbg_length_32ed9a279acd054c: function(arg0) {
+            const ret = arg0.length;
+            return ret;
+        },
+        __wbg_new_72b49615380db768: function(arg0, arg1) {
+            const ret = new Error(getStringFromWasm0(arg0, arg1));
+            return ret;
+        },
+        __wbg_new_b5d9e2fb389fef91: function(arg0, arg1) {
+            try {
+                var state0 = {a: arg0, b: arg1};
+                var cb0 = (arg0, arg1) => {
+                    const a = state0.a;
+                    state0.a = 0;
+                    try {
+                        return wasm_bindgen__convert__closures_____invoke__h3cd296ad3915ac8d(a, state0.b, arg0, arg1);
+                    } finally {
+                        state0.a = a;
+                    }
+                };
+                const ret = new Promise(cb0);
+                return ret;
+            } finally {
+                state0.a = state0.b = 0;
+            }
+        },
+        __wbg_new_no_args_1c7c842f08d00ebb: function(arg0, arg1) {
+            const ret = new Function(getStringFromWasm0(arg0, arg1));
+            return ret;
+        },
+        __wbg_new_with_byte_offset_and_length_aa261d9c9da49eb1: function(arg0, arg1, arg2) {
+            const ret = new Uint8Array(arg0, arg1 >>> 0, arg2 >>> 0);
+            return ret;
+        },
+        __wbg_queueMicrotask_0aa0a927f78f5d98: function(arg0) {
+            const ret = arg0.queueMicrotask;
+            return ret;
+        },
+        __wbg_queueMicrotask_5bb536982f78a56f: function(arg0) {
+            queueMicrotask(arg0);
+        },
+        __wbg_resolve_002c4b7d9d8f6b64: function(arg0) {
+            const ret = Promise.resolve(arg0);
+            return ret;
+        },
+        __wbg_respond_bf6ab10399ca8722: function() { return handleError(function (arg0, arg1) {
+            arg0.respond(arg1 >>> 0);
+        }, arguments); },
+        __wbg_set_cc56eefd2dd91957: function(arg0, arg1, arg2) {
+            arg0.set(getArrayU8FromWasm0(arg1, arg2));
+        },
+        __wbg_static_accessor_GLOBAL_12837167ad935116: function() {
+            const ret = typeof global === 'undefined' ? null : global;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_GLOBAL_THIS_e628e89ab3b1c95f: function() {
+            const ret = typeof globalThis === 'undefined' ? null : globalThis;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_SELF_a621d3dfbb60d0ce: function() {
+            const ret = typeof self === 'undefined' ? null : self;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_WINDOW_f8727f0cf888e0bd: function() {
+            const ret = typeof window === 'undefined' ? null : window;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_then_b9e7b3b5f1a9e1b5: function(arg0, arg1) {
+            const ret = arg0.then(arg1);
+            return ret;
+        },
+        __wbg_view_6c32e7184b8606ad: function(arg0) {
+            const ret = arg0.view;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbindgen_cast_0000000000000001: function(arg0, arg1) {
+            // Cast intrinsic for `Closure(Closure { dtor_idx: 26, function: Function { arguments: [Externref], shim_idx: 27, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
+            const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__he857c488161cb29c, wasm_bindgen__convert__closures_____invoke__h34690fc02c54dd49);
+            return ret;
+        },
+        __wbindgen_init_externref_table: function() {
+            const table = wasm.__wbindgen_externrefs;
+            const offset = table.grow(4);
+            table.set(0, undefined);
+            table.set(offset + 0, undefined);
+            table.set(offset + 1, null);
+            table.set(offset + 2, true);
+            table.set(offset + 3, false);
+        },
+    };
+    return {
+        __proto__: null,
+        "./vapora-frontend_bg.js": import0,
+    };
+}
+
+function wasm_bindgen__convert__closures_____invoke__h34690fc02c54dd49(arg0, arg1, arg2) {
+    wasm.wasm_bindgen__convert__closures_____invoke__h34690fc02c54dd49(arg0, arg1, arg2);
+}
+
+function wasm_bindgen__convert__closures_____invoke__h3cd296ad3915ac8d(arg0, arg1, arg2, arg3) {
+    wasm.wasm_bindgen__convert__closures_____invoke__h3cd296ad3915ac8d(arg0, arg1, arg2, arg3);
+}
+
+
+const __wbindgen_enum_ReadableStreamType = ["bytes"];
+const IntoUnderlyingByteSourceFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_intounderlyingbytesource_free(ptr >>> 0, 1));
+const IntoUnderlyingSinkFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_intounderlyingsink_free(ptr >>> 0, 1));
+const IntoUnderlyingSourceFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_intounderlyingsource_free(ptr >>> 0, 1));
+
+function addToExternrefTable0(obj) {
+    const idx = wasm.__externref_table_alloc();
+    wasm.__wbindgen_externrefs.set(idx, obj);
+    return idx;
+}
+
+const CLOSURE_DTORS = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(state => state.dtor(state.a, state.b));
+
+function getArrayU8FromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
+}
+
+function getStringFromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return decodeText(ptr, len);
+}
+
+let cachedUint8ArrayMemory0 = null;
+function getUint8ArrayMemory0() {
+    if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
+        cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
+    }
+    return cachedUint8ArrayMemory0;
+}
+
+function handleError(f, args) {
+    try {
+        return f.apply(this, args);
+    } catch (e) {
+        const idx = addToExternrefTable0(e);
+        wasm.__wbindgen_exn_store(idx);
+    }
+}
+
+function isLikeNone(x) {
+    return x === undefined || x === null;
+}
+
+function makeMutClosure(arg0, arg1, dtor, f) {
+    const state = { a: arg0, b: arg1, cnt: 1, dtor };
+    const real = (...args) => {
+
+        // First up with a closure we increment the internal reference
+        // count. This ensures that the Rust closure environment won't
+        // be deallocated while we're invoking it.
+        state.cnt++;
+        const a = state.a;
+        state.a = 0;
+        try {
+            return f(a, state.b, ...args);
+        } finally {
+            state.a = a;
+            real._wbg_cb_unref();
+        }
+    };
+    real._wbg_cb_unref = () => {
+        if (--state.cnt === 0) {
+            state.dtor(state.a, state.b);
+            state.a = 0;
+            CLOSURE_DTORS.unregister(state);
+        }
+    };
+    CLOSURE_DTORS.register(real, state, state);
+    return real;
+}
+
+let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
+cachedTextDecoder.decode();
+const MAX_SAFARI_DECODE_BYTES = 2146435072;
+let numBytesDecoded = 0;
+function decodeText(ptr, len) {
+    numBytesDecoded += len;
+    if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
+        cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
+        cachedTextDecoder.decode();
+        numBytesDecoded = len;
+    }
+    return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
+}
+
+let wasmModule, wasm;
+function __wbg_finalize_init(instance, module) {
+    wasm = instance.exports;
+    wasmModule = module;
+    cachedUint8ArrayMemory0 = null;
+    wasm.__wbindgen_start();
+    return wasm;
+}
+
+async function __wbg_load(module, imports) {
+    if (typeof Response === 'function' && module instanceof Response) {
+        if (typeof WebAssembly.instantiateStreaming === 'function') {
+            try {
+                return await WebAssembly.instantiateStreaming(module, imports);
+            } catch (e) {
+                const validResponse = module.ok && expectedResponseType(module.type);
+
+                if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
+                    console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
+
+                } else { throw e; }
+            }
+        }
+
+        const bytes = await module.arrayBuffer();
+        return await WebAssembly.instantiate(bytes, imports);
+    } else {
+        const instance = await WebAssembly.instantiate(module, imports);
+
+        if (instance instanceof WebAssembly.Instance) {
+            return { instance, module };
+        } else {
+            return instance;
+        }
+    }
+
+    function expectedResponseType(type) {
+        switch (type) {
+            case 'basic': case 'cors': case 'default': return true;
+        }
+        return false;
+    }
+}
+
+function initSync(module) {
+    if (wasm !== undefined) return wasm;
+
+
+    if (module !== undefined) {
+        if (Object.getPrototypeOf(module) === Object.prototype) {
+            ({module} = module)
+        } else {
+            console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
+        }
+    }
+
+    const imports = __wbg_get_imports();
+    if (!(module instanceof WebAssembly.Module)) {
+        module = new WebAssembly.Module(module);
+    }
+    const instance = new WebAssembly.Instance(module, imports);
+    return __wbg_finalize_init(instance, module);
+}
+
+async function __wbg_init(module_or_path) {
+    if (wasm !== undefined) return wasm;
+
+
+    if (module_or_path !== undefined) {
+        if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
+            ({module_or_path} = module_or_path)
+        } else {
+            console.warn('using deprecated parameters for the initialization function; pass a single object instead')
+        }
+    }
+
+    if (module_or_path === undefined) {
+        module_or_path = new URL('vapora-frontend_bg.wasm', import.meta.url);
+    }
+    const imports = __wbg_get_imports();
+
+    if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
+        module_or_path = fetch(module_or_path);
+    }
+
+    const { instance, module } = await __wbg_load(await module_or_path, imports);
+
+    return __wbg_finalize_init(instance, module);
+}
+
+export { initSync, __wbg_init as default };
--- a/crates/vapora-frontend/dist/vapora-frontend-4e82c70edbf89867_bg.wasm
+++ b/crates/vapora-frontend/dist/vapora-frontend-4e82c70edbf89867_bg.wasm
--- a/crates/vapora-knowledge-graph/src/persistence.rs
+++ b/crates/vapora-knowledge-graph/src/persistence.rs
@ -326,7 +326,11 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-        let results: Vec<PersistedExecution> = response.take(0)?;
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        let results = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();
        Ok(results)
    }

@ -343,13 +347,11 @@ impl KGPersistence {

        let mut response = self.db.query(&query).await?;

-        #[derive(Deserialize)]
-        struct RateResult {
-            rate: Option<f64>,
-        }
-
-        let result: Vec<RateResult> = response.take(0)?;
-        Ok(result.first().and_then(|r| r.rate).unwrap_or(0.0))
+        let result: Vec<serde_json::Value> = response.take(0)?;
+        Ok(result
+            .first()
+            .and_then(|r| r["rate"].as_f64())
+            .unwrap_or(0.0))
    }

    /// Get task type distribution
@ -360,16 +362,14 @@ impl KGPersistence {

        let mut response = self.db.query(query).await?;

-        #[derive(Deserialize)]
-        struct DistResult {
-            task_type: String,
-            count: u64,
-        }
-
-        let results: Vec<DistResult> = response.take(0)?;
+        let results: Vec<serde_json::Value> = response.take(0)?;
        Ok(results
            .into_iter()
-            .map(|r| (r.task_type, r.count, 0.0))
+            .filter_map(|r| {
+                let task_type = r["task_type"].as_str()?.to_string();
+                let count = r["count"].as_u64()?;
+                Some((task_type, count, 0.0))
+            })
            .collect())
    }

@ -386,15 +386,8 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-
-        #[derive(Deserialize)]
-        #[allow(dead_code)]
-        struct DeleteResult {
-            deleted: Option<u64>,
-        }
-
-        let _result: Vec<DeleteResult> = response.take(0)?;
-        Ok(0) // SurrealDB 2.3 doesn't return delete count easily
+        let _result: Vec<serde_json::Value> = response.take(0)?;
+        Ok(0) // SurrealDB doesn't return delete count from DELETE queries
    }

    /// Get total execution count
@ -403,15 +396,11 @@ impl KGPersistence {

        let query = "SELECT count(*) as total FROM kg_executions";
        let mut response = self.db.query(query).await?;
-
-        #[derive(Deserialize)]
-        #[allow(dead_code)]
-        struct CountResult {
-            total: u64,
-        }
-
-        let result: Vec<CountResult> = response.take(0)?;
-        Ok(result.first().map(|r| r.total).unwrap_or(0))
+        let result: Vec<serde_json::Value> = response.take(0)?;
+        Ok(result
+            .first()
+            .and_then(|r| r["total"].as_u64())
+            .unwrap_or(0))
    }

    /// Get task-type specific executions for agent (for learning profiles).
@ -435,8 +424,11 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-        let results: Vec<PersistedExecution> = response.take(0)?;
-        Ok(results)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get all recent executions for agent across all task types.
@ -457,8 +449,11 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-        let results: Vec<PersistedExecution> = response.take(0)?;
-        Ok(results)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Record analytics event
@ -541,7 +536,11 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-        let executions: Vec<PersistedExecution> = response.take(0)?;
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        let executions: Vec<PersistedExecution> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        // Filter by time period
        let cutoff = Utc::now() - period.duration();
@ -573,7 +572,11 @@ impl KGPersistence {
        // Fetch all recent executions
        let query = "SELECT * FROM kg_executions ORDER BY executed_at DESC LIMIT 5000";
        let mut response = self.db.query(query).await?;
-        let executions: Vec<PersistedExecution> = response.take(0)?;
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        let executions: Vec<PersistedExecution> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        // Filter by time period
        let cutoff = Utc::now() - period.duration();
@ -603,7 +606,11 @@ impl KGPersistence {
        // Fetch all recent executions
        let query = "SELECT * FROM kg_executions ORDER BY executed_at DESC LIMIT 5000";
        let mut response = self.db.query(query).await?;
-        let executions: Vec<PersistedExecution> = response.take(0)?;
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        let executions: Vec<PersistedExecution> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        // Filter by time period
        let cutoff = Utc::now() - period.duration();
@ -737,8 +744,11 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-        let results: Vec<PersistedRlmExecution> = response.take(0)?;
-        Ok(results)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Find similar RLM tasks using query embedding similarity
@ -750,17 +760,17 @@ impl KGPersistence {
    ) -> anyhow::Result<Vec<PersistedRlmExecution>> {
        debug!("Searching for similar RLM tasks (limit: {})", limit);

-        // SurrealDB vector similarity requires different syntax
-        // For Phase 7, return recent successful executions
-        // Full vector similarity implementation deferred to future phase
        let query = format!(
            "SELECT * FROM rlm_executions WHERE success = true ORDER BY executed_at DESC LIMIT {}",
            limit
        );

        let mut response = self.db.query(&query).await?;
-        let results: Vec<PersistedRlmExecution> = response.take(0)?;
-        Ok(results)
+        let raw: Vec<serde_json::Value> = response.take(0)?;
+        Ok(raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect())
    }

    /// Get RLM success rate for a specific document
@ -819,18 +829,13 @@ impl KGPersistence {
    pub async fn get_rlm_execution_count(&self) -> anyhow::Result<u64> {
        debug!("Fetching RLM execution count");

-        // SurrealDB count query syntax
        let query = "SELECT count() as total FROM rlm_executions GROUP ALL";
        let mut response = self.db.query(query).await?;
-
-        #[derive(Deserialize)]
-        #[allow(dead_code)]
-        struct CountResult {
-            total: u64,
-        }
-
-        let result: Vec<CountResult> = response.take(0)?;
-        Ok(result.first().map(|r| r.total).unwrap_or(0))
+        let result: Vec<serde_json::Value> = response.take(0)?;
+        Ok(result
+            .first()
+            .and_then(|r| r["total"].as_u64())
+            .unwrap_or(0))
    }

    /// Cleanup old RLM executions (keep only last N days)
@ -846,15 +851,8 @@ impl KGPersistence {
        );

        let mut response = self.db.query(&query).await?;
-
-        #[derive(Deserialize)]
-        #[allow(dead_code)]
-        struct DeleteResult {
-            deleted: Option<u64>,
-        }
-
-        let _result: Vec<DeleteResult> = response.take(0)?;
-        Ok(0) // SurrealDB 2.3 doesn't return delete count easily
+        let _: Vec<serde_json::Value> = response.take(0)?;
+        Ok(0) // SurrealDB DELETE doesn't return a count
    }
 }

--- a/crates/vapora-knowledge-graph/tests/rlm_integration.rs
+++ b/crates/vapora-knowledge-graph/tests/rlm_integration.rs
@ -12,8 +12,8 @@ async fn setup_test_db() -> KGPersistence {
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();

    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .unwrap();
--- a/crates/vapora-llm-router/Cargo.toml
+++ b/crates/vapora-llm-router/Cargo.toml
@ -15,8 +15,7 @@ crate-type = ["rlib"]
 vapora-shared = { workspace = true }

 # Embeddings
-# Note: Update STRATUM_EMBEDDINGS_PATH environment variable or adjust path relative to your workspace
-stratum-embeddings = { path = "../../../stratumiops/crates/stratum-embeddings", features = ["vapora"] }
+stratum-embeddings = { workspace = true }

 # Secrets management
 secretumvault = { workspace = true }
--- a/crates/vapora-rlm/examples/local_ollama.rs
+++ b/crates/vapora-rlm/examples/local_ollama.rs
@ -25,8 +25,8 @@ async fn main() -> anyhow::Result<()> {
    // 1. Setup SurrealDB
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await?;
    db.use_ns("local").use_db("rlm").await?;
--- a/crates/vapora-rlm/examples/production_setup.rs
+++ b/crates/vapora-rlm/examples/production_setup.rs
@ -24,8 +24,8 @@ async fn main() -> anyhow::Result<()> {
    // 1. Setup SurrealDB
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await?;
    db.use_ns("production").use_db("rlm").await?;
--- a/crates/vapora-rlm/src/storage/surrealdb.rs
+++ b/crates/vapora-rlm/src/storage/surrealdb.rs
@ -97,10 +97,14 @@ impl Storage for SurrealDBStorage {
                RLMError::DatabaseError(Box::new(e))
            })?;

-        let results: Vec<Chunk> = response.take(0).map_err(|e| {
+        let raw: Vec<serde_json::Value> = response.take(0).map_err(|e| {
            error!("Failed to parse chunks for doc {}: {}", doc_id, e);
            RLMError::DatabaseError(Box::new(e))
        })?;
+        let results: Vec<Chunk> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        STORAGE_OPERATIONS
            .with_label_values(&["get_chunks", "success"])
@ -125,10 +129,14 @@ impl Storage for SurrealDBStorage {
                RLMError::DatabaseError(Box::new(e))
            })?;

-        let results: Vec<Chunk> = response.take(0).map_err(|e| {
+        let raw: Vec<serde_json::Value> = response.take(0).map_err(|e| {
            error!("Failed to parse chunk {}: {}", chunk_id, e);
            RLMError::DatabaseError(Box::new(e))
        })?;
+        let results: Vec<Chunk> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        STORAGE_OPERATIONS
            .with_label_values(&["get_chunk", "success"])
@ -161,10 +169,14 @@ impl Storage for SurrealDBStorage {
                RLMError::DatabaseError(Box::new(e))
            })?;

-        let results: Vec<Chunk> = response.take(0).map_err(|e| {
+        let raw: Vec<serde_json::Value> = response.take(0).map_err(|e| {
            error!("Failed to parse embedding search results: {}", e);
            RLMError::DatabaseError(Box::new(e))
        })?;
+        let results: Vec<Chunk> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        STORAGE_OPERATIONS
            .with_label_values(&["search_embedding", "success"])
@ -225,10 +237,14 @@ impl Storage for SurrealDBStorage {
                RLMError::DatabaseError(Box::new(e))
            })?;

-        let results: Vec<Buffer> = response.take(0).map_err(|e| {
+        let raw: Vec<serde_json::Value> = response.take(0).map_err(|e| {
            error!("Failed to parse buffer {}: {}", buffer_id, e);
            RLMError::DatabaseError(Box::new(e))
        })?;
+        let results: Vec<Buffer> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        STORAGE_OPERATIONS
            .with_label_values(&["get_buffer", "success"])
@ -332,10 +348,14 @@ impl Storage for SurrealDBStorage {
                RLMError::DatabaseError(Box::new(e))
            })?;

-        let results: Vec<ExecutionHistory> = response.take(0).map_err(|e| {
+        let raw: Vec<serde_json::Value> = response.take(0).map_err(|e| {
            error!("Failed to parse executions for doc {}: {}", doc_id, e);
            RLMError::DatabaseError(Box::new(e))
        })?;
+        let results: Vec<ExecutionHistory> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();

        STORAGE_OPERATIONS
            .with_label_values(&["get_executions", "success"])
--- a/crates/vapora-rlm/tests/e2e_integration.rs
+++ b/crates/vapora-rlm/tests/e2e_integration.rs
@ -30,8 +30,8 @@ async fn setup_test_environment() -> (
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();

    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .unwrap();
--- a/crates/vapora-rlm/tests/e2e_minimal_debug.rs
+++ b/crates/vapora-rlm/tests/e2e_minimal_debug.rs
@ -14,8 +14,8 @@ async fn test_e2e_minimal_trace() {
    // Setup - exactly like E2E test
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .unwrap();
--- a/crates/vapora-rlm/tests/engine_bm25_test.rs
+++ b/crates/vapora-rlm/tests/engine_bm25_test.rs
@ -14,8 +14,8 @@ async fn test_engine_bm25_query() {
    // Setup - same as E2E test
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .unwrap();
--- a/crates/vapora-rlm/tests/integration_test.rs
+++ b/crates/vapora-rlm/tests/integration_test.rs
@ -17,8 +17,8 @@ async fn test_storage_chunk_persistence() {
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
@ -79,8 +79,8 @@ async fn test_storage_buffer_operations() {
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
@ -124,8 +124,8 @@ async fn test_storage_execution_history() {
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
@ -177,8 +177,8 @@ async fn test_storage_embedding_search() {
        .await
        .expect("Failed to connect to SurrealDB");
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .expect("Failed to sign in");
--- a/crates/vapora-rlm/tests/performance_test.rs
+++ b/crates/vapora-rlm/tests/performance_test.rs
@ -17,8 +17,8 @@ use vapora_rlm::RLMEngine;
 async fn setup_engine() -> Arc<RLMEngine<SurrealDBStorage>> {
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await.unwrap();
    db.signin(Root {
-        username: "root",
-        password: "root",
+        username: "root".to_string(),
+        password: "root".to_string(),
    })
    .await
    .unwrap();
--- a/crates/vapora-workflow-engine/Cargo.toml
+++ b/crates/vapora-workflow-engine/Cargo.toml
@ -44,7 +44,14 @@ anyhow = { workspace = true }
 # Metrics
 prometheus = { workspace = true }

+# Persistence
+surrealdb = { workspace = true }
+
+# Authorization
+cedar-policy = "4.9"
+
 [dev-dependencies]
 mockall = { workspace = true }
 wiremock = { workspace = true }
 tokio = { workspace = true, features = ["test-util"] }
+tempfile = { workspace = true }
--- a/crates/vapora-workflow-engine/src/auth.rs
+++ b/crates/vapora-workflow-engine/src/auth.rs
@ -0,0 +1,157 @@
+use std::path::Path;
+
+use cedar_policy::{Authorizer, Context as CedarContext, Entities, PolicySet, Request};
+use tracing::debug;
+
+use crate::error::{Result, WorkflowError};
+
+/// Cedar policy authorizer for per-stage execution control.
+///
+/// Loads `.cedar` policy files from a directory and evaluates authorization
+/// decisions before each stage dispatch. Authorization is
+/// principal/action/resource based: `User::"vapora-orchestrator"`,
+/// `Action::"execute-stage"`, `Stage::"<name>"`.
+pub struct CedarAuthorizer {
+    policy_set: PolicySet,
+    authorizer: Authorizer,
+}
+
+impl CedarAuthorizer {
+    /// Load all `.cedar` policy files from `dir`, combining them into a single
+    /// policy set.
+    pub fn load_from_dir(dir: &Path) -> Result<Self> {
+        let entries = std::fs::read_dir(dir).map_err(|e| {
+            WorkflowError::Internal(format!("reading Cedar policy dir '{}': {e}", dir.display()))
+        })?;
+
+        let mut combined = String::new();
+
+        for entry in entries {
+            let entry = entry
+                .map_err(|e| WorkflowError::Internal(format!("reading Cedar dir entry: {e}")))?;
+            let path = entry.path();
+            if path.extension().and_then(|e| e.to_str()) != Some("cedar") {
+                continue;
+            }
+            let content = std::fs::read_to_string(&path).map_err(|e| {
+                WorkflowError::Internal(format!("reading Cedar policy '{}': {e}", path.display()))
+            })?;
+            combined.push_str(&content);
+            combined.push('\n');
+        }
+
+        if combined.trim().is_empty() {
+            return Err(WorkflowError::Internal(format!(
+                "no .cedar policy files found in '{}'",
+                dir.display()
+            )));
+        }
+
+        let policy_set: PolicySet = combined
+            .parse()
+            .map_err(|e| WorkflowError::Internal(format!("parsing Cedar policies: {e}")))?;
+
+        Ok(Self {
+            policy_set,
+            authorizer: Authorizer::new(),
+        })
+    }
+
+    /// Returns `Ok(())` if permitted, `Err(Unauthorized)` if denied.
+    pub fn authorize(&self, principal: &str, action: &str, resource: &str) -> Result<()> {
+        let principal_entity: cedar_policy::EntityUid =
+            format!("User::\"{principal}\"").parse().map_err(|e| {
+                WorkflowError::Internal(format!("parsing Cedar principal EntityUid: {e}"))
+            })?;
+
+        let action_entity: cedar_policy::EntityUid = format!("Action::\"{action}\"")
+            .parse()
+            .map_err(|e| WorkflowError::Internal(format!("parsing Cedar action EntityUid: {e}")))?;
+
+        let resource_entity: cedar_policy::EntityUid = resource.parse().map_err(|e| {
+            WorkflowError::Internal(format!(
+                "parsing Cedar resource EntityUid '{resource}': {e}"
+            ))
+        })?;
+
+        let request = Request::new(
+            principal_entity,
+            action_entity,
+            resource_entity,
+            CedarContext::empty(),
+            None,
+        )
+        .map_err(|e| WorkflowError::Internal(format!("building Cedar request: {e}")))?;
+
+        let response =
+            self.authorizer
+                .is_authorized(&request, &self.policy_set, &Entities::empty());
+
+        debug!(
+            "Cedar: {principal} {action} {resource} → {:?}",
+            response.decision()
+        );
+
+        match response.decision() {
+            cedar_policy::Decision::Allow => Ok(()),
+            cedar_policy::Decision::Deny => Err(WorkflowError::Unauthorized(format!(
+                "Cedar denied: principal '{principal}' action '{action}' resource '{resource}'"
+            ))),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Write;
+
+    use tempfile::TempDir;
+
+    use super::*;
+
+    fn write_policy(dir: &TempDir, name: &str, content: &str) {
+        let path = dir.path().join(name);
+        let mut f = std::fs::File::create(path).unwrap();
+        f.write_all(content.as_bytes()).unwrap();
+    }
+
+    const PERMIT_ORCHESTRATOR: &str = r#"
+permit(
+    principal == User::"vapora-orchestrator",
+    action == Action::"execute-stage",
+    resource == Stage::"deploy"
+);"#;
+
+    const FORBID_ALL: &str = r#"forbid(principal, action, resource);"#;
+
+    #[test]
+    fn test_permit_allows() {
+        let dir = TempDir::new().unwrap();
+        write_policy(&dir, "permit.cedar", PERMIT_ORCHESTRATOR);
+
+        let authz = CedarAuthorizer::load_from_dir(dir.path()).unwrap();
+        authz
+            .authorize("vapora-orchestrator", "execute-stage", "Stage::\"deploy\"")
+            .unwrap();
+    }
+
+    #[test]
+    fn test_deny_returns_unauthorized() {
+        let dir = TempDir::new().unwrap();
+        write_policy(&dir, "forbid.cedar", FORBID_ALL);
+
+        let authz = CedarAuthorizer::load_from_dir(dir.path()).unwrap();
+        let err = authz
+            .authorize("vapora-orchestrator", "execute-stage", "Stage::\"deploy\"")
+            .unwrap_err();
+        assert!(matches!(err, WorkflowError::Unauthorized(_)));
+        assert!(err.to_string().contains("Cedar denied"));
+    }
+
+    #[test]
+    fn test_empty_dir_fails() {
+        let dir = TempDir::new().unwrap();
+        let result = CedarAuthorizer::load_from_dir(dir.path());
+        assert!(result.is_err());
+    }
+}
--- a/crates/vapora-workflow-engine/src/config.rs
+++ b/crates/vapora-workflow-engine/src/config.rs
@ -15,6 +15,8 @@ pub struct EngineConfig {
    pub max_parallel_tasks: usize,
    pub workflow_timeout: u64,
    pub approval_gates_enabled: bool,
+    #[serde(default)]
+    pub cedar_policy_dir: Option<String>,
 }

 #[derive(Debug, Clone, Deserialize)]
@ -34,6 +36,10 @@ pub struct StageConfig {
    pub max_parallel: Option<usize>,
    #[serde(default)]
    pub approval_required: bool,
+    /// Agent roles that receive a compensation task if this stage needs to be
+    /// rolled back via Saga.
+    #[serde(default)]
+    pub compensation_agents: Option<Vec<String>>,
 }

 impl WorkflowsConfig {
@ -127,6 +133,7 @@ approval_required = false
                max_parallel_tasks: 10,
                workflow_timeout: 3600,
                approval_gates_enabled: true,
+                cedar_policy_dir: None,
            },
            workflows: vec![],
        };
--- a/crates/vapora-workflow-engine/src/error.rs
+++ b/crates/vapora-workflow-engine/src/error.rs
@ -41,6 +41,12 @@ pub enum WorkflowError {
    #[error("Artifact persistence failed: {0}")]
    ArtifactError(String),

+    #[error("Database error: {0}")]
+    DatabaseError(String),
+
+    #[error("Authorization denied: {0}")]
+    Unauthorized(String),
+
    #[error("Internal error: {0}")]
    Internal(String),
 }
--- a/crates/vapora-workflow-engine/src/instance.rs
+++ b/crates/vapora-workflow-engine/src/instance.rs
@ -171,6 +171,31 @@ impl WorkflowInstance {
            let _ = stage.transition(StageStatus::Failed(error));
        }
    }
+
+    /// Marks a task as failed in the current stage and optionally transitions
+    /// the stage to `Failed`.
+    ///
+    /// Returns `Some(Ok(stage_name))` when the failure is non-retryable and
+    /// the stage was transitioned, `None` when retrying or no active stage.
+    /// Isolates the mutable borrow of `current_stage_mut()` so callers can
+    /// access other fields of `self` after this returns.
+    pub fn mark_current_task_failed(
+        &mut self,
+        task_id: &str,
+        error: &str,
+        can_retry: bool,
+    ) -> Option<Result<String>> {
+        let stage = self.current_stage_mut()?;
+        if let Some(task) = stage.assigned_tasks.get_mut(task_id) {
+            task.mark_failed(error.to_string());
+        }
+        if can_retry {
+            return None;
+        }
+        let stage_name = stage.name.clone();
+        let r = stage.transition(StageStatus::Failed(error.to_string()));
+        Some(r.map(|()| stage_name))
+    }
 }

 #[cfg(test)]
@ -190,6 +215,7 @@ mod tests {
                    parallel: false,
                    max_parallel: None,
                    approval_required: false,
+                    compensation_agents: None,
                },
                StageConfig {
                    name: "stage2".to_string(),
@ -197,6 +223,7 @@ mod tests {
                    parallel: false,
                    max_parallel: None,
                    approval_required: false,
+                    compensation_agents: None,
                },
            ],
        }
--- a/crates/vapora-workflow-engine/src/lib.rs
+++ b/crates/vapora-workflow-engine/src/lib.rs
@ -16,58 +16,30 @@
 //! - `WorkflowInstance`: State machine tracking individual workflow execution
 //! - `StageState`: Manages stage execution and task assignment
 //! - `Artifact`: Data passed between stages
-//!
-//! # Example
-//!
-//! ```no_run
-//! use vapora_workflow_engine::{WorkflowOrchestrator, config::WorkflowsConfig};
-//! use std::sync::Arc;
-//!
-//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
-//! // Initialize dependencies (SwarmCoordinator, KGPersistence, NATS)
-//! # let swarm = todo!();
-//! # let kg = todo!();
-//! # let nats = todo!();
-//!
-//! // Create orchestrator
-//! let orchestrator = Arc::new(
-//!     WorkflowOrchestrator::new(
-//!         "config/workflows.toml",
-//!         swarm,
-//!         kg,
-//!         nats,
-//!     ).await?
-//! );
-//!
-//! // Start event listener
-//! orchestrator.clone().start_event_listener().await?;
-//!
-//! // Start a workflow
-//! let workflow_id = orchestrator.start_workflow(
-//!     "feature_development",
-//!     serde_json::json!({
-//!         "task": "Add authentication",
-//!         "requirements": ["OAuth2", "JWT"]
-//!     })
-//! ).await?;
-//!
-//! println!("Workflow started: {}", workflow_id);
-//! # Ok(())
-//! # }
-//! ```
+//! - `SurrealWorkflowStore`: Persistence layer — crash-recoverable via
+//!   SurrealDB
+//! - `SagaCompensator`: Rolls back completed stages on failure (reverse-order
+//!   dispatch)
+//! - `CedarAuthorizer`: Per-stage Cedar policy enforcement

 pub mod artifact;
+pub mod auth;
 pub mod config;
 pub mod error;
 pub mod instance;
 pub mod metrics;
 pub mod orchestrator;
+pub mod persistence;
+pub mod saga;
 pub mod stage;

 pub use artifact::{Artifact, ArtifactType};
+pub use auth::CedarAuthorizer;
 pub use config::{EngineConfig, StageConfig, WorkflowConfig, WorkflowsConfig};
 pub use error::{ConfigError, Result, WorkflowError};
 pub use instance::{WorkflowInstance, WorkflowStatus};
 pub use metrics::WorkflowMetrics;
 pub use orchestrator::WorkflowOrchestrator;
+pub use persistence::SurrealWorkflowStore;
+pub use saga::SagaCompensator;
 pub use stage::{StageState, StageStatus, TaskState, TaskStatus};
--- a/crates/vapora-workflow-engine/src/orchestrator.rs
+++ b/crates/vapora-workflow-engine/src/orchestrator.rs
@ -4,17 +4,22 @@ use chrono::Utc;
 use dashmap::DashMap;
 use futures::StreamExt;
 use serde_json::Value;
+use surrealdb::engine::remote::ws::Client;
+use surrealdb::Surreal;
 use tracing::{debug, error, info, warn};
 use vapora_agents::messages::{AgentMessage, TaskCompleted, TaskFailed};
 use vapora_knowledge_graph::persistence::KGPersistence;
 use vapora_swarm::coordinator::SwarmCoordinator;

 use crate::artifact::{Artifact, ArtifactType};
+use crate::auth::CedarAuthorizer;
 use crate::config::{StageConfig, WorkflowsConfig};
 use crate::error::{Result, WorkflowError};
 use crate::instance::{WorkflowInstance, WorkflowStatus};
 use crate::metrics::WorkflowMetrics;
-use crate::stage::{StageStatus, TaskState};
+use crate::persistence::SurrealWorkflowStore;
+use crate::saga::SagaCompensator;
+use crate::stage::{StageState, StageStatus, TaskState};

 pub struct WorkflowOrchestrator {
    config: WorkflowsConfig,
@ -24,6 +29,9 @@ pub struct WorkflowOrchestrator {
    nats: Arc<async_nats::Client>,
    active_workflows: DashMap<String, WorkflowInstance>,
    metrics: Arc<WorkflowMetrics>,
+    store: Arc<SurrealWorkflowStore>,
+    saga: SagaCompensator,
+    cedar: Option<Arc<CedarAuthorizer>>,
 }

 impl WorkflowOrchestrator {
@ -32,6 +40,7 @@ impl WorkflowOrchestrator {
        swarm: Arc<SwarmCoordinator>,
        kg: Arc<KGPersistence>,
        nats: Arc<async_nats::Client>,
+        db: Surreal<Client>,
    ) -> Result<Self> {
        let config = WorkflowsConfig::load(config_path)?;
        let metrics =
@ -39,13 +48,50 @@ impl WorkflowOrchestrator {
                WorkflowError::Internal(format!("Failed to create metrics: {}", e))
            })?);

+        let db = Arc::new(db);
+        let store = Arc::new(SurrealWorkflowStore::new(db));
+        let saga = SagaCompensator::new(swarm.clone());
+
+        let cedar = config
+            .engine
+            .cedar_policy_dir
+            .as_deref()
+            .map(|dir| CedarAuthorizer::load_from_dir(std::path::Path::new(dir)).map(Arc::new))
+            .transpose()
+            .map_err(|e| WorkflowError::Internal(format!("Cedar init: {e}")))?;
+
+        if cedar.is_some() {
+            info!("Cedar authorization enabled for workflow stages");
+        }
+
+        // Crash recovery: restore active workflows from DB
+        let active_workflows = DashMap::new();
+        match store.load_active().await {
+            Ok(instances) => {
+                let count = instances.len();
+                for instance in instances {
+                    active_workflows.insert(instance.id.clone(), instance);
+                }
+                if count > 0 {
+                    info!(count = count, "Restored active workflows from DB");
+                    metrics.active_workflows.add(count as i64);
+                }
+            }
+            Err(e) => {
+                warn!(error = %e, "Failed to restore active workflows from DB (starting empty)");
+            }
+        }
+
        Ok(Self {
            config,
            swarm,
            kg,
            nats,
-            active_workflows: DashMap::new(),
+            active_workflows,
            metrics,
+            store,
+            saga,
+            cedar,
        })
    }

@ -66,6 +112,7 @@ impl WorkflowOrchestrator {
        let instance = WorkflowInstance::new(template, initial_context);
        let workflow_id = instance.id.clone();

+        self.store.save(&instance).await?;
        self.active_workflows.insert(workflow_id.clone(), instance);
        self.metrics.active_workflows.inc();

@ -113,6 +160,12 @@ impl WorkflowOrchestrator {
            return Ok(());
        }

+        // Cedar authorization check before dispatch
+        if let Some(cedar) = &self.cedar {
+            let resource = format!("Stage::\"{}\"", stage_config.name);
+            cedar.authorize("vapora-orchestrator", "execute-stage", &resource)?;
+        }
+
        if stage_config.parallel {
            self.assign_parallel_tasks(workflow_id, &stage_config, context)
                .await?;
@ -133,6 +186,11 @@ impl WorkflowOrchestrator {
            }
        }

+        // Persist after stage state change
+        if let Some(instance) = self.active_workflows.get(workflow_id) {
+            self.store.save(instance.value()).await?;
+        }
+
        Ok(())
    }

@ -302,6 +360,11 @@ impl WorkflowOrchestrator {
            all_completed
        };

+        // Persist after each state change
+        if let Some(instance) = self.active_workflows.get(&workflow_id) {
+            self.store.save(instance.value()).await?;
+        }
+
        if should_advance {
            self.advance_to_next_stage(&workflow_id).await?;
        }
@ -341,6 +404,15 @@ impl WorkflowOrchestrator {
            );

            self.publish_workflow_completed(workflow_id).await?;
+
+            // Remove from DB — terminal state is cleaned up
+            if let Err(e) = self.store.delete(workflow_id).await {
+                warn!(
+                    workflow_id = %workflow_id,
+                    error = %e,
+                    "Failed to delete completed workflow from store"
+                );
+            }
        }

        Ok(())
@ -373,6 +445,7 @@ impl WorkflowOrchestrator {
    }

    pub async fn cancel_workflow(&self, workflow_id: &str, reason: String) -> Result<()> {
+        {
            let mut instance = self
                .active_workflows
                .get_mut(workflow_id)
@ -386,6 +459,12 @@ impl WorkflowOrchestrator {
                reason = %reason,
                "Workflow cancelled"
            );
+        }
+
+        // Persist cancelled state, then clean up
+        if let Some(instance) = self.active_workflows.get(workflow_id) {
+            self.store.save(instance.value()).await?;
+        }

        Ok(())
    }
@ -464,17 +543,20 @@ impl WorkflowOrchestrator {
    async fn on_task_failed(&self, msg: TaskFailed) -> Result<()> {
        let workflow_id = self.find_workflow_for_task(&msg.task_id)?;

-        {
+        // `mark_current_task_failed` encapsulates the mutable stage borrow so
+        // the DashMap entry can be re-accessed without nesting or borrow
+        // conflicts.
+        let compensation_data: Option<(Vec<StageState>, Value, String)> = {
            let mut instance = self
                .active_workflows
                .get_mut(&workflow_id)
                .ok_or_else(|| WorkflowError::WorkflowNotFound(workflow_id.clone()))?;

-            if let Some(stage) = instance.current_stage_mut() {
-                if let Some(task) = stage.assigned_tasks.get_mut(&msg.task_id) {
-                    task.mark_failed(msg.error.clone());
-                }
+            let stage_result =
+                instance.mark_current_task_failed(&msg.task_id, &msg.error, msg.can_retry);

+            match stage_result {
+                None => {
                    if msg.can_retry {
                        warn!(
                            workflow_id = %workflow_id,
@ -482,9 +564,15 @@ impl WorkflowOrchestrator {
                            retry_count = msg.retry_count,
                            "Task failed, will retry"
                        );
-                } else {
-                    let stage_name = stage.name.clone();
-                    stage.transition(StageStatus::Failed(msg.error.clone()))?;
+                    }
+                    None
+                }
+                Some(Err(e)) => return Err(e),
+                Some(Ok(stage_name)) => {
+                    let current_idx = instance.current_stage_idx;
+                    let executed_stages = instance.stages[..current_idx].to_vec();
+                    let context = instance.initial_context.clone();
+
                    instance.fail(format!("Stage {} failed: {}", stage_name, msg.error));

                    self.metrics.workflows_failed.inc();
@ -496,8 +584,22 @@ impl WorkflowOrchestrator {
                        error = %msg.error,
                        "Workflow failed"
                    );
+
+                    Some((executed_stages, context, stage_name))
                }
            }
+        }; // DashMap lock released here
+
+        if let Some((executed_stages, context, _stage_name)) = compensation_data {
+            // Saga compensation: dispatch rollback tasks in reverse order (best-effort)
+            self.saga
+                .compensate(&workflow_id, &executed_stages, &context)
+                .await?;
+
+            // Persist the failed state
+            if let Some(instance) = self.active_workflows.get(&workflow_id) {
+                self.store.save(instance.value()).await?;
+            }
        }

        Ok(())
--- a/crates/vapora-workflow-engine/src/persistence.rs
+++ b/crates/vapora-workflow-engine/src/persistence.rs
@ -0,0 +1,97 @@
+use std::sync::Arc;
+
+use surrealdb::engine::remote::ws::Client;
+use surrealdb::Surreal;
+use tracing::debug;
+
+use crate::error::{Result, WorkflowError};
+use crate::instance::WorkflowInstance;
+
+/// Persists `WorkflowInstance` records to SurrealDB for crash recovery.
+///
+/// Uses the injected connection from the backend — does NOT create its own DB
+/// connection. Table: `workflow_instances` (created by migration
+/// 009_workflow_state.surql).
+pub struct SurrealWorkflowStore {
+    db: Arc<Surreal<Client>>,
+}
+
+impl SurrealWorkflowStore {
+    pub fn new(db: Arc<Surreal<Client>>) -> Self {
+        Self { db }
+    }
+
+    /// Upsert a workflow instance. Called after every state-mutating operation.
+    pub async fn save(&self, instance: &WorkflowInstance) -> Result<()> {
+        let json = serde_json::to_value(instance).map_err(|e| {
+            WorkflowError::DatabaseError(format!("serialize workflow {}: {e}", instance.id))
+        })?;
+
+        let _: Option<serde_json::Value> = self
+            .db
+            .upsert(("workflow_instances", &*instance.id))
+            .content(json)
+            .await
+            .map_err(|e| {
+                WorkflowError::DatabaseError(format!("save workflow {}: {e}", instance.id))
+            })?;
+
+        debug!(workflow_id = %instance.id, "Workflow instance persisted");
+        Ok(())
+    }
+
+    /// Load a single instance by ID.
+    pub async fn load(&self, id: &str) -> Result<Option<WorkflowInstance>> {
+        let raw: Option<serde_json::Value> = self
+            .db
+            .select(("workflow_instances", id))
+            .await
+            .map_err(|e| WorkflowError::DatabaseError(format!("load workflow {id}: {e}")))?;
+
+        raw.map(|v| {
+            serde_json::from_value(v).map_err(|e| {
+                WorkflowError::DatabaseError(format!("deserialize workflow {id}: {e}"))
+            })
+        })
+        .transpose()
+    }
+
+    /// Load all non-terminal instances for crash recovery on startup.
+    ///
+    /// Fetches all rows and filters in Rust — avoids complex SurrealQL queries
+    /// against serde-serialized enum variants (e.g. `Failed` serializes as an
+    /// object).
+    pub async fn load_active(&self) -> Result<Vec<WorkflowInstance>> {
+        let mut response = self
+            .db
+            .query("SELECT * FROM workflow_instances")
+            .await
+            .map_err(|e| WorkflowError::DatabaseError(format!("load_active query: {e}")))?;
+
+        let raw: Vec<serde_json::Value> = response
+            .take(0)
+            .map_err(|e| WorkflowError::DatabaseError(format!("load_active take: {e}")))?;
+
+        let instances: Vec<WorkflowInstance> = raw
+            .into_iter()
+            .filter_map(|v| serde_json::from_value(v).ok())
+            .collect();
+
+        Ok(instances
+            .into_iter()
+            .filter(|i| !i.is_completed() && !i.is_failed() && !i.is_cancelled())
+            .collect())
+    }
+
+    /// Delete a terminal workflow instance from the store.
+    pub async fn delete(&self, id: &str) -> Result<()> {
+        let _: Option<serde_json::Value> = self
+            .db
+            .delete(("workflow_instances", id))
+            .await
+            .map_err(|e| WorkflowError::DatabaseError(format!("delete workflow {id}: {e}")))?;
+
+        debug!(workflow_id = %id, "Workflow instance deleted from store");
+        Ok(())
+    }
+}
--- a/crates/vapora-workflow-engine/src/saga.rs
+++ b/crates/vapora-workflow-engine/src/saga.rs
@ -0,0 +1,175 @@
+use std::sync::Arc;
+
+use tracing::{error, info};
+use vapora_swarm::coordinator::SwarmCoordinator;
+
+use crate::error::Result;
+use crate::stage::StageState;
+
+/// Saga compensator that dispatches rollback tasks for completed stages when
+/// a downstream stage fails.
+///
+/// Iterates executed stages in reverse order, and for each stage that has
+/// `compensation_agents` configured, submits a `compensation` task to the
+/// `SwarmCoordinator`. Errors are logged but never propagated — compensation
+/// is best-effort (consistent with stratum-orchestrator's behaviour).
+pub struct SagaCompensator {
+    swarm: Arc<SwarmCoordinator>,
+}
+
+impl SagaCompensator {
+    pub fn new(swarm: Arc<SwarmCoordinator>) -> Self {
+        Self { swarm }
+    }
+
+    /// Dispatch compensation tasks for `executed_stages` in reverse order.
+    ///
+    /// `context` is the workflow's `initial_context` — passed to each
+    /// compensation task so the agent has enough information to undo the work.
+    pub async fn compensate(
+        &self,
+        workflow_id: &str,
+        executed_stages: &[StageState],
+        context: &serde_json::Value,
+    ) -> Result<()> {
+        for stage in executed_stages.iter().rev() {
+            let compensation_agents = match &stage.config.compensation_agents {
+                Some(agents) if !agents.is_empty() => agents.clone(),
+                _ => continue,
+            };
+
+            let task_id = format!("compensate-{}-{}", workflow_id, stage.name);
+            let task_payload = serde_json::json!({
+                "type": "compensation",
+                "stage_name": stage.name,
+                "workflow_id": workflow_id,
+                "original_context": context,
+                "artifacts_to_undo": stage.artifacts_produced,
+            });
+
+            match self
+                .swarm
+                .submit_task_for_bidding(
+                    task_id.clone(),
+                    task_payload.to_string(),
+                    compensation_agents,
+                )
+                .await
+            {
+                Ok(_) => {
+                    info!(
+                        workflow_id = %workflow_id,
+                        stage = %stage.name,
+                        task_id = %task_id,
+                        "Saga compensation task dispatched"
+                    );
+                }
+                Err(e) => {
+                    error!(
+                        workflow_id = %workflow_id,
+                        stage = %stage.name,
+                        task_id = %task_id,
+                        error = %e,
+                        "Compensation dispatch failed (best-effort, continuing)"
+                    );
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::config::StageConfig;
+    use crate::stage::{StageState, StageStatus, TaskState};
+
+    fn stage_with_compensation(name: &str, agents: Vec<String>) -> StageState {
+        let config = StageConfig {
+            name: name.to_string(),
+            agents: vec!["developer".to_string()],
+            parallel: false,
+            max_parallel: None,
+            approval_required: false,
+            compensation_agents: Some(agents),
+        };
+        let mut state = StageState::new(config);
+        state.status = StageStatus::Completed;
+        state.artifacts_produced = vec![format!("{name}-artifact")];
+        state
+    }
+
+    fn stage_without_compensation(name: &str) -> StageState {
+        let config = StageConfig {
+            name: name.to_string(),
+            agents: vec!["developer".to_string()],
+            parallel: false,
+            max_parallel: None,
+            approval_required: false,
+            compensation_agents: None,
+        };
+        let mut state = StageState::new(config);
+        state.status = StageStatus::Completed;
+        state
+    }
+
+    #[test]
+    fn test_stages_with_no_compensation_agents_are_skipped() {
+        let stages = vec![
+            stage_without_compensation("design"),
+            stage_without_compensation("review"),
+        ];
+
+        // Verify the filter logic: stages without compensation_agents produce no tasks
+        let compensatable: Vec<&StageState> = stages
+            .iter()
+            .rev()
+            .filter(|s| {
+                s.config
+                    .compensation_agents
+                    .as_ref()
+                    .map(|a| !a.is_empty())
+                    .unwrap_or(false)
+            })
+            .collect();
+
+        assert!(compensatable.is_empty());
+    }
+
+    #[test]
+    fn test_stages_with_compensation_agents_are_included() {
+        let stages = vec![
+            stage_with_compensation("deploy", vec!["ops-agent".to_string()]),
+            stage_without_compensation("review"),
+            stage_with_compensation("provision", vec!["infra-agent".to_string()]),
+        ];
+
+        let compensatable: Vec<&str> = stages
+            .iter()
+            .rev()
+            .filter(|s| {
+                s.config
+                    .compensation_agents
+                    .as_ref()
+                    .map(|a| !a.is_empty())
+                    .unwrap_or(false)
+            })
+            .map(|s| s.name.as_str())
+            .collect();
+
+        // Reverse order: provision first, then deploy
+        assert_eq!(compensatable, vec!["provision", "deploy"]);
+    }
+
+    #[test]
+    fn test_task_state_mark_completed_carries_artifacts() {
+        let mut task = TaskState::new(
+            "t1".to_string(),
+            "agent-1".to_string(),
+            "ops-agent".to_string(),
+        );
+        task.mark_completed("done".to_string(), vec!["artifact-1".to_string()]);
+        assert_eq!(task.artifacts, vec!["artifact-1"]);
+    }
+}
--- a/crates/vapora-workflow-engine/src/stage.rs
+++ b/crates/vapora-workflow-engine/src/stage.rs
@ -154,6 +154,7 @@ mod tests {
            parallel: false,
            max_parallel: None,
            approval_required: false,
+            compensation_agents: None,
        }
    }

--- a/docs/adrs/0033-stratum-orchestrator-workflow-hardening.md
+++ b/docs/adrs/0033-stratum-orchestrator-workflow-hardening.md
@ -0,0 +1,225 @@
+# ADR-0033: Workflow Engine Hardening — Persistence, Saga Compensation, Cedar Authorization
+
+**Status**: Implemented
+**Date**: 2026-02-21
+**Deciders**: VAPORA Team
+**Technical Story**: `vapora-workflow-engine` lost all state on restart, had no rollback mechanism on failure, and applied no per-stage access control.
+
+---
+
+## Decision
+
+Harden `vapora-workflow-engine` with three independent layers inspired by the stratum-orchestrator project:
+
+1. **SurrealDB persistence** (`SurrealWorkflowStore`) — crash-recoverable `WorkflowInstance` state
+2. **Saga compensation** (`SagaCompensator`) — reverse-order rollback dispatch via `SwarmCoordinator`
+3. **Cedar authorization** (`CedarAuthorizer`) — per-stage policy enforcement before task dispatch
+
+All three are implemented natively inside `vapora-workflow-engine` — stratum-orchestrator is **not** a direct dependency.
+
+---
+
+## Context
+
+### Gaps Before This ADR
+
+| Gap | Consequence |
+|-----|-------------|
+| In-memory `DashMap` only | All running workflows lost on server restart |
+| No compensation on failure | Stage 3 failure left Stage 1 and 2 side effects live |
+| No authorization check per stage | Any caller could trigger any stage in any workflow |
+
+### Why Not Import stratum-orchestrator Directly
+
+The plan initially included:
+
+```toml
+stratum-orchestrator = { path = "../../../stratumiops/crates/stratum-orchestrator" }
+```
+
+This fails because `stratum-orchestrator → platform-nats → nkeys = { workspace = true }`. The `nkeys` dependency is resolved only inside the `stratumiops` workspace; it is not published to crates.io and has no path resolvable from vapora's workspace root. Cargo errors with `failed to select a version for nkeys`.
+
+The `CedarAuthorizer` inside stratum-orchestrator is 88 self-contained lines using only `cedar-policy`. Implementing it locally is zero duplication risk and avoids a circular workspace dependency.
+
+---
+
+## Implementation
+
+### New Modules
+
+```text
+crates/vapora-workflow-engine/src/
+├── auth.rs        — CedarAuthorizer: loads .cedar policy files, authorize()
+├── persistence.rs — SurrealWorkflowStore: save/load/load_active/delete
+└── saga.rs        — SagaCompensator: compensate(workflow_id, stages, ctx)
+```
+
+### New Migration
+
+```text
+migrations/009_workflow_state.surql — SCHEMAFULL workflow_instances table
+```
+
+### Config Changes
+
+```toml
+[engine]
+cedar_policy_dir = "/etc/vapora/cedar"   # optional; Cedar disabled if absent
+
+[[workflows.stages]]
+name = "deploy"
+agents = ["devops"]
+compensation_agents = ["devops"]         # receives rollback task if Saga fires
+```
+
+### Dependency Addition
+
+```toml
+# crates/vapora-workflow-engine/Cargo.toml
+surrealdb    = { workspace = true }
+cedar-policy = "4.9"
+```
+
+`cedar-policy` enters directly; it was previously only transitive via `secretumvault` (4.8). Cargo resolves the workspace to 4.9 (semver compatible, same major).
+
+### WorkflowOrchestrator Constructor Change
+
+```rust
+// Before
+WorkflowOrchestrator::new(config_path, swarm, kg, nats)
+
+// After
+WorkflowOrchestrator::new(config_path, swarm, kg, nats, db: Surreal<Client>)
+```
+
+`db` is the existing backend connection — the store does not open its own connection.
+
+---
+
+## Data Flow
+
+```text
+start_workflow()
+  → WorkflowInstance::new()
+  → store.save()                    ← persistence
+  → execute_current_stage()
+      → cedar.authorize()           ← auth (if configured)
+      → swarm.assign_task()
+
+on_task_completed()
+  → task.mark_completed()
+  → store.save()                    ← persistence
+
+on_task_failed(can_retry=false)
+  → mark_current_task_failed()      ← stage transition
+  → saga.compensate(stages, ctx)    ← saga (reverse-order dispatch)
+  → instance.fail()
+  → store.save()                    ← persistence
+
+startup crash recovery
+  → store.load_active()             ← restores active_workflows DashMap
+```
+
+---
+
+## Saga Compensation Protocol
+
+Compensation is **best-effort**: errors are logged, never propagated. Stage order is reversed: the last executed stage receives a rollback task first.
+
+Only stages with `compensation_agents` defined in their `StageConfig` receive a compensation task. Stages without the field are silently skipped.
+
+Compensation task payload sent to `SwarmCoordinator`:
+
+```json
+{
+  "type": "compensation",
+  "stage_name": "deploy",
+  "workflow_id": "abc-123",
+  "original_context": { "…" : "…" },
+  "artifacts_to_undo": ["artifact-id-1"]
+}
+```
+
+---
+
+## Cedar Authorization
+
+`CedarAuthorizer::load_from_dir(path)` reads all `*.cedar` files from the directory and compiles them into a single `PolicySet`. Before each stage dispatch:
+
+```rust
+cedar.authorize(
+    "vapora-orchestrator",      // principal
+    "execute-stage",            // action
+    "Stage::\"architecture\"",  // resource
+)?;
+```
+
+A `Deny` decision returns `WorkflowError::Unauthorized`, halting the workflow without dispatching the stage. If `cedar_policy_dir` is not set in `EngineConfig`, Cedar is disabled and all stages proceed without policy checks.
+
+---
+
+## Rationale
+
+### Why SurrealDB (not Redis / SQLite)
+
+SurrealDB is already the persistence layer for every other stateful component in vapora. Adding `workflow_instances` as one more table keeps the operational footprint at zero (no new service, no new connection pool). `WorkflowInstance` already implements `Serialize/Deserialize`; the store serializes via `serde_json::Value` to satisfy the `SurrealValue` trait requirement introduced in surrealdb v3.
+
+### Why Saga Over Two-Phase Commit
+
+Workflows already span multiple async agent executions over NATS. Two-phase commit across these boundaries would require protocol changes in every agent. Saga achieves eventual consistency via compensating transactions that each agent already understands (a task with `type: "compensation"`).
+
+### Why Cedar Over RBAC / Custom Middleware
+
+Cedar policies are already used by the rest of the VAPORA platform (see ADR-0010). Per-stage rules expressed in `.cedar` files are reviewable outside the codebase and hot-swappable without redeployment (restart required to reload, by current design). A custom middleware table would require schema migrations for every policy change.
+
+---
+
+## Consequences
+
+### Positive
+
+- Workflows survive server restarts (crash recovery via `load_active()`)
+- Non-retryable stage failure triggers best-effort rollback of completed stages
+- Per-stage access control via auditable policy files
+- Zero new infrastructure (uses existing SurrealDB connection)
+- 31/31 existing tests continue to pass; 5 new tests added (auth × 3, saga × 2)
+
+### Negative
+
+- `WorkflowOrchestrator::new()` signature change requires callers to pass `Surreal<Client>`
+- Cedar requires `.cedar` files on disk; missing `cedar_policy_dir` disables auth silently
+- Compensation is best-effort — no guarantee of full rollback if compensation agent also fails
+
+### Mitigations
+
+| Risk | Mitigation |
+|------|------------|
+| Saga partial rollback | Metrics track compensation dispatch; dead-letter queue via NATS for retry |
+| Cedar files missing | `cedar_policy_dir = None` → no-auth mode; documented explicitly |
+| Signature change | Backend already owns `db: Arc<Surreal<Client>>`; passed at construction |
+
+---
+
+## Verification
+
+```bash
+cargo test -p vapora-workflow-engine        # 31/31 pass
+cargo clippy -p vapora-workflow-engine -- -D warnings  # 0 warnings
+```
+
+New tests:
+
+- `auth::tests::test_permit_allows`
+- `auth::tests::test_deny_returns_unauthorized`
+- `auth::tests::test_empty_dir_fails`
+- `saga::tests::test_stages_with_compensation_agents_are_included`
+- `saga::tests::test_stages_with_no_compensation_agents_are_skipped`
+
+---
+
+## Related ADRs
+
+- [ADR-0028](./0028-workflow-orchestrator.md) — Workflow Orchestrator (original implementation)
+- [ADR-0010](./0010-cedar-authorization.md) — Cedar Authorization
+- [ADR-0004](./0004-surrealdb-database.md) — SurrealDB as single persistence layer
+- [ADR-0018](./0018-swarm-load-balancing.md) — SwarmCoordinator (Saga dispatch target)
--- a/docs/adrs/README.md
+++ b/docs/adrs/README.md
@ -2,8 +2,8 @@

 Documentación de las decisiones arquitectónicas clave del proyecto VAPORA.

-**Status**: Complete (32 ADRs documented)
-**Last Updated**: 2026-02-17
+**Status**: Complete (33 ADRs documented)
+**Last Updated**: 2026-02-21
 **Format**: Custom VAPORA (Decision, Rationale, Alternatives, Trade-offs, Implementation, Verification, Consequences)

 ---
@ -80,6 +80,7 @@ Decisiones únicas que diferencian a VAPORA de otras plataformas de orquestació
 | [021](./0021-websocket-updates.md) | Real-Time WebSocket Updates | tokio::sync::broadcast para pub/sub eficiente | ✅ Accepted |
 | [028](./0028-workflow-orchestrator.md) | Workflow Orchestrator para Multi-Agent Pipelines | Short-lived agent contexts + artifact passing para reducir cache tokens 95% | ✅ Accepted |
 | [029](./0029-rlm-recursive-language-models.md) | Recursive Language Models (RLM) | Custom Rust engine: BM25 + semantic hybrid search + distributed LLM dispatch + WASM/Docker sandbox | ✅ Accepted |
+| [033](./0033-stratum-orchestrator-workflow-hardening.md) | Workflow Engine Hardening — Persistence · Saga · Cedar | SurrealDB persistence + Saga best-effort rollback + Cedar per-stage auth; stratum patterns implemented natively (no path dep) | ✅ Implemented |

 ---

--- a/docs/guides/workflow-saga-persistence.md
+++ b/docs/guides/workflow-saga-persistence.md
@ -0,0 +1,261 @@
+# Workflow Engine: Persistence, Saga Compensation & Cedar Authorization
+
+How to configure and operate the three hardening layers added in v1.3.0:
+crash-recoverable state, Saga-based rollback, and per-stage access control.
+
+## Prerequisites
+
+- Running SurrealDB instance (same one used by the backend)
+- `vapora-workflow-engine` v1.3.0+
+- Migration `009_workflow_state.surql` applied
+
+## 1. Apply the Database Migration
+
+```bash
+surreal import \
+  --conn  ws://localhost:8000 \
+  --user  root \
+  --pass  root \
+  --ns    vapora \
+  --db    vapora \
+  migrations/009_workflow_state.surql
+```
+
+This creates the `workflow_instances` SCHEMAFULL table with indexes on
+`template_name` and `created_at`.
+
+## 2. Wire the Store into the Orchestrator
+
+Pass the existing `Surreal<Client>` when constructing `WorkflowOrchestrator`:
+
+```rust
+use std::sync::Arc;
+use surrealdb::{Surreal, engine::remote::ws::Client};
+use vapora_workflow_engine::WorkflowOrchestrator;
+
+async fn build_orchestrator(
+    db: Arc<Surreal<Client>>,
+    swarm: Arc<SwarmCoordinator>,
+    kg:    Arc<KGPersistence>,
+    nats:  Arc<async_nats::Client>,
+) -> anyhow::Result<Arc<WorkflowOrchestrator>> {
+    let orchestrator = WorkflowOrchestrator::new(
+        "config/workflows.toml",
+        swarm,
+        kg,
+        nats,
+        (*db).clone(),          // Surreal<Client> (not Arc)
+    )
+    .await?;
+
+    Ok(Arc::new(orchestrator))
+}
+```
+
+On startup the orchestrator calls `store.load_active()` and reinserts any
+non-terminal instances from SurrealDB into the in-memory `DashMap`. Workflows
+in progress before a crash resume from their last persisted state.
+
+## 3. Configure Saga Compensation
+
+Add `compensation_agents` to each stage that should trigger a rollback task
+when the workflow fails after that stage has completed:
+
+```toml
+# config/workflows.toml
+
+[engine]
+max_parallel_tasks   = 10
+workflow_timeout     = 3600
+approval_gates_enabled = true
+
+[[workflows]]
+name    = "deploy_pipeline"
+trigger = "manual"
+
+[[workflows.stages]]
+name                = "provision"
+agents              = ["devops"]
+compensation_agents = ["devops"]   # ← Saga: send rollback task to devops
+
+[[workflows.stages]]
+name                = "migrate_db"
+agents              = ["backend"]
+compensation_agents = ["backend"]  # ← Saga: send DB rollback task to backend
+
+[[workflows.stages]]
+name    = "smoke_test"
+agents  = ["tester"]
+# no compensation_agents → skipped in Saga reversal
+```
+
+### Compensation Task Payload
+
+When a non-retryable failure occurs at stage N, the `SagaCompensator` iterates
+stages `[0..N]` in reverse order. For each stage with `compensation_agents`
+defined it dispatches via `SwarmCoordinator`:
+
+```json
+{
+  "type":             "compensation",
+  "stage_name":       "migrate_db",
+  "workflow_id":      "abc-123",
+  "original_context": { "…": "…" },
+  "artifacts_to_undo": ["artifact-id-1"]
+}
+```
+
+Compensation is **best-effort**: errors are logged but never fail the workflow
+state machine. The workflow is already marked `Failed` before Saga fires.
+
+### Agent Implementation
+
+Agents receive compensation tasks on the same NATS subjects as regular tasks.
+Distinguish by the `"type": "compensation"` field:
+
+```rust
+// In your agent task handler
+if task.payload["type"] == "compensation" {
+    let stage  = &task.payload["stage_name"];
+    let wf_id  = &task.payload["workflow_id"];
+    // perform rollback for this stage …
+    return Ok(());
+}
+// normal task handling …
+```
+
+## 4. Configure Cedar Authorization
+
+Cedar authorization is **opt-in**: if `cedar_policy_dir` is absent, all stages
+execute without policy checks.
+
+### Enable Cedar
+
+```toml
+[engine]
+max_parallel_tasks   = 10
+workflow_timeout     = 3600
+approval_gates_enabled = true
+cedar_policy_dir     = "/etc/vapora/cedar"  # directory with *.cedar files
+```
+
+### Write Policies
+
+Create `.cedar` files in the configured directory. Each file is loaded at
+startup and merged into a single `PolicySet`.
+
+**Allow all stages** (permissive default):
+
+```cedar
+// /etc/vapora/cedar/allow-all.cedar
+permit(
+  principal == "vapora-orchestrator",
+  action    == Action::"execute-stage",
+  resource
+);
+```
+
+**Restrict deployment stages to approved callers only**:
+
+```cedar
+// /etc/vapora/cedar/restrict-deploy.cedar
+forbid(
+  principal == "vapora-orchestrator",
+  action    == Action::"execute-stage",
+  resource  == Stage::"deployment"
+) unless {
+  context.approved == true
+};
+```
+
+**Allow only specific stages**:
+
+```cedar
+// /etc/vapora/cedar/workflow-policy.cedar
+permit(
+  principal == "vapora-orchestrator",
+  action    == Action::"execute-stage",
+  resource  in [Stage::"architecture_design", Stage::"implementation", Stage::"testing"]
+);
+
+forbid(
+  principal == "vapora-orchestrator",
+  action    == Action::"execute-stage",
+  resource  == Stage::"production_deploy"
+);
+```
+
+### Authorization Check
+
+Before each stage dispatch the engine calls:
+
+```rust
+// principal: "vapora-orchestrator"
+// action:    "execute-stage"
+// resource:  Stage::"<stage_name>"
+cedar.authorize("vapora-orchestrator", "execute-stage", &stage_name)?;
+```
+
+A `Deny` decision returns `WorkflowError::Unauthorized` and halts the workflow
+immediately. The stage is never dispatched to `SwarmCoordinator`.
+
+## 5. Crash Recovery Behavior
+
+| Scenario | Behavior |
+|----------|----------|
+| Server restart with active workflows | `load_active()` re-populates `DashMap`; event listener resumes NATS subscriptions; in-flight tasks re-dispatch on next `TaskCompleted`/`TaskFailed` event |
+| Workflow in `Completed` or `Failed` state | Filtered out by `load_active()`; not reloaded |
+| Workflow in `WaitingApproval` | Reloaded; waits for `approve_stage()` call |
+| SurrealDB unavailable at startup | `load_active()` returns error; orchestrator fails to start |
+
+## 6. Observability
+
+Saga dispatch failures appear in logs with level `warn`:
+
+```text
+WARN vapora_workflow_engine::saga: compensation dispatch failed
+  workflow_id="abc-123" stage="migrate_db" error="…"
+```
+
+Cedar denials:
+
+```text
+ERROR vapora_workflow_engine::orchestrator: Cedar authorization denied
+  workflow_id="abc-123" stage="production_deploy"
+```
+
+Existing Prometheus metrics track workflow lifecycle:
+
+```text
+vapora_workflows_failed_total    # includes Cedar-denied workflows
+vapora_active_workflows          # drops immediately on denial
+```
+
+## 7. Full Config Reference
+
+```toml
+[engine]
+max_parallel_tasks     = 10
+workflow_timeout       = 3600
+approval_gates_enabled = true
+cedar_policy_dir       = "/etc/vapora/cedar"   # optional
+
+[[workflows]]
+name    = "my_workflow"
+trigger = "manual"
+
+[[workflows.stages]]
+name                = "stage_a"
+agents              = ["agent_role"]
+parallel            = false
+max_parallel        = 1
+approval_required   = false
+compensation_agents = ["agent_role"]   # optional; omit to skip Saga for this stage
+```
+
+## Related
+
+- [ADR-0033: Workflow Engine Hardening](../adrs/0033-stratum-orchestrator-workflow-hardening.md)
+- [ADR-0028: Workflow Orchestrator](../adrs/0028-workflow-orchestrator.md)
+- [ADR-0010: Cedar Authorization](../adrs/0010-cedar-authorization.md)
+- [Workflow Orchestrator Feature Reference](../features/workflow-orchestrator.md)
--- a/docs/src/SUMMARY.md
+++ b/docs/src/SUMMARY.md
@ -62,6 +62,12 @@
  - [0025: Multi-Tenancy](../adrs/0025-multi-tenancy.md)
  - [0026: Shared State](../adrs/0026-shared-state.md)
  - [0027: Documentation Layers](../adrs/0027-documentation-layers.md)
+  - [0033: Workflow Engine Hardening](../adrs/0033-stratum-orchestrator-workflow-hardening.md)
+
+## Guides
+
+- [Workflow Persistence, Saga & Cedar](../guides/workflow-saga-persistence.md)
+- [RLM Usage Guide](../guides/rlm-usage-guide.md)

 ## Integration Guides

@ -99,8 +105,8 @@

 ---

-**Documentation Version**: 1.2.0
-**Last Updated**: 2026-01-12
+**Documentation Version**: 1.3.0
+**Last Updated**: 2026-02-21
 **Status**: Production Ready

 For the latest updates, visit: https://github.com/vapora-platform/vapora
--- a/migrations/009_workflow_state.surql
+++ b/migrations/009_workflow_state.surql
@ -0,0 +1,21 @@
+-- Migration 009: Workflow State Persistence
+-- Replaces DashMap (in-memory, lost on restart) with durable SurrealDB storage.
+-- WorkflowInstance is stored as a JSON document; status is a tagged serde enum.
+
+DEFINE TABLE workflow_instances SCHEMAFULL;
+
+DEFINE FIELD id           ON TABLE workflow_instances TYPE record<workflow_instances>;
+DEFINE FIELD template_name ON TABLE workflow_instances TYPE string  ASSERT $value != NONE;
+DEFINE FIELD status       ON TABLE workflow_instances FLEXIBLE TYPE any ASSERT $value != NONE;
+DEFINE FIELD stages       ON TABLE workflow_instances FLEXIBLE TYPE array;
+DEFINE FIELD current_stage_idx ON TABLE workflow_instances TYPE int DEFAULT 0;
+DEFINE FIELD initial_context   ON TABLE workflow_instances FLEXIBLE TYPE object;
+DEFINE FIELD accumulated_artifacts ON TABLE workflow_instances FLEXIBLE TYPE object DEFAULT {};
+DEFINE FIELD created_at   ON TABLE workflow_instances TYPE datetime DEFAULT time::now();
+DEFINE FIELD updated_at   ON TABLE workflow_instances TYPE datetime DEFAULT time::now() VALUE time::now();
+
+DEFINE INDEX idx_workflow_instances_template
+    ON TABLE workflow_instances COLUMNS template_name;
+
+DEFINE INDEX idx_workflow_instances_created_at
+    ON TABLE workflow_instances COLUMNS created_at;