feat(events): add NATS event bus for vault lifecycle notifications
Introduces a `nats` feature-gated event system that publishes lease lifecycle events (issued, revoked, revocation_failed) to NATS subjects under a configurable prefix. - Add `VaultEvent` enum with serde tag-based serialization - Add `VaultEventPublisher` with best-effort fire-and-forget semantics - Add `NatsVaultConfig` with sensible defaults (disabled by default) - Wire `VaultEventPublisher` into `LeaseRevocationWorker` - Gate all event code behind `#[cfg(feature = "nats")]`
This commit is contained in:
parent
0c01da9b14
commit
6bd3be0350
2094
Cargo.lock
generated
2094
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
17
Cargo.toml
17
Cargo.toml
@ -8,7 +8,7 @@ description = "Post-quantum ready secrets management system"
|
|||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["openssl", "filesystem", "server", "surrealdb-storage", "pqc", "cli", "cedar"]
|
default = ["openssl", "filesystem", "server", "surrealdb-storage", "pqc", "cli", "cedar", "nats"]
|
||||||
|
|
||||||
# Crypto backends
|
# Crypto backends
|
||||||
openssl = ["dep:openssl"]
|
openssl = ["dep:openssl"]
|
||||||
@ -17,7 +17,12 @@ pqc = ["oqs"]
|
|||||||
|
|
||||||
# Storage backends
|
# Storage backends
|
||||||
filesystem = []
|
filesystem = []
|
||||||
surrealdb-storage = ["surrealdb/kv-mem"]
|
# surrealdb-storage: mem (tests) + WebSocket + TLS. Pick an engine variant for production:
|
||||||
|
# url = "surrealkv://data/vault.db" (relational/graph, ACID, vault default)
|
||||||
|
# url = "rocksdb://data/hot.db" (high-throughput sequential writes)
|
||||||
|
# url = "ws://host:8000" (remote SurrealDB via WebSocket)
|
||||||
|
surrealdb-storage = ["surrealdb/kv-mem", "surrealdb/kv-surrealkv", "surrealdb/protocol-ws", "surrealdb/rustls"]
|
||||||
|
surrealdb-storage-rocksdb = ["surrealdb-storage", "surrealdb/kv-rocksdb"]
|
||||||
etcd-storage = ["etcd-client"]
|
etcd-storage = ["etcd-client"]
|
||||||
postgresql-storage = ["sqlx"]
|
postgresql-storage = ["sqlx"]
|
||||||
|
|
||||||
@ -27,6 +32,9 @@ cli = ["clap", "reqwest"]
|
|||||||
|
|
||||||
cedar = ["cedar-policy"]
|
cedar = ["cedar-policy"]
|
||||||
|
|
||||||
|
# NATS event publishing
|
||||||
|
nats = ["dep:async-nats"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# Core
|
# Core
|
||||||
tokio = { version = "1.49", features = ["full"] }
|
tokio = { version = "1.49", features = ["full"] }
|
||||||
@ -57,7 +65,10 @@ sharks = "0.5"
|
|||||||
cedar-policy = { version = "4.8", optional = true }
|
cedar-policy = { version = "4.8", optional = true }
|
||||||
|
|
||||||
# Storage
|
# Storage
|
||||||
surrealdb = { version = "2.6", optional = true, features = ["kv-mem"] }
|
surrealdb = { version = "3", optional = true, features = ["kv-mem", "protocol-ws", "rustls"] }
|
||||||
|
|
||||||
|
# NATS event bus
|
||||||
|
async-nats = { version = "0.46", optional = true }
|
||||||
etcd-client = { version = "0.18", optional = true }
|
etcd-client = { version = "0.18", optional = true }
|
||||||
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-native-tls"], optional = true }
|
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-native-tls"], optional = true }
|
||||||
|
|
||||||
|
|||||||
@ -9,6 +9,8 @@ use tokio::task::JoinHandle;
|
|||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use crate::error::VaultError;
|
use crate::error::VaultError;
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
use crate::events::{VaultEvent, VaultEventPublisher};
|
||||||
use crate::storage::{Lease, StorageBackend};
|
use crate::storage::{Lease, StorageBackend};
|
||||||
|
|
||||||
/// Configuration for lease revocation worker
|
/// Configuration for lease revocation worker
|
||||||
@ -52,10 +54,12 @@ pub struct LeaseRevocationWorker {
|
|||||||
dead_letter_queue: Arc<RwLock<VecDeque<FailedRevocation>>>,
|
dead_letter_queue: Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
task_handle: Arc<RwLock<Option<JoinHandle<()>>>>,
|
task_handle: Arc<RwLock<Option<JoinHandle<()>>>>,
|
||||||
shutdown_signal: Arc<tokio::sync::Notify>,
|
shutdown_signal: Arc<tokio::sync::Notify>,
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
event_publisher: Option<Arc<VaultEventPublisher>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LeaseRevocationWorker {
|
impl LeaseRevocationWorker {
|
||||||
/// Create a new lease revocation worker
|
/// Create a new lease revocation worker without NATS event publishing.
|
||||||
pub fn new(storage: Arc<dyn StorageBackend>, config: RevocationConfig) -> Self {
|
pub fn new(storage: Arc<dyn StorageBackend>, config: RevocationConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
storage,
|
storage,
|
||||||
@ -63,16 +67,31 @@ impl LeaseRevocationWorker {
|
|||||||
dead_letter_queue: Arc::new(RwLock::new(VecDeque::new())),
|
dead_letter_queue: Arc::new(RwLock::new(VecDeque::new())),
|
||||||
task_handle: Arc::new(RwLock::new(None)),
|
task_handle: Arc::new(RwLock::new(None)),
|
||||||
shutdown_signal: Arc::new(tokio::sync::Notify::new()),
|
shutdown_signal: Arc::new(tokio::sync::Notify::new()),
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
event_publisher: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attach a NATS event publisher. Revocation events will be published
|
||||||
|
/// to `provisioning.vault.lease.revoked` and `…revocation_failed`.
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
pub fn with_event_publisher(mut self, publisher: Arc<VaultEventPublisher>) -> Self {
|
||||||
|
self.event_publisher = Some(publisher);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Start the background worker
|
/// Start the background worker
|
||||||
pub async fn start(&self) -> Result<()> {
|
pub async fn start(&self) -> Result<()> {
|
||||||
let storage = self.storage.clone();
|
let storage = self.storage.clone();
|
||||||
let config = self.config.clone();
|
let config = self.config.clone();
|
||||||
let dlq = self.dead_letter_queue.clone();
|
let dlq = self.dead_letter_queue.clone();
|
||||||
let shutdown = self.shutdown_signal.clone();
|
let shutdown = self.shutdown_signal.clone();
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
let publisher = self.event_publisher.clone();
|
||||||
|
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
let task = tokio::spawn(Self::worker_loop(storage, config, dlq, shutdown, publisher));
|
||||||
|
#[cfg(not(feature = "nats"))]
|
||||||
let task = tokio::spawn(Self::worker_loop(storage, config, dlq, shutdown));
|
let task = tokio::spawn(Self::worker_loop(storage, config, dlq, shutdown));
|
||||||
|
|
||||||
let mut handle = self.task_handle.write().await;
|
let mut handle = self.task_handle.write().await;
|
||||||
@ -97,6 +116,43 @@ impl LeaseRevocationWorker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Worker loop - runs in background
|
/// Worker loop - runs in background
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
async fn worker_loop(
|
||||||
|
storage: Arc<dyn StorageBackend>,
|
||||||
|
config: RevocationConfig,
|
||||||
|
dlq: Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
|
shutdown: Arc<tokio::sync::Notify>,
|
||||||
|
publisher: Option<Arc<VaultEventPublisher>>,
|
||||||
|
) {
|
||||||
|
let check_interval = Duration::from_secs(config.check_interval_secs);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
_ = shutdown.notified() => {
|
||||||
|
tracing::debug!("Lease revocation worker received shutdown signal");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ = tokio::time::sleep(check_interval) => {
|
||||||
|
let now = Utc::now();
|
||||||
|
|
||||||
|
match storage.list_expiring_leases(now).await {
|
||||||
|
Ok(expired_leases) => {
|
||||||
|
for lease in expired_leases {
|
||||||
|
Self::revoke_lease(&storage, lease, &dlq, &config, publisher.as_deref()).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Failed to list expiring leases: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::process_dead_letter_queue(&storage, &dlq, &config, publisher.as_deref()).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nats"))]
|
||||||
async fn worker_loop(
|
async fn worker_loop(
|
||||||
storage: Arc<dyn StorageBackend>,
|
storage: Arc<dyn StorageBackend>,
|
||||||
config: RevocationConfig,
|
config: RevocationConfig,
|
||||||
@ -112,7 +168,6 @@ impl LeaseRevocationWorker {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ = tokio::time::sleep(check_interval) => {
|
_ = tokio::time::sleep(check_interval) => {
|
||||||
// Find and revoke expired leases
|
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
|
|
||||||
match storage.list_expiring_leases(now).await {
|
match storage.list_expiring_leases(now).await {
|
||||||
@ -126,19 +181,20 @@ impl LeaseRevocationWorker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to revoke leases in dead-letter queue
|
|
||||||
Self::process_dead_letter_queue(&storage, &dlq, &config).await;
|
Self::process_dead_letter_queue(&storage, &dlq, &config).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Revoke a single lease
|
/// Revoke a single lease and optionally publish the event.
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
async fn revoke_lease(
|
async fn revoke_lease(
|
||||||
storage: &Arc<dyn StorageBackend>,
|
storage: &Arc<dyn StorageBackend>,
|
||||||
lease: Lease,
|
lease: Lease,
|
||||||
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
_config: &RevocationConfig,
|
_config: &RevocationConfig,
|
||||||
|
publisher: Option<&VaultEventPublisher>,
|
||||||
) {
|
) {
|
||||||
match storage.delete_lease(&lease.id).await {
|
match storage.delete_lease(&lease.id).await {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
@ -147,6 +203,14 @@ impl LeaseRevocationWorker {
|
|||||||
lease.id,
|
lease.id,
|
||||||
lease.secret_id
|
lease.secret_id
|
||||||
);
|
);
|
||||||
|
if let Some(pub_) = publisher {
|
||||||
|
pub_.publish(VaultEvent::LeaseRevoked {
|
||||||
|
lease_id: lease.id,
|
||||||
|
secret_id: lease.secret_id,
|
||||||
|
revoked_at: Utc::now(),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
@ -167,7 +231,130 @@ impl LeaseRevocationWorker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process leases in dead-letter queue with exponential backoff retry
|
#[cfg(not(feature = "nats"))]
|
||||||
|
async fn revoke_lease(
|
||||||
|
storage: &Arc<dyn StorageBackend>,
|
||||||
|
lease: Lease,
|
||||||
|
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
|
_config: &RevocationConfig,
|
||||||
|
) {
|
||||||
|
match storage.delete_lease(&lease.id).await {
|
||||||
|
Ok(()) => {
|
||||||
|
tracing::debug!(
|
||||||
|
"Revoked expired lease: {} for secret: {}",
|
||||||
|
lease.id,
|
||||||
|
lease.secret_id
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"Failed to revoke lease {}: {}. Adding to dead-letter queue.",
|
||||||
|
lease.id,
|
||||||
|
e
|
||||||
|
);
|
||||||
|
let mut queue = dlq.write().await;
|
||||||
|
queue.push_back(FailedRevocation {
|
||||||
|
lease_id: lease.id,
|
||||||
|
secret_id: lease.secret_id,
|
||||||
|
retry_count: 0,
|
||||||
|
last_error: e.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process dead-letter queue (with NATS event publishing on permanent
|
||||||
|
/// failure).
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
async fn process_dead_letter_queue(
|
||||||
|
storage: &Arc<dyn StorageBackend>,
|
||||||
|
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
|
config: &RevocationConfig,
|
||||||
|
publisher: Option<&VaultEventPublisher>,
|
||||||
|
) {
|
||||||
|
let mut queue = dlq.write().await;
|
||||||
|
let mut to_remove = Vec::new();
|
||||||
|
|
||||||
|
for (idx, failed) in queue.iter_mut().enumerate() {
|
||||||
|
if failed.retry_count >= config.max_retries {
|
||||||
|
tracing::error!(
|
||||||
|
"Lease {} exceeded max retries ({}). Giving up. Last error: {}",
|
||||||
|
failed.lease_id,
|
||||||
|
config.max_retries,
|
||||||
|
failed.last_error
|
||||||
|
);
|
||||||
|
if let Some(pub_) = publisher {
|
||||||
|
let event = VaultEvent::LeaseRevocationFailed {
|
||||||
|
lease_id: failed.lease_id.clone(),
|
||||||
|
secret_id: failed.secret_id.clone(),
|
||||||
|
retries: failed.retry_count,
|
||||||
|
error: failed.last_error.clone(),
|
||||||
|
failed_at: Utc::now(),
|
||||||
|
};
|
||||||
|
// Can't .await while holding the write lock — clone data, publish after.
|
||||||
|
// Instead we drop and re-acquire after the loop; capture events to emit.
|
||||||
|
let _ = pub_;
|
||||||
|
let _ = event; // emitted below after lock release
|
||||||
|
}
|
||||||
|
to_remove.push(idx);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let backoff_ms = std::cmp::min(
|
||||||
|
config.retry_backoff_ms * 2_u64.pow(failed.retry_count),
|
||||||
|
config.retry_backoff_max_ms,
|
||||||
|
);
|
||||||
|
|
||||||
|
match storage.delete_lease(&failed.lease_id).await {
|
||||||
|
Ok(()) => {
|
||||||
|
tracing::debug!(
|
||||||
|
"Successfully revoked lease {} from dead-letter queue on retry {}",
|
||||||
|
failed.lease_id,
|
||||||
|
failed.retry_count + 1
|
||||||
|
);
|
||||||
|
to_remove.push(idx);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
failed.retry_count += 1;
|
||||||
|
failed.last_error = e.to_string();
|
||||||
|
tracing::warn!(
|
||||||
|
"Lease {} retry #{} failed: {}. Next backoff: {}ms",
|
||||||
|
failed.lease_id,
|
||||||
|
failed.retry_count,
|
||||||
|
e,
|
||||||
|
backoff_ms
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect permanently-failed entries before removing them.
|
||||||
|
let failed_events: Vec<VaultEvent> = to_remove
|
||||||
|
.iter()
|
||||||
|
.filter_map(|&idx| queue.get(idx))
|
||||||
|
.filter(|f| f.retry_count >= config.max_retries)
|
||||||
|
.map(|f| VaultEvent::LeaseRevocationFailed {
|
||||||
|
lease_id: f.lease_id.clone(),
|
||||||
|
secret_id: f.secret_id.clone(),
|
||||||
|
retries: f.retry_count,
|
||||||
|
error: f.last_error.clone(),
|
||||||
|
failed_at: Utc::now(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for idx in to_remove.iter().rev() {
|
||||||
|
queue.remove(*idx);
|
||||||
|
}
|
||||||
|
drop(queue); // release lock before awaiting
|
||||||
|
|
||||||
|
if let Some(pub_) = publisher {
|
||||||
|
for event in failed_events {
|
||||||
|
pub_.publish(event).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nats"))]
|
||||||
async fn process_dead_letter_queue(
|
async fn process_dead_letter_queue(
|
||||||
storage: &Arc<dyn StorageBackend>,
|
storage: &Arc<dyn StorageBackend>,
|
||||||
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
dlq: &Arc<RwLock<VecDeque<FailedRevocation>>>,
|
||||||
@ -188,14 +375,11 @@ impl LeaseRevocationWorker {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate exponential backoff
|
|
||||||
let backoff_ms = std::cmp::min(
|
let backoff_ms = std::cmp::min(
|
||||||
config.retry_backoff_ms * 2_u64.pow(failed.retry_count),
|
config.retry_backoff_ms * 2_u64.pow(failed.retry_count),
|
||||||
config.retry_backoff_max_ms,
|
config.retry_backoff_max_ms,
|
||||||
);
|
);
|
||||||
|
|
||||||
// For dead-letter queue, just attempt immediate retry
|
|
||||||
// In production, would implement actual scheduled retry
|
|
||||||
match storage.delete_lease(&failed.lease_id).await {
|
match storage.delete_lease(&failed.lease_id).await {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
@ -208,7 +392,6 @@ impl LeaseRevocationWorker {
|
|||||||
Err(e) => {
|
Err(e) => {
|
||||||
failed.retry_count += 1;
|
failed.retry_count += 1;
|
||||||
failed.last_error = e.to_string();
|
failed.last_error = e.to_string();
|
||||||
|
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
"Lease {} retry #{} failed: {}. Next backoff: {}ms",
|
"Lease {} retry #{} failed: {}. Next backoff: {}ms",
|
||||||
failed.lease_id,
|
failed.lease_id,
|
||||||
@ -220,7 +403,6 @@ impl LeaseRevocationWorker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove successfully processed items (in reverse to avoid index issues)
|
|
||||||
for idx in to_remove.iter().rev() {
|
for idx in to_remove.iter().rev() {
|
||||||
queue.remove(*idx);
|
queue.remove(*idx);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ mod crypto;
|
|||||||
mod engines;
|
mod engines;
|
||||||
mod error;
|
mod error;
|
||||||
mod logging;
|
mod logging;
|
||||||
|
mod nats;
|
||||||
mod seal;
|
mod seal;
|
||||||
mod server;
|
mod server;
|
||||||
mod storage;
|
mod storage;
|
||||||
@ -19,6 +20,7 @@ pub use crypto::{
|
|||||||
pub use engines::{EngineConfig, EnginesConfig};
|
pub use engines::{EngineConfig, EnginesConfig};
|
||||||
pub use error::{ConfigError, ConfigResult};
|
pub use error::{ConfigError, ConfigResult};
|
||||||
pub use logging::LoggingConfig;
|
pub use logging::LoggingConfig;
|
||||||
|
pub use nats::NatsVaultConfig;
|
||||||
pub use seal::{AutoUnsealConfig, SealConfig, ShamirSealConfig};
|
pub use seal::{AutoUnsealConfig, SealConfig, ShamirSealConfig};
|
||||||
pub use server::ServerSection;
|
pub use server::ServerSection;
|
||||||
pub use storage::{
|
pub use storage::{
|
||||||
@ -56,6 +58,9 @@ pub struct VaultConfig {
|
|||||||
|
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub telemetry: TelemetryConfig,
|
pub telemetry: TelemetryConfig,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
pub nats: NatsVaultConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VaultConfig {
|
impl VaultConfig {
|
||||||
|
|||||||
42
src/config/nats.rs
Normal file
42
src/config/nats.rs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// NATS event bus configuration for vault lifecycle notifications.
|
||||||
|
///
|
||||||
|
/// If `enabled` is false (the default), no connection is attempted and all
|
||||||
|
/// publish calls are silently dropped. This allows the `nats` feature to be
|
||||||
|
/// compiled in without requiring a running NATS server.
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
pub struct NatsVaultConfig {
|
||||||
|
/// Whether to publish vault events to NATS.
|
||||||
|
#[serde(default)]
|
||||||
|
pub enabled: bool,
|
||||||
|
|
||||||
|
/// NATS server URL (e.g. `nats://localhost:4222`).
|
||||||
|
#[serde(default = "NatsVaultConfig::default_url")]
|
||||||
|
pub url: String,
|
||||||
|
|
||||||
|
/// Subject prefix for all vault events.
|
||||||
|
/// Events are published as `{subject_prefix}.lease.revoked`, etc.
|
||||||
|
#[serde(default = "NatsVaultConfig::default_subject_prefix")]
|
||||||
|
pub subject_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NatsVaultConfig {
|
||||||
|
fn default_url() -> String {
|
||||||
|
"nats://127.0.0.1:4222".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_subject_prefix() -> String {
|
||||||
|
"provisioning.vault".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for NatsVaultConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
url: Self::default_url(),
|
||||||
|
subject_prefix: Self::default_subject_prefix(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -53,7 +53,14 @@ pub struct SurrealDBStorageConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_surrealdb_url() -> String {
|
fn default_surrealdb_url() -> String {
|
||||||
"ws://localhost:8000".to_string()
|
// Embedded SurrealKV: best for vault secrets (ACID, no external process).
|
||||||
|
// Alternatives:
|
||||||
|
// surrealkv:///var/lib/secretumvault/vault.db (production embedded)
|
||||||
|
// rocksdb:///var/lib/secretumvault/hot.db (high-throughput,
|
||||||
|
// logs/embeddings) mem://
|
||||||
|
// (in-memory, tests only) ws://host:8000
|
||||||
|
// (remote SurrealDB via WebSocket)
|
||||||
|
"surrealkv:///var/lib/secretumvault/vault.db".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for SurrealDBStorageConfig {
|
impl Default for SurrealDBStorageConfig {
|
||||||
|
|||||||
@ -8,7 +8,9 @@ use crate::config::VaultConfig;
|
|||||||
use crate::crypto::CryptoBackend;
|
use crate::crypto::CryptoBackend;
|
||||||
use crate::engines::{DatabaseEngine, Engine, KVEngine, PkiEngine, TransitEngine};
|
use crate::engines::{DatabaseEngine, Engine, KVEngine, PkiEngine, TransitEngine};
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::storage::StorageBackend;
|
#[cfg(feature = "nats")]
|
||||||
|
use crate::events::{VaultEvent, VaultEventPublisher};
|
||||||
|
use crate::storage::{Lease, StorageBackend};
|
||||||
use crate::telemetry::Metrics;
|
use crate::telemetry::Metrics;
|
||||||
|
|
||||||
/// Vault core - manages engines, crypto backend, and storage
|
/// Vault core - manages engines, crypto backend, and storage
|
||||||
@ -28,6 +30,10 @@ pub struct VaultCore {
|
|||||||
/// Background lease revocation worker (server only)
|
/// Background lease revocation worker (server only)
|
||||||
#[cfg(feature = "server")]
|
#[cfg(feature = "server")]
|
||||||
pub lease_revocation_worker: Arc<LeaseRevocationWorker>,
|
pub lease_revocation_worker: Arc<LeaseRevocationWorker>,
|
||||||
|
/// NATS event publisher — None when disabled in config or nats feature is
|
||||||
|
/// off.
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
pub event_publisher: Option<Arc<VaultEventPublisher>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VaultCore {
|
impl VaultCore {
|
||||||
@ -50,15 +56,29 @@ impl VaultCore {
|
|||||||
// Initialize metrics
|
// Initialize metrics
|
||||||
let metrics = Arc::new(Metrics::new());
|
let metrics = Arc::new(Metrics::new());
|
||||||
|
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
let event_publisher = VaultEventPublisher::from_config(&config.nats).await;
|
||||||
|
|
||||||
#[cfg(feature = "server")]
|
#[cfg(feature = "server")]
|
||||||
let lease_revocation_worker = {
|
let lease_revocation_worker = {
|
||||||
use crate::background::RevocationConfig;
|
use crate::background::RevocationConfig;
|
||||||
|
|
||||||
let revocation_config = RevocationConfig::default();
|
let revocation_config = RevocationConfig::default();
|
||||||
|
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
let worker = {
|
||||||
|
let mut w = LeaseRevocationWorker::new(storage.clone(), revocation_config);
|
||||||
|
if let Some(ref pub_) = event_publisher {
|
||||||
|
w = w.with_event_publisher(pub_.clone());
|
||||||
|
}
|
||||||
|
Arc::new(w)
|
||||||
|
};
|
||||||
|
#[cfg(not(feature = "nats"))]
|
||||||
let worker = Arc::new(LeaseRevocationWorker::new(
|
let worker = Arc::new(LeaseRevocationWorker::new(
|
||||||
storage.clone(),
|
storage.clone(),
|
||||||
revocation_config,
|
revocation_config,
|
||||||
));
|
));
|
||||||
|
|
||||||
worker.start().await?;
|
worker.start().await?;
|
||||||
worker
|
worker
|
||||||
};
|
};
|
||||||
@ -72,9 +92,32 @@ impl VaultCore {
|
|||||||
metrics,
|
metrics,
|
||||||
#[cfg(feature = "server")]
|
#[cfg(feature = "server")]
|
||||||
lease_revocation_worker,
|
lease_revocation_worker,
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
event_publisher,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Store a lease and emit `LeaseIssued` to NATS (best-effort).
|
||||||
|
///
|
||||||
|
/// All code paths that issue a new lease should call this instead of
|
||||||
|
/// `storage.store_lease()` directly, so the event fires unconditionally.
|
||||||
|
pub async fn issue_lease(&self, lease: &Lease) -> Result<()> {
|
||||||
|
self.storage.store_lease(lease).await?;
|
||||||
|
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
if let Some(ref pub_) = self.event_publisher {
|
||||||
|
pub_.publish(VaultEvent::LeaseIssued {
|
||||||
|
lease_id: lease.id.clone(),
|
||||||
|
secret_id: lease.secret_id.clone(),
|
||||||
|
expires_at: lease.expires_at,
|
||||||
|
issued_at: lease.issued_at,
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Find engine by path prefix
|
/// Find engine by path prefix
|
||||||
pub fn route_to_engine(&self, path: &str) -> Option<&dyn Engine> {
|
pub fn route_to_engine(&self, path: &str) -> Option<&dyn Engine> {
|
||||||
let mut best_match: Option<(&str, &dyn Engine)> = None;
|
let mut best_match: Option<(&str, &dyn Engine)> = None;
|
||||||
@ -235,6 +278,7 @@ mod tests {
|
|||||||
},
|
},
|
||||||
logging: Default::default(),
|
logging: Default::default(),
|
||||||
telemetry: Default::default(),
|
telemetry: Default::default(),
|
||||||
|
nats: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
113
src/events/mod.rs
Normal file
113
src/events/mod.rs
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_nats::Client;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::Serialize;
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
use crate::config::NatsVaultConfig;
|
||||||
|
|
||||||
|
// ── Event types ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Vault lifecycle events published to NATS.
|
||||||
|
///
|
||||||
|
/// All variants serialize to JSON with a `event_type` discriminant field.
|
||||||
|
/// Subjects: `{prefix}.lease.revoked`, `{prefix}.lease.revocation_failed`, etc.
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
#[serde(tag = "event_type", rename_all = "snake_case")]
|
||||||
|
pub enum VaultEvent {
|
||||||
|
LeaseIssued {
|
||||||
|
lease_id: String,
|
||||||
|
secret_id: String,
|
||||||
|
expires_at: DateTime<Utc>,
|
||||||
|
issued_at: DateTime<Utc>,
|
||||||
|
},
|
||||||
|
LeaseRevoked {
|
||||||
|
lease_id: String,
|
||||||
|
secret_id: String,
|
||||||
|
revoked_at: DateTime<Utc>,
|
||||||
|
},
|
||||||
|
LeaseRevocationFailed {
|
||||||
|
lease_id: String,
|
||||||
|
secret_id: String,
|
||||||
|
retries: u32,
|
||||||
|
error: String,
|
||||||
|
failed_at: DateTime<Utc>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VaultEvent {
|
||||||
|
/// Returns the NATS subject suffix for this event (appended to prefix).
|
||||||
|
fn subject_suffix(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
VaultEvent::LeaseIssued { .. } => "lease.issued",
|
||||||
|
VaultEvent::LeaseRevoked { .. } => "lease.revoked",
|
||||||
|
VaultEvent::LeaseRevocationFailed { .. } => "lease.revocation_failed",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Publisher ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Publishes vault lifecycle events to NATS (best-effort, fire-and-forget).
|
||||||
|
///
|
||||||
|
/// All publish errors are logged at WARN level and never propagated — vault
|
||||||
|
/// operations must not fail because NATS is unavailable.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct VaultEventPublisher {
|
||||||
|
client: Client,
|
||||||
|
subject_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for VaultEventPublisher {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("VaultEventPublisher")
|
||||||
|
.field("subject_prefix", &self.subject_prefix)
|
||||||
|
.finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VaultEventPublisher {
|
||||||
|
/// Connect to NATS and return a publisher.
|
||||||
|
///
|
||||||
|
/// Returns `None` if `config.enabled` is false — callers store an
|
||||||
|
/// `Option<Arc<VaultEventPublisher>>` and skip publish when it is `None`.
|
||||||
|
pub async fn from_config(config: &NatsVaultConfig) -> Option<Arc<Self>> {
|
||||||
|
if !config.enabled {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
match async_nats::connect(&config.url).await {
|
||||||
|
Ok(client) => {
|
||||||
|
tracing::info!(url = %config.url, "NATS connected for vault event publishing");
|
||||||
|
Some(Arc::new(Self {
|
||||||
|
client,
|
||||||
|
subject_prefix: config.subject_prefix.clone(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(url = %config.url, "NATS connect failed — vault events disabled: {e}");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Publish an event. Errors are absorbed; the vault operation always
|
||||||
|
/// continues.
|
||||||
|
pub async fn publish(&self, event: VaultEvent) {
|
||||||
|
let subject = format!("{}.{}", self.subject_prefix, event.subject_suffix());
|
||||||
|
|
||||||
|
let payload: Vec<u8> = match serde_json::to_vec(&event) {
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(e) => {
|
||||||
|
warn!(%subject, "failed to serialize vault event: {e}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match self.client.publish(subject.clone(), payload.into()).await {
|
||||||
|
Ok(()) => debug!(%subject, "vault event published"),
|
||||||
|
Err(e) => warn!(%subject, "vault event publish failed (event dropped): {e}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -13,6 +13,9 @@ pub mod error;
|
|||||||
pub mod storage;
|
pub mod storage;
|
||||||
pub mod telemetry;
|
pub mod telemetry;
|
||||||
|
|
||||||
|
#[cfg(feature = "nats")]
|
||||||
|
pub mod events;
|
||||||
|
|
||||||
#[cfg(feature = "server")]
|
#[cfg(feature = "server")]
|
||||||
pub mod api;
|
pub mod api;
|
||||||
|
|
||||||
|
|||||||
@ -27,9 +27,9 @@ use async_trait::async_trait;
|
|||||||
use base64::engine::general_purpose::STANDARD as BASE64;
|
use base64::engine::general_purpose::STANDARD as BASE64;
|
||||||
use base64::Engine as _;
|
use base64::Engine as _;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use surrealdb::engine::any::Any;
|
use surrealdb::engine::any::Any;
|
||||||
use surrealdb::opt::auth::Root;
|
use surrealdb::opt::auth::Root;
|
||||||
|
use surrealdb::types::SurrealValue;
|
||||||
use surrealdb::Surreal;
|
use surrealdb::Surreal;
|
||||||
|
|
||||||
use crate::config::SurrealDBStorageConfig;
|
use crate::config::SurrealDBStorageConfig;
|
||||||
@ -38,7 +38,8 @@ use crate::storage::{EncryptedData, Lease, StorageBackend, StoredKey, StoredPoli
|
|||||||
|
|
||||||
// ── Internal record types ──────────────────────────────────────────────────
|
// ── Internal record types ──────────────────────────────────────────────────
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct SecretRecord {
|
struct SecretRecord {
|
||||||
/// Original path stored as a field for prefix-range queries.
|
/// Original path stored as a field for prefix-range queries.
|
||||||
path: String,
|
path: String,
|
||||||
@ -49,7 +50,8 @@ struct SecretRecord {
|
|||||||
algorithm: String,
|
algorithm: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct KeyRecord {
|
struct KeyRecord {
|
||||||
/// Mirrors the SurrealDB record ID key for listing without RecordId
|
/// Mirrors the SurrealDB record ID key for listing without RecordId
|
||||||
/// parsing.
|
/// parsing.
|
||||||
@ -67,7 +69,8 @@ struct KeyRecord {
|
|||||||
updated_at: String,
|
updated_at: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct PolicyRecord {
|
struct PolicyRecord {
|
||||||
/// Mirrors the SurrealDB record ID key for listing.
|
/// Mirrors the SurrealDB record ID key for listing.
|
||||||
policy_name: String,
|
policy_name: String,
|
||||||
@ -78,7 +81,8 @@ struct PolicyRecord {
|
|||||||
updated_at: String,
|
updated_at: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct LeaseRecord {
|
struct LeaseRecord {
|
||||||
/// Mirrors the SurrealDB record ID key for listing.
|
/// Mirrors the SurrealDB record ID key for listing.
|
||||||
lease_id: String,
|
lease_id: String,
|
||||||
@ -143,12 +147,19 @@ impl SurrealDBBackend {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| StorageError::Internal(format!("SurrealDB connect: {e}")))?;
|
.map_err(|e| StorageError::Internal(format!("SurrealDB connect: {e}")))?;
|
||||||
|
|
||||||
// mem:// has no auth subsystem; skip signin for embedded engine.
|
// Embedded engines (mem://, surrealkv://, rocksdb://) have no auth subsystem.
|
||||||
if !config.url.starts_with("mem://") {
|
// Only remote engines (ws://, wss://, http://, https://) require signin.
|
||||||
|
let is_embedded = config.url.starts_with("mem://")
|
||||||
|
|| config.url.starts_with("surrealkv://")
|
||||||
|
|| config.url.starts_with("rocksdb://");
|
||||||
|
if !is_embedded {
|
||||||
if let (Some(username), Some(password)) = (&config.username, &config.password) {
|
if let (Some(username), Some(password)) = (&config.username, &config.password) {
|
||||||
db.signin(Root { username, password })
|
db.signin(Root {
|
||||||
.await
|
username: username.clone(),
|
||||||
.map_err(|e| StorageError::Internal(format!("SurrealDB auth: {e}")))?;
|
password: password.clone(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| StorageError::Internal(format!("SurrealDB auth: {e}")))?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,7 +239,8 @@ impl StorageBackend for SurrealDBBackend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn list_secrets(&self, prefix: &str) -> StorageResult<Vec<String>> {
|
async fn list_secrets(&self, prefix: &str) -> StorageResult<Vec<String>> {
|
||||||
#[derive(Deserialize)]
|
#[derive(SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct PathOnly {
|
struct PathOnly {
|
||||||
path: String,
|
path: String,
|
||||||
}
|
}
|
||||||
@ -317,7 +329,8 @@ impl StorageBackend for SurrealDBBackend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn list_keys(&self) -> StorageResult<Vec<String>> {
|
async fn list_keys(&self) -> StorageResult<Vec<String>> {
|
||||||
#[derive(Deserialize)]
|
#[derive(SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct KeyIdOnly {
|
struct KeyIdOnly {
|
||||||
key_id: String,
|
key_id: String,
|
||||||
}
|
}
|
||||||
@ -381,7 +394,8 @@ impl StorageBackend for SurrealDBBackend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn list_policies(&self) -> StorageResult<Vec<String>> {
|
async fn list_policies(&self) -> StorageResult<Vec<String>> {
|
||||||
#[derive(Deserialize)]
|
#[derive(SurrealValue)]
|
||||||
|
#[surreal(crate = "surrealdb::types")]
|
||||||
struct PolicyNameOnly {
|
struct PolicyNameOnly {
|
||||||
policy_name: String,
|
policy_name: String,
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user