prvng_platform/crates/orchestrator/src/monitor.rs

828 lines
28 KiB
Rust
Raw Normal View History

2025-10-07 10:59:52 +01:00
//! Monitoring and metrics system for workflow orchestrator
//!
//! This module provides comprehensive monitoring, metrics collection, and
//! health checking functionality with Prometheus-compatible metrics export
//! and real-time updates via WebSocket/SSE.
use std::{
collections::HashMap,
sync::{
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc,
},
time::{Duration, Instant},
};
use anyhow::Result;
use async_trait::async_trait;
use axum::{
extract::{ws::WebSocket, WebSocketUpgrade},
routing::get,
Router,
};
use serde::{Deserialize, Serialize};
use tokio::sync::{broadcast, Mutex, RwLock};
2025-10-07 10:59:52 +01:00
use tracing::{debug, error, info, warn};
use crate::{
state::{ComponentHealth, HealthStatus, WorkflowStateManager},
storage::{TaskEvent, TaskStorage},
2025-10-07 10:59:52 +01:00
};
/// Configuration for monitoring system
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MonitoringConfig {
/// Metrics collection interval in seconds
pub metrics_interval_seconds: u64,
/// Health check interval in seconds
pub health_check_interval_seconds: u64,
/// Maximum number of metrics to retain in memory
pub max_metrics_in_memory: usize,
/// Enable Prometheus metrics export
pub enable_prometheus: bool,
/// Prometheus metrics path
pub prometheus_path: String,
/// Enable WebSocket real-time updates
pub enable_websocket: bool,
/// WebSocket endpoint path
pub websocket_path: String,
/// Enable Server-Sent Events
pub enable_sse: bool,
/// SSE endpoint path
pub sse_path: String,
}
impl Default for MonitoringConfig {
fn default() -> Self {
Self {
metrics_interval_seconds: 30,
health_check_interval_seconds: 60,
max_metrics_in_memory: 1000,
enable_prometheus: true,
prometheus_path: "/metrics".to_string(),
enable_websocket: true,
websocket_path: "/ws".to_string(),
enable_sse: true,
sse_path: "/events".to_string(),
}
}
}
/// Real-time monitoring event for WebSocket/SSE
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MonitoringEvent {
pub event_type: MonitoringEventType,
pub timestamp: chrono::DateTime<chrono::Utc>,
pub data: serde_json::Value,
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MonitoringEventType {
TaskStatusChanged,
WorkflowCompleted,
HealthStatusChanged,
MetricsUpdated,
SystemAlert,
PerformanceWarning,
}
/// Performance metrics collector
#[derive(Debug)]
pub struct MetricsCollector {
/// Task execution metrics
task_counter: AtomicUsize,
completed_tasks: AtomicUsize,
failed_tasks: AtomicUsize,
average_task_duration_ms: AtomicU64,
/// System performance metrics
memory_usage_mb: AtomicU64,
cpu_usage_percent: AtomicU64, // Store as integer (percent * 100)
/// Workflow metrics
active_workflows: AtomicUsize,
completed_workflows: AtomicUsize,
/// Storage metrics
storage_operations: AtomicUsize,
storage_errors: AtomicUsize,
/// Custom metrics storage
custom_metrics: Arc<RwLock<HashMap<String, f64>>>,
/// Metrics history for trends
metrics_history: Arc<RwLock<Vec<MetricsSnapshot>>>,
start_time: Instant,
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
2025-10-07 10:59:52 +01:00
impl MetricsCollector {
/// Create new metrics collector
pub fn new() -> Self {
Self {
task_counter: AtomicUsize::new(0),
completed_tasks: AtomicUsize::new(0),
failed_tasks: AtomicUsize::new(0),
average_task_duration_ms: AtomicU64::new(0),
memory_usage_mb: AtomicU64::new(0),
cpu_usage_percent: AtomicU64::new(0),
active_workflows: AtomicUsize::new(0),
completed_workflows: AtomicUsize::new(0),
storage_operations: AtomicUsize::new(0),
storage_errors: AtomicUsize::new(0),
custom_metrics: Arc::new(RwLock::new(HashMap::new())),
metrics_history: Arc::new(RwLock::new(Vec::new())),
start_time: Instant::now(),
}
}
/// Increment task counter
pub fn increment_task_counter(&self) {
self.task_counter.fetch_add(1, Ordering::Relaxed);
}
/// Record task completion
pub fn record_task_completion(&self, duration_ms: u64) {
self.completed_tasks.fetch_add(1, Ordering::Relaxed);
// Update average duration (simple moving average)
let current_avg = self.average_task_duration_ms.load(Ordering::Relaxed);
let completed = self.completed_tasks.load(Ordering::Relaxed);
if completed > 0 {
let new_avg = ((current_avg * (completed - 1) as u64) + duration_ms) / completed as u64;
self.average_task_duration_ms
.store(new_avg, Ordering::Relaxed);
2025-10-07 10:59:52 +01:00
}
}
/// Record task failure
pub fn record_task_failure(&self) {
self.failed_tasks.fetch_add(1, Ordering::Relaxed);
}
/// Update system metrics
pub fn update_system_metrics(&self, memory_mb: u64, cpu_percent: f64) {
self.memory_usage_mb.store(memory_mb, Ordering::Relaxed);
self.cpu_usage_percent
.store((cpu_percent * 100.0) as u64, Ordering::Relaxed);
2025-10-07 10:59:52 +01:00
}
/// Update workflow metrics
pub fn update_workflow_metrics(&self, active: usize, completed: usize) {
self.active_workflows.store(active, Ordering::Relaxed);
self.completed_workflows.store(completed, Ordering::Relaxed);
}
/// Record storage operation
pub fn record_storage_operation(&self, success: bool) {
self.storage_operations.fetch_add(1, Ordering::Relaxed);
if !success {
self.storage_errors.fetch_add(1, Ordering::Relaxed);
}
}
/// Set custom metric
pub async fn set_custom_metric(&self, name: String, value: f64) {
let mut metrics = self.custom_metrics.write().await;
metrics.insert(name, value);
}
/// Get custom metric
pub async fn get_custom_metric(&self, name: &str) -> Option<f64> {
let metrics = self.custom_metrics.read().await;
metrics.get(name).copied()
}
/// Get all current metrics
pub async fn get_current_metrics(&self) -> MetricsSnapshot {
let custom_metrics = {
let metrics = self.custom_metrics.read().await;
metrics.clone()
};
MetricsSnapshot {
timestamp: chrono::Utc::now(),
total_tasks: self.task_counter.load(Ordering::Relaxed),
completed_tasks: self.completed_tasks.load(Ordering::Relaxed),
failed_tasks: self.failed_tasks.load(Ordering::Relaxed),
average_task_duration_ms: self.average_task_duration_ms.load(Ordering::Relaxed),
memory_usage_mb: self.memory_usage_mb.load(Ordering::Relaxed),
cpu_usage_percent: self.cpu_usage_percent.load(Ordering::Relaxed) as f64 / 100.0,
active_workflows: self.active_workflows.load(Ordering::Relaxed),
completed_workflows: self.completed_workflows.load(Ordering::Relaxed),
storage_operations: self.storage_operations.load(Ordering::Relaxed),
storage_errors: self.storage_errors.load(Ordering::Relaxed),
uptime_seconds: self.start_time.elapsed().as_secs(),
custom_metrics,
}
}
/// Take snapshot of current metrics
pub async fn take_snapshot(&self) -> Result<()> {
let snapshot = self.get_current_metrics().await;
let mut history = self.metrics_history.write().await;
history.push(snapshot);
// Keep only last 100 snapshots
let history_len = history.len();
if history_len > 100 {
history.drain(0..history_len - 100);
}
Ok(())
}
/// Get metrics history
pub async fn get_metrics_history(&self) -> Vec<MetricsSnapshot> {
let history = self.metrics_history.read().await;
history.clone()
}
/// Generate Prometheus-compatible metrics string
pub async fn generate_prometheus_metrics(&self) -> String {
let metrics = self.get_current_metrics().await;
let mut output = String::new();
// Basic metrics
output.push_str("# HELP orchestrator_tasks_total Total number of tasks processed\n");
output.push_str("# TYPE orchestrator_tasks_total counter\n");
output.push_str(&format!(
"orchestrator_tasks_total {}\n",
metrics.total_tasks
));
output.push_str("# HELP orchestrator_tasks_completed Total number of completed tasks\n");
output.push_str("# TYPE orchestrator_tasks_completed counter\n");
output.push_str(&format!(
"orchestrator_tasks_completed {}\n",
metrics.completed_tasks
));
output.push_str("# HELP orchestrator_tasks_failed Total number of failed tasks\n");
output.push_str("# TYPE orchestrator_tasks_failed counter\n");
output.push_str(&format!(
"orchestrator_tasks_failed {}\n",
metrics.failed_tasks
));
output.push_str(
"# HELP orchestrator_task_duration_ms Average task duration in milliseconds\n",
);
output.push_str("# TYPE orchestrator_task_duration_ms gauge\n");
output.push_str(&format!(
"orchestrator_task_duration_ms {}\n",
metrics.average_task_duration_ms
));
2025-10-07 10:59:52 +01:00
// System metrics
output.push_str("# HELP orchestrator_memory_usage_mb Current memory usage in MB\n");
output.push_str("# TYPE orchestrator_memory_usage_mb gauge\n");
output.push_str(&format!(
"orchestrator_memory_usage_mb {}\n",
metrics.memory_usage_mb
));
output.push_str("# HELP orchestrator_cpu_usage_percent Current CPU usage percentage\n");
output.push_str("# TYPE orchestrator_cpu_usage_percent gauge\n");
output.push_str(&format!(
"orchestrator_cpu_usage_percent {}\n",
metrics.cpu_usage_percent
));
2025-10-07 10:59:52 +01:00
// Workflow metrics
output.push_str("# HELP orchestrator_workflows_active Currently active workflows\n");
output.push_str("# TYPE orchestrator_workflows_active gauge\n");
output.push_str(&format!(
"orchestrator_workflows_active {}\n",
metrics.active_workflows
));
output.push_str("# HELP orchestrator_workflows_completed Total completed workflows\n");
output.push_str("# TYPE orchestrator_workflows_completed counter\n");
output.push_str(&format!(
"orchestrator_workflows_completed {}\n",
metrics.completed_workflows
));
2025-10-07 10:59:52 +01:00
// Storage metrics
output.push_str("# HELP orchestrator_storage_operations_total Total storage operations\n");
output.push_str("# TYPE orchestrator_storage_operations_total counter\n");
output.push_str(&format!(
"orchestrator_storage_operations_total {}\n",
metrics.storage_operations
));
output.push_str("# HELP orchestrator_storage_errors_total Total storage errors\n");
output.push_str("# TYPE orchestrator_storage_errors_total counter\n");
output.push_str(&format!(
"orchestrator_storage_errors_total {}\n",
metrics.storage_errors
));
2025-10-07 10:59:52 +01:00
// Uptime
output.push_str("# HELP orchestrator_uptime_seconds System uptime in seconds\n");
output.push_str("# TYPE orchestrator_uptime_seconds gauge\n");
output.push_str(&format!(
"orchestrator_uptime_seconds {}\n",
metrics.uptime_seconds
));
2025-10-07 10:59:52 +01:00
// Custom metrics
for (name, value) in &metrics.custom_metrics {
let metric_name = format!("orchestrator_custom_{}", name.replace("-", "_"));
output.push_str(&format!("# HELP {} Custom metric: {}\n", metric_name, name));
output.push_str(&format!("# TYPE {} gauge\n", metric_name));
output.push_str(&format!("{} {}\n", metric_name, value));
}
output
}
}
/// Snapshot of metrics at a point in time
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: chrono::DateTime<chrono::Utc>,
pub total_tasks: usize,
pub completed_tasks: usize,
pub failed_tasks: usize,
pub average_task_duration_ms: u64,
pub memory_usage_mb: u64,
pub cpu_usage_percent: f64,
pub active_workflows: usize,
pub completed_workflows: usize,
pub storage_operations: usize,
pub storage_errors: usize,
pub uptime_seconds: u64,
pub custom_metrics: HashMap<String, f64>,
}
/// System health monitor
pub struct HealthMonitor {
storage: Arc<dyn TaskStorage>,
state_manager: Arc<WorkflowStateManager>,
health_checks: Arc<RwLock<HashMap<String, Box<dyn HealthCheck + Send + Sync>>>>,
last_health_check: Arc<Mutex<Instant>>,
}
impl HealthMonitor {
/// Create new health monitor
pub fn new(storage: Arc<dyn TaskStorage>, state_manager: Arc<WorkflowStateManager>) -> Self {
2025-10-07 10:59:52 +01:00
Self {
storage,
state_manager,
health_checks: Arc::new(RwLock::new(HashMap::new())),
last_health_check: Arc::new(Mutex::new(Instant::now())),
}
}
/// Register a health check
pub async fn register_health_check<H>(&self, name: String, health_check: H)
where
H: HealthCheck + Send + Sync + 'static,
{
let mut checks = self.health_checks.write().await;
checks.insert(name, Box::new(health_check));
}
/// Run all health checks
pub async fn run_health_checks(&self) -> HashMap<String, HealthStatus> {
let mut results = HashMap::new();
let checks = self.health_checks.read().await;
// Default storage health check
let storage_health = self.check_storage_health().await;
results.insert("storage".to_string(), storage_health);
// Run registered health checks
for (name, check) in checks.iter() {
match check.check().await {
Ok(status) => {
results.insert(name.clone(), status);
}
Err(e) => {
let error_status = HealthStatus {
component: name.clone(),
status: ComponentHealth::Unhealthy,
last_check: chrono::Utc::now(),
details: HashMap::new(),
error: Some(e.to_string()),
};
results.insert(name.clone(), error_status);
}
}
}
// Update state manager
for (component, status) in &results {
self.state_manager
.update_health_status(
component,
status.status.clone(),
status.details.clone(),
status.error.clone(),
)
.await;
2025-10-07 10:59:52 +01:00
}
// Update last check time
{
let mut last_check = self.last_health_check.lock().await;
*last_check = Instant::now();
}
results
}
/// Check storage backend health
async fn check_storage_health(&self) -> HealthStatus {
let mut details = HashMap::new();
match self.storage.health_check().await {
Ok(true) => {
details.insert("status".to_string(), "operational".to_string());
// Check storage statistics
if let Ok(stats) = self.storage.get_statistics().await {
details.insert("total_tasks".to_string(), stats.total_tasks.to_string());
details.insert("pending_tasks".to_string(), stats.pending_tasks.to_string());
details.insert(
"storage_size".to_string(),
stats.total_storage_size.to_string(),
);
2025-10-07 10:59:52 +01:00
}
HealthStatus {
component: "storage".to_string(),
status: ComponentHealth::Healthy,
last_check: chrono::Utc::now(),
details,
error: None,
}
}
Ok(false) => HealthStatus {
component: "storage".to_string(),
status: ComponentHealth::Degraded,
last_check: chrono::Utc::now(),
details,
error: Some("Storage health check returned false".to_string()),
},
Err(e) => HealthStatus {
component: "storage".to_string(),
status: ComponentHealth::Unhealthy,
last_check: chrono::Utc::now(),
details,
error: Some(e.to_string()),
},
}
}
/// Get overall system health
pub async fn get_system_health(&self) -> SystemHealthStatus {
let health_results = self.run_health_checks().await;
let total_components = health_results.len();
let healthy_components = health_results
.values()
2025-10-07 10:59:52 +01:00
.filter(|status| status.status == ComponentHealth::Healthy)
.count();
let degraded_components = health_results
.values()
2025-10-07 10:59:52 +01:00
.filter(|status| status.status == ComponentHealth::Degraded)
.count();
let unhealthy_components = health_results
.values()
2025-10-07 10:59:52 +01:00
.filter(|status| status.status == ComponentHealth::Unhealthy)
.count();
let overall_status = if unhealthy_components > 0 {
ComponentHealth::Unhealthy
} else if degraded_components > 0 {
ComponentHealth::Degraded
} else {
ComponentHealth::Healthy
};
SystemHealthStatus {
overall_status,
total_components,
healthy_components,
degraded_components,
unhealthy_components,
component_details: health_results,
last_check: chrono::Utc::now(),
}
}
}
/// Overall system health status
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemHealthStatus {
pub overall_status: ComponentHealth,
pub total_components: usize,
pub healthy_components: usize,
pub degraded_components: usize,
pub unhealthy_components: usize,
pub component_details: HashMap<String, HealthStatus>,
pub last_check: chrono::DateTime<chrono::Utc>,
}
/// Health check trait for components
#[async_trait]
pub trait HealthCheck {
async fn check(&self) -> Result<HealthStatus>;
}
/// Main monitoring system that coordinates all monitoring components
pub struct MonitoringSystem {
config: MonitoringConfig,
metrics_collector: Arc<MetricsCollector>,
health_monitor: Arc<HealthMonitor>,
state_manager: Arc<WorkflowStateManager>,
event_broadcaster: broadcast::Sender<MonitoringEvent>,
storage: Arc<dyn TaskStorage>,
}
impl MonitoringSystem {
/// Create new monitoring system
pub fn new(
config: MonitoringConfig,
storage: Arc<dyn TaskStorage>,
state_manager: Arc<WorkflowStateManager>,
) -> Self {
let metrics_collector = Arc::new(MetricsCollector::new());
let health_monitor = Arc::new(HealthMonitor::new(storage.clone(), state_manager.clone()));
let (event_broadcaster, _) = broadcast::channel(1000);
Self {
config,
metrics_collector,
health_monitor,
state_manager,
event_broadcaster,
storage,
}
}
/// Initialize monitoring system
pub async fn init(&self) -> Result<()> {
info!("Initializing monitoring system");
// Start background monitoring tasks
self.start_monitoring_tasks().await?;
info!("Monitoring system initialized successfully");
Ok(())
}
/// Get metrics collector
pub fn metrics_collector(&self) -> Arc<MetricsCollector> {
self.metrics_collector.clone()
}
/// Get health monitor
pub fn health_monitor(&self) -> Arc<HealthMonitor> {
self.health_monitor.clone()
}
/// Create monitoring routes for web server
pub fn create_routes<S>(&self) -> Router<S>
where
S: Clone + Send + Sync + 'static,
{
2025-10-07 10:59:52 +01:00
let mut router = Router::new();
if self.config.enable_prometheus {
let metrics_collector = self.metrics_collector.clone();
router = router.route(
&self.config.prometheus_path,
get({
let metrics_collector = metrics_collector.clone();
move || async move { metrics_collector.generate_prometheus_metrics().await }
2025-10-07 10:59:52 +01:00
}),
);
}
if self.config.enable_websocket {
let event_broadcaster = self.event_broadcaster.clone();
let websocket_handler = |ws: WebSocketUpgrade| async move {
ws.on_upgrade(move |socket| {
handle_websocket_connection(socket, event_broadcaster.clone())
})
};
router = router.route(&self.config.websocket_path, get(websocket_handler));
2025-10-07 10:59:52 +01:00
}
router
}
/// Publish monitoring event
pub async fn publish_event(&self, event: MonitoringEvent) -> Result<()> {
match self.event_broadcaster.send(event.clone()) {
Ok(subscriber_count) => {
debug!(
"Published monitoring event to {} subscribers",
subscriber_count
);
2025-10-07 10:59:52 +01:00
}
Err(_) => {
// No subscribers, which is fine
}
}
// Also store in storage if it's a task event
if let MonitoringEventType::TaskStatusChanged = event.event_type {
if let Ok(task_event) = serde_json::from_value::<TaskEvent>(event.data) {
let _ = self.storage.publish_event(task_event).await;
2025-10-07 10:59:52 +01:00
}
}
Ok(())
}
/// Start background monitoring tasks
async fn start_monitoring_tasks(&self) -> Result<()> {
// Metrics collection task
let metrics_collector = self.metrics_collector.clone();
let metrics_interval = Duration::from_secs(self.config.metrics_interval_seconds);
tokio::spawn(async move {
let mut interval = tokio::time::interval(metrics_interval);
loop {
interval.tick().await;
if let Err(e) = metrics_collector.take_snapshot().await {
error!("Failed to take metrics snapshot: {}", e);
}
}
});
// Health monitoring task
let health_monitor = self.health_monitor.clone();
let health_interval = Duration::from_secs(self.config.health_check_interval_seconds);
let event_broadcaster = self.event_broadcaster.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(health_interval);
loop {
interval.tick().await;
Self::process_health_checks(&health_monitor, &event_broadcaster).await;
2025-10-07 10:59:52 +01:00
}
});
Ok(())
}
async fn process_health_checks(
health_monitor: &Arc<HealthMonitor>,
event_broadcaster: &tokio::sync::broadcast::Sender<MonitoringEvent>,
) {
let health_results = health_monitor.run_health_checks().await;
// Broadcast health status changes
for (component, status) in health_results {
if status.status == ComponentHealth::Healthy {
continue;
}
let metadata = HashMap::from([("component".to_string(), component)]);
let event = MonitoringEvent {
event_type: MonitoringEventType::HealthStatusChanged,
timestamp: chrono::Utc::now(),
data: serde_json::to_value(&status).unwrap_or_default(),
metadata,
};
let _ = event_broadcaster.send(event);
}
}
2025-10-07 10:59:52 +01:00
}
/// Handle WebSocket connection for real-time monitoring
async fn handle_websocket_connection(
mut socket: WebSocket,
event_broadcaster: broadcast::Sender<MonitoringEvent>,
) {
let mut event_receiver = event_broadcaster.subscribe();
loop {
tokio::select! {
event_result = event_receiver.recv() => {
match event_result {
Ok(event) => {
if let Ok(json) = serde_json::to_string(&event) {
if socket.send(axum::extract::ws::Message::Text(json.into())).await.is_err() {
break;
}
}
}
Err(broadcast::error::RecvError::Lagged(_)) => {
warn!("WebSocket client lagged behind, skipping events");
continue;
}
Err(broadcast::error::RecvError::Closed) => {
break;
}
}
}
// Handle incoming WebSocket messages (ping/pong, client requests)
msg_result = socket.recv() => {
match msg_result {
Some(Ok(msg)) => {
match msg {
axum::extract::ws::Message::Close(_) => break,
axum::extract::ws::Message::Pong(_) => {
// Handle pong response
}
_ => {
// Handle other message types if needed
}
}
}
Some(Err(_)) | None => break,
}
}
}
}
debug!("WebSocket connection closed");
}
/// Example health check implementation for system resources
pub struct SystemResourceHealthCheck {
memory_threshold_mb: u64,
cpu_threshold_percent: f64,
}
impl SystemResourceHealthCheck {
pub fn new(memory_threshold_mb: u64, cpu_threshold_percent: f64) -> Self {
Self {
memory_threshold_mb,
cpu_threshold_percent,
}
}
async fn get_system_info(&self) -> Result<(u64, f64)> {
// In a real implementation, this would collect actual system metrics
// For now, return mock values
let memory_mb = 512; // Mock memory usage
let cpu_percent = 25.0; // Mock CPU usage
Ok((memory_mb, cpu_percent))
}
}
#[async_trait]
impl HealthCheck for SystemResourceHealthCheck {
async fn check(&self) -> Result<HealthStatus> {
let (memory_mb, cpu_percent) = self.get_system_info().await?;
let mut details = HashMap::new();
details.insert("memory_usage_mb".to_string(), memory_mb.to_string());
details.insert("cpu_usage_percent".to_string(), cpu_percent.to_string());
details.insert(
"memory_threshold_mb".to_string(),
self.memory_threshold_mb.to_string(),
);
details.insert(
"cpu_threshold_percent".to_string(),
self.cpu_threshold_percent.to_string(),
);
let status =
if memory_mb > self.memory_threshold_mb || cpu_percent > self.cpu_threshold_percent {
if memory_mb > self.memory_threshold_mb * 2
|| cpu_percent > self.cpu_threshold_percent * 2.0
{
ComponentHealth::Unhealthy
} else {
ComponentHealth::Degraded
}
2025-10-07 10:59:52 +01:00
} else {
ComponentHealth::Healthy
};
2025-10-07 10:59:52 +01:00
let error = if status != ComponentHealth::Healthy {
Some(format!(
"Resource usage exceeds thresholds - Memory: {}MB (max: {}MB), CPU: {:.1}% (max: \
{:.1}%)",
memory_mb, self.memory_threshold_mb, cpu_percent, self.cpu_threshold_percent
))
2025-10-07 10:59:52 +01:00
} else {
None
};
Ok(HealthStatus {
component: "system_resources".to_string(),
status,
last_check: chrono::Utc::now(),
details,
error,
})
}
}