apiVersion: v1 kind: ConfigMap metadata: name: prometheus-vapora-alerts namespace: monitoring data: vapora-alerts.yml: | groups: - name: vapora_analytics interval: 30s rules: # Performance Alerts - alert: LowAgentSuccessRate expr: vapora_overall_success_rate < 0.8 for: 5m labels: severity: warning component: analytics annotations: summary: "Low agent success rate: {{ $value | humanizePercentage }}" description: "Overall agent success rate is below 80% (current: {{ $value | humanizePercentage }})" - alert: CriticalAgentSuccessRate expr: vapora_overall_success_rate < 0.6 for: 2m labels: severity: critical component: analytics annotations: summary: "Critical agent success rate: {{ $value | humanizePercentage }}" description: "Overall agent success rate is below 60% (current: {{ $value | humanizePercentage }})" # Cost Alerts - alert: HighExecutionCost expr: vapora_cost_per_task_cents > 100 for: 10m labels: severity: warning component: cost annotations: summary: "High average cost per task: {{ $value | humanize }} cents" description: "Average cost per task has exceeded 100 cents (current: {{ $value | humanize }} cents)" - alert: BudgetThresholdExceeded expr: vapora_budget_threshold_alerts_total > 0 for: 1m labels: severity: warning component: budget annotations: summary: "Budget threshold alerts detected" description: "Budget threshold has been exceeded {{ $value | humanize }} times" # System Health Alerts - alert: NoActiveAgents expr: vapora_active_agents == 0 for: 1m labels: severity: critical component: agents annotations: summary: "No active agents" description: "No active agents detected. System cannot process tasks." - alert: HighAnalyticsQueryErrors expr: vapora_analytics_errors_total > 10 for: 5m labels: severity: warning component: analytics annotations: summary: "High analytics query errors: {{ $value | humanize }} errors" description: "More than 10 analytics query errors detected in the last 5 minutes" - alert: TaskExecutionStalled expr: rate(vapora_total_tasks_executed[5m]) < 0.1 for: 10m labels: severity: warning component: execution annotations: summary: "Task execution rate is very low" description: "Less than 0.1 tasks/second being executed. System may be stalled." # Analytics Query Performance - alert: SlowAnalyticsQueries expr: histogram_quantile(0.95, vapora_analytics_query_duration_ms) > 5000 for: 5m labels: severity: warning component: analytics annotations: summary: "Slow analytics queries detected" description: "95th percentile query duration exceeds 5 seconds (current: {{ $value | humanize }}ms)" # Budget Enforcement - alert: BudgetExceeded expr: vapora_budget_threshold_alerts_total > 5 for: 2m labels: severity: critical component: budget annotations: summary: "Multiple budget threshold violations" description: "Budget has been exceeded multiple times. Cost control measures may be needed."