Vapora/provisioning/scripts/health-check.nu

226 lines
6.8 KiB
Plaintext
Raw Normal View History

2026-01-12 03:36:55 +00:00
#!/usr/bin/env nu
# VAPORA Health Check and Monitoring Script
# Monitors deployment health across Docker and Kubernetes platforms
# Version: 1.0.0
def main [
--target: string = "docker"
--interval: int = 30
--count: int = 0
] {
print "🏥 VAPORA Health Check Monitor"
print $"Target: ($target) | Interval: ($interval)s"
print ""
if $count <= 0 {
print "⚠️ Running continuous monitoring (Press Ctrl+C to stop)"
print ""
loop {
let status = match $target {
"docker" => { check-docker-health }
"kubernetes" => { check-kubernetes-health }
_ => {
error make {msg: $"Unknown target: ($target)"}
}
}
if not $status.healthy {
print "❌ Unhealthy services detected!"
$status.issues | each { |issue| print $" • ($issue)" }
} else {
print "✅ All services healthy"
}
print ""
sleep ($interval | into duration -u 'sec')
}
} else {
# Run N times
1..$count | each { |iteration|
print $"Check ($iteration)/($count):"
let status = match $target {
"docker" => { check-docker-health }
"kubernetes" => { check-kubernetes-health }
_ => {
error make {msg: $"Unknown target: ($target)"}
}
}
if not $status.healthy {
print "❌ Unhealthy"
$status.issues | each { |issue| print $" • ($issue)" }
} else {
print "✅ Healthy"
}
if $iteration < $count {
print ""
sleep ($interval | into duration -u 'sec')
}
}
}
}
def check-docker-health: record {
let services = ["vapora-backend", "vapora-agents", "vapora-llm-router", "vapora-frontend"]
let issues = []
let all_healthy = true
print "🐳 Checking Docker services..."
$services | each { |service|
let result = do {
docker ps --filter $"name=($service)" --format "{{.Status}}"
} | complete
if $result.exit_code == 0 {
let status = ($result.stdout | str trim)
if ($status | str contains "Up") {
print $" ✓ ($service): ($status)"
} else if ($status | is-empty) {
print $" ✗ ($service): not running"
$issues | append $"($service) not running"
} else {
print $" ⚠️ ($service): ($status)"
$issues | append $"($service) in state: ($status)"
}
} else {
print $" ✗ ($service): error checking status"
$issues | append $"Failed to check ($service)"
}
}
print ""
print "📊 Checking service endpoints..."
let endpoints = [
["backend", "http://localhost:8001/health"]
["agents", "http://localhost:8002/health"]
["llm-router", "http://localhost:8003/health"]
["frontend", "http://localhost:3000/"]
]
$endpoints | each { |endpoint|
let name = $endpoint.0
let url = $endpoint.1
let result = do {
curl -s -o /dev/null -w "%{http_code}" $url
} | complete
if $result.exit_code == 0 {
let status_code = ($result.stdout | str trim)
if ($status_code | str starts-with "2") {
print $" ✓ ($name): HTTP ($status_code)"
} else {
print $" ⚠️ ($name): HTTP ($status_code)"
$issues | append $"($name) returned HTTP ($status_code)"
}
} else {
print $" ✗ ($name): unreachable"
$issues | append $"($name) endpoint unreachable"
}
}
{
healthy: ($issues | length) == 0
issues: $issues
}
}
def check-kubernetes-health: record {
let deployments = ["vapora-backend", "vapora-agents", "vapora-llm-router"]
let issues = []
print "☸️ Checking Kubernetes deployments..."
$deployments | each { |deployment|
let result = do {
kubectl get deployment $deployment -n vapora -o json
} | complete
if $result.exit_code == 0 {
let deploy_json = ($result.stdout | from json)
let desired = $deploy_json.spec.replicas
let ready = $deploy_json.status.readyReplicas
let updated = $deploy_json.status.updatedReplicas
if ($desired == $ready) and ($desired == $updated) {
print $" ✓ ($deployment): ($ready)/($desired) replicas ready"
} else {
print $" ⚠️ ($deployment): ($ready)/($desired) replicas ready"
$issues | append $"($deployment) replicas not ready: ($ready)/($desired)"
}
} else {
print $" ✗ ($deployment): not found"
$issues | append $"($deployment) deployment not found"
}
}
print ""
print "📊 Checking pod health..."
let pods_result = do {
kubectl get pods -n vapora -o json
} | complete
if $pods_result.exit_code == 0 {
let pods_json = ($pods_result.stdout | from json)
let pods = $pods_json.items
$pods | each { |pod|
let name = $pod.metadata.name
let phase = $pod.status.phase
let ready_containers = (
$pod.status.conditions
| where type == "Ready"
| get status
| get 0
)
if ($phase == "Running") and ($ready_containers == "True") {
print $" ✓ ($name): Running"
} else {
print $" ⚠️ ($name): ($phase)"
$issues | append $"Pod ($name) in phase: ($phase)"
}
}
} else {
print " ✗ Could not get pod status"
$issues | append "Failed to query pods"
}
print ""
print "📊 Checking services..."
let svc_result = do {
kubectl get svc -n vapora -o json
} | complete
if $svc_result.exit_code == 0 {
let svc_json = ($svc_result.stdout | from json)
let services = $svc_json.items
$services | each { |service|
let name = $service.metadata.name
let svc_type = $service.spec.type
let cluster_ip = $service.spec.clusterIP
if ($cluster_ip != "None") {
print $" ✓ ($name): ($svc_type) - ($cluster_ip)"
} else {
print $" ⚠️ ($name): no cluster IP assigned"
$issues | append $"Service ($name) has no cluster IP"
}
}
}
{
healthy: ($issues | length) == 0
issues: $issues
}
}
# Run main function
main