prvng_platform/scripts/health-check.nu

172 lines
5.1 KiB
Plaintext
Raw Permalink Normal View History

2025-10-07 10:59:52 +01:00
#!/usr/bin/env nu
# Health Check Script for Provisioning Platform
# Verifies all services are healthy and accessible
def main [
--json # Output in JSON format
--quiet # Only show errors
--timeout: int = 30 # Timeout for each health check (seconds)
] {
if not $quiet {
print $"(ansi green_bold)Provisioning Platform Health Check(ansi reset)"
print ""
}
mut results = []
# Check orchestrator
$results = ($results | append (check_service "Orchestrator" "http://localhost:8080/health" $timeout))
# Check control center
$results = ($results | append (check_service "Control Center" "http://localhost:8081/health" $timeout))
# Check CoreDNS
$results = ($results | append (check_dns "CoreDNS" "localhost" 5353 $timeout))
# Check OCI Registry
$results = ($results | append (check_service "OCI Registry" "http://localhost:5000/v2/" $timeout))
# Check Extension Registry
$results = ($results | append (check_service "Extension Registry" "http://localhost:8082/api/v1/health" $timeout))
# Check optional services
let gitea_running = (docker ps --filter "name=provisioning-gitea" --format "{{.Names}}" | lines | length) > 0
if $gitea_running {
$results = ($results | append (check_service "Gitea" "http://localhost:3000/api/healthz" $timeout))
}
let api_server_running = (docker ps --filter "name=provisioning-api-server" --format "{{.Names}}" | lines | length) > 0
if $api_server_running {
$results = ($results | append (check_service "API Server" "http://localhost:8083/health" $timeout))
}
let postgres_running = (docker ps --filter "name=provisioning-postgres" --format "{{.Names}}" | lines | length) > 0
if $postgres_running {
$results = ($results | append (check_postgres "PostgreSQL" "localhost" 5432 $timeout))
}
# Output results
if $json {
$results | to json
} else {
display_results $results $quiet
}
# Return exit code
let failed = ($results | where status == "unhealthy" | length)
if $failed > 0 {
return 1
}
return 0
}
# Check HTTP service health
def check_service [name: string, url: string, timeout: int] {
try {
let response = (http get --max-time $timeout $url)
{
service: $name,
status: "healthy",
url: $url,
message: "OK"
}
} catch {
{
service: $name,
status: "unhealthy",
url: $url,
message: $"Failed to connect to ($url)"
}
}
}
# Check DNS service
def check_dns [name: string, host: string, port: int, timeout: int] {
try {
dig +short +time=($timeout) @($host) -p ($port) health.check | complete | get exit_code
if $in == 0 {
{
service: $name,
status: "healthy",
url: $"dns://($host):($port)",
message: "DNS responding"
}
} else {
{
service: $name,
status: "unhealthy",
url: $"dns://($host):($port)",
message: "DNS not responding"
}
}
} catch {
{
service: $name,
status: "unhealthy",
url: $"dns://($host):($port)",
message: "Failed to query DNS"
}
}
}
# Check PostgreSQL
def check_postgres [name: string, host: string, port: int, timeout: int] {
try {
docker exec provisioning-postgres pg_isready -h localhost -p 5432 | complete | get exit_code
if $in == 0 {
{
service: $name,
status: "healthy",
url: $"postgres://($host):($port)",
message: "PostgreSQL accepting connections"
}
} else {
{
service: $name,
status: "unhealthy",
url: $"postgres://($host):($port)",
message: "PostgreSQL not ready"
}
}
} catch {
{
service: $name,
status: "unhealthy",
url: $"postgres://($host):($port)",
message: "Failed to check PostgreSQL"
}
}
}
# Display results in table format
def display_results [results: list, quiet: bool] {
if not $quiet {
print ($results | table -e)
print ""
}
let healthy = ($results | where status == "healthy" | length)
let total = ($results | length)
let unhealthy = ($results | where status == "unhealthy")
if $unhealthy | is-not-empty {
if not $quiet {
print $"(ansi red_bold)Unhealthy Services:(ansi reset)"
for service in $unhealthy {
print $" (ansi red)✗(ansi reset) ($service.service): ($service.message)"
}
print ""
}
}
if $healthy == $total {
if not $quiet {
print $"(ansi green_bold)✓ All services healthy ($healthy)/($total)(ansi reset)"
}
} else {
print $"(ansi red_bold)✗ Some services unhealthy: ($healthy)/($total) healthy(ansi reset)"
}
}