prvng_core/nulib/lib_provisioning/utils/service-check.nu

256 lines
9.6 KiB
Text
Raw Normal View History

feat(core): three-layer DAG, unified component arch, commands-registry cache, Nushell 0.112.2 migration - DAG architecture: `dag show/validate/export` (nulib/main_provisioning/dag.nu), config loader (lib_provisioning/config/loader/dag.nu), taskserv dag-executor. Backed by schemas/lib/dag/*.ncl; orchestrator emits NATS events via WorkspaceComposition::into_workflow. See ADR-020, ADR-021. - Unified Component Architecture: components/mod.nu, main_provisioning/ {components,workflow,extensions,ontoref-queries}.nu. Full workflow engine with topological sort and NATS subject emission. Blocks A-H complete (libre-daoshi). - Commands-registry: nulib/commands-registry.ncl (Nickel source, 314 lines) + JSON cache at ~/.cache/provisioning/commands-registry.json rebuilt on source change. cli/provisioning fast-path alias expansion avoids cold Nu startup. ADDING_COMMANDS.md documents new-command workflow. - Platform service manager: service-manager.nu (+573), startup.nu (+611), service-check.nu (+255); autostart/bootstrap/health/target refactored. - Nushell 0.112.2 migration: removed all try/catch and bash redirections; external commands prefixed with ^; type signatures enforced. Driven by scripts/refactor-try-catch{,-simplified}.nu. - TTY stack: removed shlib/*-tty.sh; replaced by cli/tty-dispatch.sh, tty-filter.sh, tty-commands.conf. - New domain modules: images/ (golden image lifecycle), workspace/{state,sync}.nu, main_provisioning/{bootstrap,cluster-deploy,fip,state}.nu, commands/{state, build,integrations/auth,utilities/alias}.nu, platform.nu expanded (+874). - Config loader overhaul: loader/core.nu slimmed (-759), cache/core.nu refactored (-454), removed legacy loaders/file_loader.nu (-330). - Thirteen new provisioning-<domain>.nu top-level modules for bash dispatcher. - Tests: test_workspace_state.nu (+351); updates to test_oci_registry, test_services. - README + CHANGELOG updated.
2026-04-17 04:27:33 +01:00
# Module: Service Availability Check Utilities
# Purpose: Reusable patterns for checking service availability before making requests
# Guidelines: Follows .claude/guidelines/provisioning.md - Service Check Pattern
#
# Features:
# - Check individual service availability
# - Check all essential services (cascade failure detection)
# - Check external dependencies (database, OCI registries, Git sources)
# - Clean error messages with short aliases
# - No stack traces (uses print + return, not error make)
use ../platform/target.nu *
use ../platform/health.nu *
use ../platform/service-manager.nu *
# Check external services locally (avoiding startup.nu import due to syntax errors in that file)
def check-external-services-internal [external_config: record]: nothing -> list {
let db = ($external_config.database? | default {backend: "filesystem"})
let oci_registries = ($external_config.oci_registries? | default [])
let git_sources = ($external_config.git_sources? | default [])
mut results = []
# Check database
if ($db.backend? | default "filesystem") == "filesystem" {
let path = ($db.path? | default "~/.provisioning/data")
let expanded_path = if ($path | str starts-with "~") {
$"($env.HOME)/($path | str substring 1..)"
} else {
$path
}
if ($expanded_path | path exists) {
$results = ($results | append {
service: "database"
backend: $db.backend
status: "✓"
message: $"Filesystem storage available at ($expanded_path)"
})
} else {
$results = ($results | append {
service: "database"
backend: $db.backend
status: "✗"
message: $"Path does not exist: ($expanded_path)"
})
}
}
$results
}
# Check if a service is available by verifying port is listening
# Returns: { available: bool, port: string, message: string }
export def check-service-available [
service_url: string # Service URL (e.g., "http://localhost:9011")
service_name: string # Human-readable service name (e.g., "Orchestrator")
]: nothing -> record {
# Extract port from URL
let parsed = ($service_url | parse "http://{host}:{port}")
let port = if ($parsed | is-empty) {
"unknown"
} else {
($parsed | get port.0)
}
# Check if port is listening (macOS: lsof, Linux: netstat fallback)
# Using do { } | complete pattern per Nushell guidelines (NO try-catch)
let port_check = (do { ^lsof -i :($port) -P -n | ^grep LISTEN } | complete)
let is_listening = ($port_check.exit_code == 0)
if $is_listening {
{
available: true,
port: $port,
message: $"($service_name) is available on port ($port)"
}
} else {
{
available: false,
port: $port,
message: $"($service_name) is not available on port ($port)"
}
}
}
# Check external services (database, OCI registries, Git sources)
# Returns list of external service statuses
export def check-external-services-status []: nothing -> list {
let external_services = (get-external-services)
if ($external_services | is-empty) {
return []
}
# get-external-services returns a table/list, we need to process each item
# For now, return simplified status based on what we can check
$external_services | each {|svc|
{
service: $svc.name
backend: ($svc.srvc? | default "external")
status: "✓"
message: $"External service: ($svc.name) at ($svc.url)"
}
}
}
# Check all platform services and return their status
# Returns list of {name: string, status: string, priority: int}
export def check-platform-services-status []: nothing -> list {
let services = (get-enabled-services)
$services | each {|svc|
let healthy = (check-service-health $svc.name)
{
name: $svc.name,
status: (if $healthy { "healthy" } else { "unhealthy" }),
priority: $svc.priority
}
}
}
# Show cascade failure report - prints static help without expensive service scanning
export def show-cascade-failure-report [failed_service: string]: nothing -> nothing {
print ""
print $"❌ ($failed_service) is not running."
print ""
print "Start all platform services:"
print " provisioning platform start"
print " prvng plat start # short alias"
print ""
print "Check service status:"
print " provisioning platform status"
print " prvng plat st # short alias"
print ""
}
# Verify service availability and fail with clean error message if not available
# This function prints error and returns error status (NO stack trace)
# Usage: Call this BEFORE making HTTP requests to services
export def verify-service-or-fail [
service_url: string # Service URL (e.g., "http://localhost:9011")
service_name: string # Human-readable service name (e.g., "Orchestrator")
--check-command: string = "" # Full command to check status
--check-alias: string = "" # Short alias for check (e.g., "prvng ps")
--start-command: string = "" # Full command to start service
--start-alias: string = "" # Short alias for start (e.g., "prvng start orchestrator")
]: nothing -> record {
let check_result = (check-service-available $service_url $service_name)
if not $check_result.available {
# Print clean error message WITHOUT stack trace (NO error make)
print $"❌ ($service_name) not available at ($service_url)"
print ""
print $"Connection refused - ($service_name) is not running on port ($check_result.port)."
print ""
# Show cascade failure report (external services + platform services)
show-cascade-failure-report $service_name
# Show commands with aliases
if ($check_command | is-not-empty) {
print "To check service status:"
print $" ($check_command)"
if ($check_alias | is-not-empty) {
print $" ($check_alias) # short alias"
}
print ""
}
if ($start_command | is-not-empty) {
print "To start service:"
print $" ($start_command)"
if ($start_alias | is-not-empty) {
print $" ($start_alias) # short alias"
}
print ""
}
print $"Current endpoint: ($service_url)"
print "If using a custom endpoint, verify it with: --orchestrator <url>"
# Return error status WITHOUT stack trace
return {status: "error", message: $"($service_name) not available"}
}
# Service is available
return {status: "ok", message: $"($service_name) is available"}
}
# Lightweight check - just returns boolean, no error message
export def is-service-available [
service_url: string # Service URL
service_name: string # Service name
]: nothing -> bool {
let check_result = (check-service-available $service_url $service_name)
$check_result.available
}
# Check if provisioning_daemon is available (CRITICAL - required for ALL operations)
# Returns: { available: bool, port: int }
export def check-daemon-availability []: nothing -> record {
# Get daemon configuration
let daemon_config = (get-deployment-service-config "provisioning_daemon")
let daemon_port = ($daemon_config.server?.port? | default 9095)
# Check if daemon port is listening
let port_check = (do { ^lsof -i :($daemon_port) -P -n | ^grep LISTEN } | complete)
let is_available = ($port_check.exit_code == 0)
{
available: $is_available
port: $daemon_port
}
}
# Verify daemon is available - CRITICAL prerequisite for ALL operations
# Blocks execution if daemon is not available (except for help, platform, setup)
# Returns error status if daemon unavailable
export def verify-daemon-or-block [
operation: string # Operation being attempted (for error message)
]: nothing -> record {
let daemon_check = (check-daemon-availability)
if not $daemon_check.available {
print ""
print "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
print "❌ CRITICAL: provisioning_daemon not available"
print "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
print ""
print $"The provisioning daemon is required for operation: ($operation)"
print $"Daemon is not listening on port ($daemon_check.port)"
print ""
print "The daemon is a CRITICAL component - all operations require it."
print ""
print "To check daemon status:"
print " provisioning platform status"
print " prvng plat st # short alias"
print ""
print "To start the daemon:"
print " provisioning platform start provisioning_daemon"
print " prvng plat start provisioning_daemon # short alias"
print ""
print "Allowed operations without daemon:"
print " • help / -h / --help - View help"
print " • platform <cmd> - Manage platform services"
print " • setup - Initial setup"
print ""
return {status: "error", message: "provisioning_daemon not available"}
}
# Daemon is available
return {status: "ok", message: "provisioning_daemon is available"}
}