- DAG architecture: `dag show/validate/export` (nulib/main_provisioning/dag.nu),
config loader (lib_provisioning/config/loader/dag.nu), taskserv dag-executor.
Backed by schemas/lib/dag/*.ncl; orchestrator emits NATS events via
WorkspaceComposition::into_workflow. See ADR-020, ADR-021.
- Unified Component Architecture: components/mod.nu, main_provisioning/
{components,workflow,extensions,ontoref-queries}.nu. Full workflow engine with
topological sort and NATS subject emission. Blocks A-H complete (libre-daoshi).
- Commands-registry: nulib/commands-registry.ncl (Nickel source, 314 lines) +
JSON cache at ~/.cache/provisioning/commands-registry.json rebuilt on source
change. cli/provisioning fast-path alias expansion avoids cold Nu startup.
ADDING_COMMANDS.md documents new-command workflow.
- Platform service manager: service-manager.nu (+573), startup.nu (+611),
service-check.nu (+255); autostart/bootstrap/health/target refactored.
- Nushell 0.112.2 migration: removed all try/catch and bash redirections;
external commands prefixed with ^; type signatures enforced. Driven by
scripts/refactor-try-catch{,-simplified}.nu.
- TTY stack: removed shlib/*-tty.sh; replaced by cli/tty-dispatch.sh,
tty-filter.sh, tty-commands.conf.
- New domain modules: images/ (golden image lifecycle), workspace/{state,sync}.nu,
main_provisioning/{bootstrap,cluster-deploy,fip,state}.nu, commands/{state,
build,integrations/auth,utilities/alias}.nu, platform.nu expanded (+874).
- Config loader overhaul: loader/core.nu slimmed (-759), cache/core.nu
refactored (-454), removed legacy loaders/file_loader.nu (-330).
- Thirteen new provisioning-<domain>.nu top-level modules for bash dispatcher.
- Tests: test_workspace_state.nu (+351); updates to test_oci_registry,
test_services.
- README + CHANGELOG updated.
255 lines
9.6 KiB
Text
255 lines
9.6 KiB
Text
# Module: Service Availability Check Utilities
|
|
# Purpose: Reusable patterns for checking service availability before making requests
|
|
# Guidelines: Follows .claude/guidelines/provisioning.md - Service Check Pattern
|
|
#
|
|
# Features:
|
|
# - Check individual service availability
|
|
# - Check all essential services (cascade failure detection)
|
|
# - Check external dependencies (database, OCI registries, Git sources)
|
|
# - Clean error messages with short aliases
|
|
# - No stack traces (uses print + return, not error make)
|
|
|
|
use ../platform/target.nu *
|
|
use ../platform/health.nu *
|
|
use ../platform/service-manager.nu *
|
|
|
|
# Check external services locally (avoiding startup.nu import due to syntax errors in that file)
|
|
def check-external-services-internal [external_config: record]: nothing -> list {
|
|
let db = ($external_config.database? | default {backend: "filesystem"})
|
|
let oci_registries = ($external_config.oci_registries? | default [])
|
|
let git_sources = ($external_config.git_sources? | default [])
|
|
|
|
mut results = []
|
|
|
|
# Check database
|
|
if ($db.backend? | default "filesystem") == "filesystem" {
|
|
let path = ($db.path? | default "~/.provisioning/data")
|
|
let expanded_path = if ($path | str starts-with "~") {
|
|
$"($env.HOME)/($path | str substring 1..)"
|
|
} else {
|
|
$path
|
|
}
|
|
|
|
if ($expanded_path | path exists) {
|
|
$results = ($results | append {
|
|
service: "database"
|
|
backend: $db.backend
|
|
status: "✓"
|
|
message: $"Filesystem storage available at ($expanded_path)"
|
|
})
|
|
} else {
|
|
$results = ($results | append {
|
|
service: "database"
|
|
backend: $db.backend
|
|
status: "✗"
|
|
message: $"Path does not exist: ($expanded_path)"
|
|
})
|
|
}
|
|
}
|
|
|
|
$results
|
|
}
|
|
|
|
# Check if a service is available by verifying port is listening
|
|
# Returns: { available: bool, port: string, message: string }
|
|
export def check-service-available [
|
|
service_url: string # Service URL (e.g., "http://localhost:9011")
|
|
service_name: string # Human-readable service name (e.g., "Orchestrator")
|
|
]: nothing -> record {
|
|
# Extract port from URL
|
|
let parsed = ($service_url | parse "http://{host}:{port}")
|
|
let port = if ($parsed | is-empty) {
|
|
"unknown"
|
|
} else {
|
|
($parsed | get port.0)
|
|
}
|
|
|
|
# Check if port is listening (macOS: lsof, Linux: netstat fallback)
|
|
# Using do { } | complete pattern per Nushell guidelines (NO try-catch)
|
|
let port_check = (do { ^lsof -i :($port) -P -n | ^grep LISTEN } | complete)
|
|
let is_listening = ($port_check.exit_code == 0)
|
|
|
|
if $is_listening {
|
|
{
|
|
available: true,
|
|
port: $port,
|
|
message: $"($service_name) is available on port ($port)"
|
|
}
|
|
} else {
|
|
{
|
|
available: false,
|
|
port: $port,
|
|
message: $"($service_name) is not available on port ($port)"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check external services (database, OCI registries, Git sources)
|
|
# Returns list of external service statuses
|
|
export def check-external-services-status []: nothing -> list {
|
|
let external_services = (get-external-services)
|
|
|
|
if ($external_services | is-empty) {
|
|
return []
|
|
}
|
|
|
|
# get-external-services returns a table/list, we need to process each item
|
|
# For now, return simplified status based on what we can check
|
|
$external_services | each {|svc|
|
|
{
|
|
service: $svc.name
|
|
backend: ($svc.srvc? | default "external")
|
|
status: "✓"
|
|
message: $"External service: ($svc.name) at ($svc.url)"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check all platform services and return their status
|
|
# Returns list of {name: string, status: string, priority: int}
|
|
export def check-platform-services-status []: nothing -> list {
|
|
let services = (get-enabled-services)
|
|
|
|
$services | each {|svc|
|
|
let healthy = (check-service-health $svc.name)
|
|
{
|
|
name: $svc.name,
|
|
status: (if $healthy { "healthy" } else { "unhealthy" }),
|
|
priority: $svc.priority
|
|
}
|
|
}
|
|
}
|
|
|
|
# Show cascade failure report - prints static help without expensive service scanning
|
|
export def show-cascade-failure-report [failed_service: string]: nothing -> nothing {
|
|
print ""
|
|
print $"❌ ($failed_service) is not running."
|
|
print ""
|
|
print "Start all platform services:"
|
|
print " provisioning platform start"
|
|
print " prvng plat start # short alias"
|
|
print ""
|
|
print "Check service status:"
|
|
print " provisioning platform status"
|
|
print " prvng plat st # short alias"
|
|
print ""
|
|
}
|
|
|
|
# Verify service availability and fail with clean error message if not available
|
|
# This function prints error and returns error status (NO stack trace)
|
|
# Usage: Call this BEFORE making HTTP requests to services
|
|
export def verify-service-or-fail [
|
|
service_url: string # Service URL (e.g., "http://localhost:9011")
|
|
service_name: string # Human-readable service name (e.g., "Orchestrator")
|
|
--check-command: string = "" # Full command to check status
|
|
--check-alias: string = "" # Short alias for check (e.g., "prvng ps")
|
|
--start-command: string = "" # Full command to start service
|
|
--start-alias: string = "" # Short alias for start (e.g., "prvng start orchestrator")
|
|
]: nothing -> record {
|
|
let check_result = (check-service-available $service_url $service_name)
|
|
|
|
if not $check_result.available {
|
|
# Print clean error message WITHOUT stack trace (NO error make)
|
|
print $"❌ ($service_name) not available at ($service_url)"
|
|
print ""
|
|
print $"Connection refused - ($service_name) is not running on port ($check_result.port)."
|
|
print ""
|
|
|
|
# Show cascade failure report (external services + platform services)
|
|
show-cascade-failure-report $service_name
|
|
|
|
# Show commands with aliases
|
|
if ($check_command | is-not-empty) {
|
|
print "To check service status:"
|
|
print $" ($check_command)"
|
|
if ($check_alias | is-not-empty) {
|
|
print $" ($check_alias) # short alias"
|
|
}
|
|
print ""
|
|
}
|
|
|
|
if ($start_command | is-not-empty) {
|
|
print "To start service:"
|
|
print $" ($start_command)"
|
|
if ($start_alias | is-not-empty) {
|
|
print $" ($start_alias) # short alias"
|
|
}
|
|
print ""
|
|
}
|
|
|
|
print $"Current endpoint: ($service_url)"
|
|
print "If using a custom endpoint, verify it with: --orchestrator <url>"
|
|
|
|
# Return error status WITHOUT stack trace
|
|
return {status: "error", message: $"($service_name) not available"}
|
|
}
|
|
|
|
# Service is available
|
|
return {status: "ok", message: $"($service_name) is available"}
|
|
}
|
|
|
|
# Lightweight check - just returns boolean, no error message
|
|
export def is-service-available [
|
|
service_url: string # Service URL
|
|
service_name: string # Service name
|
|
]: nothing -> bool {
|
|
let check_result = (check-service-available $service_url $service_name)
|
|
$check_result.available
|
|
}
|
|
|
|
# Check if provisioning_daemon is available (CRITICAL - required for ALL operations)
|
|
# Returns: { available: bool, port: int }
|
|
export def check-daemon-availability []: nothing -> record {
|
|
# Get daemon configuration
|
|
let daemon_config = (get-deployment-service-config "provisioning_daemon")
|
|
let daemon_port = ($daemon_config.server?.port? | default 9095)
|
|
|
|
# Check if daemon port is listening
|
|
let port_check = (do { ^lsof -i :($daemon_port) -P -n | ^grep LISTEN } | complete)
|
|
let is_available = ($port_check.exit_code == 0)
|
|
|
|
{
|
|
available: $is_available
|
|
port: $daemon_port
|
|
}
|
|
}
|
|
|
|
# Verify daemon is available - CRITICAL prerequisite for ALL operations
|
|
# Blocks execution if daemon is not available (except for help, platform, setup)
|
|
# Returns error status if daemon unavailable
|
|
export def verify-daemon-or-block [
|
|
operation: string # Operation being attempted (for error message)
|
|
]: nothing -> record {
|
|
let daemon_check = (check-daemon-availability)
|
|
|
|
if not $daemon_check.available {
|
|
print ""
|
|
print "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
print "❌ CRITICAL: provisioning_daemon not available"
|
|
print "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
print ""
|
|
print $"The provisioning daemon is required for operation: ($operation)"
|
|
print $"Daemon is not listening on port ($daemon_check.port)"
|
|
print ""
|
|
print "The daemon is a CRITICAL component - all operations require it."
|
|
print ""
|
|
print "To check daemon status:"
|
|
print " provisioning platform status"
|
|
print " prvng plat st # short alias"
|
|
print ""
|
|
print "To start the daemon:"
|
|
print " provisioning platform start provisioning_daemon"
|
|
print " prvng plat start provisioning_daemon # short alias"
|
|
print ""
|
|
print "Allowed operations without daemon:"
|
|
print " • help / -h / --help - View help"
|
|
print " • platform <cmd> - Manage platform services"
|
|
print " • setup - Initial setup"
|
|
print ""
|
|
|
|
return {status: "error", message: "provisioning_daemon not available"}
|
|
}
|
|
|
|
# Daemon is available
|
|
return {status: "ok", message: "provisioning_daemon is available"}
|
|
}
|