345 lines
9.4 KiB
Plaintext
345 lines
9.4 KiB
Plaintext
|
|
#!/usr/bin/env nu
|
||
|
|
|
||
|
|
# Service Manager Core
|
||
|
|
# Manages platform service lifecycle, registry, and health checks
|
||
|
|
|
||
|
|
use ../config/loader.nu get-config
|
||
|
|
|
||
|
|
const SERVICE_STATE_DIR = $"($env.HOME)/.provisioning/services/state"
|
||
|
|
const SERVICE_PID_DIR = $"($env.HOME)/.provisioning/services/pids"
|
||
|
|
const SERVICE_LOG_DIR = $"($env.HOME)/.provisioning/services/logs"
|
||
|
|
|
||
|
|
# Load service registry from configuration
|
||
|
|
export def load-service-registry [] -> record {
|
||
|
|
let config = (get-config)
|
||
|
|
|
||
|
|
# Load services from config file
|
||
|
|
let services_config = $"($config.paths.base)/config/services.toml"
|
||
|
|
|
||
|
|
if not ($services_config | path exists) {
|
||
|
|
error make {
|
||
|
|
msg: "Service registry not found"
|
||
|
|
label: {
|
||
|
|
text: $"Missing: ($services_config)"
|
||
|
|
span: (metadata $services_config).span
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
open $services_config | get services
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get service definition by name
|
||
|
|
export def get-service-definition [
|
||
|
|
service_name: string
|
||
|
|
] -> record {
|
||
|
|
let registry = (load-service-registry)
|
||
|
|
|
||
|
|
if $service_name not-in ($registry | columns) {
|
||
|
|
error make {
|
||
|
|
msg: $"Service '($service_name)' not found in registry"
|
||
|
|
label: {
|
||
|
|
text: "Available services: " + (($registry | columns) | str join ", ")
|
||
|
|
span: (metadata $service_name).span
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
$registry | get $service_name
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check if service is running
|
||
|
|
export def is-service-running [
|
||
|
|
service_name: string
|
||
|
|
] -> bool {
|
||
|
|
let service_def = (get-service-definition $service_name)
|
||
|
|
|
||
|
|
match $service_def.deployment.mode {
|
||
|
|
"binary" => {
|
||
|
|
let pid_file = $"($SERVICE_PID_DIR)/($service_name).pid"
|
||
|
|
if not ($pid_file | path exists) {
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
|
||
|
|
let pid = (open $pid_file | str trim | into int)
|
||
|
|
(ps | where pid == $pid | length) > 0
|
||
|
|
}
|
||
|
|
"docker" => {
|
||
|
|
let container_name = $service_def.deployment.docker.container_name
|
||
|
|
let containers = (docker ps --filter $"name=($container_name)" --format "{{.Names}}" | lines)
|
||
|
|
$container_name in $containers
|
||
|
|
}
|
||
|
|
"docker-compose" => {
|
||
|
|
let compose_service = $service_def.deployment.docker_compose.service_name
|
||
|
|
let project = $service_def.deployment.docker_compose.project_name? | default "provisioning"
|
||
|
|
let containers = (docker ps --filter $"label=com.docker.compose.project=($project)" --filter $"label=com.docker.compose.service=($compose_service)" --format "{{.Names}}" | lines)
|
||
|
|
($containers | length) > 0
|
||
|
|
}
|
||
|
|
"kubernetes" => {
|
||
|
|
# Check if deployment exists and has ready pods
|
||
|
|
let namespace = $service_def.deployment.kubernetes.namespace
|
||
|
|
let deployment = $service_def.deployment.kubernetes.deployment_name
|
||
|
|
try {
|
||
|
|
let replicas = (kubectl get deployment $deployment -n $namespace -o json | from json | get status.readyReplicas? | default 0)
|
||
|
|
$replicas > 0
|
||
|
|
} catch {
|
||
|
|
false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
"remote" => {
|
||
|
|
# For remote services, check health endpoint
|
||
|
|
check-service-health $service_name | get healthy
|
||
|
|
}
|
||
|
|
_ => {
|
||
|
|
false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get service status
|
||
|
|
export def get-service-status [
|
||
|
|
service_name: string
|
||
|
|
] -> record {
|
||
|
|
let is_running = (is-service-running $service_name)
|
||
|
|
let service_def = (get-service-definition $service_name)
|
||
|
|
|
||
|
|
let state_file = $"($SERVICE_STATE_DIR)/($service_name).json"
|
||
|
|
let state = if ($state_file | path exists) {
|
||
|
|
open $state_file
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
name: $service_name
|
||
|
|
status: "unknown"
|
||
|
|
health_status: "unknown"
|
||
|
|
restart_count: 0
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
name: $service_name
|
||
|
|
type: $service_def.type
|
||
|
|
category: $service_def.category
|
||
|
|
status: (if $is_running { "running" } else { "stopped" })
|
||
|
|
deployment_mode: $service_def.deployment.mode
|
||
|
|
pid: (get-service-pid $service_name)
|
||
|
|
uptime: (get-service-uptime $service_name)
|
||
|
|
health_status: $state.health_status
|
||
|
|
restart_count: $state.restart_count
|
||
|
|
auto_start: $service_def.startup.auto_start
|
||
|
|
dependencies: $service_def.dependencies
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get service PID
|
||
|
|
def get-service-pid [
|
||
|
|
service_name: string
|
||
|
|
] -> int {
|
||
|
|
let pid_file = $"($SERVICE_PID_DIR)/($service_name).pid"
|
||
|
|
|
||
|
|
if not ($pid_file | path exists) {
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
open $pid_file | str trim | into int
|
||
|
|
} catch {
|
||
|
|
0
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get service uptime in seconds
|
||
|
|
def get-service-uptime [
|
||
|
|
service_name: string
|
||
|
|
] -> int {
|
||
|
|
let state_file = $"($SERVICE_STATE_DIR)/($service_name).json"
|
||
|
|
|
||
|
|
if not ($state_file | path exists) {
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
|
||
|
|
let state = (open $state_file)
|
||
|
|
|
||
|
|
if "started_at" not-in $state {
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
let started = ($state.started_at | into datetime)
|
||
|
|
let now = (date now)
|
||
|
|
(($now - $started) | into int) / 1_000_000_000 # Convert nanoseconds to seconds
|
||
|
|
} catch {
|
||
|
|
0
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Start service
|
||
|
|
export def start-service [
|
||
|
|
service_name: string
|
||
|
|
--force (-f)
|
||
|
|
] -> bool {
|
||
|
|
# Ensure state directories exist
|
||
|
|
mkdir $SERVICE_STATE_DIR
|
||
|
|
mkdir $SERVICE_PID_DIR
|
||
|
|
mkdir $SERVICE_LOG_DIR
|
||
|
|
|
||
|
|
if (is-service-running $service_name) and not $force {
|
||
|
|
print $"Service '($service_name)' is already running"
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
|
||
|
|
if $force and (is-service-running $service_name) {
|
||
|
|
print $"Stopping existing instance of '($service_name)'..."
|
||
|
|
stop-service $service_name --force
|
||
|
|
sleep 2sec
|
||
|
|
}
|
||
|
|
|
||
|
|
let service_def = (get-service-definition $service_name)
|
||
|
|
|
||
|
|
# Check dependencies
|
||
|
|
for dep in $service_def.dependencies {
|
||
|
|
if not (is-service-running $dep) {
|
||
|
|
print $"Starting dependency: ($dep)"
|
||
|
|
start-service $dep
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
print $"Starting service: ($service_name)"
|
||
|
|
|
||
|
|
# Delegate to lifecycle module
|
||
|
|
use lifecycle.nu start-service-by-mode
|
||
|
|
let result = (start-service-by-mode $service_def $service_name)
|
||
|
|
|
||
|
|
if $result {
|
||
|
|
# Update state
|
||
|
|
let state = {
|
||
|
|
name: $service_name
|
||
|
|
status: "running"
|
||
|
|
started_at: (date now | format date "%Y-%m-%dT%H:%M:%S%z")
|
||
|
|
health_status: "unknown"
|
||
|
|
restart_count: 0
|
||
|
|
}
|
||
|
|
|
||
|
|
$state | to json | save -f $"($SERVICE_STATE_DIR)/($service_name).json"
|
||
|
|
|
||
|
|
# Wait for health check
|
||
|
|
print $"Waiting for ($service_name) to become healthy..."
|
||
|
|
use health.nu wait-for-service
|
||
|
|
wait-for-service $service_name $service_def.startup.start_timeout
|
||
|
|
} else {
|
||
|
|
print $"Failed to start service: ($service_name)"
|
||
|
|
false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Stop service
|
||
|
|
export def stop-service [
|
||
|
|
service_name: string
|
||
|
|
--force (-f)
|
||
|
|
] -> bool {
|
||
|
|
if not (is-service-running $service_name) {
|
||
|
|
print $"Service '($service_name)' is not running"
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
|
||
|
|
print $"Stopping service: ($service_name)"
|
||
|
|
|
||
|
|
let service_def = (get-service-definition $service_name)
|
||
|
|
|
||
|
|
# Delegate to lifecycle module
|
||
|
|
use lifecycle.nu stop-service-by-mode
|
||
|
|
let result = (stop-service-by-mode $service_name $service_def $force)
|
||
|
|
|
||
|
|
if $result {
|
||
|
|
# Update state
|
||
|
|
let state_file = $"($SERVICE_STATE_DIR)/($service_name).json"
|
||
|
|
if ($state_file | path exists) {
|
||
|
|
let state = (open $state_file)
|
||
|
|
{
|
||
|
|
...$state
|
||
|
|
status: "stopped"
|
||
|
|
stopped_at: (date now | format date "%Y-%m-%dT%H:%M:%S%z")
|
||
|
|
} | to json | save -f $state_file
|
||
|
|
}
|
||
|
|
|
||
|
|
# Remove PID file
|
||
|
|
let pid_file = $"($SERVICE_PID_DIR)/($service_name).pid"
|
||
|
|
if ($pid_file | path exists) {
|
||
|
|
rm $pid_file
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
$result
|
||
|
|
}
|
||
|
|
|
||
|
|
# Restart service
|
||
|
|
export def restart-service [
|
||
|
|
service_name: string
|
||
|
|
] -> bool {
|
||
|
|
print $"Restarting service: ($service_name)"
|
||
|
|
|
||
|
|
if (is-service-running $service_name) {
|
||
|
|
stop-service $service_name --force
|
||
|
|
sleep 2sec
|
||
|
|
}
|
||
|
|
|
||
|
|
start-service $service_name
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check service health
|
||
|
|
export def check-service-health [
|
||
|
|
service_name: string
|
||
|
|
] -> record {
|
||
|
|
let service_def = (get-service-definition $service_name)
|
||
|
|
|
||
|
|
use health.nu perform-health-check
|
||
|
|
perform-health-check $service_name $service_def.health_check
|
||
|
|
}
|
||
|
|
|
||
|
|
# Wait for service to be healthy
|
||
|
|
export def wait-for-service [
|
||
|
|
service_name: string
|
||
|
|
timeout: int = 60
|
||
|
|
] -> bool {
|
||
|
|
use health.nu wait-for-service
|
||
|
|
wait-for-service $service_name $timeout
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get all services
|
||
|
|
export def list-all-services [] -> list {
|
||
|
|
let registry = (load-service-registry)
|
||
|
|
$registry | columns | each { |name|
|
||
|
|
get-service-status $name
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get running services
|
||
|
|
export def list-running-services [] -> list {
|
||
|
|
list-all-services | where status == "running"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Get service logs
|
||
|
|
export def get-service-logs [
|
||
|
|
service_name: string
|
||
|
|
--lines: int = 50
|
||
|
|
--follow (-f)
|
||
|
|
] -> string {
|
||
|
|
let log_file = $"($SERVICE_LOG_DIR)/($service_name).log"
|
||
|
|
|
||
|
|
if not ($log_file | path exists) {
|
||
|
|
return $"No logs found for ($service_name)"
|
||
|
|
}
|
||
|
|
|
||
|
|
if $follow {
|
||
|
|
tail -f -n $lines $log_file
|
||
|
|
} else {
|
||
|
|
tail -n $lines $log_file
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Initialize service state directories
|
||
|
|
export def init-service-state [] {
|
||
|
|
mkdir $SERVICE_STATE_DIR
|
||
|
|
mkdir $SERVICE_PID_DIR
|
||
|
|
mkdir $SERVICE_LOG_DIR
|
||
|
|
}
|