prvng_core/nulib/lib_provisioning/services/manager.nu
Jesús Pérez e5ffc55104
refactor(23 files): selective imports + dangling/broken cleanup (ADR-025 L2/L3)
Large combined batch of 23 files refactored from star-imports to selective.
Grouped because two sub-batches accumulated in staging without intermediate
commit.

=== Orchestrator facades (Layer 3) ===
  ai/mod.nu              [12 symbols from ai/lib.nu]
  config/loader.nu       [14 symbols from loader/mod.nu]
  config/accessor/mod.nu [15 symbols from accessor/functions.nu]
  sops/mod.nu            [11 symbols from sops/lib.nu]
  user/mod.nu            [16 symbols from user/config.nu]

=== Selective imports ===
  defs/lists.nu                      utils/on_select (kept, selective)
  services/manager.nu                (all dead dropped)
  webhook/ai_webhook.nu              ai/lib [4] + settings/lib
  kms/lib.nu                         utils/error + utils/interface + plugins/kms
  gitea/locking.nu                   api_client [8]
  gitea/workspace_git.nu             api_client [3]
  gitea/extension_publish.nu         api_client [8] + config/loader
  infra_validator/rules_engine.nu    config_loader [3]
  plugins/kms.nu                     config/accessor/core [config-get]
  coredns/api_client.nu              config/loader [get-config]

=== Dangling imports removed (target file does not exist) ===
  coredns/docker.nu                  ../utils/log.nu → deleted (uses corefile.nu [2])
  coredns/zones.nu                   ../utils/log.nu → deleted (uses corefile.nu [1])
  coredns/service.nu                 ../utils/log.nu → deleted (uses corefile.nu [2])
  coredns/corefile.nu                ../utils/log.nu → deleted

=== Broken paths cleaned up ===
  project/detect.nu   Former `use ../../../lib_provisioning *` resolved to
    non-existent path (core/lib_provisioning). Silent no-op at runtime.
    Removed. Error count went 19 -> 17.

=== Dead imports dropped ===
  utils/ssh.nu           config/accessor DROPPED (dead)
  utils/init.nu          config/accessor DROPPED (dead)
  infra_validator/agent_interface.nu   report_generator DROPPED (dead)

=== Dynamic imports preserved ===
  providers/loader.nu   line 179 `use ($provider_entry.entry_point) *` is
    intentional runtime dispatch — not convertible to selective.

Validation: all files match pre-existing baseline. Gitea subsystem has
known pre-existing 50-error noise (transitive); independent of this work.

Refs: ADR-025
2026-04-17 12:13:13 +01:00

373 lines
10 KiB
Text

#!/usr/bin/env nu
# Service Manager Core
# Manages platform service lifecycle, registry, and health checks
# config/loader star-import was dead — dropped (ADR-025 Phase 3 Layer 2).
def get-service-state-dir [] {
$"($env.HOME)/.provisioning/services/state"
}
def get-service-pid-dir [] {
$"($env.HOME)/.provisioning/services/pids"
}
def get-service-log-dir [] {
$"($env.HOME)/.provisioning/services/logs"
}
# Load service registry from configuration
export def load-service-registry [] {
let config = (load-provisioning-config)
# Load services from config file
let services_config = $"($config.paths.base)/config/services.toml"
if not ($services_config | path exists) {
error make {
msg: "Service registry not found"
label: {
text: $"Missing: ($services_config)"
span: (metadata $services_config).span
}
}
}
open $services_config | get services
}
# Get service definition by name
export def get-service-definition [
service_name: string
] {
let registry = (load-service-registry)
if $service_name not-in ($registry | columns) {
let available = (($registry | columns) | str join ", ")
error make {
msg: $"Service '($service_name)' not found in registry"
label: {
text: $"Available services: ($available)"
span: (metadata $service_name).span
}
}
}
$registry | get $service_name
}
# Check if service is running
export def is-service-running [
service_name: string
] {
let service_def = (get-service-definition $service_name)
match $service_def.deployment.mode {
"binary" => {
let pid_dir = (get-service-pid-dir)
let pid_file = $"($pid_dir)/($service_name).pid"
if not ($pid_file | path exists) {
return false
}
let pid = (open $pid_file | str trim | into int)
(ps | where pid == $pid | length) > 0
}
"docker" => {
let container_name = $service_def.deployment.docker.container_name
let containers = (docker ps --filter $"name=($container_name)" --format "{{.Names}}" | lines)
$container_name in $containers
}
"docker-compose" => {
let compose_service = $service_def.deployment.docker_compose.service_name
let project = $service_def.deployment.docker_compose.project_name? | default "provisioning"
let containers = (docker ps --filter $"label=com.docker.compose.project=($project)" --filter $"label=com.docker.compose.service=($compose_service)" --format "{{.Names}}" | lines)
($containers | length) > 0
}
"kubernetes" => {
# Check if deployment exists and has ready pods
let namespace = $service_def.deployment.kubernetes.namespace
let deployment = $service_def.deployment.kubernetes.deployment_name
let result = (do {
kubectl get deployment $deployment -n $namespace -o json | from json | get status.readyReplicas? | default 0
} | complete)
if $result.exit_code == 0 {
($result.stdout | into int) > 0
} else {
false
}
}
"remote" => {
# For remote services, check health endpoint
check-service-health $service_name | get healthy
}
_ => {
false
}
}
}
# Get service status
export def get-service-status [
service_name: string
] {
let is_running = (is-service-running $service_name)
let service_def = (get-service-definition $service_name)
let state_dir = (get-service-state-dir)
let state_file = $"($state_dir)/($service_name).json"
let state = if ($state_file | path exists) {
open $state_file
} else {
{
name: $service_name
status: "unknown"
health_status: "unknown"
restart_count: 0
}
}
{
name: $service_name
type: $service_def.type
category: $service_def.category
status: (if $is_running { "running" } else { "stopped" })
deployment_mode: $service_def.deployment.mode
pid: (get-service-pid $service_name)
uptime: (get-service-uptime $service_name)
health_status: $state.health_status
restart_count: $state.restart_count
auto_start: $service_def.startup.auto_start
dependencies: $service_def.dependencies
}
}
# Get service PID
def get-service-pid [
service_name: string
] {
let pid_dir = (get-service-pid-dir)
let pid_file = $"($pid_dir)/[$service_name].pid"
if not ($pid_file | path exists) {
return 0
}
let result = (do {
open $pid_file | str trim | into int
} | complete)
if $result.exit_code == 0 {
$result.stdout | into int
} else {
0
}
}
# Get service uptime in seconds
def get-service-uptime [
service_name: string
] {
let state_dir = (get-service-state-dir)
let state_file = $"($state_dir)/[$service_name].json"
if not ($state_file | path exists) {
return 0
}
let state = (open $state_file)
if "started_at" not-in $state {
return 0
}
let result = (do {
let started = ($state.started_at | into datetime)
let now = (date now)
(($now - $started) | into int) / 1_000_000_000 # Convert nanoseconds to seconds
} | complete)
if $result.exit_code == 0 {
$result.stdout | into int
} else {
0
}
}
# Start service
export def start-service [
service_name: string
--force (-f)
] {
# Ensure state directories exist
mkdir (get-service-state-dir)
mkdir (get-service-pid-dir)
mkdir (get-service-log-dir)
if (is-service-running $service_name) and not $force {
print $"Service '($service_name)' is already running"
return true
}
if $force and (is-service-running $service_name) {
print $"Stopping existing instance of '($service_name)'..."
stop-service $service_name --force
sleep 2sec
}
let service_def = (get-service-definition $service_name)
# Check dependencies
for dep in $service_def.dependencies {
if not (is-service-running $dep) {
print $"Starting dependency: ($dep)"
start-service $dep
}
}
print $"Starting service: ($service_name)"
# Delegate to lifecycle module
use ./lifecycle.nu start-service-by-mode
let result = (start-service-by-mode $service_def $service_name)
if $result {
# Update state
let state = {
name: $service_name
status: "running"
started_at: (date now | format date "%Y-%m-%dT%H:%M:%S%z")
health_status: "unknown"
restart_count: 0
}
let state_dir = (get-service-state-dir)
$state | to json | save -f $"($state_dir)/($service_name).json"
# Wait for health check
print $"Waiting for ($service_name) to become healthy..."
use ./health.nu wait-for-service
wait-for-service $service_name $service_def.startup.start_timeout
} else {
print $"Failed to start service: ($service_name)"
false
}
}
# Stop service
export def stop-service [
service_name: string
--force (-f)
] {
if not (is-service-running $service_name) {
print $"Service '($service_name)' is not running"
return true
}
print $"Stopping service: ($service_name)"
let service_def = (get-service-definition $service_name)
# Delegate to lifecycle module
use ./lifecycle.nu stop-service-by-mode
let result = (stop-service-by-mode $service_name $service_def $force)
if $result {
# Update state
let state_dir = (get-service-state-dir)
let state_file = $"($state_dir)/($service_name).json"
if ($state_file | path exists) {
let state = (open $state_file)
{
...$state
status: "stopped"
stopped_at: (date now | format date "%Y-%m-%dT%H:%M:%S%z")
} | to json | save -f $state_file
}
# Remove PID file
let pid_dir = (get-service-pid-dir)
let pid_file = $"($pid_dir)/($service_name).pid"
if ($pid_file | path exists) {
rm $pid_file
}
}
$result
}
# Restart service
export def restart-service [
service_name: string
] {
print $"Restarting service: ($service_name)"
if (is-service-running $service_name) {
stop-service $service_name --force
sleep 2sec
}
start-service $service_name
}
# Check service health
export def check-service-health [
service_name: string
] {
let service_def = (get-service-definition $service_name)
use ./health.nu perform-health-check
perform-health-check $service_name $service_def.health_check
}
# Wait for service to be healthy
export def wait-for-service-health [
service_name: string
timeout: int = 60
] {
use ./health.nu wait-for-service
wait-for-service $service_name $timeout
}
# Get all services
export def list-all-services [] {
let registry = (load-service-registry)
$registry | columns | each { |name|
get-service-status $name
}
}
# Get running services
export def list-running-services [] {
list-all-services | where status == "running"
}
# Get service logs
export def get-service-logs [
service_name: string
--lines: int = 50
--follow (-f)
] {
let log_dir = (get-service-log-dir)
let log_file = $"($log_dir)/($service_name).log"
if not ($log_file | path exists) {
return $"No logs found for ($service_name)"
}
if $follow {
tail -f -n $lines $log_file
} else {
tail -n $lines $log_file
}
}
# Initialize service state directories
export def init-service-state [] {
mkdir (get-service-state-dir)
mkdir (get-service-pid-dir)
mkdir (get-service-log-dir)
}