prvng_core/nulib/workflows/server_create.nu

389 lines
15 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std
# Selective imports replacing fat-path (ADR-025 Phase 4).
use lib_provisioning/config/accessor/core.nu [config-get]
use lib_provisioning/platform/target.nu [detect-platform-mode]
use lib_provisioning/utils/interface.nu [_print]
use lib_provisioning/utils/script-compression.nu [compress-workflow]
use lib_provisioning/utils/service-check.nu [verify-daemon-or-block verify-service-or-fail]
use lib_provisioning/utils/simple_validation.nu [check-command]
use ../servers/delete.nu [sync-servers-state-post-op]
use ../servers/utils.nu *
# Prepare compressed server creation script
# The script MUST have been RENDERED during template processing
# If not available, it's a FATAL ERROR - no fallback allowed
def prepare-server-creation-script [settings: record, servers_list: list<string>] {
let rendered_script = ($env.LAST_RENDERED_SCRIPT? | default "")
if ($rendered_script | is-empty) {
# FATAL: No rendered script - this is a critical error
# We cannot proceed without the complete rendered script
error make {
msg: "FATAL: No rendered script captured from template processing
The orchestrator REQUIRES a complete, rendered script to execute.
Template rendering FAILED - check provider configuration and template paths.
This is NOT a fallback situation. Aborting."
}
}
# Script rendered and ready - compress for transmission to orchestrator
let compressed_result = (compress-workflow "" {} $rendered_script)
if ($compressed_result | is-empty) {
error make {
msg: "FATAL: Script compression failed"
}
}
$compressed_result
}
# Workflow definition for server creation
def get-orchestrator-url [--orchestrator: string = ""] {
if ($orchestrator | is-not-empty) {
return $orchestrator
}
if ($env.PROVISIONING_ORCHESTRATOR_URL? | is-not-empty) {
return $env.PROVISIONING_ORCHESTRATOR_URL
}
config-get "platform.orchestrator.url" "http://localhost:9011"
}
# Detect if orchestrator URL is local (for plugin usage)
def use-local-plugin [orchestrator_url: string] {
# Check if it's a local endpoint
(detect-platform-mode $orchestrator_url) == "local"
}
export def server_create_workflow [
infra: string # Infrastructure target
settings?: string # Settings file path
servers?: list # Specific servers to create (empty = all)
--check (-c) # Check mode only
--wait (-w) # Wait for completion
--orchestrator: string = "" # Orchestrator URL (optional, uses platform config if not provided)
--script-compressed: string = "" # Compressed script (gzip+base64 encoded)
--template-path: string = "" # Path to template used
--template-vars-compressed: string = "" # Compressed template variables
--compression-ratio: float = 0.0 # Compression ratio for monitoring
--original-size: int = 0 # Original script size
--compressed-size: int = 0 # Compressed script size
] {
# CRITICAL: Verify daemon availability FIRST (required for ALL operations)
let daemon_check = (verify-daemon-or-block "create server")
if $daemon_check.status == "error" {
return {status: "error", message: "provisioning_daemon not available"}
}
let orch_url = (get-orchestrator-url --orchestrator=$orchestrator)
# Build base workflow data
let base_data = {
infra: $infra,
settings: ($settings | default ""),
servers: ($servers | default []),
check_mode: $check,
wait: $wait
}
# Add compression data if provided (complete auditable unit)
let workflow_data = if ($script_compressed | is-not-empty) {
$base_data | merge {
template_path: $template_path,
template_vars_compressed: $template_vars_compressed,
script_compressed: $script_compressed,
script_encoding: "tar+gzip+base64",
compression_ratio: $compression_ratio,
original_size: $original_size,
compressed_size: $compressed_size
}
} else {
$base_data
}
# Verify orchestrator availability BEFORE attempting submission
# Using reusable service check pattern (see .claude/guidelines/provisioning.md)
# Shows cascade failure report (external services + platform services)
let check_result = (verify-service-or-fail $orch_url "Orchestrator"
--check-command "provisioning platform status"
--check-alias "prvng plat st"
--start-command "provisioning platform start orchestrator"
--start-alias "prvng plat start orchestrator"
)
if $check_result.status == "error" {
return $check_result
}
# Submit to orchestrator (port is verified, so any error here is a request failure)
let response = (http post --max-time 30sec $"($orch_url)/workflows/servers/create" --content-type "application/json" ($workflow_data | to json))
if not ($response | get success) {
return { status: "error", message: ($response | get error) }
}
let task_id = ($response | get data)
_print $"Server creation workflow submitted: ($task_id)"
if $wait {
let result = (wait_for_workflow_completion $orch_url $task_id)
if ($result | get status) == "completed" {
let ws_root = ($env.PROVISIONING_WORKSPACE_PATH? | default "")
let infra_name = ($infra | path basename)
if ($ws_root | is-not-empty) and ($infra_name | is-not-empty) {
print "\n[state sync]"
sync-servers-state-post-op $ws_root $infra_name
}
}
$result
} else {
{ status: "submitted", task_id: $task_id }
}
}
def wait_for_workflow_completion [orchestrator: string, task_id: string] {
_print "Waiting for workflow completion..."
mut result = { status: "pending" }
mut poll_errors = 0
mut iteration = 0
let max_poll_errors = 8
let max_iterations = 120 # 120 × 5s = 10 min hard cap
while true {
$iteration = $iteration + 1
if $iteration > $max_iterations {
return { status: "error", message: $"Workflow timed out after ($max_iterations) polling iterations" }
}
# Always use HTTP — plugin proved unreliable for tasks created via HTTP API
# --full gives {status, headers, body}; --allow-errors prevents throw on 4xx/5xx
let http_resp = (http get --max-time 10sec --full --allow-errors $"($orchestrator)/tasks/($task_id)")
let http_status = ($http_resp | get status? | default 0)
if $http_status == 0 or $http_status >= 500 {
$poll_errors = $poll_errors + 1
_print $"⚠️ Poll ($iteration): HTTP ($http_status), retry ($poll_errors)/($max_poll_errors)..."
if $poll_errors >= $max_poll_errors {
return { status: "error", message: $"Task ($task_id) unreachable after ($max_poll_errors) retries" }
}
sleep 3sec
continue
}
if $http_status == 404 {
$poll_errors = $poll_errors + 1
_print $"⚠️ Poll ($iteration): task not found (404), retry ($poll_errors)/($max_poll_errors)..."
if $poll_errors >= $max_poll_errors {
return { status: "error", message: $"Task ($task_id) not found after ($max_poll_errors) retries" }
}
sleep 3sec
continue
}
$poll_errors = 0
let resp = ($http_resp | get body)
if not ($resp | get success? | default false) {
return { status: "error", message: ($resp | get error? | default "orchestrator returned failure") }
}
let task = ($resp | get data)
let task_status = ($task | get status)
match $task_status {
"Completed" => {
_print $"✅ Workflow completed successfully"
if ($task | get output | is-not-empty) {
_print "Output:"
_print ($task | get output)
}
$result = { status: "completed", task: $task }
break
},
"Failed" => {
_print $"❌ Workflow failed"
if ($task | get error | is-not-empty) {
_print "Error:"
_print ($task | get error)
}
$result = { status: "failed", task: $task }
break
},
"Running" => {
_print $"🔄 Workflow is running..."
},
_ => {
_print $"⏳ Workflow status: ($task_status)"
}
}
sleep 2sec
}
return $result
}
# Bridge function to convert legacy server create calls to workflow
export def on_create_servers_workflow [
settings: record # Settings record
check: bool # Only check mode no servers will be created
wait: bool # Wait for creation
outfile?: string # Out file for creation
hostname?: string # Server hostname in settings
serverpos?: int # Server position in settings
--orchestrator: string = "http://localhost:8080" # Orchestrator URL
--script-compressed: string = "" # Pre-rendered compressed script (skip local render)
--template-path: string = "" # Template path for auditing
--compression-ratio: float = 0.0 # Compression ratio for monitoring
--original-size: int = 0 # Original script size in bytes
--compressed-size: int = 0 # Compressed script size in bytes
] {
# Convert legacy parameters to workflow format
let servers_list = if $hostname != null {
[$hostname]
} else if $serverpos != null {
let total = ($settings.data.servers | length)
if $serverpos <= $total and $serverpos > 0 {
let target_server = ($settings.data.servers | get ($serverpos - 1))
[$target_server.hostname]
} else {
[]
}
} else {
[]
}
# Extract infra and settings paths from settings record
let infra_path = ($settings | get infra_path? | default "")
let settings_path = ($settings | get src? | default "")
# Prepare compression data — use pre-rendered script when caller already compressed it,
# otherwise fall back to rendering from $env.LAST_RENDERED_SCRIPT (single-server path)
let compression_params = if ($script_compressed | is-not-empty) {
{
script_compressed: $script_compressed,
template_path: $template_path,
template_vars_compressed: "",
compression_ratio: $compression_ratio,
original_size: $original_size,
compressed_size: $compressed_size
}
} else if not $check and ($servers_list | length) >= 1 {
prepare-server-creation-script $settings $servers_list
} else {
{}
}
# Submit workflow to orchestrator with compression data if available
let workflow_result = if ($compression_params | is-empty) {
server_create_workflow $infra_path $settings_path $servers_list --check=$check --wait=$wait --orchestrator $orchestrator
} else {
server_create_workflow $infra_path $settings_path $servers_list --check=$check --wait=$wait --orchestrator $orchestrator --script-compressed ($compression_params | get script_compressed? | default "") --template-path ($compression_params | get template_path? | default "") --template-vars-compressed ($compression_params | get template_vars_compressed? | default "") --compression-ratio ($compression_params | get compression_ratio? | default 0.0) --original-size ($compression_params | get original_size? | default 0) --compressed-size ($compression_params | get compressed_size? | default 0)
}
match ($workflow_result | get status) {
"completed" => { status: true, error: "" },
"submitted" => {
status: true,
error: "",
task_id: ($workflow_result | get task_id)
},
"error" | "failed" => {
status: false,
error: ($workflow_result | get message? | default "Workflow failed")
},
_ => { status: false, error: "Unknown workflow status" }
}
}
# Workflow status check command
export def "workflow status" [
task_id: string # Task ID to check
--orchestrator: string = "http://localhost:8080" # Orchestrator URL
] {
# Use plugin for local orchestrator (~5ms vs ~50ms with HTTP)
if (use-local-plugin $orchestrator) {
let all_tasks = (orch tasks)
let task = ($all_tasks | where id == $task_id | first)
if ($task | is-empty) {
return { error: $"Task ($task_id) not found" }
}
return {
id: ($task | get id),
status: ($task | get status),
priority: ($task | get priority),
created_at: ($task | get created_at),
workflow_id: ($task | get workflow_id)
}
}
# Fall back to HTTP for remote orchestrators
let response = (http get $"($orchestrator)/tasks/($task_id)")
if not ($response | get success) {
return { error: ($response | get error) }
}
let task = ($response | get data)
{
id: ($task | get id),
name: ($task | get name),
status: ($task | get status),
created_at: ($task | get created_at),
started_at: ($task | get started_at? | default null),
completed_at: ($task | get completed_at? | default null),
output: ($task | get output? | default null),
error: ($task | get error? | default null)
}
}
# List all workflows
export def "workflow list" [
--orchestrator: string = "http://localhost:8080" # Orchestrator URL
] {
# Use plugin for local orchestrator (<10ms vs ~50ms with HTTP)
if (use-local-plugin $orchestrator) {
return (orch tasks)
}
# Fall back to HTTP for remote orchestrators
let response = (http get $"($orchestrator)/tasks")
if not ($response | get success) {
_print $"Error: (($response | get error))"
return []
}
($response | get data)
}
# Workflow health check
export def "workflow health" [
--orchestrator: string = "http://localhost:8080" # Orchestrator URL
] {
# Use plugin for local orchestrator (<5ms vs ~50ms with HTTP)
if (use-local-plugin $orchestrator) {
let status = (orch status)
return {
status: (if $status.running { "healthy" } else { "stopped" }),
message: $"Orchestrator running: ($status.running)",
plugin_mode: true
}
}
# Fall back to HTTP for remote orchestrators
let response = (http get $"($orchestrator)/health")
if ($response | get success) {
{ status: "healthy", message: ($response | get data) }
} else {
{ status: "unhealthy", message: "Orchestrator returned error" }
}
}