prvng_core/nulib/workflows/server_create.nu

use std
# Selective imports replacing fat-path (ADR-025 Phase 4).
use lib_provisioning/config/accessor/core.nu [config-get]
use lib_provisioning/platform/target.nu [detect-platform-mode]
use lib_provisioning/utils/interface.nu [_print]
use lib_provisioning/utils/script-compression.nu [compress-workflow]
use lib_provisioning/utils/service-check.nu [verify-daemon-or-block verify-service-or-fail]
use lib_provisioning/utils/simple_validation.nu [check-command]
use ../servers/delete.nu [sync-servers-state-post-op]
use ../servers/utils.nu *

# Prepare compressed server creation script
# The script MUST have been RENDERED during template processing
# If not available, it's a FATAL ERROR - no fallback allowed
def prepare-server-creation-script [settings: record, servers_list: list<string>] {
    let rendered_script = ($env.LAST_RENDERED_SCRIPT? | default "")

    if ($rendered_script | is-empty) {
        # FATAL: No rendered script - this is a critical error
        # We cannot proceed without the complete rendered script
        error make {
            msg: "FATAL: No rendered script captured from template processing

The orchestrator REQUIRES a complete, rendered script to execute.
Template rendering FAILED - check provider configuration and template paths.

This is NOT a fallback situation. Aborting."
        }
    }

    # Script rendered and ready - compress for transmission to orchestrator
    let compressed_result = (compress-workflow "" {} $rendered_script)

    if ($compressed_result | is-empty) {
        error make {
            msg: "FATAL: Script compression failed"
        }
    }

    $compressed_result
}

# Workflow definition for server creation

def get-orchestrator-url [--orchestrator: string = ""] {
    if ($orchestrator | is-not-empty) {
        return $orchestrator
    }
    if ($env.PROVISIONING_ORCHESTRATOR_URL? | is-not-empty) {
        return $env.PROVISIONING_ORCHESTRATOR_URL
    }
    config-get "platform.orchestrator.url" "http://localhost:9011"
}

# Detect if orchestrator URL is local (for plugin usage)
def use-local-plugin [orchestrator_url: string] {
    # Check if it's a local endpoint
    (detect-platform-mode $orchestrator_url) == "local"
}


export def server_create_workflow [
    infra: string       # Infrastructure target
    settings?: string   # Settings file path
    servers?: list  # Specific servers to create (empty = all)
    --check (-c)        # Check mode only
    --wait (-w)         # Wait for completion
    --orchestrator: string = ""  # Orchestrator URL (optional, uses platform config if not provided)
    --script-compressed: string = ""  # Compressed script (gzip+base64 encoded)
    --template-path: string = ""  # Path to template used
    --template-vars-compressed: string = ""  # Compressed template variables
    --compression-ratio: float = 0.0  # Compression ratio for monitoring
    --original-size: int = 0  # Original script size
    --compressed-size: int = 0  # Compressed script size
] {
    # CRITICAL: Verify daemon availability FIRST (required for ALL operations)
    let daemon_check = (verify-daemon-or-block "create server")
    if $daemon_check.status == "error" {
        return {status: "error", message: "provisioning_daemon not available"}
    }

    let orch_url = (get-orchestrator-url --orchestrator=$orchestrator)

    # Build base workflow data
    let base_data = {
        infra: $infra,
        settings: ($settings | default ""),
        servers: ($servers | default []),
        check_mode: $check,
        wait: $wait
    }

    # Add compression data if provided (complete auditable unit)
    let workflow_data = if ($script_compressed | is-not-empty) {
        $base_data | merge {
            template_path: $template_path,
            template_vars_compressed: $template_vars_compressed,
            script_compressed: $script_compressed,
            script_encoding: "tar+gzip+base64",
            compression_ratio: $compression_ratio,
            original_size: $original_size,
            compressed_size: $compressed_size
        }
    } else {
        $base_data
    }

    # Verify orchestrator availability BEFORE attempting submission
    # Using reusable service check pattern (see .claude/guidelines/provisioning.md)
    # Shows cascade failure report (external services + platform services)
    let check_result = (verify-service-or-fail $orch_url "Orchestrator"
        --check-command "provisioning platform status"
        --check-alias "prvng plat st"
        --start-command "provisioning platform start orchestrator"
        --start-alias "prvng plat start orchestrator"
    )

    if $check_result.status == "error" {
        return $check_result
    }

    # Submit to orchestrator (port is verified, so any error here is a request failure)
    let response = (http post --max-time 30sec $"($orch_url)/workflows/servers/create" --content-type "application/json" ($workflow_data | to json))

    if not ($response | get success) {
        return { status: "error", message: ($response | get error) }
    }

    let task_id = ($response | get data)
    _print $"Server creation workflow submitted: ($task_id)"

    if $wait {
        let result = (wait_for_workflow_completion $orch_url $task_id)
        if ($result | get status) == "completed" {
            let ws_root = ($env.PROVISIONING_WORKSPACE_PATH? | default "")
            let infra_name = ($infra | path basename)
            if ($ws_root | is-not-empty) and ($infra_name | is-not-empty) {
                print "\n[state sync]"
                sync-servers-state-post-op $ws_root $infra_name
            }
        }
        $result
    } else {
        { status: "submitted", task_id: $task_id }
    }
}

def wait_for_workflow_completion [orchestrator: string, task_id: string] {
    _print "Waiting for workflow completion..."

    mut result = { status: "pending" }
    mut poll_errors = 0
    mut iteration = 0
    let max_poll_errors = 8
    let max_iterations = 120  # 120 × 5s = 10 min hard cap

    while true {
        $iteration = $iteration + 1
        if $iteration > $max_iterations {
            return { status: "error", message: $"Workflow timed out after ($max_iterations) polling iterations" }
        }

        # Always use HTTP — plugin proved unreliable for tasks created via HTTP API
        # --full gives {status, headers, body}; --allow-errors prevents throw on 4xx/5xx
        let http_resp = (http get --max-time 10sec --full --allow-errors $"($orchestrator)/tasks/($task_id)")
        let http_status = ($http_resp | get status? | default 0)

        if $http_status == 0 or $http_status >= 500 {
            $poll_errors = $poll_errors + 1
            _print $"⚠️  Poll ($iteration): HTTP ($http_status), retry ($poll_errors)/($max_poll_errors)..."
            if $poll_errors >= $max_poll_errors {
                return { status: "error", message: $"Task ($task_id) unreachable after ($max_poll_errors) retries" }
            }
            sleep 3sec
            continue
        }

        if $http_status == 404 {
            $poll_errors = $poll_errors + 1
            _print $"⚠️  Poll ($iteration): task not found (404), retry ($poll_errors)/($max_poll_errors)..."
            if $poll_errors >= $max_poll_errors {
                return { status: "error", message: $"Task ($task_id) not found after ($max_poll_errors) retries" }
            }
            sleep 3sec
            continue
        }

        $poll_errors = 0
        let resp = ($http_resp | get body)
        if not ($resp | get success? | default false) {
            return { status: "error", message: ($resp | get error? | default "orchestrator returned failure") }
        }

        let task = ($resp | get data)

        let task_status = ($task | get status)

        match $task_status {
            "Completed" => {
                _print $"✅ Workflow completed successfully"
                if ($task | get output | is-not-empty) {
                    _print "Output:"
                    _print ($task | get output)
                }
                $result = { status: "completed", task: $task }
                break
            },
            "Failed" => {
                _print $"❌ Workflow failed"
                if ($task | get error | is-not-empty) {
                    _print "Error:"
                    _print ($task | get error)
                }
                $result = { status: "failed", task: $task }
                break
            },
            "Running" => {
                _print $"🔄 Workflow is running..."
            },
            _ => {
                _print $"⏳ Workflow status: ($task_status)"
            }
        }

        sleep 2sec
    }

    return $result
}

# Bridge function to convert legacy server create calls to workflow
export def on_create_servers_workflow [
    settings: record   # Settings record
    check: bool        # Only check mode no servers will be created
    wait: bool         # Wait for creation
    outfile?: string   # Out file for creation
    hostname?: string  # Server hostname in settings
    serverpos?: int    # Server position in settings
    --orchestrator: string = "http://localhost:8080"  # Orchestrator URL
    --script-compressed: string = ""    # Pre-rendered compressed script (skip local render)
    --template-path: string = ""        # Template path for auditing
    --compression-ratio: float = 0.0    # Compression ratio for monitoring
    --original-size: int = 0            # Original script size in bytes
    --compressed-size: int = 0          # Compressed script size in bytes
] {

    # Convert legacy parameters to workflow format
    let servers_list = if $hostname != null {
        [$hostname]
    } else if $serverpos != null {
        let total = ($settings.data.servers | length)
        if $serverpos <= $total and $serverpos > 0 {
            let target_server = ($settings.data.servers | get ($serverpos - 1))
            [$target_server.hostname]
        } else {
            []
        }
    } else {
        []
    }

    # Extract infra and settings paths from settings record
    let infra_path = ($settings | get infra_path? | default "")
    let settings_path = ($settings | get src? | default "")

    # Prepare compression data — use pre-rendered script when caller already compressed it,
    # otherwise fall back to rendering from $env.LAST_RENDERED_SCRIPT (single-server path)
    let compression_params = if ($script_compressed | is-not-empty) {
        {
            script_compressed: $script_compressed,
            template_path: $template_path,
            template_vars_compressed: "",
            compression_ratio: $compression_ratio,
            original_size: $original_size,
            compressed_size: $compressed_size
        }
    } else if not $check and ($servers_list | length) >= 1 {
        prepare-server-creation-script $settings $servers_list
    } else {
        {}
    }

    # Submit workflow to orchestrator with compression data if available
    let workflow_result = if ($compression_params | is-empty) {
        server_create_workflow $infra_path $settings_path $servers_list --check=$check --wait=$wait --orchestrator $orchestrator
    } else {
        server_create_workflow $infra_path $settings_path $servers_list --check=$check --wait=$wait --orchestrator $orchestrator --script-compressed ($compression_params | get script_compressed? | default "") --template-path ($compression_params | get template_path? | default "") --template-vars-compressed ($compression_params | get template_vars_compressed? | default "") --compression-ratio ($compression_params | get compression_ratio? | default 0.0) --original-size ($compression_params | get original_size? | default 0) --compressed-size ($compression_params | get compressed_size? | default 0)
    }

    match ($workflow_result | get status) {
        "completed" => { status: true, error: "" },
        "submitted" => {
            status: true,
            error: "",
            task_id: ($workflow_result | get task_id)
        },
        "error" | "failed" => {
            status: false,
            error: ($workflow_result | get message? | default "Workflow failed")
        },
        _ => { status: false, error: "Unknown workflow status" }
    }
}

# Workflow status check command
export def "workflow status" [
    task_id: string     # Task ID to check
    --orchestrator: string = "http://localhost:8080"  # Orchestrator URL
] {
    # Use plugin for local orchestrator (~5ms vs ~50ms with HTTP)
    if (use-local-plugin $orchestrator) {
        let all_tasks = (orch tasks)
        let task = ($all_tasks | where id == $task_id | first)

        if ($task | is-empty) {
            return { error: $"Task ($task_id) not found" }
        }

        return {
            id: ($task | get id),
            status: ($task | get status),
            priority: ($task | get priority),
            created_at: ($task | get created_at),
            workflow_id: ($task | get workflow_id)
        }
    }

    # Fall back to HTTP for remote orchestrators
    let response = (http get $"($orchestrator)/tasks/($task_id)")

    if not ($response | get success) {
        return { error: ($response | get error) }
    }

    let task = ($response | get data)
    {
        id: ($task | get id),
        name: ($task | get name),
        status: ($task | get status),
        created_at: ($task | get created_at),
        started_at: ($task | get started_at? | default null),
        completed_at: ($task | get completed_at? | default null),
        output: ($task | get output? | default null),
        error: ($task | get error? | default null)
    }
}

# List all workflows
export def "workflow list" [
    --orchestrator: string = "http://localhost:8080"  # Orchestrator URL
] {
    # Use plugin for local orchestrator (<10ms vs ~50ms with HTTP)
    if (use-local-plugin $orchestrator) {
        return (orch tasks)
    }

    # Fall back to HTTP for remote orchestrators
    let response = (http get $"($orchestrator)/tasks")

    if not ($response | get success) {
        _print $"Error: (($response | get error))"
        return []
    }

    ($response | get data)
}

# Workflow health check
export def "workflow health" [
    --orchestrator: string = "http://localhost:8080"  # Orchestrator URL
] {
    # Use plugin for local orchestrator (<5ms vs ~50ms with HTTP)
    if (use-local-plugin $orchestrator) {
        let status = (orch status)
        return {
            status: (if $status.running { "healthy" } else { "stopped" }),
            message: $"Orchestrator running: ($status.running)",
            plugin_mode: true
        }
    }

    # Fall back to HTTP for remote orchestrators
    let response = (http get $"($orchestrator)/health")
    if ($response | get success) {
        { status: "healthy", message: ($response | get data) }
    } else {
        { status: "unhealthy", message: "Orchestrator returned error" }
    }
}