provisioning/docs/book/user/SERVICE_MANAGEMENT_GUIDE.html

991 lines
42 KiB
HTML
Raw Normal View History

<!DOCTYPE HTML>
<html lang="en" class="ayu sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>Service Management Guide - Provisioning Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Complete documentation for the Provisioning Platform - Infrastructure automation with Nushell, KCL, and Rust">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "ayu";
const default_dark_theme = "navy";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('ayu')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">Provisioning Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform/edit/main/provisioning/docs/src/user/SERVICE_MANAGEMENT_GUIDE.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="service-management-guide"><a class="header" href="#service-management-guide">Service Management Guide</a></h1>
<p><strong>Version</strong>: 1.0.0
<strong>Last Updated</strong>: 2025-10-06</p>
<h2 id="table-of-contents"><a class="header" href="#table-of-contents">Table of Contents</a></h2>
<ol>
<li><a href="#overview">Overview</a></li>
<li><a href="#service-architecture">Service Architecture</a></li>
<li><a href="#service-registry">Service Registry</a></li>
<li><a href="#platform-commands">Platform Commands</a></li>
<li><a href="#service-commands">Service Commands</a></li>
<li><a href="#deployment-modes">Deployment Modes</a></li>
<li><a href="#health-monitoring">Health Monitoring</a></li>
<li><a href="#dependency-management">Dependency Management</a></li>
<li><a href="#pre-flight-checks">Pre-flight Checks</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li>
</ol>
<hr />
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
<p>The Service Management System provides comprehensive lifecycle management for all platform services (orchestrator, control-center, CoreDNS, Gitea, OCI registry, MCP server, API gateway).</p>
<h3 id="key-features"><a class="header" href="#key-features">Key Features</a></h3>
<ul>
<li><strong>Unified Service Management</strong>: Single interface for all services</li>
<li><strong>Automatic Dependency Resolution</strong>: Start services in correct order</li>
<li><strong>Health Monitoring</strong>: Continuous health checks with automatic recovery</li>
<li><strong>Multiple Deployment Modes</strong>: Binary, Docker, Docker Compose, Kubernetes, Remote</li>
<li><strong>Pre-flight Checks</strong>: Validate prerequisites before operations</li>
<li><strong>Service Registry</strong>: Centralized service configuration</li>
</ul>
<h3 id="supported-services"><a class="header" href="#supported-services">Supported Services</a></h3>
<div class="table-wrapper"><table><thead><tr><th>Service</th><th>Type</th><th>Category</th><th>Description</th></tr></thead><tbody>
<tr><td>orchestrator</td><td>Platform</td><td>Orchestration</td><td>Rust-based workflow coordinator</td></tr>
<tr><td>control-center</td><td>Platform</td><td>UI</td><td>Web-based management interface</td></tr>
<tr><td>coredns</td><td>Infrastructure</td><td>DNS</td><td>Local DNS resolution</td></tr>
<tr><td>gitea</td><td>Infrastructure</td><td>Git</td><td>Self-hosted Git service</td></tr>
<tr><td>oci-registry</td><td>Infrastructure</td><td>Registry</td><td>OCI-compliant container registry</td></tr>
<tr><td>mcp-server</td><td>Platform</td><td>API</td><td>Model Context Protocol server</td></tr>
<tr><td>api-gateway</td><td>Platform</td><td>API</td><td>Unified REST API gateway</td></tr>
</tbody></table>
</div>
<hr />
<h2 id="service-architecture"><a class="header" href="#service-architecture">Service Architecture</a></h2>
<h3 id="system-architecture"><a class="header" href="#system-architecture">System Architecture</a></h3>
<pre><code>┌─────────────────────────────────────────┐
│ Service Management CLI │
│ (platform/services commands) │
└─────────────────┬───────────────────────┘
┌──────────┴──────────┐
│ │
▼ ▼
┌──────────────┐ ┌───────────────┐
│ Manager │ │ Lifecycle │
│ (Core) │ │ (Start/Stop)│
└──────┬───────┘ └───────┬───────┘
│ │
▼ ▼
┌──────────────┐ ┌───────────────┐
│ Health │ │ Dependencies │
│ (Checks) │ │ (Resolution) │
└──────────────┘ └───────────────┘
│ │
└────────┬───────────┘
┌────────────────┐
│ Pre-flight │
│ (Validation) │
└────────────────┘
</code></pre>
<h3 id="component-responsibilities"><a class="header" href="#component-responsibilities">Component Responsibilities</a></h3>
<p><strong>Manager</strong> (<code>manager.nu</code>)</p>
<ul>
<li>Service registry loading</li>
<li>Service status tracking</li>
<li>State persistence</li>
</ul>
<p><strong>Lifecycle</strong> (<code>lifecycle.nu</code>)</p>
<ul>
<li>Service start/stop operations</li>
<li>Deployment mode handling</li>
<li>Process management</li>
</ul>
<p><strong>Health</strong> (<code>health.nu</code>)</p>
<ul>
<li>Health check execution</li>
<li>HTTP/TCP/Command/File checks</li>
<li>Continuous monitoring</li>
</ul>
<p><strong>Dependencies</strong> (<code>dependencies.nu</code>)</p>
<ul>
<li>Dependency graph analysis</li>
<li>Topological sorting</li>
<li>Startup order calculation</li>
</ul>
<p><strong>Pre-flight</strong> (<code>preflight.nu</code>)</p>
<ul>
<li>Prerequisite validation</li>
<li>Conflict detection</li>
<li>Auto-start orchestration</li>
</ul>
<hr />
<h2 id="service-registry"><a class="header" href="#service-registry">Service Registry</a></h2>
<h3 id="configuration-file"><a class="header" href="#configuration-file">Configuration File</a></h3>
<p><strong>Location</strong>: <code>provisioning/config/services.toml</code></p>
<h3 id="service-definition-structure"><a class="header" href="#service-definition-structure">Service Definition Structure</a></h3>
<pre><code class="language-toml">[services.&lt;service-name&gt;]
name = "&lt;service-name&gt;"
type = "platform" | "infrastructure" | "utility"
category = "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui"
description = "Service description"
required_for = ["operation1", "operation2"]
dependencies = ["dependency1", "dependency2"]
conflicts = ["conflicting-service"]
[services.&lt;service-name&gt;.deployment]
mode = "binary" | "docker" | "docker-compose" | "kubernetes" | "remote"
# Mode-specific configuration
[services.&lt;service-name&gt;.deployment.binary]
binary_path = "/path/to/binary"
args = ["--arg1", "value1"]
working_dir = "/working/directory"
env = { KEY = "value" }
[services.&lt;service-name&gt;.health_check]
type = "http" | "tcp" | "command" | "file" | "none"
interval = 10
retries = 3
timeout = 5
[services.&lt;service-name&gt;.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
method = "GET"
[services.&lt;service-name&gt;.startup]
auto_start = true
start_timeout = 30
start_order = 10
restart_on_failure = true
max_restarts = 3
</code></pre>
<h3 id="example-orchestrator-service"><a class="header" href="#example-orchestrator-service">Example: Orchestrator Service</a></h3>
<pre><code class="language-toml">[services.orchestrator]
name = "orchestrator"
type = "platform"
category = "orchestration"
description = "Rust-based orchestrator for workflow coordination"
required_for = ["server", "taskserv", "cluster", "workflow", "batch"]
[services.orchestrator.deployment]
mode = "binary"
[services.orchestrator.deployment.binary]
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
args = ["--port", "8080", "--data-dir", "${HOME}/.provisioning/orchestrator/data"]
[services.orchestrator.health_check]
type = "http"
[services.orchestrator.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
[services.orchestrator.startup]
auto_start = true
start_timeout = 30
start_order = 10
</code></pre>
<hr />
<h2 id="platform-commands"><a class="header" href="#platform-commands">Platform Commands</a></h2>
<p>Platform commands manage all services as a cohesive system.</p>
<h3 id="start-platform"><a class="header" href="#start-platform">Start Platform</a></h3>
<p>Start all auto-start services or specific services:</p>
<pre><code class="language-bash"># Start all auto-start services
provisioning platform start
# Start specific services (with dependencies)
provisioning platform start orchestrator control-center
# Force restart if already running
provisioning platform start --force orchestrator
</code></pre>
<p><strong>Behavior</strong>:</p>
<ol>
<li>Resolves dependencies</li>
<li>Calculates startup order (topological sort)</li>
<li>Starts services in correct order</li>
<li>Waits for health checks</li>
<li>Reports success/failure</li>
</ol>
<h3 id="stop-platform"><a class="header" href="#stop-platform">Stop Platform</a></h3>
<p>Stop all running services or specific services:</p>
<pre><code class="language-bash"># Stop all running services
provisioning platform stop
# Stop specific services
provisioning platform stop orchestrator control-center
# Force stop (kill -9)
provisioning platform stop --force orchestrator
</code></pre>
<p><strong>Behavior</strong>:</p>
<ol>
<li>Checks for dependent services</li>
<li>Stops in reverse dependency order</li>
<li>Updates service state</li>
<li>Cleans up PID files</li>
</ol>
<h3 id="restart-platform"><a class="header" href="#restart-platform">Restart Platform</a></h3>
<p>Restart running services:</p>
<pre><code class="language-bash"># Restart all running services
provisioning platform restart
# Restart specific services
provisioning platform restart orchestrator
</code></pre>
<h3 id="platform-status"><a class="header" href="#platform-status">Platform Status</a></h3>
<p>Show status of all services:</p>
<pre><code class="language-bash">provisioning platform status
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Services Status
Running: 3/7
=== ORCHESTRATION ===
🟢 orchestrator - running (uptime: 3600s) ✅
=== UI ===
🟢 control-center - running (uptime: 3550s) ✅
=== DNS ===
⚪ coredns - stopped ❓
=== GIT ===
⚪ gitea - stopped ❓
=== REGISTRY ===
⚪ oci-registry - stopped ❓
=== API ===
🟢 mcp-server - running (uptime: 3540s) ✅
⚪ api-gateway - stopped ❓
</code></pre>
<h3 id="platform-health"><a class="header" href="#platform-health">Platform Health</a></h3>
<p>Check health of all running services:</p>
<pre><code class="language-bash">provisioning platform health
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Health Check
✅ orchestrator: Healthy - HTTP health check passed
✅ control-center: Healthy - HTTP status 200 matches expected
⚪ coredns: Not running
✅ mcp-server: Healthy - HTTP health check passed
Summary: 3 healthy, 0 unhealthy, 4 not running
</code></pre>
<h3 id="platform-logs"><a class="header" href="#platform-logs">Platform Logs</a></h3>
<p>View service logs:</p>
<pre><code class="language-bash"># View last 50 lines
provisioning platform logs orchestrator
# View last 100 lines
provisioning platform logs orchestrator --lines 100
# Follow logs in real-time
provisioning platform logs orchestrator --follow
</code></pre>
<hr />
<h2 id="service-commands"><a class="header" href="#service-commands">Service Commands</a></h2>
<p>Individual service management commands.</p>
<h3 id="list-services"><a class="header" href="#list-services">List Services</a></h3>
<pre><code class="language-bash"># List all services
provisioning services list
# List only running services
provisioning services list --running
# Filter by category
provisioning services list --category orchestration
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>name type category status deployment_mode auto_start
orchestrator platform orchestration running binary true
control-center platform ui stopped binary false
coredns infrastructure dns stopped docker false
</code></pre>
<h3 id="service-status"><a class="header" href="#service-status">Service Status</a></h3>
<p>Get detailed status of a service:</p>
<pre><code class="language-bash">provisioning services status orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Service: orchestrator
Type: platform
Category: orchestration
Status: running
Deployment: binary
Health: healthy
Auto-start: true
PID: 12345
Uptime: 3600s
Dependencies: []
</code></pre>
<h3 id="start-service"><a class="header" href="#start-service">Start Service</a></h3>
<pre><code class="language-bash"># Start service (with pre-flight checks)
provisioning services start orchestrator
# Force start (skip checks)
provisioning services start orchestrator --force
</code></pre>
<p><strong>Pre-flight Checks</strong>:</p>
<ol>
<li>Validate prerequisites (binary exists, Docker running, etc.)</li>
<li>Check for conflicts</li>
<li>Verify dependencies are running</li>
<li>Auto-start dependencies if needed</li>
</ol>
<h3 id="stop-service"><a class="header" href="#stop-service">Stop Service</a></h3>
<pre><code class="language-bash"># Stop service (with dependency check)
provisioning services stop orchestrator
# Force stop (ignore dependents)
provisioning services stop orchestrator --force
</code></pre>
<h3 id="restart-service"><a class="header" href="#restart-service">Restart Service</a></h3>
<pre><code class="language-bash">provisioning services restart orchestrator
</code></pre>
<h3 id="service-health"><a class="header" href="#service-health">Service Health</a></h3>
<p>Check service health:</p>
<pre><code class="language-bash">provisioning services health orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Service: orchestrator
Status: healthy
Healthy: true
Message: HTTP health check passed
Check type: http
Check duration: 15ms
</code></pre>
<h3 id="service-logs"><a class="header" href="#service-logs">Service Logs</a></h3>
<pre><code class="language-bash"># View logs
provisioning services logs orchestrator
# Follow logs
provisioning services logs orchestrator --follow
# Custom line count
provisioning services logs orchestrator --lines 200
</code></pre>
<h3 id="check-required-services"><a class="header" href="#check-required-services">Check Required Services</a></h3>
<p>Check which services are required for an operation:</p>
<pre><code class="language-bash">provisioning services check server
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Operation: server
Required services: orchestrator
All running: true
</code></pre>
<h3 id="service-dependencies"><a class="header" href="#service-dependencies">Service Dependencies</a></h3>
<p>View dependency graph:</p>
<pre><code class="language-bash"># View all dependencies
provisioning services dependencies
# View specific service dependencies
provisioning services dependencies control-center
</code></pre>
<h3 id="validate-services"><a class="header" href="#validate-services">Validate Services</a></h3>
<p>Validate all service configurations:</p>
<pre><code class="language-bash">provisioning services validate
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Total services: 7
Valid: 6
Invalid: 1
Invalid services:
❌ coredns:
- Docker is not installed or not running
</code></pre>
<h3 id="readiness-report"><a class="header" href="#readiness-report">Readiness Report</a></h3>
<p>Get platform readiness report:</p>
<pre><code class="language-bash">provisioning services readiness
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Readiness Report
Total services: 7
Running: 3
Ready to start: 6
Services:
🟢 orchestrator - platform - orchestration
🟢 control-center - platform - ui
🔴 coredns - infrastructure - dns
Issues: 1
🟡 gitea - infrastructure - git
</code></pre>
<h3 id="monitor-service"><a class="header" href="#monitor-service">Monitor Service</a></h3>
<p>Continuous health monitoring:</p>
<pre><code class="language-bash"># Monitor with default interval (30s)
provisioning services monitor orchestrator
# Custom interval
provisioning services monitor orchestrator --interval 10
</code></pre>
<hr />
<h2 id="deployment-modes"><a class="header" href="#deployment-modes">Deployment Modes</a></h2>
<h3 id="binary-deployment"><a class="header" href="#binary-deployment">Binary Deployment</a></h3>
<p>Run services as native binaries.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "binary"
[services.orchestrator.deployment.binary]
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
args = ["--port", "8080"]
working_dir = "${HOME}/.provisioning/orchestrator"
env = { RUST_LOG = "info" }
</code></pre>
<p><strong>Process Management</strong>:</p>
<ul>
<li>PID tracking in <code>~/.provisioning/services/pids/</code></li>
<li>Log output to <code>~/.provisioning/services/logs/</code></li>
<li>State tracking in <code>~/.provisioning/services/state/</code></li>
</ul>
<h3 id="docker-deployment"><a class="header" href="#docker-deployment">Docker Deployment</a></h3>
<p>Run services as Docker containers.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.coredns.deployment]
mode = "docker"
[services.coredns.deployment.docker]
image = "coredns/coredns:1.11.1"
container_name = "provisioning-coredns"
ports = ["5353:53/udp"]
volumes = ["${HOME}/.provisioning/coredns/Corefile:/Corefile:ro"]
restart_policy = "unless-stopped"
</code></pre>
<p><strong>Prerequisites</strong>:</p>
<ul>
<li>Docker daemon running</li>
<li>Docker CLI installed</li>
</ul>
<h3 id="docker-compose-deployment"><a class="header" href="#docker-compose-deployment">Docker Compose Deployment</a></h3>
<p>Run services via Docker Compose.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.platform.deployment]
mode = "docker-compose"
[services.platform.deployment.docker_compose]
compose_file = "${HOME}/.provisioning/platform/docker-compose.yaml"
service_name = "orchestrator"
project_name = "provisioning"
</code></pre>
<p><strong>File</strong>: <code>provisioning/platform/docker-compose.yaml</code></p>
<h3 id="kubernetes-deployment"><a class="header" href="#kubernetes-deployment">Kubernetes Deployment</a></h3>
<p>Run services on Kubernetes.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "kubernetes"
[services.orchestrator.deployment.kubernetes]
namespace = "provisioning"
deployment_name = "orchestrator"
manifests_path = "${HOME}/.provisioning/k8s/orchestrator/"
</code></pre>
<p><strong>Prerequisites</strong>:</p>
<ul>
<li>kubectl installed and configured</li>
<li>Kubernetes cluster accessible</li>
</ul>
<h3 id="remote-deployment"><a class="header" href="#remote-deployment">Remote Deployment</a></h3>
<p>Connect to remotely-running services.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "remote"
[services.orchestrator.deployment.remote]
endpoint = "https://orchestrator.example.com"
tls_enabled = true
auth_token_path = "${HOME}/.provisioning/tokens/orchestrator.token"
</code></pre>
<hr />
<h2 id="health-monitoring"><a class="header" href="#health-monitoring">Health Monitoring</a></h2>
<h3 id="health-check-types"><a class="header" href="#health-check-types">Health Check Types</a></h3>
<h4 id="http-health-check"><a class="header" href="#http-health-check">HTTP Health Check</a></h4>
<pre><code class="language-toml">[services.orchestrator.health_check]
type = "http"
[services.orchestrator.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
method = "GET"
</code></pre>
<h4 id="tcp-health-check"><a class="header" href="#tcp-health-check">TCP Health Check</a></h4>
<pre><code class="language-toml">[services.coredns.health_check]
type = "tcp"
[services.coredns.health_check.tcp]
host = "localhost"
port = 5353
</code></pre>
<h4 id="command-health-check"><a class="header" href="#command-health-check">Command Health Check</a></h4>
<pre><code class="language-toml">[services.custom.health_check]
type = "command"
[services.custom.health_check.command]
command = "systemctl is-active myservice"
expected_exit_code = 0
</code></pre>
<h4 id="file-health-check"><a class="header" href="#file-health-check">File Health Check</a></h4>
<pre><code class="language-toml">[services.custom.health_check]
type = "file"
[services.custom.health_check.file]
path = "/var/run/myservice.pid"
must_exist = true
</code></pre>
<h3 id="health-check-configuration"><a class="header" href="#health-check-configuration">Health Check Configuration</a></h3>
<ul>
<li><code>interval</code>: Seconds between checks (default: 10)</li>
<li><code>retries</code>: Max retry attempts (default: 3)</li>
<li><code>timeout</code>: Check timeout in seconds (default: 5)</li>
</ul>
<h3 id="continuous-monitoring"><a class="header" href="#continuous-monitoring">Continuous Monitoring</a></h3>
<pre><code class="language-bash">provisioning services monitor orchestrator --interval 30
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting health monitoring for orchestrator (interval: 30s)
Press Ctrl+C to stop
2025-10-06 14:30:00 ✅ orchestrator: HTTP health check passed
2025-10-06 14:30:30 ✅ orchestrator: HTTP health check passed
2025-10-06 14:31:00 ✅ orchestrator: HTTP health check passed
</code></pre>
<hr />
<h2 id="dependency-management"><a class="header" href="#dependency-management">Dependency Management</a></h2>
<h3 id="dependency-graph"><a class="header" href="#dependency-graph">Dependency Graph</a></h3>
<p>Services can depend on other services:</p>
<pre><code class="language-toml">[services.control-center]
dependencies = ["orchestrator"]
[services.api-gateway]
dependencies = ["orchestrator", "control-center", "mcp-server"]
</code></pre>
<h3 id="startup-order"><a class="header" href="#startup-order">Startup Order</a></h3>
<p>Services start in topological order:</p>
<pre><code>orchestrator (order: 10)
└─&gt; control-center (order: 20)
└─&gt; api-gateway (order: 45)
</code></pre>
<h3 id="dependency-resolution"><a class="header" href="#dependency-resolution">Dependency Resolution</a></h3>
<p>Automatic dependency resolution when starting services:</p>
<pre><code class="language-bash"># Starting control-center automatically starts orchestrator first
provisioning services start control-center
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting dependency: orchestrator
✅ Started orchestrator with PID 12345
Waiting for orchestrator to become healthy...
✅ Service orchestrator is healthy
Starting service: control-center
✅ Started control-center with PID 12346
✅ Service control-center is healthy
</code></pre>
<h3 id="conflicts"><a class="header" href="#conflicts">Conflicts</a></h3>
<p>Services can conflict with each other:</p>
<pre><code class="language-toml">[services.coredns]
conflicts = ["dnsmasq", "systemd-resolved"]
</code></pre>
<p>Attempting to start a conflicting service will fail:</p>
<pre><code class="language-bash">provisioning services start coredns
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>❌ Pre-flight check failed: conflicts
Conflicting services running: dnsmasq
</code></pre>
<h3 id="reverse-dependencies"><a class="header" href="#reverse-dependencies">Reverse Dependencies</a></h3>
<p>Check which services depend on a service:</p>
<pre><code class="language-bash">provisioning services dependencies orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>## orchestrator
- Type: platform
- Category: orchestration
- Required by:
- control-center
- mcp-server
- api-gateway
</code></pre>
<h3 id="safe-stop"><a class="header" href="#safe-stop">Safe Stop</a></h3>
<p>System prevents stopping services with running dependents:</p>
<pre><code class="language-bash">provisioning services stop orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>❌ Cannot stop orchestrator:
Dependent services running: control-center, mcp-server, api-gateway
Use --force to stop anyway
</code></pre>
<hr />
<h2 id="pre-flight-checks"><a class="header" href="#pre-flight-checks">Pre-flight Checks</a></h2>
<h3 id="purpose"><a class="header" href="#purpose">Purpose</a></h3>
<p>Pre-flight checks ensure services can start successfully before attempting to start them.</p>
<h3 id="check-types"><a class="header" href="#check-types">Check Types</a></h3>
<ol>
<li><strong>Prerequisites</strong>: Binary exists, Docker running, etc.</li>
<li><strong>Conflicts</strong>: No conflicting services running</li>
<li><strong>Dependencies</strong>: All dependencies available</li>
</ol>
<h3 id="automatic-checks"><a class="header" href="#automatic-checks">Automatic Checks</a></h3>
<p>Pre-flight checks run automatically when starting services:</p>
<pre><code class="language-bash">provisioning services start orchestrator
</code></pre>
<p><strong>Check Process</strong>:</p>
<pre><code>Running pre-flight checks for orchestrator...
✅ Binary found: /Users/user/.provisioning/bin/provisioning-orchestrator
✅ No conflicts detected
✅ All dependencies available
Starting service: orchestrator
</code></pre>
<h3 id="manual-validation"><a class="header" href="#manual-validation">Manual Validation</a></h3>
<p>Validate all services:</p>
<pre><code class="language-bash">provisioning services validate
</code></pre>
<p>Validate specific service:</p>
<pre><code class="language-bash">provisioning services status orchestrator
</code></pre>
<h3 id="auto-start"><a class="header" href="#auto-start">Auto-Start</a></h3>
<p>Services with <code>auto_start = true</code> can be started automatically when needed:</p>
<pre><code class="language-bash"># Orchestrator auto-starts if needed for server operations
provisioning server create
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting required services...
✅ Orchestrator started
Creating server...
</code></pre>
<hr />
<h2 id="troubleshooting"><a class="header" href="#troubleshooting">Troubleshooting</a></h2>
<h3 id="service-wont-start"><a class="header" href="#service-wont-start">Service Wont Start</a></h3>
<p><strong>Check prerequisites</strong>:</p>
<pre><code class="language-bash">provisioning services validate
provisioning services status &lt;service&gt;
</code></pre>
<p><strong>Common issues</strong>:</p>
<ul>
<li>Binary not found: Check <code>binary_path</code> in config</li>
<li>Docker not running: Start Docker daemon</li>
<li>Port already in use: Check for conflicting processes</li>
<li>Dependencies not running: Start dependencies first</li>
</ul>
<h3 id="service-health-check-failing"><a class="header" href="#service-health-check-failing">Service Health Check Failing</a></h3>
<p><strong>View health status</strong>:</p>
<pre><code class="language-bash">provisioning services health &lt;service&gt;
</code></pre>
<p><strong>Check logs</strong>:</p>
<pre><code class="language-bash">provisioning services logs &lt;service&gt; --follow
</code></pre>
<p><strong>Common issues</strong>:</p>
<ul>
<li>Service not fully initialized: Wait longer or increase <code>start_timeout</code></li>
<li>Wrong health check endpoint: Verify endpoint in config</li>
<li>Network issues: Check firewall, port bindings</li>
</ul>
<h3 id="dependency-issues"><a class="header" href="#dependency-issues">Dependency Issues</a></h3>
<p><strong>View dependency tree</strong>:</p>
<pre><code class="language-bash">provisioning services dependencies &lt;service&gt;
</code></pre>
<p><strong>Check dependency status</strong>:</p>
<pre><code class="language-bash">provisioning services status &lt;dependency&gt;
</code></pre>
<p><strong>Start with dependencies</strong>:</p>
<pre><code class="language-bash">provisioning platform start &lt;service&gt;
</code></pre>
<h3 id="circular-dependencies"><a class="header" href="#circular-dependencies">Circular Dependencies</a></h3>
<p><strong>Validate dependency graph</strong>:</p>
<pre><code class="language-bash"># This is done automatically but you can check manually
nu -c "use lib_provisioning/services/mod.nu *; validate-dependency-graph"
</code></pre>
<h3 id="pid-file-stale"><a class="header" href="#pid-file-stale">PID File Stale</a></h3>
<p>If service reports running but isnt:</p>
<pre><code class="language-bash"># Manual cleanup
rm ~/.provisioning/services/pids/&lt;service&gt;.pid
# Force restart
provisioning services restart &lt;service&gt;
</code></pre>
<h3 id="port-conflicts"><a class="header" href="#port-conflicts">Port Conflicts</a></h3>
<p><strong>Find process using port</strong>:</p>
<pre><code class="language-bash">lsof -i :9090
</code></pre>
<p><strong>Kill conflicting process</strong>:</p>
<pre><code class="language-bash">kill &lt;PID&gt;
</code></pre>
<h3 id="docker-issues"><a class="header" href="#docker-issues">Docker Issues</a></h3>
<p><strong>Check Docker status</strong>:</p>
<pre><code class="language-bash">docker ps
docker info
</code></pre>
<p><strong>View container logs</strong>:</p>
<pre><code class="language-bash">docker logs provisioning-&lt;service&gt;
</code></pre>
<p><strong>Restart Docker daemon</strong>:</p>
<pre><code class="language-bash"># macOS
killall Docker &amp;&amp; open /Applications/Docker.app
# Linux
systemctl restart docker
</code></pre>
<h3 id="service-logs-1"><a class="header" href="#service-logs-1">Service Logs</a></h3>
<p><strong>View recent logs</strong>:</p>
<pre><code class="language-bash">tail -f ~/.provisioning/services/logs/&lt;service&gt;.log
</code></pre>
<p><strong>Search logs</strong>:</p>
<pre><code class="language-bash">grep "ERROR" ~/.provisioning/services/logs/&lt;service&gt;.log
</code></pre>
<hr />
<h2 id="advanced-usage"><a class="header" href="#advanced-usage">Advanced Usage</a></h2>
<h3 id="custom-service-registration"><a class="header" href="#custom-service-registration">Custom Service Registration</a></h3>
<p>Add custom services by editing <code>provisioning/config/services.toml</code>.</p>
<h3 id="integration-with-workflows"><a class="header" href="#integration-with-workflows">Integration with Workflows</a></h3>
<p>Services automatically start when required by workflows:</p>
<pre><code class="language-bash"># Orchestrator starts automatically if not running
provisioning workflow submit my-workflow
</code></pre>
<h3 id="cicd-integration"><a class="header" href="#cicd-integration">CI/CD Integration</a></h3>
<pre><code class="language-yaml"># GitLab CI
before_script:
- provisioning platform start orchestrator
- provisioning services health orchestrator
test:
script:
- provisioning test quick kubernetes
</code></pre>
<h3 id="monitoring-integration"><a class="header" href="#monitoring-integration">Monitoring Integration</a></h3>
<p>Services can integrate with monitoring systems via health endpoints.</p>
<hr />
<h2 id="related-documentation"><a class="header" href="#related-documentation">Related Documentation</a></h2>
<ul>
<li><a href="../../provisioning/platform/orchestrator/README.html">Orchestrator README</a></li>
<li><a href="./test-environment-guide.html">Test Environment Guide</a></li>
<li><a href="./workflow-management.html">Workflow Management</a></li>
</ul>
<hr />
<p><strong>Maintained By</strong>: Platform Team
<strong>Support</strong>: <a href="https://github.com/your-org/provisioning/issues">GitHub Issues</a></p>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../user/COREDNS_GUIDE.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/SERVICE_MANAGEMENT_QUICKREF.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../user/COREDNS_GUIDE.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/SERVICE_MANAGEMENT_QUICKREF.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<!-- Livereload script (if served using the cli tool) -->
<script>
const wsProtocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsAddress = wsProtocol + "//" + location.host + "/" + "__livereload";
const socket = new WebSocket(wsAddress);
socket.onmessage = function (event) {
if (event.data === "reload") {
socket.close();
location.reload();
}
};
window.onbeforeunload = function() {
socket.close();
}
</script>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>