provisioning/docs/book/user/SERVICE_MANAGEMENT_GUIDE.html
Jesús Pérez 6a59d34bb1
chore: update provisioning configuration and documentation
Update configuration files, templates, and internal documentation
for the provisioning repository system.

Configuration Updates:
- KMS configuration modernization
- Plugin system settings
- Service port mappings
- Test cluster topologies
- Installation configuration examples
- VM configuration defaults
- Cedar authorization policies

Documentation Updates:
- Library module documentation
- Extension API guides
- AI system documentation
- Service management guides
- Test environment setup
- Plugin usage guides
- Validator configuration documentation

All changes are backward compatible.
2025-12-11 21:50:42 +00:00

991 lines
42 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE HTML>
<html lang="en" class="ayu sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>Service Management Guide - Provisioning Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Complete documentation for the Provisioning Platform - Infrastructure automation with Nushell, KCL, and Rust">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "ayu";
const default_dark_theme = "navy";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('ayu')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">Provisioning Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform/edit/main/provisioning/docs/src/user/SERVICE_MANAGEMENT_GUIDE.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="service-management-guide"><a class="header" href="#service-management-guide">Service Management Guide</a></h1>
<p><strong>Version</strong>: 1.0.0
<strong>Last Updated</strong>: 2025-10-06</p>
<h2 id="table-of-contents"><a class="header" href="#table-of-contents">Table of Contents</a></h2>
<ol>
<li><a href="#overview">Overview</a></li>
<li><a href="#service-architecture">Service Architecture</a></li>
<li><a href="#service-registry">Service Registry</a></li>
<li><a href="#platform-commands">Platform Commands</a></li>
<li><a href="#service-commands">Service Commands</a></li>
<li><a href="#deployment-modes">Deployment Modes</a></li>
<li><a href="#health-monitoring">Health Monitoring</a></li>
<li><a href="#dependency-management">Dependency Management</a></li>
<li><a href="#pre-flight-checks">Pre-flight Checks</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li>
</ol>
<hr />
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
<p>The Service Management System provides comprehensive lifecycle management for all platform services (orchestrator, control-center, CoreDNS, Gitea, OCI registry, MCP server, API gateway).</p>
<h3 id="key-features"><a class="header" href="#key-features">Key Features</a></h3>
<ul>
<li><strong>Unified Service Management</strong>: Single interface for all services</li>
<li><strong>Automatic Dependency Resolution</strong>: Start services in correct order</li>
<li><strong>Health Monitoring</strong>: Continuous health checks with automatic recovery</li>
<li><strong>Multiple Deployment Modes</strong>: Binary, Docker, Docker Compose, Kubernetes, Remote</li>
<li><strong>Pre-flight Checks</strong>: Validate prerequisites before operations</li>
<li><strong>Service Registry</strong>: Centralized service configuration</li>
</ul>
<h3 id="supported-services"><a class="header" href="#supported-services">Supported Services</a></h3>
<div class="table-wrapper"><table><thead><tr><th>Service</th><th>Type</th><th>Category</th><th>Description</th></tr></thead><tbody>
<tr><td>orchestrator</td><td>Platform</td><td>Orchestration</td><td>Rust-based workflow coordinator</td></tr>
<tr><td>control-center</td><td>Platform</td><td>UI</td><td>Web-based management interface</td></tr>
<tr><td>coredns</td><td>Infrastructure</td><td>DNS</td><td>Local DNS resolution</td></tr>
<tr><td>gitea</td><td>Infrastructure</td><td>Git</td><td>Self-hosted Git service</td></tr>
<tr><td>oci-registry</td><td>Infrastructure</td><td>Registry</td><td>OCI-compliant container registry</td></tr>
<tr><td>mcp-server</td><td>Platform</td><td>API</td><td>Model Context Protocol server</td></tr>
<tr><td>api-gateway</td><td>Platform</td><td>API</td><td>Unified REST API gateway</td></tr>
</tbody></table>
</div>
<hr />
<h2 id="service-architecture"><a class="header" href="#service-architecture">Service Architecture</a></h2>
<h3 id="system-architecture"><a class="header" href="#system-architecture">System Architecture</a></h3>
<pre><code>┌─────────────────────────────────────────┐
│ Service Management CLI │
│ (platform/services commands) │
└─────────────────┬───────────────────────┘
┌──────────┴──────────┐
│ │
▼ ▼
┌──────────────┐ ┌───────────────┐
│ Manager │ │ Lifecycle │
│ (Core) │ │ (Start/Stop)│
└──────┬───────┘ └───────┬───────┘
│ │
▼ ▼
┌──────────────┐ ┌───────────────┐
│ Health │ │ Dependencies │
│ (Checks) │ │ (Resolution) │
└──────────────┘ └───────────────┘
│ │
└────────┬───────────┘
┌────────────────┐
│ Pre-flight │
│ (Validation) │
└────────────────┘
</code></pre>
<h3 id="component-responsibilities"><a class="header" href="#component-responsibilities">Component Responsibilities</a></h3>
<p><strong>Manager</strong> (<code>manager.nu</code>)</p>
<ul>
<li>Service registry loading</li>
<li>Service status tracking</li>
<li>State persistence</li>
</ul>
<p><strong>Lifecycle</strong> (<code>lifecycle.nu</code>)</p>
<ul>
<li>Service start/stop operations</li>
<li>Deployment mode handling</li>
<li>Process management</li>
</ul>
<p><strong>Health</strong> (<code>health.nu</code>)</p>
<ul>
<li>Health check execution</li>
<li>HTTP/TCP/Command/File checks</li>
<li>Continuous monitoring</li>
</ul>
<p><strong>Dependencies</strong> (<code>dependencies.nu</code>)</p>
<ul>
<li>Dependency graph analysis</li>
<li>Topological sorting</li>
<li>Startup order calculation</li>
</ul>
<p><strong>Pre-flight</strong> (<code>preflight.nu</code>)</p>
<ul>
<li>Prerequisite validation</li>
<li>Conflict detection</li>
<li>Auto-start orchestration</li>
</ul>
<hr />
<h2 id="service-registry"><a class="header" href="#service-registry">Service Registry</a></h2>
<h3 id="configuration-file"><a class="header" href="#configuration-file">Configuration File</a></h3>
<p><strong>Location</strong>: <code>provisioning/config/services.toml</code></p>
<h3 id="service-definition-structure"><a class="header" href="#service-definition-structure">Service Definition Structure</a></h3>
<pre><code class="language-toml">[services.&lt;service-name&gt;]
name = "&lt;service-name&gt;"
type = "platform" | "infrastructure" | "utility"
category = "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui"
description = "Service description"
required_for = ["operation1", "operation2"]
dependencies = ["dependency1", "dependency2"]
conflicts = ["conflicting-service"]
[services.&lt;service-name&gt;.deployment]
mode = "binary" | "docker" | "docker-compose" | "kubernetes" | "remote"
# Mode-specific configuration
[services.&lt;service-name&gt;.deployment.binary]
binary_path = "/path/to/binary"
args = ["--arg1", "value1"]
working_dir = "/working/directory"
env = { KEY = "value" }
[services.&lt;service-name&gt;.health_check]
type = "http" | "tcp" | "command" | "file" | "none"
interval = 10
retries = 3
timeout = 5
[services.&lt;service-name&gt;.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
method = "GET"
[services.&lt;service-name&gt;.startup]
auto_start = true
start_timeout = 30
start_order = 10
restart_on_failure = true
max_restarts = 3
</code></pre>
<h3 id="example-orchestrator-service"><a class="header" href="#example-orchestrator-service">Example: Orchestrator Service</a></h3>
<pre><code class="language-toml">[services.orchestrator]
name = "orchestrator"
type = "platform"
category = "orchestration"
description = "Rust-based orchestrator for workflow coordination"
required_for = ["server", "taskserv", "cluster", "workflow", "batch"]
[services.orchestrator.deployment]
mode = "binary"
[services.orchestrator.deployment.binary]
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
args = ["--port", "8080", "--data-dir", "${HOME}/.provisioning/orchestrator/data"]
[services.orchestrator.health_check]
type = "http"
[services.orchestrator.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
[services.orchestrator.startup]
auto_start = true
start_timeout = 30
start_order = 10
</code></pre>
<hr />
<h2 id="platform-commands"><a class="header" href="#platform-commands">Platform Commands</a></h2>
<p>Platform commands manage all services as a cohesive system.</p>
<h3 id="start-platform"><a class="header" href="#start-platform">Start Platform</a></h3>
<p>Start all auto-start services or specific services:</p>
<pre><code class="language-bash"># Start all auto-start services
provisioning platform start
# Start specific services (with dependencies)
provisioning platform start orchestrator control-center
# Force restart if already running
provisioning platform start --force orchestrator
</code></pre>
<p><strong>Behavior</strong>:</p>
<ol>
<li>Resolves dependencies</li>
<li>Calculates startup order (topological sort)</li>
<li>Starts services in correct order</li>
<li>Waits for health checks</li>
<li>Reports success/failure</li>
</ol>
<h3 id="stop-platform"><a class="header" href="#stop-platform">Stop Platform</a></h3>
<p>Stop all running services or specific services:</p>
<pre><code class="language-bash"># Stop all running services
provisioning platform stop
# Stop specific services
provisioning platform stop orchestrator control-center
# Force stop (kill -9)
provisioning platform stop --force orchestrator
</code></pre>
<p><strong>Behavior</strong>:</p>
<ol>
<li>Checks for dependent services</li>
<li>Stops in reverse dependency order</li>
<li>Updates service state</li>
<li>Cleans up PID files</li>
</ol>
<h3 id="restart-platform"><a class="header" href="#restart-platform">Restart Platform</a></h3>
<p>Restart running services:</p>
<pre><code class="language-bash"># Restart all running services
provisioning platform restart
# Restart specific services
provisioning platform restart orchestrator
</code></pre>
<h3 id="platform-status"><a class="header" href="#platform-status">Platform Status</a></h3>
<p>Show status of all services:</p>
<pre><code class="language-bash">provisioning platform status
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Services Status
Running: 3/7
=== ORCHESTRATION ===
🟢 orchestrator - running (uptime: 3600s) ✅
=== UI ===
🟢 control-center - running (uptime: 3550s) ✅
=== DNS ===
⚪ coredns - stopped ❓
=== GIT ===
⚪ gitea - stopped ❓
=== REGISTRY ===
⚪ oci-registry - stopped ❓
=== API ===
🟢 mcp-server - running (uptime: 3540s) ✅
⚪ api-gateway - stopped ❓
</code></pre>
<h3 id="platform-health"><a class="header" href="#platform-health">Platform Health</a></h3>
<p>Check health of all running services:</p>
<pre><code class="language-bash">provisioning platform health
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Health Check
✅ orchestrator: Healthy - HTTP health check passed
✅ control-center: Healthy - HTTP status 200 matches expected
⚪ coredns: Not running
✅ mcp-server: Healthy - HTTP health check passed
Summary: 3 healthy, 0 unhealthy, 4 not running
</code></pre>
<h3 id="platform-logs"><a class="header" href="#platform-logs">Platform Logs</a></h3>
<p>View service logs:</p>
<pre><code class="language-bash"># View last 50 lines
provisioning platform logs orchestrator
# View last 100 lines
provisioning platform logs orchestrator --lines 100
# Follow logs in real-time
provisioning platform logs orchestrator --follow
</code></pre>
<hr />
<h2 id="service-commands"><a class="header" href="#service-commands">Service Commands</a></h2>
<p>Individual service management commands.</p>
<h3 id="list-services"><a class="header" href="#list-services">List Services</a></h3>
<pre><code class="language-bash"># List all services
provisioning services list
# List only running services
provisioning services list --running
# Filter by category
provisioning services list --category orchestration
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>name type category status deployment_mode auto_start
orchestrator platform orchestration running binary true
control-center platform ui stopped binary false
coredns infrastructure dns stopped docker false
</code></pre>
<h3 id="service-status"><a class="header" href="#service-status">Service Status</a></h3>
<p>Get detailed status of a service:</p>
<pre><code class="language-bash">provisioning services status orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Service: orchestrator
Type: platform
Category: orchestration
Status: running
Deployment: binary
Health: healthy
Auto-start: true
PID: 12345
Uptime: 3600s
Dependencies: []
</code></pre>
<h3 id="start-service"><a class="header" href="#start-service">Start Service</a></h3>
<pre><code class="language-bash"># Start service (with pre-flight checks)
provisioning services start orchestrator
# Force start (skip checks)
provisioning services start orchestrator --force
</code></pre>
<p><strong>Pre-flight Checks</strong>:</p>
<ol>
<li>Validate prerequisites (binary exists, Docker running, etc.)</li>
<li>Check for conflicts</li>
<li>Verify dependencies are running</li>
<li>Auto-start dependencies if needed</li>
</ol>
<h3 id="stop-service"><a class="header" href="#stop-service">Stop Service</a></h3>
<pre><code class="language-bash"># Stop service (with dependency check)
provisioning services stop orchestrator
# Force stop (ignore dependents)
provisioning services stop orchestrator --force
</code></pre>
<h3 id="restart-service"><a class="header" href="#restart-service">Restart Service</a></h3>
<pre><code class="language-bash">provisioning services restart orchestrator
</code></pre>
<h3 id="service-health"><a class="header" href="#service-health">Service Health</a></h3>
<p>Check service health:</p>
<pre><code class="language-bash">provisioning services health orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Service: orchestrator
Status: healthy
Healthy: true
Message: HTTP health check passed
Check type: http
Check duration: 15ms
</code></pre>
<h3 id="service-logs"><a class="header" href="#service-logs">Service Logs</a></h3>
<pre><code class="language-bash"># View logs
provisioning services logs orchestrator
# Follow logs
provisioning services logs orchestrator --follow
# Custom line count
provisioning services logs orchestrator --lines 200
</code></pre>
<h3 id="check-required-services"><a class="header" href="#check-required-services">Check Required Services</a></h3>
<p>Check which services are required for an operation:</p>
<pre><code class="language-bash">provisioning services check server
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Operation: server
Required services: orchestrator
All running: true
</code></pre>
<h3 id="service-dependencies"><a class="header" href="#service-dependencies">Service Dependencies</a></h3>
<p>View dependency graph:</p>
<pre><code class="language-bash"># View all dependencies
provisioning services dependencies
# View specific service dependencies
provisioning services dependencies control-center
</code></pre>
<h3 id="validate-services"><a class="header" href="#validate-services">Validate Services</a></h3>
<p>Validate all service configurations:</p>
<pre><code class="language-bash">provisioning services validate
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Total services: 7
Valid: 6
Invalid: 1
Invalid services:
❌ coredns:
- Docker is not installed or not running
</code></pre>
<h3 id="readiness-report"><a class="header" href="#readiness-report">Readiness Report</a></h3>
<p>Get platform readiness report:</p>
<pre><code class="language-bash">provisioning services readiness
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Platform Readiness Report
Total services: 7
Running: 3
Ready to start: 6
Services:
🟢 orchestrator - platform - orchestration
🟢 control-center - platform - ui
🔴 coredns - infrastructure - dns
Issues: 1
🟡 gitea - infrastructure - git
</code></pre>
<h3 id="monitor-service"><a class="header" href="#monitor-service">Monitor Service</a></h3>
<p>Continuous health monitoring:</p>
<pre><code class="language-bash"># Monitor with default interval (30s)
provisioning services monitor orchestrator
# Custom interval
provisioning services monitor orchestrator --interval 10
</code></pre>
<hr />
<h2 id="deployment-modes"><a class="header" href="#deployment-modes">Deployment Modes</a></h2>
<h3 id="binary-deployment"><a class="header" href="#binary-deployment">Binary Deployment</a></h3>
<p>Run services as native binaries.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "binary"
[services.orchestrator.deployment.binary]
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
args = ["--port", "8080"]
working_dir = "${HOME}/.provisioning/orchestrator"
env = { RUST_LOG = "info" }
</code></pre>
<p><strong>Process Management</strong>:</p>
<ul>
<li>PID tracking in <code>~/.provisioning/services/pids/</code></li>
<li>Log output to <code>~/.provisioning/services/logs/</code></li>
<li>State tracking in <code>~/.provisioning/services/state/</code></li>
</ul>
<h3 id="docker-deployment"><a class="header" href="#docker-deployment">Docker Deployment</a></h3>
<p>Run services as Docker containers.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.coredns.deployment]
mode = "docker"
[services.coredns.deployment.docker]
image = "coredns/coredns:1.11.1"
container_name = "provisioning-coredns"
ports = ["5353:53/udp"]
volumes = ["${HOME}/.provisioning/coredns/Corefile:/Corefile:ro"]
restart_policy = "unless-stopped"
</code></pre>
<p><strong>Prerequisites</strong>:</p>
<ul>
<li>Docker daemon running</li>
<li>Docker CLI installed</li>
</ul>
<h3 id="docker-compose-deployment"><a class="header" href="#docker-compose-deployment">Docker Compose Deployment</a></h3>
<p>Run services via Docker Compose.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.platform.deployment]
mode = "docker-compose"
[services.platform.deployment.docker_compose]
compose_file = "${HOME}/.provisioning/platform/docker-compose.yaml"
service_name = "orchestrator"
project_name = "provisioning"
</code></pre>
<p><strong>File</strong>: <code>provisioning/platform/docker-compose.yaml</code></p>
<h3 id="kubernetes-deployment"><a class="header" href="#kubernetes-deployment">Kubernetes Deployment</a></h3>
<p>Run services on Kubernetes.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "kubernetes"
[services.orchestrator.deployment.kubernetes]
namespace = "provisioning"
deployment_name = "orchestrator"
manifests_path = "${HOME}/.provisioning/k8s/orchestrator/"
</code></pre>
<p><strong>Prerequisites</strong>:</p>
<ul>
<li>kubectl installed and configured</li>
<li>Kubernetes cluster accessible</li>
</ul>
<h3 id="remote-deployment"><a class="header" href="#remote-deployment">Remote Deployment</a></h3>
<p>Connect to remotely-running services.</p>
<p><strong>Configuration</strong>:</p>
<pre><code class="language-toml">[services.orchestrator.deployment]
mode = "remote"
[services.orchestrator.deployment.remote]
endpoint = "https://orchestrator.example.com"
tls_enabled = true
auth_token_path = "${HOME}/.provisioning/tokens/orchestrator.token"
</code></pre>
<hr />
<h2 id="health-monitoring"><a class="header" href="#health-monitoring">Health Monitoring</a></h2>
<h3 id="health-check-types"><a class="header" href="#health-check-types">Health Check Types</a></h3>
<h4 id="http-health-check"><a class="header" href="#http-health-check">HTTP Health Check</a></h4>
<pre><code class="language-toml">[services.orchestrator.health_check]
type = "http"
[services.orchestrator.health_check.http]
endpoint = "http://localhost:9090/health"
expected_status = 200
method = "GET"
</code></pre>
<h4 id="tcp-health-check"><a class="header" href="#tcp-health-check">TCP Health Check</a></h4>
<pre><code class="language-toml">[services.coredns.health_check]
type = "tcp"
[services.coredns.health_check.tcp]
host = "localhost"
port = 5353
</code></pre>
<h4 id="command-health-check"><a class="header" href="#command-health-check">Command Health Check</a></h4>
<pre><code class="language-toml">[services.custom.health_check]
type = "command"
[services.custom.health_check.command]
command = "systemctl is-active myservice"
expected_exit_code = 0
</code></pre>
<h4 id="file-health-check"><a class="header" href="#file-health-check">File Health Check</a></h4>
<pre><code class="language-toml">[services.custom.health_check]
type = "file"
[services.custom.health_check.file]
path = "/var/run/myservice.pid"
must_exist = true
</code></pre>
<h3 id="health-check-configuration"><a class="header" href="#health-check-configuration">Health Check Configuration</a></h3>
<ul>
<li><code>interval</code>: Seconds between checks (default: 10)</li>
<li><code>retries</code>: Max retry attempts (default: 3)</li>
<li><code>timeout</code>: Check timeout in seconds (default: 5)</li>
</ul>
<h3 id="continuous-monitoring"><a class="header" href="#continuous-monitoring">Continuous Monitoring</a></h3>
<pre><code class="language-bash">provisioning services monitor orchestrator --interval 30
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting health monitoring for orchestrator (interval: 30s)
Press Ctrl+C to stop
2025-10-06 14:30:00 ✅ orchestrator: HTTP health check passed
2025-10-06 14:30:30 ✅ orchestrator: HTTP health check passed
2025-10-06 14:31:00 ✅ orchestrator: HTTP health check passed
</code></pre>
<hr />
<h2 id="dependency-management"><a class="header" href="#dependency-management">Dependency Management</a></h2>
<h3 id="dependency-graph"><a class="header" href="#dependency-graph">Dependency Graph</a></h3>
<p>Services can depend on other services:</p>
<pre><code class="language-toml">[services.control-center]
dependencies = ["orchestrator"]
[services.api-gateway]
dependencies = ["orchestrator", "control-center", "mcp-server"]
</code></pre>
<h3 id="startup-order"><a class="header" href="#startup-order">Startup Order</a></h3>
<p>Services start in topological order:</p>
<pre><code>orchestrator (order: 10)
└─&gt; control-center (order: 20)
└─&gt; api-gateway (order: 45)
</code></pre>
<h3 id="dependency-resolution"><a class="header" href="#dependency-resolution">Dependency Resolution</a></h3>
<p>Automatic dependency resolution when starting services:</p>
<pre><code class="language-bash"># Starting control-center automatically starts orchestrator first
provisioning services start control-center
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting dependency: orchestrator
✅ Started orchestrator with PID 12345
Waiting for orchestrator to become healthy...
✅ Service orchestrator is healthy
Starting service: control-center
✅ Started control-center with PID 12346
✅ Service control-center is healthy
</code></pre>
<h3 id="conflicts"><a class="header" href="#conflicts">Conflicts</a></h3>
<p>Services can conflict with each other:</p>
<pre><code class="language-toml">[services.coredns]
conflicts = ["dnsmasq", "systemd-resolved"]
</code></pre>
<p>Attempting to start a conflicting service will fail:</p>
<pre><code class="language-bash">provisioning services start coredns
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>❌ Pre-flight check failed: conflicts
Conflicting services running: dnsmasq
</code></pre>
<h3 id="reverse-dependencies"><a class="header" href="#reverse-dependencies">Reverse Dependencies</a></h3>
<p>Check which services depend on a service:</p>
<pre><code class="language-bash">provisioning services dependencies orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>## orchestrator
- Type: platform
- Category: orchestration
- Required by:
- control-center
- mcp-server
- api-gateway
</code></pre>
<h3 id="safe-stop"><a class="header" href="#safe-stop">Safe Stop</a></h3>
<p>System prevents stopping services with running dependents:</p>
<pre><code class="language-bash">provisioning services stop orchestrator
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>❌ Cannot stop orchestrator:
Dependent services running: control-center, mcp-server, api-gateway
Use --force to stop anyway
</code></pre>
<hr />
<h2 id="pre-flight-checks"><a class="header" href="#pre-flight-checks">Pre-flight Checks</a></h2>
<h3 id="purpose"><a class="header" href="#purpose">Purpose</a></h3>
<p>Pre-flight checks ensure services can start successfully before attempting to start them.</p>
<h3 id="check-types"><a class="header" href="#check-types">Check Types</a></h3>
<ol>
<li><strong>Prerequisites</strong>: Binary exists, Docker running, etc.</li>
<li><strong>Conflicts</strong>: No conflicting services running</li>
<li><strong>Dependencies</strong>: All dependencies available</li>
</ol>
<h3 id="automatic-checks"><a class="header" href="#automatic-checks">Automatic Checks</a></h3>
<p>Pre-flight checks run automatically when starting services:</p>
<pre><code class="language-bash">provisioning services start orchestrator
</code></pre>
<p><strong>Check Process</strong>:</p>
<pre><code>Running pre-flight checks for orchestrator...
✅ Binary found: /Users/user/.provisioning/bin/provisioning-orchestrator
✅ No conflicts detected
✅ All dependencies available
Starting service: orchestrator
</code></pre>
<h3 id="manual-validation"><a class="header" href="#manual-validation">Manual Validation</a></h3>
<p>Validate all services:</p>
<pre><code class="language-bash">provisioning services validate
</code></pre>
<p>Validate specific service:</p>
<pre><code class="language-bash">provisioning services status orchestrator
</code></pre>
<h3 id="auto-start"><a class="header" href="#auto-start">Auto-Start</a></h3>
<p>Services with <code>auto_start = true</code> can be started automatically when needed:</p>
<pre><code class="language-bash"># Orchestrator auto-starts if needed for server operations
provisioning server create
</code></pre>
<p><strong>Output</strong>:</p>
<pre><code>Starting required services...
✅ Orchestrator started
Creating server...
</code></pre>
<hr />
<h2 id="troubleshooting"><a class="header" href="#troubleshooting">Troubleshooting</a></h2>
<h3 id="service-wont-start"><a class="header" href="#service-wont-start">Service Wont Start</a></h3>
<p><strong>Check prerequisites</strong>:</p>
<pre><code class="language-bash">provisioning services validate
provisioning services status &lt;service&gt;
</code></pre>
<p><strong>Common issues</strong>:</p>
<ul>
<li>Binary not found: Check <code>binary_path</code> in config</li>
<li>Docker not running: Start Docker daemon</li>
<li>Port already in use: Check for conflicting processes</li>
<li>Dependencies not running: Start dependencies first</li>
</ul>
<h3 id="service-health-check-failing"><a class="header" href="#service-health-check-failing">Service Health Check Failing</a></h3>
<p><strong>View health status</strong>:</p>
<pre><code class="language-bash">provisioning services health &lt;service&gt;
</code></pre>
<p><strong>Check logs</strong>:</p>
<pre><code class="language-bash">provisioning services logs &lt;service&gt; --follow
</code></pre>
<p><strong>Common issues</strong>:</p>
<ul>
<li>Service not fully initialized: Wait longer or increase <code>start_timeout</code></li>
<li>Wrong health check endpoint: Verify endpoint in config</li>
<li>Network issues: Check firewall, port bindings</li>
</ul>
<h3 id="dependency-issues"><a class="header" href="#dependency-issues">Dependency Issues</a></h3>
<p><strong>View dependency tree</strong>:</p>
<pre><code class="language-bash">provisioning services dependencies &lt;service&gt;
</code></pre>
<p><strong>Check dependency status</strong>:</p>
<pre><code class="language-bash">provisioning services status &lt;dependency&gt;
</code></pre>
<p><strong>Start with dependencies</strong>:</p>
<pre><code class="language-bash">provisioning platform start &lt;service&gt;
</code></pre>
<h3 id="circular-dependencies"><a class="header" href="#circular-dependencies">Circular Dependencies</a></h3>
<p><strong>Validate dependency graph</strong>:</p>
<pre><code class="language-bash"># This is done automatically but you can check manually
nu -c "use lib_provisioning/services/mod.nu *; validate-dependency-graph"
</code></pre>
<h3 id="pid-file-stale"><a class="header" href="#pid-file-stale">PID File Stale</a></h3>
<p>If service reports running but isnt:</p>
<pre><code class="language-bash"># Manual cleanup
rm ~/.provisioning/services/pids/&lt;service&gt;.pid
# Force restart
provisioning services restart &lt;service&gt;
</code></pre>
<h3 id="port-conflicts"><a class="header" href="#port-conflicts">Port Conflicts</a></h3>
<p><strong>Find process using port</strong>:</p>
<pre><code class="language-bash">lsof -i :9090
</code></pre>
<p><strong>Kill conflicting process</strong>:</p>
<pre><code class="language-bash">kill &lt;PID&gt;
</code></pre>
<h3 id="docker-issues"><a class="header" href="#docker-issues">Docker Issues</a></h3>
<p><strong>Check Docker status</strong>:</p>
<pre><code class="language-bash">docker ps
docker info
</code></pre>
<p><strong>View container logs</strong>:</p>
<pre><code class="language-bash">docker logs provisioning-&lt;service&gt;
</code></pre>
<p><strong>Restart Docker daemon</strong>:</p>
<pre><code class="language-bash"># macOS
killall Docker &amp;&amp; open /Applications/Docker.app
# Linux
systemctl restart docker
</code></pre>
<h3 id="service-logs-1"><a class="header" href="#service-logs-1">Service Logs</a></h3>
<p><strong>View recent logs</strong>:</p>
<pre><code class="language-bash">tail -f ~/.provisioning/services/logs/&lt;service&gt;.log
</code></pre>
<p><strong>Search logs</strong>:</p>
<pre><code class="language-bash">grep "ERROR" ~/.provisioning/services/logs/&lt;service&gt;.log
</code></pre>
<hr />
<h2 id="advanced-usage"><a class="header" href="#advanced-usage">Advanced Usage</a></h2>
<h3 id="custom-service-registration"><a class="header" href="#custom-service-registration">Custom Service Registration</a></h3>
<p>Add custom services by editing <code>provisioning/config/services.toml</code>.</p>
<h3 id="integration-with-workflows"><a class="header" href="#integration-with-workflows">Integration with Workflows</a></h3>
<p>Services automatically start when required by workflows:</p>
<pre><code class="language-bash"># Orchestrator starts automatically if not running
provisioning workflow submit my-workflow
</code></pre>
<h3 id="cicd-integration"><a class="header" href="#cicd-integration">CI/CD Integration</a></h3>
<pre><code class="language-yaml"># GitLab CI
before_script:
- provisioning platform start orchestrator
- provisioning services health orchestrator
test:
script:
- provisioning test quick kubernetes
</code></pre>
<h3 id="monitoring-integration"><a class="header" href="#monitoring-integration">Monitoring Integration</a></h3>
<p>Services can integrate with monitoring systems via health endpoints.</p>
<hr />
<h2 id="related-documentation"><a class="header" href="#related-documentation">Related Documentation</a></h2>
<ul>
<li><a href="../../provisioning/platform/orchestrator/README.html">Orchestrator README</a></li>
<li><a href="./test-environment-guide.html">Test Environment Guide</a></li>
<li><a href="./workflow-management.html">Workflow Management</a></li>
</ul>
<hr />
<p><strong>Maintained By</strong>: Platform Team
<strong>Support</strong>: <a href="https://github.com/your-org/provisioning/issues">GitHub Issues</a></p>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../user/COREDNS_GUIDE.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/SERVICE_MANAGEMENT_QUICKREF.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../user/COREDNS_GUIDE.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/SERVICE_MANAGEMENT_QUICKREF.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<!-- Livereload script (if served using the cli tool) -->
<script>
const wsProtocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsAddress = wsProtocol + "//" + location.host + "/" + "__livereload";
const socket = new WebSocket(wsAddress);
socket.onmessage = function (event) {
if (event.data === "reload") {
socket.close();
location.reload();
}
};
window.onbeforeunload = function() {
socket.close();
}
</script>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>