2026-01-14 04:53:21 +00:00
|
|
|
# Service Management Guide
|
|
|
|
|
|
|
|
|
|
**Version**: 1.0.0
|
|
|
|
|
**Last Updated**: 2025-10-06
|
|
|
|
|
|
|
|
|
|
## Table of Contents
|
|
|
|
|
|
|
|
|
|
1. [Overview](#overview)
|
|
|
|
|
2. [Service Architecture](#service-architecture)
|
|
|
|
|
3. [Service Registry](#service-registry)
|
|
|
|
|
4. [Platform Commands](#platform-commands)
|
|
|
|
|
5. [Service Commands](#service-commands)
|
|
|
|
|
6. [Deployment Modes](#deployment-modes)
|
|
|
|
|
7. [Health Monitoring](#health-monitoring)
|
|
|
|
|
8. [Dependency Management](#dependency-management)
|
|
|
|
|
9. [Pre-flight Checks](#pre-flight-checks)
|
|
|
|
|
10. [Troubleshooting](#troubleshooting)
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Overview
|
|
|
|
|
|
|
|
|
|
The Service Management System provides comprehensive lifecycle management for all platform services (orchestrator, control-center, CoreDNS, Gitea, OCI
|
|
|
|
|
registry, MCP server, API gateway).
|
|
|
|
|
|
|
|
|
|
### Key Features
|
|
|
|
|
|
|
|
|
|
- **Unified Service Management**: Single interface for all services
|
|
|
|
|
- **Automatic Dependency Resolution**: Start services in correct order
|
|
|
|
|
- **Health Monitoring**: Continuous health checks with automatic recovery
|
|
|
|
|
- **Multiple Deployment Modes**: Binary, Docker, Docker Compose, Kubernetes, Remote
|
|
|
|
|
- **Pre-flight Checks**: Validate prerequisites before operations
|
|
|
|
|
- **Service Registry**: Centralized service configuration
|
|
|
|
|
|
|
|
|
|
### Supported Services
|
|
|
|
|
|
|
|
|
|
| Service | Type | Category | Description |
|
|
|
|
|
| --------- | ------ | ---------- | ------------- |
|
|
|
|
|
| orchestrator | Platform | Orchestration | Rust-based workflow coordinator |
|
|
|
|
|
| control-center | Platform | UI | Web-based management interface |
|
|
|
|
|
| coredns | Infrastructure | DNS | Local DNS resolution |
|
|
|
|
|
| gitea | Infrastructure | Git | Self-hosted Git service |
|
|
|
|
|
| oci-registry | Infrastructure | Registry | OCI-compliant container registry |
|
|
|
|
|
| mcp-server | Platform | API | Model Context Protocol server |
|
|
|
|
|
| api-gateway | Platform | API | Unified REST API gateway |
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Service Architecture
|
|
|
|
|
|
|
|
|
|
### System Architecture
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
┌─────────────────────────────────────────┐
|
|
|
|
|
│ Service Management CLI │
|
|
|
|
|
│ (platform/services commands) │
|
|
|
|
|
└─────────────────┬───────────────────────┘
|
|
|
|
|
│
|
|
|
|
|
┌──────────┴──────────┐
|
|
|
|
|
│ │
|
|
|
|
|
▼ ▼
|
|
|
|
|
┌──────────────┐ ┌───────────────┐
|
|
|
|
|
│ Manager │ │ Lifecycle │
|
|
|
|
|
│ (Core) │ │ (Start/Stop)│
|
|
|
|
|
└──────┬───────┘ └───────┬───────┘
|
|
|
|
|
│ │
|
|
|
|
|
▼ ▼
|
|
|
|
|
┌──────────────┐ ┌───────────────┐
|
|
|
|
|
│ Health │ │ Dependencies │
|
|
|
|
|
│ (Checks) │ │ (Resolution) │
|
|
|
|
|
└──────────────┘ └───────────────┘
|
|
|
|
|
│ │
|
|
|
|
|
└────────┬───────────┘
|
|
|
|
|
│
|
|
|
|
|
▼
|
|
|
|
|
┌────────────────┐
|
|
|
|
|
│ Pre-flight │
|
|
|
|
|
│ (Validation) │
|
|
|
|
|
└────────────────┘
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Component Responsibilities
|
|
|
|
|
|
|
|
|
|
**Manager** (`manager.nu`)
|
|
|
|
|
|
|
|
|
|
- Service registry loading
|
|
|
|
|
- Service status tracking
|
|
|
|
|
- State persistence
|
|
|
|
|
|
|
|
|
|
**Lifecycle** (`lifecycle.nu`)
|
|
|
|
|
|
|
|
|
|
- Service start/stop operations
|
|
|
|
|
- Deployment mode handling
|
|
|
|
|
- Process management
|
|
|
|
|
|
|
|
|
|
**Health** (`health.nu`)
|
|
|
|
|
|
|
|
|
|
- Health check execution
|
|
|
|
|
- HTTP/TCP/Command/File checks
|
|
|
|
|
- Continuous monitoring
|
|
|
|
|
|
|
|
|
|
**Dependencies** (`dependencies.nu`)
|
|
|
|
|
|
|
|
|
|
- Dependency graph analysis
|
|
|
|
|
- Topological sorting
|
|
|
|
|
- Startup order calculation
|
|
|
|
|
|
|
|
|
|
**Pre-flight** (`preflight.nu`)
|
|
|
|
|
|
|
|
|
|
- Prerequisite validation
|
|
|
|
|
- Conflict detection
|
|
|
|
|
- Auto-start orchestration
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Service Registry
|
|
|
|
|
|
|
|
|
|
### Configuration File
|
|
|
|
|
|
|
|
|
|
**Location**: `provisioning/config/services.toml`
|
|
|
|
|
|
|
|
|
|
### Service Definition Structure
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.<service-name>]
|
|
|
|
|
name = "<service-name>"
|
|
|
|
|
type = "platform" | "infrastructure" | "utility"
|
|
|
|
|
category = "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui"
|
|
|
|
|
description = "Service description"
|
|
|
|
|
required_for = ["operation1", "operation2"]
|
|
|
|
|
dependencies = ["dependency1", "dependency2"]
|
|
|
|
|
conflicts = ["conflicting-service"]
|
|
|
|
|
|
|
|
|
|
[services.<service-name>.deployment]
|
|
|
|
|
mode = "binary" | "docker" | "docker-compose" | "kubernetes" | "remote"
|
|
|
|
|
|
|
|
|
|
# Mode-specific configuration
|
|
|
|
|
[services.<service-name>.deployment.binary]
|
|
|
|
|
binary_path = "/path/to/binary"
|
|
|
|
|
args = ["--arg1", "value1"]
|
|
|
|
|
working_dir = "/working/directory"
|
|
|
|
|
env = { KEY = "value" }
|
|
|
|
|
|
|
|
|
|
[services.<service-name>.health_check]
|
|
|
|
|
type = "http" | "tcp" | "command" | "file" | "none"
|
|
|
|
|
interval = 10
|
|
|
|
|
retries = 3
|
|
|
|
|
timeout = 5
|
|
|
|
|
|
|
|
|
|
[services.<service-name>.health_check.http]
|
|
|
|
|
endpoint = "http://localhost:9090/health"
|
|
|
|
|
expected_status = 200
|
|
|
|
|
method = "GET"
|
|
|
|
|
|
|
|
|
|
[services.<service-name>.startup]
|
|
|
|
|
auto_start = true
|
|
|
|
|
start_timeout = 30
|
|
|
|
|
start_order = 10
|
|
|
|
|
restart_on_failure = true
|
|
|
|
|
max_restarts = 3
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Example: Orchestrator Service
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.orchestrator]
|
|
|
|
|
name = "orchestrator"
|
|
|
|
|
type = "platform"
|
|
|
|
|
category = "orchestration"
|
|
|
|
|
description = "Rust-based orchestrator for workflow coordination"
|
|
|
|
|
required_for = ["server", "taskserv", "cluster", "workflow", "batch"]
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.deployment]
|
|
|
|
|
mode = "binary"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.deployment.binary]
|
|
|
|
|
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
|
|
|
|
|
args = ["--port", "8080", "--data-dir", "${HOME}/.provisioning/orchestrator/data"]
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.health_check]
|
|
|
|
|
type = "http"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.health_check.http]
|
|
|
|
|
endpoint = "http://localhost:9090/health"
|
|
|
|
|
expected_status = 200
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.startup]
|
|
|
|
|
auto_start = true
|
|
|
|
|
start_timeout = 30
|
|
|
|
|
start_order = 10
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Platform Commands
|
|
|
|
|
|
|
|
|
|
Platform commands manage all services as a cohesive system.
|
|
|
|
|
|
|
|
|
|
### Start Platform
|
|
|
|
|
|
|
|
|
|
Start all auto-start services or specific services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Start all auto-start services
|
|
|
|
|
provisioning platform start
|
|
|
|
|
|
|
|
|
|
# Start specific services (with dependencies)
|
|
|
|
|
provisioning platform start orchestrator control-center
|
|
|
|
|
|
|
|
|
|
# Force restart if already running
|
|
|
|
|
provisioning platform start --force orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Behavior**:
|
|
|
|
|
|
|
|
|
|
1. Resolves dependencies
|
|
|
|
|
2. Calculates startup order (topological sort)
|
|
|
|
|
3. Starts services in correct order
|
|
|
|
|
4. Waits for health checks
|
|
|
|
|
5. Reports success/failure
|
|
|
|
|
|
|
|
|
|
### Stop Platform
|
|
|
|
|
|
|
|
|
|
Stop all running services or specific services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Stop all running services
|
|
|
|
|
provisioning platform stop
|
|
|
|
|
|
|
|
|
|
# Stop specific services
|
|
|
|
|
provisioning platform stop orchestrator control-center
|
|
|
|
|
|
|
|
|
|
# Force stop (kill -9)
|
|
|
|
|
provisioning platform stop --force orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Behavior**:
|
|
|
|
|
|
|
|
|
|
1. Checks for dependent services
|
|
|
|
|
2. Stops in reverse dependency order
|
|
|
|
|
3. Updates service state
|
|
|
|
|
4. Cleans up PID files
|
|
|
|
|
|
|
|
|
|
### Restart Platform
|
|
|
|
|
|
|
|
|
|
Restart running services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Restart all running services
|
|
|
|
|
provisioning platform restart
|
|
|
|
|
|
|
|
|
|
# Restart specific services
|
|
|
|
|
provisioning platform restart orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Platform Status
|
|
|
|
|
|
|
|
|
|
Show status of all services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning platform status
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Platform Services Status
|
|
|
|
|
|
|
|
|
|
Running: 3/7
|
|
|
|
|
|
|
|
|
|
=== ORCHESTRATION ===
|
|
|
|
|
🟢 orchestrator - running (uptime: 3600s) ✅
|
|
|
|
|
|
|
|
|
|
=== UI ===
|
|
|
|
|
🟢 control-center - running (uptime: 3550s) ✅
|
|
|
|
|
|
|
|
|
|
=== DNS ===
|
|
|
|
|
⚪ coredns - stopped ❓
|
|
|
|
|
|
|
|
|
|
=== GIT ===
|
|
|
|
|
⚪ gitea - stopped ❓
|
|
|
|
|
|
|
|
|
|
=== REGISTRY ===
|
|
|
|
|
⚪ oci-registry - stopped ❓
|
|
|
|
|
|
|
|
|
|
=== API ===
|
|
|
|
|
🟢 mcp-server - running (uptime: 3540s) ✅
|
|
|
|
|
⚪ api-gateway - stopped ❓
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Platform Health
|
|
|
|
|
|
|
|
|
|
Check health of all running services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning platform health
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Platform Health Check
|
|
|
|
|
|
|
|
|
|
✅ orchestrator: Healthy - HTTP health check passed
|
|
|
|
|
✅ control-center: Healthy - HTTP status 200 matches expected
|
|
|
|
|
⚪ coredns: Not running
|
|
|
|
|
✅ mcp-server: Healthy - HTTP health check passed
|
|
|
|
|
|
|
|
|
|
Summary: 3 healthy, 0 unhealthy, 4 not running
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Platform Logs
|
|
|
|
|
|
|
|
|
|
View service logs:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# View last 50 lines
|
|
|
|
|
provisioning platform logs orchestrator
|
|
|
|
|
|
|
|
|
|
# View last 100 lines
|
|
|
|
|
provisioning platform logs orchestrator --lines 100
|
|
|
|
|
|
|
|
|
|
# Follow logs in real-time
|
|
|
|
|
provisioning platform logs orchestrator --follow
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Service Commands
|
|
|
|
|
|
|
|
|
|
Individual service management commands.
|
|
|
|
|
|
|
|
|
|
### List Services
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# List all services
|
|
|
|
|
provisioning services list
|
|
|
|
|
|
|
|
|
|
# List only running services
|
|
|
|
|
provisioning services list --running
|
|
|
|
|
|
|
|
|
|
# Filter by category
|
|
|
|
|
provisioning services list --category orchestration
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
name type category status deployment_mode auto_start
|
|
|
|
|
orchestrator platform orchestration running binary true
|
|
|
|
|
control-center platform ui stopped binary false
|
|
|
|
|
coredns infrastructure dns stopped docker false
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Service Status
|
|
|
|
|
|
|
|
|
|
Get detailed status of a service:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services status orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Service: orchestrator
|
|
|
|
|
Type: platform
|
|
|
|
|
Category: orchestration
|
|
|
|
|
Status: running
|
|
|
|
|
Deployment: binary
|
|
|
|
|
Health: healthy
|
|
|
|
|
Auto-start: true
|
|
|
|
|
PID: 12345
|
|
|
|
|
Uptime: 3600s
|
|
|
|
|
Dependencies: []
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Start Service
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Start service (with pre-flight checks)
|
|
|
|
|
provisioning services start orchestrator
|
|
|
|
|
|
|
|
|
|
# Force start (skip checks)
|
|
|
|
|
provisioning services start orchestrator --force
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Pre-flight Checks**:
|
|
|
|
|
|
|
|
|
|
1. Validate prerequisites (binary exists, Docker running, etc.)
|
|
|
|
|
2. Check for conflicts
|
|
|
|
|
3. Verify dependencies are running
|
|
|
|
|
4. Auto-start dependencies if needed
|
|
|
|
|
|
|
|
|
|
### Stop Service
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Stop service (with dependency check)
|
|
|
|
|
provisioning services stop orchestrator
|
|
|
|
|
|
|
|
|
|
# Force stop (ignore dependents)
|
|
|
|
|
provisioning services stop orchestrator --force
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Restart Service
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services restart orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Service Health
|
|
|
|
|
|
|
|
|
|
Check service health:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services health orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Service: orchestrator
|
|
|
|
|
Status: healthy
|
|
|
|
|
Healthy: true
|
|
|
|
|
Message: HTTP health check passed
|
|
|
|
|
Check type: http
|
|
|
|
|
Check duration: 15 ms
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Service Logs
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# View logs
|
|
|
|
|
provisioning services logs orchestrator
|
|
|
|
|
|
|
|
|
|
# Follow logs
|
|
|
|
|
provisioning services logs orchestrator --follow
|
|
|
|
|
|
|
|
|
|
# Custom line count
|
|
|
|
|
provisioning services logs orchestrator --lines 200
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Check Required Services
|
|
|
|
|
|
|
|
|
|
Check which services are required for an operation:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services check server
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Operation: server
|
|
|
|
|
Required services: orchestrator
|
|
|
|
|
All running: true
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Service Dependencies
|
|
|
|
|
|
|
|
|
|
View dependency graph:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# View all dependencies
|
|
|
|
|
provisioning services dependencies
|
|
|
|
|
|
|
|
|
|
# View specific service dependencies
|
|
|
|
|
provisioning services dependencies control-center
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Validate Services
|
|
|
|
|
|
|
|
|
|
Validate all service configurations:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services validate
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Total services: 7
|
|
|
|
|
Valid: 6
|
|
|
|
|
Invalid: 1
|
|
|
|
|
|
|
|
|
|
Invalid services:
|
|
|
|
|
❌ coredns:
|
|
|
|
|
- Docker is not installed or not running
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Readiness Report
|
|
|
|
|
|
|
|
|
|
Get platform readiness report:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services readiness
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Platform Readiness Report
|
|
|
|
|
|
|
|
|
|
Total services: 7
|
|
|
|
|
Running: 3
|
|
|
|
|
Ready to start: 6
|
|
|
|
|
|
|
|
|
|
Services:
|
|
|
|
|
🟢 orchestrator - platform - orchestration
|
|
|
|
|
🟢 control-center - platform - ui
|
|
|
|
|
🔴 coredns - infrastructure - dns
|
|
|
|
|
Issues: 1
|
|
|
|
|
🟡 gitea - infrastructure - git
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Monitor Service
|
|
|
|
|
|
|
|
|
|
Continuous health monitoring:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Monitor with default interval (30s)
|
|
|
|
|
provisioning services monitor orchestrator
|
|
|
|
|
|
|
|
|
|
# Custom interval
|
|
|
|
|
provisioning services monitor orchestrator --interval 10
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Deployment Modes
|
|
|
|
|
|
|
|
|
|
### Binary Deployment
|
|
|
|
|
|
|
|
|
|
Run services as native binaries.
|
|
|
|
|
|
|
|
|
|
**Configuration**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.orchestrator.deployment]
|
|
|
|
|
mode = "binary"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.deployment.binary]
|
|
|
|
|
binary_path = "${HOME}/.provisioning/bin/provisioning-orchestrator"
|
|
|
|
|
args = ["--port", "8080"]
|
|
|
|
|
working_dir = "${HOME}/.provisioning/orchestrator"
|
|
|
|
|
env = { RUST_LOG = "info" }
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Process Management**:
|
|
|
|
|
|
|
|
|
|
- PID tracking in `~/.provisioning/services/pids/`
|
|
|
|
|
- Log output to `~/.provisioning/services/logs/`
|
|
|
|
|
- State tracking in `~/.provisioning/services/state/`
|
|
|
|
|
|
|
|
|
|
### Docker Deployment
|
|
|
|
|
|
|
|
|
|
Run services as Docker containers.
|
|
|
|
|
|
|
|
|
|
**Configuration**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.coredns.deployment]
|
|
|
|
|
mode = "docker"
|
|
|
|
|
|
|
|
|
|
[services.coredns.deployment.docker]
|
|
|
|
|
image = "coredns/coredns:1.11.1"
|
|
|
|
|
container_name = "provisioning-coredns"
|
|
|
|
|
ports = ["5353:53/udp"]
|
|
|
|
|
volumes = ["${HOME}/.provisioning/coredns/Corefile:/Corefile:ro"]
|
|
|
|
|
restart_policy = "unless-stopped"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Prerequisites**:
|
|
|
|
|
|
|
|
|
|
- Docker daemon running
|
|
|
|
|
- Docker CLI installed
|
|
|
|
|
|
|
|
|
|
### Docker Compose Deployment
|
|
|
|
|
|
|
|
|
|
Run services via Docker Compose.
|
|
|
|
|
|
|
|
|
|
**Configuration**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.platform.deployment]
|
|
|
|
|
mode = "docker-compose"
|
|
|
|
|
|
|
|
|
|
[services.platform.deployment.docker_compose]
|
|
|
|
|
compose_file = "${HOME}/.provisioning/platform/docker-compose.yaml"
|
|
|
|
|
service_name = "orchestrator"
|
|
|
|
|
project_name = "provisioning"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**File**: `provisioning/platform/docker-compose.yaml`
|
|
|
|
|
|
|
|
|
|
### Kubernetes Deployment
|
|
|
|
|
|
|
|
|
|
Run services on Kubernetes.
|
|
|
|
|
|
|
|
|
|
**Configuration**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.orchestrator.deployment]
|
|
|
|
|
mode = "kubernetes"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.deployment.kubernetes]
|
|
|
|
|
namespace = "provisioning"
|
|
|
|
|
deployment_name = "orchestrator"
|
|
|
|
|
manifests_path = "${HOME}/.provisioning/k8s/orchestrator/"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Prerequisites**:
|
|
|
|
|
|
|
|
|
|
- kubectl installed and configured
|
|
|
|
|
- Kubernetes cluster accessible
|
|
|
|
|
|
|
|
|
|
### Remote Deployment
|
|
|
|
|
|
|
|
|
|
Connect to remotely-running services.
|
|
|
|
|
|
|
|
|
|
**Configuration**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.orchestrator.deployment]
|
|
|
|
|
mode = "remote"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.deployment.remote]
|
|
|
|
|
endpoint = "https://orchestrator.example.com"
|
|
|
|
|
tls_enabled = true
|
|
|
|
|
auth_token_path = "${HOME}/.provisioning/tokens/orchestrator.token"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Health Monitoring
|
|
|
|
|
|
|
|
|
|
### Health Check Types
|
|
|
|
|
|
|
|
|
|
#### HTTP Health Check
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.orchestrator.health_check]
|
|
|
|
|
type = "http"
|
|
|
|
|
|
|
|
|
|
[services.orchestrator.health_check.http]
|
|
|
|
|
endpoint = "http://localhost:9090/health"
|
|
|
|
|
expected_status = 200
|
|
|
|
|
method = "GET"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### TCP Health Check
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.coredns.health_check]
|
|
|
|
|
type = "tcp"
|
|
|
|
|
|
|
|
|
|
[services.coredns.health_check.tcp]
|
|
|
|
|
host = "localhost"
|
|
|
|
|
port = 5353
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Command Health Check
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.custom.health_check]
|
|
|
|
|
type = "command"
|
|
|
|
|
|
|
|
|
|
[services.custom.health_check.command]
|
|
|
|
|
command = "systemctl is-active myservice"
|
|
|
|
|
expected_exit_code = 0
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### File Health Check
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.custom.health_check]
|
|
|
|
|
type = "file"
|
|
|
|
|
|
|
|
|
|
[services.custom.health_check.file]
|
|
|
|
|
path = "/var/run/myservice.pid"
|
|
|
|
|
must_exist = true
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Health Check Configuration
|
|
|
|
|
|
|
|
|
|
- `interval`: Seconds between checks (default: 10)
|
|
|
|
|
- `retries`: Max retry attempts (default: 3)
|
|
|
|
|
- `timeout`: Check timeout in seconds (default: 5)
|
|
|
|
|
|
|
|
|
|
### Continuous Monitoring
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services monitor orchestrator --interval 30
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Starting health monitoring for orchestrator (interval: 30s)
|
|
|
|
|
Press Ctrl+C to stop
|
|
|
|
|
2025-10-06 14:30:00 ✅ orchestrator: HTTP health check passed
|
|
|
|
|
2025-10-06 14:30:30 ✅ orchestrator: HTTP health check passed
|
|
|
|
|
2025-10-06 14:31:00 ✅ orchestrator: HTTP health check passed
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Dependency Management
|
|
|
|
|
|
|
|
|
|
### Dependency Graph
|
|
|
|
|
|
|
|
|
|
Services can depend on other services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.control-center]
|
|
|
|
|
dependencies = ["orchestrator"]
|
|
|
|
|
|
|
|
|
|
[services.api-gateway]
|
|
|
|
|
dependencies = ["orchestrator", "control-center", "mcp-server"]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Startup Order
|
|
|
|
|
|
|
|
|
|
Services start in topological order:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
orchestrator (order: 10)
|
|
|
|
|
└─> control-center (order: 20)
|
|
|
|
|
└─> api-gateway (order: 45)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Dependency Resolution
|
|
|
|
|
|
|
|
|
|
Automatic dependency resolution when starting services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Starting control-center automatically starts orchestrator first
|
|
|
|
|
provisioning services start control-center
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Starting dependency: orchestrator
|
|
|
|
|
✅ Started orchestrator with PID 12345
|
|
|
|
|
Waiting for orchestrator to become healthy...
|
|
|
|
|
✅ Service orchestrator is healthy
|
|
|
|
|
Starting service: control-center
|
|
|
|
|
✅ Started control-center with PID 12346
|
|
|
|
|
✅ Service control-center is healthy
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Conflicts
|
|
|
|
|
|
|
|
|
|
Services can conflict with each other:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.coredns]
|
|
|
|
|
conflicts = ["dnsmasq", "systemd-resolved"]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
Attempting to start a conflicting service will fail:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services start coredns
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
❌ Pre-flight check failed: conflicts
|
|
|
|
|
Conflicting services running: dnsmasq
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Reverse Dependencies
|
|
|
|
|
|
|
|
|
|
Check which services depend on a service:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services dependencies orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
## orchestrator
|
|
|
|
|
- Type: platform
|
|
|
|
|
- Category: orchestration
|
|
|
|
|
- Required by:
|
|
|
|
|
- control-center
|
|
|
|
|
- mcp-server
|
|
|
|
|
- api-gateway
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Safe Stop
|
|
|
|
|
|
|
|
|
|
System prevents stopping services with running dependents:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services stop orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
❌ Cannot stop orchestrator:
|
|
|
|
|
Dependent services running: control-center, mcp-server, api-gateway
|
|
|
|
|
Use --force to stop anyway
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Pre-flight Checks
|
|
|
|
|
|
|
|
|
|
### Purpose
|
|
|
|
|
|
|
|
|
|
Pre-flight checks ensure services can start successfully before attempting to start them.
|
|
|
|
|
|
|
|
|
|
### Check Types
|
|
|
|
|
|
|
|
|
|
1. **Prerequisites**: Binary exists, Docker running, etc.
|
|
|
|
|
2. **Conflicts**: No conflicting services running
|
|
|
|
|
3. **Dependencies**: All dependencies available
|
|
|
|
|
|
|
|
|
|
### Automatic Checks
|
|
|
|
|
|
|
|
|
|
Pre-flight checks run automatically when starting services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services start orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Check Process**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Running pre-flight checks for orchestrator...
|
|
|
|
|
✅ Binary found: /Users/user/.provisioning/bin/provisioning-orchestrator
|
|
|
|
|
✅ No conflicts detected
|
|
|
|
|
✅ All dependencies available
|
|
|
|
|
Starting service: orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Manual Validation
|
|
|
|
|
|
|
|
|
|
Validate all services:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services validate
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
Validate specific service:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services status orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Auto-Start
|
|
|
|
|
|
|
|
|
|
Services with `auto_start = true` can be started automatically when needed:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Orchestrator auto-starts if needed for server operations
|
|
|
|
|
provisioning server create
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Output**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
Starting required services...
|
|
|
|
|
✅ Orchestrator started
|
|
|
|
|
Creating server...
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Troubleshooting
|
|
|
|
|
|
|
|
|
|
### Service Won't Start
|
|
|
|
|
|
|
|
|
|
**Check prerequisites**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services validate
|
|
|
|
|
provisioning services status <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Common issues**:
|
|
|
|
|
|
|
|
|
|
- Binary not found: Check `binary_path` in config
|
|
|
|
|
- Docker not running: Start Docker daemon
|
|
|
|
|
- Port already in use: Check for conflicting processes
|
|
|
|
|
- Dependencies not running: Start dependencies first
|
|
|
|
|
|
|
|
|
|
### Service Health Check Failing
|
|
|
|
|
|
|
|
|
|
**View health status**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services health <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Check logs**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services logs <service> --follow
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Common issues**:
|
|
|
|
|
|
|
|
|
|
- Service not fully initialized: Wait longer or increase `start_timeout`
|
|
|
|
|
- Wrong health check endpoint: Verify endpoint in config
|
|
|
|
|
- Network issues: Check firewall, port bindings
|
|
|
|
|
|
|
|
|
|
### Dependency Issues
|
|
|
|
|
|
|
|
|
|
**View dependency tree**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services dependencies <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Check dependency status**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning services status <dependency>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Start with dependencies**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
provisioning platform start <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Circular Dependencies
|
|
|
|
|
|
|
|
|
|
**Validate dependency graph**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# This is done automatically but you can check manually
|
|
|
|
|
nu -c "use lib_provisioning/services/mod.nu *; validate-dependency-graph"
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### PID File Stale
|
|
|
|
|
|
|
|
|
|
If service reports running but isn't:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Manual cleanup
|
|
|
|
|
rm ~/.provisioning/services/pids/<service>.pid
|
|
|
|
|
|
|
|
|
|
# Force restart
|
|
|
|
|
provisioning services restart <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Port Conflicts
|
|
|
|
|
|
|
|
|
|
**Find process using port**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
lsof -i :9090
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Kill conflicting process**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
kill <PID>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Docker Issues
|
|
|
|
|
|
|
|
|
|
**Check Docker status**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
docker ps
|
|
|
|
|
docker info
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**View container logs**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
docker logs provisioning-<service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Restart Docker daemon**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# macOS
|
|
|
|
|
killall Docker && open /Applications/Docker.app
|
|
|
|
|
|
|
|
|
|
# Linux
|
|
|
|
|
systemctl restart docker
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Service Logs
|
|
|
|
|
|
|
|
|
|
**View recent logs**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
tail -f ~/.provisioning/services/logs/<service>.log
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Search logs**:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
grep "ERROR" ~/.provisioning/services/logs/<service>.log
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Advanced Usage
|
|
|
|
|
|
|
|
|
|
### Custom Service Registration
|
|
|
|
|
|
|
|
|
|
Add custom services by editing `provisioning/config/services.toml`.
|
|
|
|
|
|
|
|
|
|
### Integration with Workflows
|
|
|
|
|
|
|
|
|
|
Services automatically start when required by workflows:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Orchestrator starts automatically if not running
|
|
|
|
|
provisioning workflow submit my-workflow
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### CI/CD Integration
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# GitLab CI
|
|
|
|
|
before_script:
|
|
|
|
|
- provisioning platform start orchestrator
|
|
|
|
|
- provisioning services health orchestrator
|
|
|
|
|
|
|
|
|
|
test:
|
|
|
|
|
script:
|
|
|
|
|
- provisioning test quick kubernetes
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Monitoring Integration
|
|
|
|
|
|
|
|
|
|
Services can integrate with monitoring systems via health endpoints.
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Related Documentation
|
|
|
|
|
|
|
|
|
|
- Orchestrator README
|
|
|
|
|
- [Test Environment Guide](test-environment-guide.md)
|
|
|
|
|
- [Workflow Management](workflow-management.md)
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Quick Reference
|
|
|
|
|
|
|
|
|
|
**Version**: 1.0.0
|
|
|
|
|
|
|
|
|
|
### Platform Commands (Manage All Services)
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Start all auto-start services
|
|
|
|
|
provisioning platform start
|
|
|
|
|
|
|
|
|
|
# Start specific services with dependencies
|
|
|
|
|
provisioning platform start control-center mcp-server
|
|
|
|
|
|
|
|
|
|
# Stop all running services
|
|
|
|
|
provisioning platform stop
|
|
|
|
|
|
|
|
|
|
# Stop specific services
|
|
|
|
|
provisioning platform stop orchestrator
|
|
|
|
|
|
|
|
|
|
# Restart services
|
|
|
|
|
provisioning platform restart
|
|
|
|
|
|
|
|
|
|
# Show platform status
|
|
|
|
|
provisioning platform status
|
|
|
|
|
|
|
|
|
|
# Check platform health
|
|
|
|
|
provisioning platform health
|
|
|
|
|
|
|
|
|
|
# View service logs
|
|
|
|
|
provisioning platform logs orchestrator --follow
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Service Commands (Individual Services)
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# List all services
|
|
|
|
|
provisioning services list
|
|
|
|
|
|
|
|
|
|
# List only running services
|
|
|
|
|
provisioning services list --running
|
|
|
|
|
|
|
|
|
|
# Filter by category
|
|
|
|
|
provisioning services list --category orchestration
|
|
|
|
|
|
|
|
|
|
# Service status
|
|
|
|
|
provisioning services status orchestrator
|
|
|
|
|
|
|
|
|
|
# Start service (with pre-flight checks)
|
|
|
|
|
provisioning services start orchestrator
|
|
|
|
|
|
|
|
|
|
# Force start (skip checks)
|
|
|
|
|
provisioning services start orchestrator --force
|
|
|
|
|
|
|
|
|
|
# Stop service
|
|
|
|
|
provisioning services stop orchestrator
|
|
|
|
|
|
|
|
|
|
# Force stop (ignore dependents)
|
|
|
|
|
provisioning services stop orchestrator --force
|
|
|
|
|
|
|
|
|
|
# Restart service
|
|
|
|
|
provisioning services restart orchestrator
|
|
|
|
|
|
|
|
|
|
# Check health
|
|
|
|
|
provisioning services health orchestrator
|
|
|
|
|
|
|
|
|
|
# View logs
|
|
|
|
|
provisioning services logs orchestrator --follow --lines 100
|
|
|
|
|
|
|
|
|
|
# Monitor health continuously
|
|
|
|
|
provisioning services monitor orchestrator --interval 30
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Dependency & Validation
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# View dependency graph
|
|
|
|
|
provisioning services dependencies
|
|
|
|
|
|
|
|
|
|
# View specific service dependencies
|
|
|
|
|
provisioning services dependencies control-center
|
|
|
|
|
|
|
|
|
|
# Validate all services
|
|
|
|
|
provisioning services validate
|
|
|
|
|
|
|
|
|
|
# Check readiness
|
|
|
|
|
provisioning services readiness
|
|
|
|
|
|
|
|
|
|
# Check required services for operation
|
|
|
|
|
provisioning services check server
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Registered Services
|
|
|
|
|
|
|
|
|
|
| Service | Port | Type | Auto-Start | Dependencies |
|
|
|
|
|
| --------- | ------ | ------ | ------------ | -------------- |
|
|
|
|
|
| orchestrator | 8080 | Platform | Yes | - |
|
|
|
|
|
| control-center | 8081 | Platform | No | orchestrator |
|
|
|
|
|
| coredns | 5353 | Infrastructure | No | - |
|
|
|
|
|
| gitea | 3000, 222 | Infrastructure | No | - |
|
|
|
|
|
| oci-registry | 5000 | Infrastructure | No | - |
|
|
|
|
|
| mcp-server | 8082 | Platform | No | orchestrator |
|
|
|
|
|
| api-gateway | 8083 | Platform | No | orchestrator, control-center, mcp-server |
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Docker Compose
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Start all services
|
|
|
|
|
cd provisioning/platform
|
|
|
|
|
docker-compose up -d
|
|
|
|
|
|
|
|
|
|
# Start specific services
|
|
|
|
|
docker-compose up -d orchestrator control-center
|
|
|
|
|
|
|
|
|
|
# Check status
|
|
|
|
|
docker-compose ps
|
|
|
|
|
|
|
|
|
|
# View logs
|
|
|
|
|
docker-compose logs -f orchestrator
|
|
|
|
|
|
|
|
|
|
# Stop all services
|
|
|
|
|
docker-compose down
|
|
|
|
|
|
|
|
|
|
# Stop and remove volumes
|
|
|
|
|
docker-compose down -v
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Service State Directories
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
~/.provisioning/services/
|
|
|
|
|
├── pids/ # Process ID files
|
|
|
|
|
├── state/ # Service state (JSON)
|
|
|
|
|
└── logs/ # Service logs
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Health Check Endpoints
|
|
|
|
|
|
|
|
|
|
| Service | Endpoint | Type |
|
|
|
|
|
| --------- | ---------- | ------ |
|
|
|
|
|
| orchestrator | <http://localhost:9090/health> | HTTP |
|
|
|
|
|
| control-center | <http://localhost:9080/health> | HTTP |
|
|
|
|
|
| coredns | localhost:5353 | TCP |
|
|
|
|
|
| gitea | <http://localhost:3000/api/healthz> | HTTP |
|
|
|
|
|
| oci-registry | <http://localhost:5000/v2/> | HTTP |
|
|
|
|
|
| mcp-server | <http://localhost:8082/health> | HTTP |
|
|
|
|
|
| api-gateway | <http://localhost:8083/health> | HTTP |
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Common Workflows
|
|
|
|
|
|
|
|
|
|
#### Start Platform for Development
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Start core services
|
|
|
|
|
provisioning platform start orchestrator
|
|
|
|
|
|
|
|
|
|
# Check status
|
|
|
|
|
provisioning platform status
|
|
|
|
|
|
|
|
|
|
# Check health
|
|
|
|
|
provisioning platform health
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Start Full Platform Stack
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Use Docker Compose
|
|
|
|
|
cd provisioning/platform
|
|
|
|
|
docker-compose up -d
|
|
|
|
|
|
|
|
|
|
# Verify
|
|
|
|
|
docker-compose ps
|
|
|
|
|
provisioning platform health
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Debug Service Issues
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Check service status
|
|
|
|
|
provisioning services status <service>
|
|
|
|
|
|
|
|
|
|
# View logs
|
|
|
|
|
provisioning services logs <service> --follow
|
|
|
|
|
|
|
|
|
|
# Check health
|
|
|
|
|
provisioning services health <service>
|
|
|
|
|
|
|
|
|
|
# Validate prerequisites
|
|
|
|
|
provisioning services validate
|
|
|
|
|
|
|
|
|
|
# Restart service
|
|
|
|
|
provisioning services restart <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Safe Service Shutdown
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Check dependents
|
|
|
|
|
nu -c "use lib_provisioning/services/mod.nu *; can-stop-service orchestrator"
|
|
|
|
|
|
|
|
|
|
# Stop with dependency check
|
|
|
|
|
provisioning services stop orchestrator
|
|
|
|
|
|
|
|
|
|
# Force stop if needed
|
|
|
|
|
provisioning services stop orchestrator --force
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Troubleshooting
|
|
|
|
|
|
|
|
|
|
#### Service Won't Start
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# 1. Check prerequisites
|
|
|
|
|
provisioning services validate
|
|
|
|
|
|
|
|
|
|
# 2. View detailed status
|
|
|
|
|
provisioning services status <service>
|
|
|
|
|
|
|
|
|
|
# 3. Check logs
|
|
|
|
|
provisioning services logs <service>
|
|
|
|
|
|
|
|
|
|
# 4. Verify binary/image exists
|
|
|
|
|
ls ~/.provisioning/bin/<service>
|
|
|
|
|
docker images | grep <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Health Check Failing
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Check endpoint manually
|
|
|
|
|
curl http://localhost:9090/health
|
|
|
|
|
|
|
|
|
|
# View health details
|
|
|
|
|
provisioning services health <service>
|
|
|
|
|
|
|
|
|
|
# Monitor continuously
|
|
|
|
|
provisioning services monitor <service> --interval 10
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### PID File Stale
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Remove stale PID file
|
|
|
|
|
rm ~/.provisioning/services/pids/<service>.pid
|
|
|
|
|
|
|
|
|
|
# Restart service
|
|
|
|
|
provisioning services restart <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Port Already in Use
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Find process using port
|
|
|
|
|
lsof -i :9090
|
|
|
|
|
|
|
|
|
|
# Kill process
|
|
|
|
|
kill <PID>
|
|
|
|
|
|
|
|
|
|
# Restart service
|
|
|
|
|
provisioning services start <service>
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Integration with Operations
|
|
|
|
|
|
|
|
|
|
#### Server Operations
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Orchestrator auto-starts if needed
|
|
|
|
|
provisioning server create
|
|
|
|
|
|
|
|
|
|
# Manual check
|
|
|
|
|
provisioning services check server
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Workflow Operations
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Orchestrator auto-starts
|
|
|
|
|
provisioning workflow submit my-workflow
|
|
|
|
|
|
|
|
|
|
# Check status
|
|
|
|
|
provisioning services status orchestrator
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Test Operations
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# Orchestrator required for test environments
|
|
|
|
|
provisioning test quick kubernetes
|
|
|
|
|
|
|
|
|
|
# Pre-flight check
|
|
|
|
|
provisioning services check test-env
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Advanced Usage
|
|
|
|
|
|
|
|
|
|
#### Custom Service Startup Order
|
|
|
|
|
|
|
|
|
|
Services start based on:
|
|
|
|
|
|
|
|
|
|
1. Dependency order (topological sort)
|
|
|
|
|
2. `start_order` field (lower = earlier)
|
|
|
|
|
|
|
|
|
|
#### Auto-Start Configuration
|
|
|
|
|
|
|
|
|
|
Edit `provisioning/config/services.toml`:
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.<service>.startup]
|
|
|
|
|
auto_start = true # Enable auto-start
|
|
|
|
|
start_timeout = 30 # Timeout in seconds
|
|
|
|
|
start_order = 10 # Startup priority
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### Health Check Configuration
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
[services.<service>.health_check]
|
|
|
|
|
type = "http" # http, tcp, command, file
|
|
|
|
|
interval = 10 # Seconds between checks
|
|
|
|
|
retries = 3 # Max retry attempts
|
|
|
|
|
timeout = 5 # Check timeout
|
|
|
|
|
|
|
|
|
|
[services.<service>.health_check.http]
|
|
|
|
|
endpoint = "http://localhost:9090/health"
|
|
|
|
|
expected_status = 200
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Key Files
|
|
|
|
|
|
|
|
|
|
- **Service Registry**: `provisioning/config/services.toml`
|
|
|
|
|
- **KCL Schema**: `provisioning/kcl/services.k`
|
|
|
|
|
- **Docker Compose**: `provisioning/platform/docker-compose.yaml`
|
|
|
|
|
- **User Guide**: `docs/user/SERVICE_MANAGEMENT_GUIDE.md`
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Getting Help
|
|
|
|
|
|
|
|
|
|
```text
|
|
|
|
|
# View documentation
|
|
|
|
|
cat docs/user/SERVICE_MANAGEMENT_GUIDE.md | less
|
|
|
|
|
|
|
|
|
|
# Run verification
|
|
|
|
|
nu provisioning/core/nulib/tests/verify_services.nu
|
|
|
|
|
|
|
|
|
|
# Check readiness
|
|
|
|
|
provisioning services readiness
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
**Quick Tip**: Use `--help` flag with any command for detailed usage information.
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
**Maintained By**: Platform Team
|
|
|
|
|
**Support**: [GitHub Issues](https://github.com/your-org/provisioning/issues)
|