Jesús Pérez 6a59d34bb1
chore: update provisioning configuration and documentation
Update configuration files, templates, and internal documentation
for the provisioning repository system.

Configuration Updates:
- KMS configuration modernization
- Plugin system settings
- Service port mappings
- Test cluster topologies
- Installation configuration examples
- VM configuration defaults
- Cedar authorization policies

Documentation Updates:
- Library module documentation
- Extension API guides
- AI system documentation
- Service management guides
- Test environment setup
- Plugin usage guides
- Validator configuration documentation

All changes are backward compatible.
2025-12-11 21:50:42 +00:00

1157 lines
29 KiB
YAML

openapi: 3.0.3
info:
title: Provisioning API
description: |
Comprehensive API for provisioning, including workflow management,
batch operations, monitoring, and configuration management.
version: 2.0.0
contact:
name: Provisioning
url: https://provisioning.systems
email: support@provisioning.systems
license:
name: MIT
url: https://opensource.org/licenses/MIT
servers:
- url: http://localhost:8080
description: Local Orchestrator API
- url: http://localhost:8081
description: Local Control Center API
- url: https://api.provisioning.systems
description: Production API
security:
- bearerAuth: []
paths:
# Health Check
/health:
get:
summary: Health check
description: Check the health status of the orchestrator
tags:
- Health
security: []
responses:
'200':
description: Service is healthy
content:
application/json:
schema:
$ref: '#/components/schemas/ApiResponse'
example:
success: true
data: "Orchestrator is healthy"
# Task Management
/tasks:
get:
summary: List tasks
description: Retrieve a list of all workflow tasks
tags:
- Tasks
parameters:
- name: status
in: query
description: Filter tasks by status
schema:
$ref: '#/components/schemas/TaskStatus'
- name: limit
in: query
description: Maximum number of results
schema:
type: integer
minimum: 1
maximum: 1000
default: 50
- name: offset
in: query
description: Pagination offset
schema:
type: integer
minimum: 0
default: 0
responses:
'200':
description: List of tasks
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/WorkflowTask'
/tasks/{taskId}:
get:
summary: Get task status
description: Retrieve the status and details of a specific task
tags:
- Tasks
parameters:
- name: taskId
in: path
required: true
description: Task ID
schema:
type: string
format: uuid
responses:
'200':
description: Task details
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/WorkflowTask'
'404':
description: Task not found
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
# Workflow Submission
/workflows/servers/create:
post:
summary: Create server workflow
description: Submit a workflow to create servers
tags:
- Workflows
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateServerWorkflow'
responses:
'200':
description: Workflow created successfully
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: string
format: uuid
description: Task ID of the created workflow
/workflows/taskserv/create:
post:
summary: Create task service workflow
description: Submit a workflow to manage task services
tags:
- Workflows
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/TaskservWorkflow'
responses:
'200':
description: Workflow created successfully
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: string
format: uuid
/workflows/cluster/create:
post:
summary: Create cluster workflow
description: Submit a workflow to manage clusters
tags:
- Workflows
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ClusterWorkflow'
responses:
'200':
description: Workflow created successfully
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: string
format: uuid
# Batch Operations
/batch/execute:
post:
summary: Execute batch operation
description: Submit a batch operation with multiple workflows
tags:
- Batch Operations
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/BatchOperationRequest'
responses:
'200':
description: Batch operation started
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/BatchOperationResult'
/batch/operations:
get:
summary: List batch operations
description: Retrieve a list of all batch operations
tags:
- Batch Operations
responses:
'200':
description: List of batch operations
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/WorkflowExecutionState'
/batch/operations/{batchId}:
get:
summary: Get batch operation status
description: Retrieve the status of a specific batch operation
tags:
- Batch Operations
parameters:
- name: batchId
in: path
required: true
description: Batch operation ID
schema:
type: string
format: uuid
responses:
'200':
description: Batch operation status
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/WorkflowExecutionState'
/batch/operations/{batchId}/cancel:
post:
summary: Cancel batch operation
description: Cancel a running batch operation
tags:
- Batch Operations
parameters:
- name: batchId
in: path
required: true
description: Batch operation ID
schema:
type: string
format: uuid
responses:
'200':
description: Batch operation cancelled
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: string
example: "Operation cancelled"
# State Management
/state/workflows/{workflowId}/progress:
get:
summary: Get workflow progress
description: Get real-time progress information for a workflow
tags:
- State Management
parameters:
- name: workflowId
in: path
required: true
description: Workflow ID
schema:
type: string
format: uuid
responses:
'200':
description: Workflow progress information
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/ProgressInfo'
/state/workflows/{workflowId}/snapshots:
get:
summary: Get workflow snapshots
description: Get state snapshots for a workflow
tags:
- State Management
parameters:
- name: workflowId
in: path
required: true
description: Workflow ID
schema:
type: string
format: uuid
responses:
'200':
description: Workflow snapshots
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/StateSnapshot'
/state/system/metrics:
get:
summary: Get system metrics
description: Get system-wide metrics and statistics
tags:
- State Management
responses:
'200':
description: System metrics
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/SystemMetrics'
/state/system/health:
get:
summary: Get system health
description: Get system health status
tags:
- State Management
responses:
'200':
description: System health status
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/SystemHealthStatus'
# Rollback and Recovery
/rollback/checkpoints:
post:
summary: Create checkpoint
description: Create a new checkpoint for rollback purposes
tags:
- Rollback
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateCheckpointRequest'
responses:
'200':
description: Checkpoint created
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: string
format: uuid
description: Checkpoint ID
get:
summary: List checkpoints
description: List all available checkpoints
tags:
- Rollback
responses:
'200':
description: List of checkpoints
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Checkpoint'
/rollback/execute:
post:
summary: Execute rollback
description: Execute a rollback operation
tags:
- Rollback
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RollbackRequest'
responses:
'200':
description: Rollback executed
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/RollbackResult'
# Authentication (Control Center)
/auth/login:
post:
summary: User login
description: Authenticate user and get JWT token
tags:
- Authentication
security: []
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/LoginRequest'
responses:
'200':
description: Login successful
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/LoginResponse'
/auth/refresh:
post:
summary: Refresh token
description: Refresh JWT token
tags:
- Authentication
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
token:
type: string
description: Current JWT token
required:
- token
responses:
'200':
description: Token refreshed
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/ApiResponse'
- type: object
properties:
data:
type: object
properties:
token:
type: string
expires_at:
type: string
format: date-time
# WebSocket endpoint (documented for reference)
/ws:
get:
summary: WebSocket connection
description: |
Establish WebSocket connection for real-time events.
This is not a traditional HTTP endpoint but a WebSocket upgrade.
tags:
- WebSocket
parameters:
- name: token
in: query
required: true
description: JWT authentication token
schema:
type: string
- name: events
in: query
description: Comma-separated list of event types to subscribe to
schema:
type: string
example: "TaskStatusChanged,WorkflowProgressUpdate"
responses:
'101':
description: WebSocket connection established
'401':
description: Authentication failed
components:
securitySchemes:
bearerAuth:
type: http
scheme: bearer
bearerFormat: JWT
schemas:
# Common response schemas
ApiResponse:
type: object
properties:
success:
type: boolean
data:
oneOf:
- type: string
- type: object
- type: array
error:
type: string
required:
- success
ErrorResponse:
type: object
properties:
success:
type: boolean
enum: [false]
error:
type: string
required:
- success
- error
# Task and workflow schemas
TaskStatus:
type: string
enum:
- Pending
- Running
- Completed
- Failed
- Cancelled
WorkflowTask:
type: object
properties:
id:
type: string
format: uuid
name:
type: string
command:
type: string
args:
type: array
items:
type: string
dependencies:
type: array
items:
type: string
status:
$ref: '#/components/schemas/TaskStatus'
created_at:
type: string
format: date-time
started_at:
type: string
format: date-time
nullable: true
completed_at:
type: string
format: date-time
nullable: true
output:
type: string
nullable: true
error:
type: string
nullable: true
progress:
type: number
format: float
minimum: 0
maximum: 100
nullable: true
required:
- id
- name
- command
- args
- dependencies
- status
- created_at
CreateServerWorkflow:
type: object
properties:
infra:
type: string
description: Infrastructure target
settings:
type: string
description: Settings file path
default: "config.k"
check_mode:
type: boolean
description: Enable check mode only
default: false
wait:
type: boolean
description: Wait for completion
default: false
required:
- infra
TaskservWorkflow:
type: object
properties:
operation:
type: string
enum: [create, delete, restart, configure]
taskserv:
type: string
description: Task service name
infra:
type: string
description: Infrastructure target
settings:
type: string
description: Settings file path
default: "config.k"
check_mode:
type: boolean
default: false
wait:
type: boolean
default: false
required:
- operation
- taskserv
- infra
ClusterWorkflow:
type: object
properties:
operation:
type: string
enum: [create, delete, scale, upgrade]
cluster_type:
type: string
description: Cluster type
infra:
type: string
description: Infrastructure target
settings:
type: string
description: Settings file path
default: "config.k"
check_mode:
type: boolean
default: false
wait:
type: boolean
default: false
required:
- operation
- cluster_type
- infra
# Batch operation schemas
BatchOperationRequest:
type: object
properties:
name:
type: string
description: Batch operation name
version:
type: string
description: Batch configuration version
default: "1.0.0"
storage_backend:
type: string
enum: [filesystem, surrealdb]
default: "filesystem"
parallel_limit:
type: integer
minimum: 1
maximum: 100
default: 5
rollback_enabled:
type: boolean
default: true
operations:
type: array
items:
$ref: '#/components/schemas/BatchOperation'
required:
- name
- operations
BatchOperation:
type: object
properties:
id:
type: string
description: Operation ID
type:
type: string
enum: [server_batch, taskserv_batch, cluster_batch]
provider:
type: string
description: Provider name
dependencies:
type: array
items:
type: string
description: List of operation IDs this depends on
config:
type: object
description: Operation-specific configuration
required:
- id
- type
- provider
- dependencies
BatchOperationResult:
type: object
properties:
batch_id:
type: string
format: uuid
status:
type: string
enum: [Running, Completed, Failed, Cancelled]
operations:
type: array
items:
type: object
properties:
id:
type: string
status:
type: string
progress:
type: number
format: float
required:
- batch_id
- status
- operations
WorkflowExecutionState:
type: object
properties:
batch_id:
type: string
format: uuid
name:
type: string
status:
type: string
created_at:
type: string
format: date-time
operations:
type: array
items:
type: object
properties:
id:
type: string
status:
type: string
progress:
type: number
format: float
required:
- batch_id
- name
- status
# State management schemas
ProgressInfo:
type: object
properties:
workflow_id:
type: string
format: uuid
progress:
type: number
format: float
minimum: 0
maximum: 100
current_step:
type: string
total_steps:
type: integer
minimum: 1
completed_steps:
type: integer
minimum: 0
estimated_time_remaining:
type: integer
minimum: 0
description: Estimated time remaining in seconds
required:
- workflow_id
- progress
StateSnapshot:
type: object
properties:
snapshot_id:
type: string
format: uuid
timestamp:
type: string
format: date-time
state:
type: string
details:
type: object
required:
- snapshot_id
- timestamp
- state
SystemMetrics:
type: object
properties:
total_workflows:
type: integer
minimum: 0
active_workflows:
type: integer
minimum: 0
completed_workflows:
type: integer
minimum: 0
failed_workflows:
type: integer
minimum: 0
system_load:
type: object
properties:
cpu_usage:
type: number
format: float
minimum: 0
maximum: 100
memory_usage:
type: integer
minimum: 0
description: Memory usage in MB
disk_usage:
type: number
format: float
minimum: 0
maximum: 100
required:
- total_workflows
- active_workflows
SystemHealthStatus:
type: object
properties:
overall_status:
type: string
enum: [Healthy, Warning, Critical]
components:
type: object
additionalProperties:
type: string
last_check:
type: string
format: date-time
required:
- overall_status
# Rollback schemas
CreateCheckpointRequest:
type: object
properties:
name:
type: string
description: Checkpoint name
description:
type: string
description: Checkpoint description
required:
- name
Checkpoint:
type: object
properties:
id:
type: string
format: uuid
name:
type: string
description:
type: string
created_at:
type: string
format: date-time
size:
type: string
description: Checkpoint size (e.g., "150MB")
required:
- id
- name
- created_at
RollbackRequest:
type: object
properties:
checkpoint_id:
type: string
format: uuid
description: Checkpoint ID for full rollback
operation_ids:
type: array
items:
type: string
description: Operation IDs for partial rollback
oneOf:
- required: [checkpoint_id]
- required: [operation_ids]
RollbackResult:
type: object
properties:
rollback_id:
type: string
format: uuid
success:
type: boolean
operations_executed:
type: integer
minimum: 0
operations_failed:
type: integer
minimum: 0
duration:
type: number
format: float
description: Duration in seconds
required:
- rollback_id
- success
- operations_executed
- operations_failed
# Authentication schemas
LoginRequest:
type: object
properties:
username:
type: string
password:
type: string
format: password
mfa_code:
type: string
description: Multi-factor authentication code
required:
- username
- password
LoginResponse:
type: object
properties:
token:
type: string
description: JWT token
expires_at:
type: string
format: date-time
user:
type: object
properties:
id:
type: string
format: uuid
username:
type: string
email:
type: string
format: email
roles:
type: array
items:
type: string
required:
- token
- expires_at
- user
# WebSocket event schema
WebSocketEvent:
type: object
properties:
event_type:
type: string
enum:
- TaskStatusChanged
- WorkflowProgressUpdate
- SystemHealthUpdate
- BatchOperationUpdate
- LogEntry
- MetricUpdate
timestamp:
type: string
format: date-time
data:
type: object
description: Event-specific data
metadata:
type: object
additionalProperties:
type: string
description: Additional event metadata
required:
- event_type
- timestamp
- data
examples:
TaskStatusChangedEvent:
summary: Task status changed event
value:
event_type: "TaskStatusChanged"
timestamp: "2025-09-26T10:00:00Z"
data:
task_id: "uuid-string"
name: "create_servers"
status: "Running"
previous_status: "Pending"
progress: 45.5
metadata:
task_id: "uuid-string"
workflow_type: "server_creation"
BatchOperationRequest:
summary: Multi-cloud deployment batch
value:
name: "multi_cloud_deployment"
version: "1.0.0"
storage_backend: "surrealdb"
parallel_limit: 5
rollback_enabled: true
operations:
- id: "upcloud_servers"
type: "server_batch"
provider: "upcloud"
dependencies: []
config:
server_configs:
- name: "web-01"
plan: "1xCPU-2GB"
zone: "de-fra1"
- id: "aws_taskservs"
type: "taskserv_batch"
provider: "aws"
dependencies: ["upcloud_servers"]
config:
taskservs: ["kubernetes", "cilium"]
tags:
- name: Health
description: Health check endpoints
- name: Tasks
description: Task management and monitoring
- name: Workflows
description: Workflow submission and management
- name: Batch Operations
description: Batch workflow operations
- name: State Management
description: System state and progress monitoring
- name: Rollback
description: Rollback and recovery operations
- name: Authentication
description: User authentication and authorization
- name: WebSocket
description: Real-time event streaming
externalDocs:
description: Full API Documentation
url: https://docs.provisioning.systems/api