init repo and codebase

This commit is contained in:
Jesús Pérez 2025-10-07 11:17:54 +01:00
commit f02b12cef3
Signed by: jesus
GPG Key ID: 9F243E355E0BC939
31 changed files with 8979 additions and 0 deletions

110
.gitignore vendored Normal file
View File

@ -0,0 +1,110 @@
.p
.claude
.vscode
.shellcheckrc
.coder
.migration
.zed
ai_demo.nu
CLAUDE.md
.cache
.coder
wrks
ROOT
OLD
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Encryption keys and related files (CRITICAL - NEVER COMMIT)
.k.backup
*.key.backup
config.*.toml
config.*back
# where book is written
_book
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
node_modules/
**/output.css
**/input.css
# Environment files
.env
.env.local
.env.production
.env.development
.env.staging
# Keep example files
!.env.example
# Configuration files (may contain sensitive data)
config.prod.toml
config.production.toml
config.local.toml
config.*.local.toml
# Keep example configuration files
!config.toml
!config.dev.toml
!config.example.toml
# Log files
logs/
*.log
# TLS certificates and keys
certs/
*.pem
*.crt
*.key
*.p12
*.pfx
# Database files
*.db
*.sqlite
*.sqlite3
# Backup files
*.bak
*.backup
*.tmp
*~
# Encryption and security related files
*.encrypted
*.enc
secrets/
private/
security/
# Configuration backups that may contain secrets
config.*.backup
config.backup.*
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Documentation build output
book-output/
# Generated setup report
SETUP_COMPLETE.md

412
README.md Normal file
View File

@ -0,0 +1,412 @@
<p align="center">
<img src="https://repo.jesusperez.pro/jesus/provisioning/media/branch/main/resources/provisioning_logo.svg" alt="Provisioning Logo" width="300"/>
</p>
<p align="center">
<img src="https://repo.jesusperez.pro/jesus/provisioning/media/branch/main/resources/logo-text.svg" alt="Provisioning" width="500"/>
</p>
# Provisioning KCL Package
A comprehensive KCL (KusionStack Configuration Language) package providing type-safe schemas for [Provisioning project](https://repo.jesusperez.pro/jesus/provisioning) batch workflows, and Kubernetes deployments.
## Overview
This package contains production-ready KCL schemas with configuration-driven, provider-agnostic infrastructure automation capabilities.
### Package Structure
```
provisioning/kcl/
├── main.k # Main entry point - import this
├── settings.k # Core system settings
├── lib.k # Common schemas and utilities
├── server.k # Server configuration schemas
├── cluster.k # Cluster management schemas
├── workflows.k # Batch workflow schemas
├── batch.k # Advanced batch operation utilities
├── dependencies.k # Taskserv dependency management
├── version.k # Version management schemas
├── k8s_deploy.k # Kubernetes deployment schemas
├── defaults.k # Default configurations
├── examples_batch.k # Comprehensive examples
└── docs/ # Documentation
```
## Quick Start
### Import the Package
```kcl
# Import the main entry point for access to all schemas
import provisioning.main
# Or import from a relative path if working within the same project
import .main
```
### Basic Server Configuration
```kcl
import .main
# Define a simple server
web_server: main.Server = main.Server {
hostname: "web-01"
title: "Production Web Server"
labels: "env: prod, tier: web"
user: "admin"
# Optional: Add taskservs to install
taskservs: [
main.TaskServDef {
name: "nginx"
install_mode: "library"
profile: "production"
}
]
}
```
### Batch Workflow Example
```kcl
import .main
# Define a multi-provider infrastructure deployment
deployment_workflow: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "prod_deploy_001"
name: "Production Infrastructure Deployment"
description: "Deploy web tier across UpCloud and AWS"
operations: [
# Create UpCloud servers
main.BatchOperation {
operation_id: "create_web_servers"
name: "Create Web Servers"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "3"
"server_type": "web"
"zone": "fi-hel2"
"plan": "2xCPU-4GB"
}
priority: 10
}
# Install Kubernetes after servers are ready
main.BatchOperation {
operation_id: "install_k8s"
name: "Install Kubernetes Cluster"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "kubernetes"
"version": "v1.31.0"
"cluster_name": "prod-cluster"
}
dependencies: [
main.DependencyDef {
target_operation_id: "create_web_servers"
dependency_type: "sequential"
timeout: 600
}
]
priority: 8
}
]
# Global workflow settings
max_parallel_operations: 3
fail_fast: False
# Use SurrealDB for state persistence
storage: main.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://localhost:8000"
"namespace": "provisioning"
"database": "workflows"
}
enable_persistence: True
retention_hours: 720 # 30 days
}
}
```
### Kubernetes Deployment
```kcl
import .main
# Define a complete Kubernetes deployment
nginx_deployment: main.K8sDeploy = main.K8sDeploy {
name: "nginx-web"
namespace: "production"
create_ns: True
spec: main.K8sDeploySpec {
replicas: 3
containers: [
main.K8sContainers {
name: "nginx"
image: "nginx:1.21"
ports: [
main.K8sPort {
name: "http"
container: 80
target: 8080
}
]
resources_requests: main.K8sResources {
memory: "128Mi"
cpu: "100m"
}
resources_limits: main.K8sResources {
memory: "256Mi"
cpu: "200m"
}
}
]
}
# Expose via service
service: main.K8sService {
name: "nginx-service"
typ: "LoadBalancer"
ports: [
main.K8sPort {
name: "http"
target: 80
nodePort: 30080
}
]
}
}
```
## Core Schemas
### Server Management
- **`Server`**: Complete server configuration with defaults inheritance
- **`ServerDefaults`**: Default settings for server provisioning
- **`Storage`**, **`StorageVol`**: Storage configuration and partitioning
### Workflow & Batch Operations
- **`BatchWorkflow`**: Multi-operation workflow with dependencies
- **`BatchOperation`**: Individual operation within workflows
- **`DependencyDef`**: Define sequential or conditional dependencies
- **`RetryPolicy`**: Configure retry behavior and backoff
- **`RollbackStrategy`**: Automatic rollback on failures
### Taskserv Management
- **`TaskServDef`**: Infrastructure service definitions
- **`TaskservDependencies`**: Dependency management for taskservs
- **`HealthCheck`**: Health monitoring configuration
### Kubernetes Deployments
- **`K8sDeploy`**: Complete Kubernetes deployment specification
- **`K8sService`**: Service definitions with load balancing
- **`K8sVolume`**: Persistent storage configuration
- **`K8sResources`**: Resource limits and requests
### Configuration & Settings
- **`Settings`**: System-wide configuration
- **`SecretProvider`**: SOPS/KMS secret management
- **`AIProvider`**: AI integration configuration
## Advanced Features
### Mixed Provider Support
Deploy across multiple cloud providers in a single workflow:
```kcl
mixed_deployment: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "multi_cloud_001"
name: "Multi-Cloud Deployment"
operations: [
# UpCloud servers for web tier
main.BatchOperation {
operation_id: "upcloud_web"
provider: "upcloud"
parameters: {"zone": "fi-hel2", "count": "3"}
}
# AWS RDS for database
main.BatchOperation {
operation_id: "aws_database"
provider: "aws"
parameters: {"region": "eu-west-1", "engine": "postgresql"}
dependencies: [
main.DependencyDef {
target_operation_id: "upcloud_web"
dependency_type: "sequential"
}
]
}
]
}
```
### Resource Constraints & Autoscaling
Configure intelligent resource management:
```kcl
batch_executor: main.BatchExecutor = main.BatchExecutor {
executor_id: "production_executor"
name: "Production Batch Executor"
# Resource limits
resource_constraints: [
main.ResourceConstraint {
resource_type: "cpu"
resource_name: "total_cores"
max_units: 16
units_per_operation: 2
hard_constraint: True
}
]
# Auto-scaling configuration
autoscaling: main.BatchAutoscaling {
enabled: True
min_parallel: 2
max_parallel: 10
scale_up_threshold: 0.8
target_utilization: 0.65
}
}
```
### Monitoring & Observability
```kcl
monitoring_config: main.MonitoringConfig = main.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: True
enable_notifications: True
notification_channels: [
"webhook:slack://ops-alerts",
"webhook:pagerduty://incidents"
]
log_level: "info"
}
```
## Validation & Testing
### Schema Validation
```bash
# Validate individual files
kcl run server_config.k
# Validate entire workflow
kcl run workflow_definition.k
# Output as JSON for integration
kcl run workflow_definition.k --format json
```
### Built-in Constraints
All schemas include comprehensive validation:
```kcl
# Server hostnames must be non-empty
server: main.Server = main.Server {
hostname: "web-01" # ✅ Valid
# hostname: "" # ❌ Validation error
}
# Resource constraints are enforced
resources: main.K8sResources = main.K8sResources {
memory: "128Mi" # ✅ Valid K8s format
# memory: "invalid" # ❌ Validation error
}
# Dependency cycles are prevented
operation: main.BatchOperation = main.BatchOperation {
operation_id: "op1"
dependencies: [
main.DependencyDef {
target_operation_id: "op2" # ✅ Valid dependency
# target_operation_id: "op1" # ❌ Self-reference prevented
}
]
}
```
## Integration Examples
### With Nushell Scripts
```nushell
# Generate workflow from KCL
let workflow = (kcl run deployment.k --format json | from json)
# Submit to batch executor
$workflow | to json | http post http://localhost:8080/workflows/batch/submit
# Monitor progress
while true {
let status = (http get $"http://localhost:8080/workflows/batch/($workflow.workflow_id)")
if $status.status == "completed" { break }
sleep 5sec
}
```
### With Rust Orchestrator
```rust
// Deserialize KCL output into Rust structs
let workflow: BatchWorkflow = serde_json::from_str(&kcl_output)?;
// Execute via orchestrator
let executor = BatchExecutor::new(workflow);
executor.execute().await?;
```
## Package Metadata
- **Version**: 0.1.0
- **API Version**: v1
- **KCL Compatibility**: 0.11.0 - 0.12.0
- **Build Date**: 2025-09-28
### Features
- ✅ Server Management
- ✅ Cluster Orchestration
- ✅ Provider Abstraction
- ✅ Workflow Automation
- ✅ Batch Operations
## Best Practices
1. **Always import via main.k** for stability
2. **Use descriptive operation_id values** for dependency tracking
3. **Set appropriate timeouts** based on operation complexity
4. **Enable monitoring** for production workflows
5. **Test workflows** with small counts before production
6. **Use retry policies** for transient failures
7. **Configure rollback strategies** for critical operations
## Contributing
When adding new schemas:
1. Follow existing naming conventions
2. Add comprehensive validation rules
3. Include documentation strings
4. Export from `main.k`
5. Add examples to `examples_batch.k`
6. Update this README
## License
This package is part of the Provisioning project and follows the same license terms.

56
REFERENCE.md Normal file
View File

@ -0,0 +1,56 @@
# KCL Schemas Reference
This directory contains references to existing KCL schema implementations.
## Current Implementation Locations
### Settings Schema
- **Current**: `/Users/Akasha/repo-cnz/src/provisioning/kcl/settings.k`
- **New Reference**: `settings.k` (placeholder created)
### Provider Schemas
- **Current**: Various files in `/Users/Akasha/repo-cnz/src/provisioning/providers/*/kcl/`
- **New Reference**: `providers.k` (to be created)
### Workflow Schemas
- **Current**: Distributed across workflow implementations
- **New Reference**: `workflows.k` (to be created)
## Migration Strategy
### Phase 1: Analysis
- Inventory all existing KCL schemas
- Identify common patterns and duplications
- Document schema relationships
### Phase 2: Consolidation
- Create unified schema files
- Preserve backward compatibility
- Update import paths gradually
### Phase 3: Optimization
- Improve type safety
- Add comprehensive validation
- Enhance documentation
## Benefits of Consolidation
1. **Single Source of Truth**: Unified schema definitions
2. **Type Safety**: Compile-time validation across entire system
3. **Consistency**: Standardized configuration patterns
4. **Maintainability**: Easier schema evolution and updates
## Current Status
- **Reference Files**: Created with placeholders
- **Original Schemas**: Fully functional in existing locations
- **Migration**: Planned for future phase
## Integration
Consolidated schemas will be used by:
- Core provisioning engine
- Code generators
- Configuration validators
- Documentation systems
- IDE integrations (syntax highlighting, autocompletion)

287
batch.k Normal file
View File

@ -0,0 +1,287 @@
# Info: KCL batch operation utilities for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.1
# Date: 25-09-2025
# Description: Batch operation utilities and helper schemas following PAP principles
import .workflows
schema BatchScheduler:
"""
Scheduler configuration for batch operations
Supports various scheduling strategies and resource management
"""
# Scheduling strategy: 'fifo', 'priority', 'dependency_first', 'resource_aware'
strategy: "fifo" | "priority" | "dependency_first" | "resource_aware" = "dependency_first"
# Resource constraints for scheduling
resource_limits: {str:int} = {
# 0 = no limit
"max_cpu_cores": 0
# 0 = no limit
"max_memory_mb": 0
# 0 = no limit
"max_network_bandwidth": 0
}
# Scheduling interval in seconds
scheduling_interval: int = 10
# Whether to enable preemptive scheduling
enable_preemption: bool = False
check:
scheduling_interval > 0, "Scheduling interval must be positive"
schema BatchQueue:
"""
Queue configuration for batch operations
Supports priority queues and resource-based queuing
"""
# Queue name/identifier
queue_id: str
# Queue type: 'standard', 'priority', 'delay', 'dead_letter'
queue_type: "standard" | "priority" | "delay" | "dead_letter" = "standard"
# Maximum queue size (0 = unlimited)
max_size: int = 0
# Message retention period in seconds
# 7 days default
retention_period: int = 604800
# Dead letter queue configuration
dead_letter_queue?: str
# Maximum delivery attempts before moving to dead letter
max_delivery_attempts: int = 3
check:
len(queue_id) > 0, "Queue ID cannot be empty"
max_size >= 0, "Max size cannot be negative"
retention_period > 0, "Retention period must be positive"
max_delivery_attempts > 0, "Max delivery attempts must be positive"
schema ResourceConstraint:
"""
Resource constraint definition for batch operations
Ensures operations don't exceed available resources
"""
# Resource type: cpu, memory, network, storage, custom
resource_type: "cpu" | "memory" | "network" | "storage" | "custom"
# Resource name/identifier
resource_name: str
# Maximum units available
max_units: int
# Current units in use
current_units: int = 0
# Units per operation (for estimation)
units_per_operation: int = 1
# Whether this constraint is hard (fails operation) or soft (warns only)
hard_constraint: bool = True
check:
len(resource_name) > 0, "Resource name cannot be empty"
max_units > 0, "Max units must be positive"
current_units >= 0, "Current units cannot be negative"
units_per_operation > 0, "Units per operation must be positive"
current_units <= max_units, "Current units cannot exceed max units"
schema BatchMetrics:
"""
Metrics collection configuration for batch operations
Tracks performance, success rates, and resource utilization
"""
# Whether to collect detailed metrics
detailed_metrics: bool = True
# Metrics retention period in hours
# 1 week
retention_hours: int = 168
# Metrics aggregation intervals
# 1min, 5min, 1hour
aggregation_intervals: [int] = [60, 300, 3600]
# Custom metrics to collect
custom_metrics?: [str] = []
# Whether to export metrics to external systems
enable_export: bool = False
# Export configuration
export_config?: {str:str} = {}
check:
retention_hours > 0, "Retention hours must be positive"
len(aggregation_intervals) > 0, "Must have at least one aggregation interval"
schema ProviderMixConfig:
"""
Configuration for mixed provider batch operations
Handles cross-provider dependencies and resource coordination
"""
# Primary provider for the batch workflow
primary_provider: str = "upcloud"
# Secondary providers available
secondary_providers: [str] = []
# Provider selection strategy for new resources
provider_selection: "primary_first" | "load_balance" | "cost_optimize" | "latency_optimize" = "primary_first"
# Cross-provider networking configuration
cross_provider_networking?: {str:str} = {}
# Shared storage configuration across providers
shared_storage?: workflows.StorageConfig
# Provider-specific resource limits
provider_limits: {str:{str:int}} = {}
check:
len(primary_provider) > 0, "Primary provider cannot be empty"
schema BatchHealthCheck:
"""
Health check configuration for batch operations
Monitors operation health and triggers recovery actions
"""
# Whether health checks are enabled
enabled: bool = True
# Health check interval in seconds
check_interval: int = 60
# Health check timeout in seconds
check_timeout: int = 30
# Failure threshold before marking as unhealthy
failure_threshold: int = 3
# Success threshold to mark as healthy again
success_threshold: int = 2
# Health check endpoints/commands
health_checks: [str] = []
# Actions to take on health check failure
failure_actions: [str] = ["retry", "rollback"]
check:
check_interval > 0, "Check interval must be positive"
check_timeout > 0, "Check timeout must be positive"
failure_threshold > 0, "Failure threshold must be positive"
success_threshold > 0, "Success threshold must be positive"
schema BatchAutoscaling:
"""
Autoscaling configuration for batch operations
Dynamically adjusts resources based on load and performance
"""
# Whether autoscaling is enabled
enabled: bool = False
# Minimum number of parallel operations
min_parallel: int = 1
# Maximum number of parallel operations
max_parallel: int = 10
# Scaling triggers based on metrics
# CPU/resource utilization
scale_up_threshold: float = 0.8
scale_down_threshold: float = 0.2
# Scaling cooldown period in seconds
cooldown_period: int = 300
# Scaling step size
scale_step: int = 1
# Target resource utilization
target_utilization: float = 0.6
check:
min_parallel > 0, "Min parallel must be positive"
max_parallel >= min_parallel, "Max parallel must be >= min parallel"
scale_up_threshold > scale_down_threshold, "Scale up threshold must be > scale down threshold"
0 < target_utilization and target_utilization < 1, "Target utilization must be between 0 and 1"
cooldown_period > 0, "Cooldown period must be positive"
schema BatchExecutor:
"""
Batch executor configuration combining all batch operation aspects
Main configuration schema for batch workflow execution engine
"""
# Executor identifier
executor_id: str
# Executor name and description
name: str
description?: str = ""
# Core scheduling configuration
scheduler: BatchScheduler = BatchScheduler {}
# Queue management
queues: [BatchQueue] = [BatchQueue {queue_id: "default"}]
# Resource constraints
resource_constraints: [ResourceConstraint] = []
# Mixed provider configuration
provider_config: ProviderMixConfig = ProviderMixConfig {}
# Health monitoring
health_check: BatchHealthCheck = BatchHealthCheck {}
# Autoscaling settings
autoscaling: BatchAutoscaling = BatchAutoscaling {}
# Metrics and monitoring
metrics: BatchMetrics = BatchMetrics {}
# Storage configuration for execution state
storage: workflows.StorageConfig = workflows.StorageConfig {}
# Security and access control
security_config: {str:str} = {}
# Audit logging configuration
audit_logging: bool = True
audit_log_path: str = "./logs/batch_audit.log"
# Integration settings
webhook_endpoints: [str] = []
api_endpoints: [str] = []
# Performance tuning
performance_config: {str:str} = {
"io_threads": "4"
"worker_threads": "8"
"batch_size": "100"
}
check:
len(executor_id) > 0, "Executor ID cannot be empty"
len(name) > 0, "Executor name cannot be empty"
len(queues) > 0, "Must have at least one queue configured"
# Utility functions and constants for batch operations
BatchOperationTypes: [str] = [
"server_create"
"server_delete"
"server_scale"
"server_update"
"taskserv_install"
"taskserv_remove"
"taskserv_update"
"taskserv_configure"
"cluster_create"
"cluster_delete"
"cluster_scale"
"cluster_upgrade"
"custom_command"
"custom_script"
"custom_api_call"
]
BatchProviders: [str] = [
"upcloud"
"aws"
"local"
"mixed"
"custom"
]
DefaultBatchConfig: BatchExecutor = BatchExecutor {
executor_id: "default_batch_executor"
name: "Default Batch Executor"
description: "Default configuration-driven batch executor for provisioning operations"
scheduler: BatchScheduler {
strategy: "dependency_first"
resource_limits: {
"max_cpu_cores": 8
"max_memory_mb": 16384
"max_network_bandwidth": 1000
}
}
provider_config: ProviderMixConfig {
primary_provider: "upcloud"
secondary_providers: ["aws", "local"]
provider_selection: "primary_first"
}
autoscaling: BatchAutoscaling {
enabled: True
min_parallel: 2
max_parallel: 8
target_utilization: 0.7
}
}

33
cluster.k Normal file
View File

@ -0,0 +1,33 @@
# Info: KCL core lib cluster schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
import .lib
schema Cluster:
"""
cluster settings
"""
not_use: bool = False
name: str
version: str
# Template deployment path in $PROVISIONING/templates
template?: "k8s-deploy" | ""
# Schema definition values
def: "K8sDeploy" | "" = ""
# Services Save path or use main settings
clusters_save_path?: str
# Profile to use
profile?: str
# host to admin cluster
admin_host?: str
# Cluster clusters admin hosts port to connect via SSH
admin_port?: int
# Cluster clusters admin user connect via SSH
admin_user?: str
ssh_key_path?: str
# cluster local definition_path directory
local_def_path: str = "./clusters/${name}"
# Scale mode settings for lib-ScaleResource
scale?: lib.ScaleResource

484
coredns.k Normal file
View File

@ -0,0 +1,484 @@
# Info: KCL CoreDNS configuration schemas for provisioning system
# Author: CoreDNS Integration Agent
# Release: 1.0.0
# Date: 2025-10-06
# Purpose: Define CoreDNS service configuration, zones, and DNS management
import regex
schema CoreDNSConfig:
"""
CoreDNS service configuration
Defines how CoreDNS is deployed and managed within the provisioning system.
Supports local binary, Docker, remote, and hybrid deployment modes.
Examples:
# Local mode with auto-start
CoreDNSConfig {
mode = "local"
local = LocalCoreDNS {
enabled = True
auto_start = True
zones = ["provisioning.local", "workspace.local"]
}
}
# Remote mode
CoreDNSConfig {
mode = "remote"
remote = RemoteCoreDNS {
enabled = True
endpoints = ["https://dns1.example.com", "https://dns2.example.com"]
zones = ["production.local"]
}
}
"""
# Deployment mode: local, remote, hybrid, or disabled
mode: "local" | "remote" | "hybrid" | "disabled" = "local"
# Local CoreDNS configuration
local?: LocalCoreDNS
# Remote CoreDNS configuration
remote?: RemoteCoreDNS
# Dynamic DNS update configuration
dynamic_updates: DynamicDNS = DynamicDNS {}
# Upstream DNS servers for forwarding
upstream: [str] = ["8.8.8.8", "1.1.1.1"]
# Global TTL for DNS records (seconds)
default_ttl: int = 300
# Enable DNS query logging
enable_logging: bool = True
# Enable metrics endpoint
enable_metrics: bool = True
# Metrics port
metrics_port: int = 9153
check:
len(upstream) > 0, "At least one upstream DNS server required"
default_ttl > 0 and default_ttl <= 86400, "TTL must be 1-86400 seconds"
metrics_port >= 1024 and metrics_port <= 65535, "Metrics port must be 1024-65535"
mode != "local" or local != Undefined, "Local config required when mode is 'local'"
mode != "remote" or remote != Undefined, "Remote config required when mode is 'remote'"
mode != "hybrid" or (local != Undefined and remote != Undefined), \
"Both local and remote config required when mode is 'hybrid'"
schema LocalCoreDNS:
"""
Local CoreDNS binary configuration
Manages CoreDNS running as a local binary or Docker container.
"""
# Enable local CoreDNS
enabled: bool = True
# Deployment type: binary or docker
deployment_type: "binary" | "docker" = "binary"
# Path to CoreDNS binary
binary_path: str = "~/.provisioning/bin/coredns"
# Path to Corefile
config_path: str = "~/.provisioning/coredns/Corefile"
# Path to zone files directory
zones_path: str = "~/.provisioning/coredns/zones"
# DNS listening port
port: int = 5353
# Auto-start CoreDNS on system startup
auto_start: bool = True
# Auto-restart on failure
auto_restart: bool = True
# Managed DNS zones
zones: [str] = ["provisioning.local", "workspace.local"]
# PID file path (for binary mode)
pid_file?: str = "~/.provisioning/coredns/coredns.pid"
# Log file path
log_file?: str = "~/.provisioning/coredns/coredns.log"
# Docker configuration (for docker mode)
docker?: DockerCoreDNS
check:
port >= 1024 and port <= 65535, "Port must be 1024-65535"
len(zones) > 0, "At least one zone required"
deployment_type != "docker" or docker != Undefined, \
"Docker config required when deployment_type is 'docker'"
schema DockerCoreDNS:
"""
Docker-based CoreDNS deployment configuration
"""
# Docker image
image: str = "coredns/coredns:1.11.1"
# Container name
container_name: str = "provisioning-coredns"
# Restart policy
restart_policy: "no" | "always" | "unless-stopped" | "on-failure" = "unless-stopped"
# Network mode
network_mode: str = "bridge"
# Publish DNS port
publish_port: bool = True
# Volume mounts (host:container)
volumes: [str] = []
check:
len(image) > 0, "Docker image required"
len(container_name) > 0, "Container name required"
schema RemoteCoreDNS:
"""
Remote CoreDNS service configuration
Connect to external CoreDNS instances for DNS management.
"""
# Enable remote CoreDNS
enabled: bool = True
# Remote CoreDNS API endpoints
endpoints: [str]
# Managed zones on remote servers
zones: [str]
# Authentication token file path
auth_token_path?: str
# TLS verification
verify_tls: bool = True
# Connection timeout (seconds)
timeout: int = 30
# Health check interval (seconds)
health_check_interval: int = 60
check:
len(endpoints) > 0, "At least one remote endpoint required"
len(zones) > 0, "At least one zone required"
timeout > 0 and timeout <= 300, "Timeout must be 1-300 seconds"
health_check_interval >= 10, "Health check interval must be >= 10 seconds"
schema DynamicDNS:
"""
Dynamic DNS update configuration
Enables automatic DNS updates when infrastructure changes.
"""
# Enable dynamic DNS updates
enabled: bool = True
# Orchestrator DNS API endpoint
api_endpoint: str = "http://localhost:8080/dns"
# Automatically register servers on creation
auto_register_servers: bool = True
# Automatically unregister servers on deletion
auto_unregister_servers: bool = True
# Default TTL for dynamic records (seconds)
ttl: int = 300
# Update strategy: immediate, batched, or scheduled
update_strategy: "immediate" | "batched" | "scheduled" = "immediate"
# Batch interval (seconds, for batched strategy)
batch_interval?: int = 60
# Retry configuration
retry_policy: RetryPolicy = RetryPolicy {}
check:
ttl > 0 and ttl <= 86400, "TTL must be 1-86400 seconds"
update_strategy != "batched" or batch_interval != Undefined, \
"Batch interval required for batched strategy"
batch_interval == Undefined or batch_interval >= 10, \
"Batch interval must be >= 10 seconds"
schema RetryPolicy:
"""
Retry policy for DNS update failures
"""
# Maximum retry attempts
max_attempts: int = 3
# Initial delay before first retry (seconds)
initial_delay: int = 5
# Backoff multiplier for subsequent retries
backoff_multiplier: float = 2.0
# Maximum delay between retries (seconds)
max_delay: int = 60
check:
max_attempts > 0 and max_attempts <= 10, "Max attempts must be 1-10"
initial_delay > 0, "Initial delay must be positive"
backoff_multiplier >= 1.0, "Backoff multiplier must be >= 1.0"
max_delay >= initial_delay, "Max delay must be >= initial delay"
schema DNSZone:
"""
DNS zone configuration
Defines a DNS zone with SOA, NS, and other records.
Examples:
DNSZone {
name = "provisioning.local"
admin_email = "admin.provisioning.local"
nameservers = ["ns1.provisioning.local"]
records = [
DNSRecord {
name = "server-01"
type = "A"
value = "10.0.1.10"
}
]
}
"""
# Zone name (must be FQDN with trailing dot in zone file)
name: str
# Zone file path
file_path?: str
# SOA record configuration
soa: SOARecord = SOARecord {}
# Nameserver hostnames
nameservers: [str]
# Admin email (dots replaced with @ in zone file)
admin_email: str = $"admin.{name}"
# DNS records
records: [DNSRecord] = []
# Default TTL for zone (seconds)
ttl: int = 3600
check:
len(name) > 0, "Zone name required"
regex.match(name, r"^[a-z0-9]([a-z0-9-\.]{0,253}[a-z0-9])?$"), \
"Zone name must be valid domain name"
len(nameservers) > 0, "At least one nameserver required"
ttl > 0, "TTL must be positive"
schema SOARecord:
"""
SOA (Start of Authority) record
"""
# Serial number (auto-incremented on updates)
serial: int = 1
# Refresh interval (seconds)
refresh: int = 3600
# Retry interval (seconds)
retry: int = 1800
# Expire time (seconds)
expire: int = 604800
# Minimum TTL (seconds)
minimum: int = 86400
check:
serial > 0, "Serial must be positive"
refresh > 0, "Refresh must be positive"
retry > 0, "Retry must be positive"
expire > refresh, "Expire must be > refresh"
minimum > 0, "Minimum must be positive"
schema DNSRecord:
"""
DNS resource record
Supports A, AAAA, CNAME, MX, TXT, NS, SRV, PTR records.
Examples:
# A record
DNSRecord {
name = "server-01"
type = "A"
value = "10.0.1.10"
}
# CNAME record
DNSRecord {
name = "web"
type = "CNAME"
value = "server-01.provisioning.local"
}
# MX record
DNSRecord {
name = "@"
type = "MX"
priority = 10
value = "mail.provisioning.local"
}
"""
# Record name (hostname or @)
name: str
# Record type
type: "A" | "AAAA" | "CNAME" | "MX" | "TXT" | "NS" | "SOA" | "SRV" | "PTR"
# Record value (IP address, hostname, or text)
value: str
# TTL in seconds (optional, uses zone default)
ttl?: int
# Priority (for MX and SRV records)
priority?: int
# Weight (for SRV records)
weight?: int
# Port (for SRV records)
port?: int
# Comment
comment?: str
check:
len(name) > 0, "Record name required"
len(value) > 0, "Record value required"
# A record validation
type != "A" or regex.match(value, \
r"^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), \
"A record value must be valid IPv4 address"
# AAAA record validation
type != "AAAA" or regex.match(value, r"^([0-9a-fA-F]{0,4}:){7}[0-9a-fA-F]{0,4}$"), \
"AAAA record value must be valid IPv6 address"
# MX/SRV priority validation
type not in ["MX", "SRV"] or priority != Undefined, \
"Priority required for MX and SRV records"
# SRV weight and port validation
type != "SRV" or (weight != Undefined and port != Undefined), \
"Weight and port required for SRV records"
# TTL validation
ttl == Undefined or (ttl > 0 and ttl <= 86400), \
"TTL must be 1-86400 seconds"
schema CorefilePlugin:
"""
Corefile plugin configuration
Defines a plugin block in Corefile.
"""
# Plugin name (file, forward, cache, etc.)
name: str
# Plugin arguments
args: [str] = []
# Plugin options (key-value pairs)
options: {str: str} = {}
check:
len(name) > 0, "Plugin name required"
schema CorefileZoneBlock:
"""
Corefile zone block configuration
Defines a zone block with plugins in Corefile.
"""
# Zone name (e.g., "provisioning.local:5353")
zone: str
# Port number
port: int = 53
# Plugins in this zone
plugins: [CorefilePlugin]
check:
len(zone) > 0, "Zone required"
port >= 1024 and port <= 65535, "Port must be 1024-65535"
len(plugins) > 0, "At least one plugin required"
schema DNSQueryLog:
"""
DNS query logging configuration
"""
# Enable query logging
enabled: bool = True
# Log file path
log_file: str = "~/.provisioning/coredns/queries.log"
# Log format: text or json
log_format: "text" | "json" = "text"
# Log level: debug, info, warn, error
log_level: "debug" | "info" | "warn" | "error" = "info"
# Rotate log files
rotate_enabled: bool = True
# Max log file size (MB)
max_size_mb: int = 100
# Max number of rotated files
max_backups: int = 5
check:
max_size_mb > 0 and max_size_mb <= 1024, "Max size must be 1-1024 MB"
max_backups >= 0 and max_backups <= 100, "Max backups must be 0-100"
schema DNSHealthCheck:
"""
CoreDNS health check configuration
"""
# Enable health checks
enabled: bool = True
# Health check endpoint
endpoint: str = "http://localhost:8080/health"
# Health check interval (seconds)
interval: int = 30
# Timeout for health check (seconds)
timeout: int = 5
# Unhealthy threshold (consecutive failures)
unhealthy_threshold: int = 3
# Healthy threshold (consecutive successes)
healthy_threshold: int = 2
check:
interval > 0, "Interval must be positive"
timeout > 0 and timeout < interval, "Timeout must be < interval"
unhealthy_threshold > 0, "Unhealthy threshold must be positive"
healthy_threshold > 0, "Healthy threshold must be positive"

75
defaults.k Normal file
View File

@ -0,0 +1,75 @@
# Info: KCL core lib defaults schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
import regex
import .lib
schema ServerDefaults:
"""
Server Defaults settings
"""
lock: bool = False
# To use private network, IPs will be set in servers items
priv_cidr_block?: str
time_zone: str = "UTC"
#zone?: str
# Second to wait before check in for running state
running_wait: int = 10
# Total seconds to wait for running state before timeout
running_timeout: int = 200
# Specific AMIs can be used with their ID
# If 'storage_os: find' storage_os_find will be used to find one in zone (region)
# expected something like: "name=debian-12 | arch=x86_64" or "name: debian-12 | arch: x86_64" will be parsed to find latest available
storage_os_find: str = "name: debian-12 | arch: x86_64"
#storage_os?: str
#storage_os: ami-0eb11ab33f229b26c
# If not Storage size, Plan Storage size will be used
# storages is defined in Provider defaults
#storages?: [Storage]
# Add one or more SSH keys to the admin account. Accepted values are SSH public keys or filenames from
# where to read the keys.
# ssh public key to be included in /root/.ssh/authorized_keys
ssh_key_path?: str
# Public certificate must be created or imported as a key_name
# use: providers/aws/bin/on-ssh.sh (add -h to get info)
ssh_key_name?: str
# Use it to rewrite or update ssh_key
# ssh_key_mode: rewrite
# AWS do not use utility network, if no value it will not be set and utility IP will not be set
# public network, if no value it will not be set and public IP will not be set
network_utility_ipv4: bool = True
network_utility_ipv6: bool = False
network_public_ipv4?: bool = True
network_public_ipv6?: bool = False
network_public_ip?: str
#TODO settings for Elastic IPs or instace without pubic IP
# To use private network a VPC + Subnet + NetworkInfterface has to be created, IPs will be set in servers items
# In AWS this is only a name
network_private_name?: str
network_private_id?: str
primary_dns?: str
secondary_dns?: str
main_domain?: str
domains_search?: str
# Labels to describe the server in `key: value` format, multiple can be declared.
# Usage: env: dev
labels: str
# Main user (default Debian user is admin)
user: str
user_ssh_key_path?: str
user_home?: str = "/home/${user}"
user_ssh_port?: int = 22
# If is not empty it will add servers entries to /etc/hosts and $HOME/.ssh/config
fix_local_hosts: bool = True
installer_user?: str = "${user}"
scale?: lib.ScaleResource
check:
user == Undefined or len(user) > 0, "Check user value"
#len(ssh_key_path) > 0, "Check ssh_key_path"
priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition"

281
dependencies.k Normal file
View File

@ -0,0 +1,281 @@
"""
KCL Dependency Management Schema for Provisioning System
Provides type-safe dependency declarations with resource requirements and health checks
"""
schema ResourceRequirement:
"""Resource requirements for taskserv installation and operation"""
# CPU requirement (K8s format)
cpu?: str = "100m"
# Memory requirement (K8s format)
memory?: str = "128Mi"
# Disk space requirement
disk?: str = "1Gi"
# Requires network connectivity
network?: bool = True
# Requires privileged access
privileged?: bool = False
check:
len(cpu) > 0, "CPU requirement cannot be empty"
len(memory) > 0, "Memory requirement cannot be empty"
len(disk) > 0, "Disk requirement cannot be empty"
schema HealthCheck:
"""Health check definition for taskserv validation"""
# Command to execute for health check
command: str
# Check interval in seconds
interval?: int = 30
# Command timeout in seconds
timeout?: int = 10
# Number of retry attempts
retries?: int = 3
# Consecutive successes needed
success_threshold?: int = 1
# Consecutive failures to mark unhealthy
failure_threshold?: int = 3
check:
len(command) > 0, "Health check command cannot be empty"
interval > 0, "Health check interval must be positive"
timeout > 0, "Health check timeout must be positive"
retries >= 0, "Health check retries cannot be negative"
schema InstallationPhase:
"""Installation phase definition for ordered deployment"""
# Phase name (e.g., "pre-install", "install", "post-install")
name: str
# Execution order within phase (lower first)
order: int
# Can run in parallel with same order
parallel?: bool = False
# Phase is required for successful installation
required?: bool = True
check:
len(name) > 0, "Installation phase name cannot be empty"
order >= 0, "Installation phase order cannot be negative"
name in ["pre-install", "install", "post-install", "validate", "cleanup"], "Phase name must be one of: pre-install, install, post-install, validate, cleanup"
schema TaskservDependencies:
"""Complete dependency configuration for a taskserv"""
# Taskserv name (must match directory)
name: str
# Dependency relationships
# Required taskservs (must be installed first)
requires?: [str]
# Conflicting taskservs (cannot coexist)
conflicts?: [str]
# Optional taskservs (install if available)
optional?: [str]
# Services this taskserv provides
provides?: [str]
# Resource requirements
# Resource requirements for installation
resources: ResourceRequirement
# Health and validation
# Health check definitions
health_checks?: [HealthCheck]
# Readiness check for installation completion
readiness_probe?: HealthCheck
# Installation control
# Installation phase definitions
phases?: [InstallationPhase]
# Installation timeout in seconds
timeout?: int = 600
# Number of installation retry attempts
retry_count?: int = 3
# Compatibility
# Supported operating systems
os_support?: [str] = ["linux"]
# Supported CPU architectures
arch_support?: [str] = ["amd64"]
# Compatible Kubernetes versions
k8s_versions?: [str]
check:
len(name) > 0, "Taskserv name cannot be empty"
name == name.lower(), "Taskserv name must be lowercase"
timeout > 0, "Installation timeout must be positive"
retry_count >= 0, "Retry count cannot be negative"
len(os_support) > 0, "Must specify at least one supported OS"
len(arch_support) > 0, "Must specify at least one supported architecture"
# Re-export for taskserv use
schema TaskservDependency(TaskservDependencies):
"""Alias for TaskservDependencies - provides the same functionality"""
# OCI Registry Integration Schemas
schema OCISource:
"""OCI registry configuration for extension distribution"""
# OCI registry endpoint (localhost:5000, harbor.company.com)
registry: str
# Namespace in registry (provisioning-extensions, provisioning-platform)
namespace: str
# Path to authentication token file
auth_token_path?: str
# Enable TLS for registry connection
tls_enabled: bool = False
# Skip TLS certificate verification (insecure, dev only)
insecure_skip_verify: bool = False
# OCI platform architecture
platform: str = "linux/amd64"
# Media type for KCL packages
media_type: str = "application/vnd.kcl.package.v1+tar"
check:
len(registry) > 0, "OCI registry endpoint required"
len(namespace) > 0, "OCI namespace required"
not (insecure_skip_verify and tls_enabled), \
"insecure_skip_verify should only be used without TLS"
platform in ["linux/amd64", "linux/arm64", "darwin/amd64", "darwin/arm64"], \
"Platform must be one of: linux/amd64, linux/arm64, darwin/amd64, darwin/arm64"
schema GiteaSource:
"""Gitea repository configuration for extension distribution"""
# Gitea server URL
url: str
# Organization/namespace containing repositories
organization: str
# Path to authentication token file
auth_token_path?: str
# Use SSH instead of HTTPS
use_ssh: bool = False
# Branch to use for extensions
branch: str = "main"
check:
len(url) > 0, "Gitea URL required"
len(organization) > 0, "Gitea organization required"
url.startswith("http://") or url.startswith("https://"), \
"Gitea URL must start with http:// or https://"
schema LocalSource:
"""Local filesystem configuration for extension distribution"""
# Absolute path to extensions directory
path: str
# Watch for changes and auto-reload
watch: bool = False
check:
len(path) > 0, "Local source path required"
path.startswith("/") or path.startswith("~"), \
"Local source path must be absolute"
schema HTTPSource:
"""Generic HTTP/HTTPS configuration for extension distribution"""
# HTTP/HTTPS URL
url: str
# Authentication header (e.g., "Bearer token123")
auth_header?: str
# Use HTTP basic auth
basic_auth?: bool = False
# Username for basic auth
username?: str
# Password for basic auth
password?: str
check:
len(url) > 0, "HTTP URL required"
url.startswith("http://") or url.startswith("https://"), \
"URL must start with http:// or https://"
not basic_auth or (username and password), \
"Basic auth requires username and password"
schema ExtensionSource:
"""Extension source configuration with multi-backend support"""
# Source type
type: "oci" | "gitea" | "local" | "http"
# OCI registry source configuration
oci?: OCISource
# Gitea source configuration
gitea?: GiteaSource
# Local filesystem source configuration
local?: LocalSource
# HTTP source configuration
http?: HTTPSource
check:
(type == "oci" and oci != None) or \
(type == "gitea" and gitea != None) or \
(type == "local" and local != None) or \
(type == "http" and http != None), \
"Source configuration must match selected type"
schema ExtensionManifest:
"""Extension package manifest for OCI distribution"""
# Extension name (must match directory name)
name: str
# Extension type
type: "provider" | "taskserv" | "cluster"
# Semantic version
version: str
# Extension description
description?: str
# Extension author/maintainer
author?: str
# License identifier (SPDX)
license?: str = "MIT"
# Extension homepage URL
homepage?: str
# Repository URL
repository?: str
# Extension dependencies
dependencies?: {str: str}
# Extension tags/keywords
tags?: [str]
# Supported platforms
platforms?: [str] = ["linux/amd64"]
# Minimum provisioning core version
min_provisioning_version?: str
check:
len(name) > 0, "Extension name required"
name == name.lower(), "Extension name must be lowercase"
len(version) > 0, "Extension version required"
# Semantic version format (basic check)
version.count(".") >= 2, "Version must be semantic (x.y.z)"
schema RepositoryConfig:
"""Multi-repository configuration for dependency management"""
# Repository name
name: str
# Repository type
type: "core" | "extensions" | "platform" | "workspace"
# Source configuration
source: ExtensionSource
# Repository version/tag
version?: str
# Enable repository
enabled: bool = True
# Repository priority (higher = more priority)
priority: int = 100
# Cache TTL in seconds
cache_ttl: int = 3600
check:
len(name) > 0, "Repository name required"
priority >= 0, "Repository priority cannot be negative"
cache_ttl > 0, "Cache TTL must be positive"
schema DependencyResolution:
"""Dependency resolution configuration"""
# Resolution strategy
strategy: "strict" | "latest" | "minimal"
# Allow pre-release versions
allow_prerelease: bool = False
# Enable version pinning
pin_versions: bool = True
# Maximum dependency depth
max_depth: int = 10
# Conflict resolution strategy
conflict_strategy: "error" | "latest" | "highest_priority"
check:
max_depth > 0 and max_depth <= 100, \
"Max depth must be between 1 and 100"

1200
docs/BEST_PRACTICES.md Normal file

File diff suppressed because it is too large Load Diff

800
docs/VALIDATION.md Normal file
View File

@ -0,0 +1,800 @@
# Schema Validation and Best Practices
This document provides comprehensive guidance on validating KCL schemas and following best practices for the provisioning package.
## Table of Contents
- [Schema Validation](#schema-validation)
- [Built-in Constraints](#built-in-constraints)
- [Custom Validation](#custom-validation)
- [Best Practices](#best-practices)
- [Common Patterns](#common-patterns)
- [Troubleshooting](#troubleshooting)
## Schema Validation
### Basic Validation
```bash
# Validate syntax and run schema checks
kcl run config.k
# Format and validate all files
kcl fmt *.k
# Validate with verbose output
kcl run config.k --debug
# Validate against specific schema
kcl vet config.k --schema main.Server
```
### JSON Output Validation
```bash
# Generate and validate JSON output
kcl run config.k --format json | jq '.'
# Validate JSON schema structure
kcl run config.k --format json | jq '.workflow_id // error("Missing workflow_id")'
# Pretty print for inspection
kcl run config.k --format json | jq '.operations[] | {operation_id, name, provider}'
```
### Validation in CI/CD
```yaml
# GitHub Actions example
- name: Validate KCL Schemas
run: |
find . -name "*.k" -exec kcl fmt {} \;
find . -name "*.k" -exec kcl run {} \;
# Check for schema changes
- name: Check Schema Compatibility
run: |
kcl run main.k --format json > current_schema.json
diff expected_schema.json current_schema.json
```
## Built-in Constraints
### Server Schema Constraints
```kcl
import .main
# ✅ Valid server configuration
valid_server: main.Server = main.Server {
hostname: "web-01" # ✅ Non-empty string required
title: "Web Server" # ✅ Non-empty string required
labels: "env: prod" # ✅ Required field
user: "admin" # ✅ Required field
# Optional but validated fields
user_ssh_port: 22 # ✅ Valid port number
running_timeout: 300 # ✅ Positive integer
time_zone: "UTC" # ✅ Valid timezone string
}
# ❌ Invalid configurations that will fail validation
invalid_examples: {
# hostname: "" # ❌ Empty hostname not allowed
# title: "" # ❌ Empty title not allowed
# user_ssh_port: -1 # ❌ Negative port not allowed
# running_timeout: 0 # ❌ Zero timeout not allowed
}
```
### Workflow Schema Constraints
```kcl
import .main
# ✅ Valid workflow with proper constraints
valid_workflow: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "deploy_001" # ✅ Non-empty ID required
name: "Production Deployment" # ✅ Non-empty name required
operations: [ # ✅ At least one operation required
main.BatchOperation {
operation_id: "create_servers" # ✅ Unique operation ID
name: "Create Servers"
operation_type: "server"
action: "create"
parameters: {}
timeout: 600 # ✅ Positive timeout
priority: 5 # ✅ Valid priority
}
]
max_parallel_operations: 3 # ✅ Non-negative number
global_timeout: 3600 # ✅ Positive global timeout
}
# ❌ Constraint violations
constraint_violations: {
# workflow_id: "" # ❌ Empty workflow ID
# operations: [] # ❌ Empty operations list
# max_parallel_operations: -1 # ❌ Negative parallel limit
# global_timeout: 0 # ❌ Zero global timeout
}
```
### Kubernetes Schema Constraints
```kcl
import .main
# ✅ Valid Kubernetes deployment with constraints
valid_k8s: main.K8sDeploy = main.K8sDeploy {
name: "webapp" # ✅ Non-empty name
namespace: "production" # ✅ Valid namespace
spec: main.K8sDeploySpec {
replicas: 3 # ✅ Positive replica count
containers: [ # ✅ At least one container required
main.K8sContainers {
name: "app" # ✅ Non-empty container name
image: "nginx:1.21" # ✅ Valid image reference
resources_requests: main.K8sResources {
memory: "128Mi" # ✅ Valid K8s memory format
cpu: "100m" # ✅ Valid K8s CPU format
}
resources_limits: main.K8sResources {
memory: "256Mi" # ✅ Limits >= requests (enforced)
cpu: "200m"
}
}
]
}
}
```
### Dependency Schema Constraints
```kcl
import .main
# ✅ Valid dependency definitions
valid_dependencies: main.TaskservDependencies = main.TaskservDependencies {
name: "kubernetes" # ✅ Lowercase name required
requires: ["containerd", "cni"] # ✅ Valid dependency list
conflicts: ["docker"] # ✅ Cannot coexist with docker
resources: main.ResourceRequirement {
cpu: "100m" # ✅ Non-empty CPU requirement
memory: "128Mi" # ✅ Non-empty memory requirement
disk: "1Gi" # ✅ Non-empty disk requirement
}
timeout: 600 # ✅ Positive timeout
retry_count: 3 # ✅ Non-negative retry count
os_support: ["linux"] # ✅ At least one OS required
arch_support: ["amd64", "arm64"] # ✅ At least one arch required
}
# ❌ Constraint violations
dependency_violations: {
# name: "Kubernetes" # ❌ Must be lowercase
# name: "" # ❌ Cannot be empty
# timeout: 0 # ❌ Must be positive
# retry_count: -1 # ❌ Cannot be negative
# os_support: [] # ❌ Must specify at least one OS
}
```
## Custom Validation
### Adding Custom Constraints
```kcl
import .main
import regex
# Custom server schema with additional validation
schema CustomServer(main.Server):
"""Custom server with additional business rules"""
# Additional custom fields
environment: "dev" | "staging" | "prod"
cost_center: str
check:
# Business rule: production servers must have specific naming
environment == "prod" and regex.match(hostname, "^prod-[a-z0-9-]+$"),
"Production servers must start with 'prod-'"
# Business rule: staging servers have resource limits
environment == "staging" and len(taskservs or []) <= 3,
"Staging servers limited to 3 taskservs"
# Business rule: cost center must be valid
cost_center in ["engineering", "operations", "security"],
"Invalid cost center: ${cost_center}"
# Usage with validation
prod_server: CustomServer = CustomServer {
hostname: "prod-web-01" # ✅ Matches production naming
title: "Production Web Server"
labels: "env: prod"
user: "admin"
environment: "prod" # ✅ Valid environment
cost_center: "engineering" # ✅ Valid cost center
}
```
### Conditional Validation
```kcl
import .main
# Workflow with conditional validation based on environment
schema EnvironmentWorkflow(main.BatchWorkflow):
"""Workflow with environment-specific validation"""
environment: "dev" | "staging" | "prod"
check:
# Production workflows must have monitoring
environment == "prod" and monitoring.enabled == True,
"Production workflows must enable monitoring"
# Production workflows must have rollback enabled
environment == "prod" and default_rollback_strategy.enabled == True,
"Production workflows must enable rollback"
# Development can have shorter timeouts
environment == "dev" and global_timeout <= 1800, # 30 minutes
"Development workflows should complete within 30 minutes"
# Staging must have retry policies
environment == "staging" and default_retry_policy.max_attempts >= 2,
"Staging workflows must have retry policies"
# Valid production workflow
prod_workflow: EnvironmentWorkflow = EnvironmentWorkflow {
workflow_id: "prod_deploy_001"
name: "Production Deployment"
environment: "prod" # ✅ Production environment
operations: [
main.BatchOperation {
operation_id: "deploy"
name: "Deploy Application"
operation_type: "server"
action: "create"
parameters: {}
}
]
# ✅ Required for production
monitoring: main.MonitoringConfig {
enabled: True
backend: "prometheus"
}
# ✅ Required for production
default_rollback_strategy: main.RollbackStrategy {
enabled: True
strategy: "immediate"
}
}
```
### Cross-Field Validation
```kcl
import .main
# Validate relationships between fields
schema ValidatedBatchOperation(main.BatchOperation):
"""Batch operation with cross-field validation"""
check:
# Timeout should be reasonable for operation type
operation_type == "server" and timeout >= 300,
"Server operations need at least 5 minutes timeout"
operation_type == "taskserv" and timeout >= 600,
"Taskserv operations need at least 10 minutes timeout"
# High priority operations should have retry policies
priority >= 8 and retry_policy.max_attempts >= 2,
"High priority operations should have retry policies"
# Parallel operations should have lower priority
allow_parallel == True and priority <= 7,
"Parallel operations should have lower priority for scheduling"
# Validate workflow operation consistency
schema ConsistentWorkflow(main.BatchWorkflow):
"""Workflow with consistent operation validation"""
check:
# All operation IDs must be unique
len(operations) == len([op.operation_id for op in operations] | unique),
"All operation IDs must be unique"
# Dependencies must reference existing operations
all([
dep.target_operation_id in [op.operation_id for op in operations]
for op in operations
for dep in op.dependencies or []
]),
"All dependencies must reference existing operations"
# No circular dependencies (simplified check)
len(operations) > 0,
"Workflow must have at least one operation"
```
## Best Practices
### 1. Schema Design Principles
```kcl
# ✅ Good: Descriptive field names and documentation
schema WellDocumentedServer:
"""
Server configuration for production workloads
Follows company security and operational standards
"""
# Core identification
hostname: str # DNS-compliant hostname
fqdn?: str # Fully qualified domain name
# Environment classification
environment: "dev" | "staging" | "prod"
classification: "public" | "internal" | "confidential"
# Operational metadata
owner_team: str # Team responsible for maintenance
cost_center: str # Billing allocation
backup_required: bool # Whether automated backups are needed
check:
len(hostname) > 0 and len(hostname) <= 63, "Hostname must be 1-63 characters"
len(owner_team) > 0, "Owner team must be specified"
len(cost_center) > 0, "Cost center must be specified"
# ❌ Avoid: Unclear field names and missing validation
schema PoorlyDocumentedServer:
name: str # ❌ Ambiguous - hostname? title? display name?
env: str # ❌ No constraints - any string allowed
data: {str: str} # ❌ Unstructured data without validation
```
### 2. Validation Strategy
```kcl
# ✅ Good: Layered validation with clear error messages
schema ProductionWorkflow(main.BatchWorkflow):
"""Production workflow with comprehensive validation"""
# Business metadata
change_request_id: str
approver: str
maintenance_window?: str
check:
# Business process validation
regex.match(change_request_id, "^CHG-[0-9]{4}-[0-9]{3}$"),
"Change request ID must match format CHG-YYYY-NNN"
# Operational validation
global_timeout <= 14400, # 4 hours max
"Production workflows must complete within 4 hours"
# Safety validation
default_rollback_strategy.enabled == True,
"Production workflows must enable rollback"
# Monitoring validation
monitoring.enabled == True and monitoring.enable_notifications == True,
"Production workflows must enable monitoring and notifications"
# ✅ Good: Environment-specific defaults with validation
schema EnvironmentDefaults:
"""Environment-specific default configurations"""
environment: "dev" | "staging" | "prod"
# Default timeouts by environment
default_timeout: int = environment == "prod" ? 1800 : (environment == "staging" ? 1200 : 600)
# Default retry attempts by environment
default_retries: int = environment == "prod" ? 3 : (environment == "staging" ? 2 : 1)
# Default monitoring settings
monitoring_enabled: bool = environment == "prod" ? True : False
check:
default_timeout > 0, "Timeout must be positive"
default_retries >= 0, "Retries cannot be negative"
```
### 3. Schema Composition Patterns
```kcl
# ✅ Good: Composable schema design
schema BaseResource:
"""Common fields for all resources"""
name: str
tags: {str: str} = {}
created_at?: str
updated_at?: str
check:
len(name) > 0, "Name cannot be empty"
regex.match(name, "^[a-z0-9-]+$"), "Name must be lowercase alphanumeric with hyphens"
schema MonitoredResource(BaseResource):
"""Resource with monitoring capabilities"""
monitoring_enabled: bool = True
alert_thresholds: {str: float} = {}
check:
monitoring_enabled == True and len(alert_thresholds) > 0,
"Monitored resources must define alert thresholds"
schema SecureResource(BaseResource):
"""Resource with security requirements"""
encryption_enabled: bool = True
access_policy: str
compliance_tags: [str] = []
check:
encryption_enabled == True, "Security-sensitive resources must enable encryption"
len(access_policy) > 0, "Access policy must be defined"
"pci" in compliance_tags or "sox" in compliance_tags or "hipaa" in compliance_tags,
"Must specify compliance requirements"
# Composed schema inheriting multiple patterns
schema ProductionDatabase(MonitoredResource, SecureResource):
"""Production database with full operational requirements"""
backup_retention_days: int = 30
high_availability: bool = True
check:
backup_retention_days >= 7, "Production databases need minimum 7 days backup retention"
high_availability == True, "Production databases must be highly available"
```
### 4. Error Handling Patterns
```kcl
# ✅ Good: Comprehensive error scenarios with specific handling
schema RobustBatchOperation(main.BatchOperation):
"""Batch operation with robust error handling"""
# Error classification
critical_operation: bool = False
max_failure_rate: float = 0.1
# Enhanced retry configuration
retry_policy: main.RetryPolicy = main.RetryPolicy {
max_attempts: critical_operation ? 5 : 3
initial_delay: critical_operation ? 30 : 10
max_delay: critical_operation ? 600 : 300
backoff_multiplier: 2
retry_on_errors: [
"connection_error",
"timeout",
"rate_limit",
"resource_unavailable"
]
}
# Enhanced rollback strategy
rollback_strategy: main.RollbackStrategy = main.RollbackStrategy {
enabled: True
strategy: critical_operation ? "manual" : "immediate"
preserve_partial_state: critical_operation
custom_rollback_operations: critical_operation ? [
"create_incident_ticket",
"notify_on_call_engineer",
"preserve_logs"
] : []
}
check:
0 <= max_failure_rate and max_failure_rate <= 1,
"Failure rate must be between 0 and 1"
critical_operation == True and timeout >= 1800,
"Critical operations need extended timeout"
```
## Common Patterns
### 1. Multi-Environment Configuration
```kcl
# Configuration that adapts to environment
schema EnvironmentAwareConfig:
environment: "dev" | "staging" | "prod"
# Computed values based on environment
replica_count: int = (
environment == "prod" ? 3 : (
environment == "staging" ? 2 : 1)
)
resource_requests: main.K8sResources = main.K8sResources {
memory: environment == "prod" ? "512Mi" : "256Mi"
cpu: environment == "prod" ? "200m" : "100m"
}
monitoring_enabled: bool = environment != "dev"
backup_enabled: bool = environment == "prod"
# Usage pattern
prod_config: EnvironmentAwareConfig = EnvironmentAwareConfig {
environment: "prod"
# replica_count automatically becomes 3
# monitoring_enabled automatically becomes True
# backup_enabled automatically becomes True
}
```
### 2. Provider Abstraction
```kcl
# Provider-agnostic resource definition
schema AbstractServer:
"""Provider-agnostic server specification"""
# Common specification
cpu_cores: int
memory_gb: int
storage_gb: int
network_performance: "low" | "moderate" | "high"
# Provider-specific mapping
provider: "upcloud" | "aws" | "gcp"
# Computed provider-specific values
instance_type: str = (
provider == "upcloud" ? f"{cpu_cores}xCPU-{memory_gb}GB" : (
provider == "aws" ? f"m5.{cpu_cores == 1 ? 'large' : 'xlarge'}" : (
provider == "gcp" ? f"n2-standard-{cpu_cores}" : "unknown"
))
)
storage_type: str = (
provider == "upcloud" ? "MaxIOPS" : (
provider == "aws" ? "gp3" : (
provider == "gcp" ? "pd-ssd" : "standard"
))
)
# Multi-provider workflow using abstraction
mixed_deployment: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "mixed_deploy_001"
name: "Multi-Provider Deployment"
operations: [
# UpCloud servers
main.BatchOperation {
operation_id: "upcloud_servers"
provider: "upcloud"
parameters: {
"instance_type": "2xCPU-4GB" # UpCloud format
"storage_type": "MaxIOPS"
}
},
# AWS servers
main.BatchOperation {
operation_id: "aws_servers"
provider: "aws"
parameters: {
"instance_type": "m5.large" # AWS format
"storage_type": "gp3"
}
}
]
}
```
### 3. Dependency Management
```kcl
# Complex dependency patterns
schema DependencyAwareWorkflow(main.BatchWorkflow):
"""Workflow with intelligent dependency management"""
# Categorize operations by type
infrastructure_ops: [str] = [
op.operation_id for op in operations
if op.operation_type == "server"
]
service_ops: [str] = [
op.operation_id for op in operations
if op.operation_type == "taskserv"
]
validation_ops: [str] = [
op.operation_id for op in operations
if op.operation_type == "custom" and "validate" in op.name.lower()
]
check:
# Infrastructure must come before services
all([
len([dep for dep in op.dependencies or []
if dep.target_operation_id in infrastructure_ops]) > 0
for op in operations
if op.operation_id in service_ops
]) or len(service_ops) == 0,
"Service operations must depend on infrastructure operations"
# Validation must come last
all([
len([dep for dep in op.dependencies or []
if dep.target_operation_id in service_ops or dep.target_operation_id in infrastructure_ops]) > 0
for op in operations
if op.operation_id in validation_ops
]) or len(validation_ops) == 0,
"Validation operations must depend on other operations"
```
## Troubleshooting
### Common Validation Errors
#### 1. Missing Required Fields
```bash
# Error: attribute 'labels' of Server is required
# ❌ Incomplete server definition
server: main.Server = main.Server {
hostname: "web-01"
title: "Web Server"
# Missing: labels, user
}
# ✅ Complete server definition
server: main.Server = main.Server {
hostname: "web-01"
title: "Web Server"
labels: "env: prod" # ✅ Required field
user: "admin" # ✅ Required field
}
```
#### 2. Type Mismatches
```bash
# Error: expect int, got str
# ❌ Wrong type
workflow: main.BatchWorkflow = main.BatchWorkflow {
max_parallel_operations: "3" # ❌ String instead of int
}
# ✅ Correct type
workflow: main.BatchWorkflow = main.BatchWorkflow {
max_parallel_operations: 3 # ✅ Integer
}
```
#### 3. Constraint Violations
```bash
# Error: Check failed: hostname cannot be empty
# ❌ Constraint violation
server: main.Server = main.Server {
hostname: "" # ❌ Empty string violates constraint
title: "Server"
labels: "env: prod"
user: "admin"
}
# ✅ Valid constraint
server: main.Server = main.Server {
hostname: "web-01" # ✅ Non-empty string
title: "Server"
labels: "env: prod"
user: "admin"
}
```
### Debugging Techniques
#### 1. Step-by-step Validation
```bash
# Validate incrementally
kcl run basic_config.k # Start with minimal config
kcl run enhanced_config.k # Add features gradually
kcl run complete_config.k # Full configuration
```
#### 2. Schema Introspection
```bash
# Check what fields are available
kcl run -c 'import .main; main.Server' --format json
# Validate against specific schema
kcl vet config.k --schema main.Server
# Debug with verbose output
kcl run config.k --debug --verbose
```
#### 3. Constraint Testing
```kcl
# Test constraint behavior
test_constraints: {
# Test minimum values
min_timeout: main.BatchOperation {
operation_id: "test"
name: "Test"
operation_type: "server"
action: "create"
parameters: {}
timeout: 1 # Test minimum allowed
}
# Test maximum values
max_parallel: main.BatchWorkflow {
workflow_id: "test"
name: "Test"
operations: [min_timeout]
max_parallel_operations: 100 # Test upper limits
}
}
```
### Performance Considerations
#### 1. Schema Complexity
```kcl
# ✅ Good: Simple, focused schemas
schema SimpleServer:
hostname: str
user: str
labels: str
check:
len(hostname) > 0, "Hostname required"
# ❌ Avoid: Overly complex schemas with many computed fields
schema OverlyComplexServer:
# ... many fields with complex interdependencies
# ... computationally expensive check conditions
# ... deep nested validations
```
#### 2. Validation Efficiency
```kcl
# ✅ Good: Efficient validation
schema EfficientValidation:
name: str
tags: {str: str}
check:
len(name) > 0, "Name required" # ✅ Simple check
len(tags) <= 10, "Maximum 10 tags allowed" # ✅ Simple count check
# ❌ Avoid: Expensive validation
schema ExpensiveValidation:
items: [str]
check:
# ❌ Expensive nested operations
all([regex.match(item, "^[a-z0-9-]+$") for item in items]),
"All items must match pattern"
```
This validation guide provides the foundation for creating robust, maintainable KCL schemas with proper error handling and validation strategies.

589
docs/provisioning.md Normal file
View File

@ -0,0 +1,589 @@
# provisioning
## Index
- [Cluster](#cluster)
- [ClusterDef](#clusterdef)
- [K8sAffinity](#k8saffinity)
- [K8sAffinityLabelSelector](#k8saffinitylabelselector)
- [K8sAffinityMatch](#k8saffinitymatch)
- [K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector)
- [K8sBackup](#k8sbackup)
- [K8sConfigMap](#k8sconfigmap)
- [K8sContainers](#k8scontainers)
- [K8sDefs](#k8sdefs)
- [K8sDeploy](#k8sdeploy)
- [K8sDeploySpec](#k8sdeployspec)
- [K8sKeyPath](#k8skeypath)
- [K8sKeyVal](#k8skeyval)
- [K8sPort](#k8sport)
- [K8sPrxyGatewayServer](#k8sprxygatewayserver)
- [K8sPrxyPort](#k8sprxyport)
- [K8sPrxyTLS](#k8sprxytls)
- [K8sPrxyVirtualService](#k8sprxyvirtualservice)
- [K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch)
- [K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl)
- [K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute)
- [K8sResources](#k8sresources)
- [K8sSecret](#k8ssecret)
- [K8sService](#k8sservice)
- [K8sVolume](#k8svolume)
- [K8sVolumeClaim](#k8svolumeclaim)
- [K8sVolumeMount](#k8svolumemount)
- [RunSet](#runset)
- [ScaleData](#scaledata)
- [ScaleResource](#scaleresource)
- [Server](#server)
- [ServerDefaults](#serverdefaults)
- [Settings](#settings)
- [Storage](#storage)
- [StorageVol](#storagevol)
- [TaskServDef](#taskservdef)
## Schemas
### Cluster
cluster settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**admin_host**|str|||
|**admin_port**|int|||
|**admin_user**|str|||
|**clusters_save_path**|str|||
|**def** `required`|"K8sDeploy" | ""||""|
|**local_def_path** `required`|str||"./clusters/${name}"|
|**name** `required`|str|||
|**not_use** `required`|bool||False|
|**profile**|str|||
|**scale**|[ScaleResource](#scaleresource)|||
|**ssh_key_path**|str|||
|**template**|"k8s-deploy" | ""|||
|**version** `required`|str|||
### ClusterDef
ClusterDef settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**name** `required`|str|||
|**profile** `required`|str||"default"|
### K8sAffinity
K8S Deployment Affinity settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**affinity**|[K8sAffinityLabelSelector](#k8saffinitylabelselector)|||
|**antiAffinity**|[K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector)|||
### K8sAffinityLabelSelector
K8S Deployment Affinity Label Selector settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]|||
|**matchLabelKeys**|[str]|||
|**topologyKey**|str|||
|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"|
### K8sAffinityMatch
K8S Deployment Affinity Match settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**key** `required`|str|||
|**operator** `required`|"In" | "NotIn" | "Exists" | "DoesNotExist"|||
|**values** `required`|[str]|||
### K8sAntyAffinityLabelSelector
K8S Deployment AntyAffinity Label Selector settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]|||
|**matchLabelKeys**|[str]|||
|**topologyKey**|str|||
|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"|
|**weight** `required`|int||100|
### K8sBackup
K8S Backup settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**mount_path** `required`|str|||
|**name** `required`|str|||
|**typ** `required`|str|||
### K8sConfigMap
K8S Volume ConfigMap settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**name** `required`|str|||
### K8sContainers
K8S Container settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**cmd**|str|||
|**env**|[[K8sKeyVal](#k8skeyval)]|||
|**image** `required`|str|||
|**imagePull** `required`|"IfNotPresent" | "Always" | "Never"||"IfNotPresent"|
|**name** `required`|str||"main"|
|**ports**|[[K8sPort](#k8sport)]|||
|**resources_limits**|[K8sResources](#k8sresources)|||
|**resources_requests**|[K8sResources](#k8sresources)|||
|**volumeMounts**|[[K8sVolumeMount](#k8svolumemount)]|||
### K8sDefs
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**cluster_domain** `required`|str|||
|**domain** `required`|str|||
|**full_domain** `required`|str|||
|**name** `required`|str|||
|**ns** `required`|str|||
|**primary_dom** `required`|str|||
### K8sDeploy
K8S Deployment settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**backups**|[[K8sBackup](#k8sbackup)]|||
|**bin_apply** `required`|bool||True|
|**create_ns** `required`|bool||False|
|**full_domain**|str|||
|**labels** `required`|[[K8sKeyVal](#k8skeyval)]||[K8sKeyVal {key: "${name}", value: "${name}"}]|
|**name** `required`|str|||
|**name_in_files** `required`|str||"${name}"|
|**namespace** `required`|str | "default"|||
|**prxy** `readOnly`|"istio"||"istio"|
|**prxyGatewayServers**|[[K8sPrxyGatewayServer](#k8sprxygatewayserver)]|||
|**prxyVirtualService**|[K8sPrxyVirtualService](#k8sprxyvirtualservice)|||
|**prxy_ns**|str||"istio-system"|
|**sel_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels|
|**service**|[K8sService](#k8sservice)|||
|**spec** `required`|[K8sDeploySpec](#k8sdeployspec)|||
|**tls_path**|str||"ssl"|
|**tpl_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels|
### K8sDeploySpec
K8S Deployment Spec settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**affinity**|[K8sAffinity](#k8saffinity)|||
|**containers** `required`|[[K8sContainers](#k8scontainers)]|||
|**hostUsers**|bool||True|
|**imagePullSecret**|str|||
|**nodeName**|str|||
|**nodeSelector**|[[K8sKeyVal](#k8skeyval)]|||
|**replicas** `required`|int||1|
|**secrets**|[[K8sSecret](#k8ssecret)]|||
|**volumes**|[[K8sVolume](#k8svolume)]|||
### K8sKeyPath
K8S key,path settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**key** `required`|str|||
|**path** `required`|str|||
### K8sKeyVal
K8S label,selector,env settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**key** `required`|str|||
|**value** `required`|str|||
### K8sPort
K8S Port settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**container**|int|||
|**name** `required`|str|||
|**nodePort**|int|||
|**target**|int|||
|**typ**|str||"TCP"|
### K8sPrxyGatewayServer
K8S Deployment Proxy Gateway Server settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**hosts**|[str]|||
|**port** `required`|[K8sPrxyPort](#k8sprxyport)|||
|**tls**|[K8sPrxyTLS](#k8sprxytls)|||
### K8sPrxyPort
K8S Proxy Port settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**name** `required`|str|||
|**number**|int|||
|**proto** `required`|"HTTP" | "HTTPS" | "TCP"||"HTTPS"|
### K8sPrxyTLS
K8S Deployment Proxy TLS settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**credentialName**|str|||
|**httpsRedirect**|bool||False|
|**mode**|"SIMPLE" | "PASSTHROUGH" | "MULTI" | ""||"SIMPLE"|
### K8sPrxyVirtualService
K8S Deployment Proxy Virtual Service settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**gateways** `required`|[str]|||
|**hosts** `required`|[str]|||
|**matches**|[[K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch)]|||
### K8sPrxyVirtualServiceMatch
K8S Deployment Proxy Virtual Service Match settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**location**|[[K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl)]|||
|**route_destination**|[[K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute)]|||
|**typ** `required`|"tcp" | "http" | "tls"|||
### K8sPrxyVirtualServiceMatchURL
K8S Deployment Proxy Virtual Service Match URL settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**port**|int|||
|**sniHost**|[str]|||
### K8sPrxyVirtualServiceRoute
K8S Deployment Proxy Virtual Service Route settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**host** `required`|str|||
|**port_number** `required`|int|||
### K8sResources
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**cpu** `required`|str|||
|**memory** `required`|str|||
### K8sSecret
K8S Volume Secret settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**items** `required`|[[K8sKeyPath](#k8skeypath)]|||
|**name** `required`|str|||
### K8sService
K8S Service settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**externaIPs**|[str]|||
|**externalName**|str|||
|**name** `required`|str|||
|**ports** `required`|[[K8sPort](#k8sport)]|||
|**proto** `required` `readOnly`|"TCP"||"TCP"|
|**selector**|[[K8sKeyVal](#k8skeyval)]|||
|**typ** `required`|"ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None"||"ClusterIP"|
### K8sVolume
K8S Volume settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**configMap**|[K8sConfigMap](#k8sconfigmap)|||
|**items**|[[K8sKeyPath](#k8skeypath)]|||
|**name** `required`|str|||
|**persitentVolumeClaim**|[K8sVolumeClaim](#k8svolumeclaim)|||
|**secret**|[K8sSecret](#k8ssecret)|||
|**typ** `required`|"volumeClaim" | "configMap" | "secret"||"volumeClaim"|
### K8sVolumeClaim
K8S VolumeClaim settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**abbrev_mode**|["RWO" | "ROX" | "RWX" | "RWOP"]||["RWO"]|
|**hostPath**|str|||
|**modes** `required`|["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"]||["ReadWriteOnce"]|
|**name** `required`|str|||
|**pvMode**|"unspecified" | "Filesystem" | "Block"|||
|**pvcMode**|"unspecified" | "Filesystem" | "Block"|||
|**reclaimPolicy**|"Recycle" | "Retain" | "Delete"||"Retain"|
|**storage**|str|||
|**storageClassName** `required`|"manual" | "nfs-client" | "rook-cephfs"||"manual"|
|**typ** `required`|"volumeClaim" | "configMap" | "secret" | ""||""|
### K8sVolumeMount
K8S VolumeMounts settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**mountPath** `required`|str|||
|**name** `required`|str|||
|**readOnly** `required`|bool||False|
|**subPath**|str|||
### RunSet
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**inventory_file** `required`|str||"./inventory.yaml"|
|**output_format** `required`|"human" | "yaml" | "json"||"human"|
|**output_path** `required`|str||"tmp/NOW-deploy"|
|**use_time** `required`|bool||True|
|**wait** `required`|bool||True|
### ScaleData
scale data
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**def** `required`|str|||
|**disabled** `required`|bool||False|
|**expire**|str|||
|**from**|str|||
|**mode** `required`|"auto" | "manual" | "ondemand"||"manual"|
|**to**|str|||
### ScaleResource
scale server settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**default** `required`|[ScaleData](#scaledata)|||
|**down**|[ScaleData](#scaledata)|||
|**fallback**|[ScaleData](#scaledata)|||
|**max**|[ScaleData](#scaledata)|||
|**min**|[ScaleData](#scaledata)|||
|**path** `required`|str||"/etc/scale_provisioning"|
|**up**|[ScaleData](#scaledata)|||
### Server
server settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**cluster**|[[ClusterDef](#clusterdef)]|||
|**delete_lock** `required`|bool||False|
|**domains_search**|str|||
|**extra_hostnames**|[str]|||
|**fix_local_hosts** `required`|bool||True|
|**hostname** `required`|str|||
|**installer_user**|str||"${user}"|
|**labels** `required`|str|||
|**lock** `required`|bool||False|
|**main_domain**|str|||
|**network_private_id**|str|||
|**network_private_name**|str|||
|**network_public_ip**|str|||
|**network_public_ipv4**|bool||True|
|**network_public_ipv6**|bool||False|
|**network_utility_ipv4** `required`|bool||True|
|**network_utility_ipv6** `required`|bool||False|
|**not_use** `required`|bool||False|
|**primary_dns**|str|||
|**priv_cidr_block**|str|||
|**running_timeout** `required`|int||200|
|**running_wait** `required`|int||10|
|**scale**|[ScaleResource](#scaleresource)|||
|**secondary_dns**|str|||
|**ssh_key_name**|str|||
|**ssh_key_path**|str|||
|**storage_os**|str|||
|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"|
|**taskservs**|[[TaskServDef](#taskservdef)]|||
|**time_zone** `required`|str||"UTC"|
|**title** `required`|str|||
|**user** `required`|str|||
|**user_home**|str||"/home/${user}"|
|**user_ssh_key_path**|str|||
|**user_ssh_port**|int||22|
|**zone**|str|||
### ServerDefaults
Server Defaults settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**domains_search**|str|||
|**fix_local_hosts** `required`|bool||True|
|**installer_user**|str||"${user}"|
|**labels** `required`|str|||
|**lock** `required`|bool||False|
|**main_domain**|str|||
|**network_private_id**|str|||
|**network_private_name**|str|||
|**network_public_ip**|str|||
|**network_public_ipv4**|bool||True|
|**network_public_ipv6**|bool||False|
|**network_utility_ipv4** `required`|bool||True|
|**network_utility_ipv6** `required`|bool||False|
|**primary_dns**|str|||
|**priv_cidr_block**|str|||
|**running_timeout** `required`|int||200|
|**running_wait** `required`|int||10|
|**scale**|[ScaleResource](#scaleresource)|||
|**secondary_dns**|str|||
|**ssh_key_name**|str|||
|**ssh_key_path**|str|||
|**storage_os**|str|||
|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"|
|**time_zone** `required`|str||"UTC"|
|**user** `required`|str|||
|**user_home**|str||"/home/${user}"|
|**user_ssh_key_path**|str|||
|**user_ssh_port**|int||22|
|**zone**|str|||
### Settings
Settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**cluster_admin_host** `required`|str|||
|**cluster_admin_port** `required`|int||22|
|**cluster_admin_user** `required`|str||"root"|
|**clusters_paths** `required`|[str]||["clusters"]|
|**clusters_save_path** `required`|str||"/${main_name}/clusters"|
|**created_clusters_dirpath** `required`|str||"./tmp/NOW_clusters"|
|**created_taskservs_dirpath** `required`|str||"./tmp/NOW_deployment"|
|**defaults_provs_dirpath** `required`|str||"./defs"|
|**defaults_provs_suffix** `required`|str||"_defaults.k"|
|**main_name** `required`|str|||
|**main_title** `required`|str||main_name|
|**prov_clusters_path** `required`|str||"./clusters"|
|**prov_data_dirpath** `required`|str||"./data"|
|**prov_data_suffix** `required`|str||"_settings.k"|
|**prov_local_bin_path** `required`|str||"./bin"|
|**prov_resources_path** `required`|str||"./resources"|
|**runset** `required`|[RunSet](#runset)|||
|**servers_paths** `required`|[str]||["servers"]|
|**servers_wait_started** `required`|int||27|
|**settings_path** `required`|str||"./settings.yaml"|
### Storage
Storage settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**fstab** `required`|bool||True|
|**mount** `required`|bool||True|
|**mount_path**|str|||
|**name** `required`|str|||
|**parts**|[[StorageVol](#storagevol)]||[]|
|**size** `required`|int||0|
|**total** `required`|int||size|
|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"|
### StorageVol
StorageVol settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**fstab** `required`|bool||True|
|**mount** `required`|bool||True|
|**mount_path**|str|||
|**name** `required`|str|||
|**size** `required`|int||0|
|**total** `required`|int||size|
|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"|
### TaskServDef
TaskServDef settings
#### Attributes
| name | type | description | default value |
| --- | --- | --- | --- |
|**install_mode** `required`|"getfile" | "library" | "server" | "library-server" | "server-library"||"library"|
|**name** `required`|str|||
|**profile** `required`|str||"default"|
<!-- Auto generated by kcl-doc tool, please do not edit. -->

40
docs/why_main.md Normal file
View File

@ -0,0 +1,40 @@
✅ main.k
Purpose:
- Entry Point: Main entry point for the provisioning KCL package
- Schema Exports: Exports all schemas from sub-modules for external consumption
- API Interface: Provides a clean, organized API for external projects
Key Features:
- Comprehensive Exports: All 70+ schemas properly exported with organized categories:
- Core configuration schemas (Settings, SecretProvider, etc.)
- Server and infrastructure schemas
- Workflow and batch operation schemas
- Kubernetes deployment schemas
- Version management schemas
- Package Metadata: Exports package information and constants
- Validated: Successfully tested with external imports and schema instantiation
Benefits vs NO directory approach:
- Current Structure: Clean, organized exports with proper categorization
- NO Directory: Contains outdated files with broken imports and missing references
- Maintainability: Single source of truth for package exports
- Usage: External projects can simply import .main and access all schemas via main.SchemaName
Example Usage:
import provisioning.main
server: main.Server = main.Server {
hostname: "web-server"
title: "Web Server"
labels: "env: prod"
user: "admin"
}
workflow: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "deploy_001"
name: "Production Deployment"
operations: [...]
}
The main.k file is now the recommended entry point for the provisioning KCL package

278
examples/README.md Normal file
View File

@ -0,0 +1,278 @@
# KCL Package Examples
This directory contains practical examples demonstrating how to use the provisioning KCL package schemas.
## Example Files
### 📄 `basic_server.k`
**Simple server configurations for different use cases**
- Web server with nginx and monitoring
- Database server with PostgreSQL
- Development server with Docker
```bash
# Validate and run
kcl run basic_server.k
# Export to JSON
kcl run basic_server.k --format json
```
### 📄 `simple_workflow.k`
**Basic workflow example with sequential operations**
- Database server creation
- Web server deployment
- Application configuration with dependencies
Demonstrates:
- Sequential dependencies
- Retry policies
- Basic monitoring
- Filesystem storage
### 📄 `kubernetes_deployment.k`
**Complete Kubernetes deployment examples**
- Web application with resources and affinity
- PostgreSQL with persistent storage
- Prometheus monitoring stack
Features:
- Resource limits and requests
- Persistent volumes
- Services and networking
- ConfigMaps and secrets
- Anti-affinity rules
### 📄 `mixed_provider_workflow.k`
**Advanced multi-cloud deployment**
- UpCloud compute infrastructure
- AWS managed services (RDS, ElastiCache)
- Kubernetes cluster setup
- Cross-cloud connectivity
Advanced features:
- Multiple providers in single workflow
- Complex dependency chains
- Production-grade monitoring
- Encrypted state storage
- Comprehensive retry and rollback strategies
## Running Examples
### Basic Validation
```bash
# Check syntax and validate schemas
kcl fmt examples/*.k
kcl run examples/basic_server.k
```
### JSON Output for Integration
```bash
# Generate JSON for Nushell/Rust integration
kcl run examples/simple_workflow.k --format json > workflow.json
# Use with Nushell
let workflow = (open workflow.json)
echo $"Workflow: ($workflow.name)"
echo $"Operations: ($workflow.operations | length)"
```
### Validation with Different Configurations
```bash
# Test with custom values
kcl run examples/basic_server.k -D hostname="my-server" -D user="ubuntu"
# Override workflow settings
kcl run examples/simple_workflow.k -D max_parallel_operations=5
```
## Integration Patterns
### With Nushell Scripts
```nushell
# Load and submit workflow
def submit-workflow [file: string] {
let workflow = (kcl run $file --format json | from json)
$workflow | to json | http post http://localhost:8080/workflows/batch/submit
}
# Monitor workflow progress
def monitor-workflow [workflow_id: string] {
while true {
let status = (http get $"http://localhost:8080/workflows/batch/($workflow_id)")
print $"Status: ($status.status)"
if $status.status in ["completed", "failed"] { break }
sleep 10sec
}
}
```
### With Rust Orchestrator
```rust
use serde_json;
use std::process::Command;
// Generate workflow from KCL
let output = Command::new("kcl")
.args(&["run", "examples/simple_workflow.k", "--format", "json"])
.output()?;
// Parse and execute
let workflow: BatchWorkflow = serde_json::from_slice(&output.stdout)?;
let executor = BatchExecutor::new(workflow);
executor.execute().await?;
```
## Customization Examples
### Server Configuration Variants
```kcl
import ..main
# High-performance server
performance_server: main.Server = main.Server {
hostname: "perf-01"
title: "High Performance Server"
labels: "env: prod, tier: compute, performance: high"
user: "performance"
# Override defaults for high-performance needs
running_timeout: 300
taskservs: [
main.TaskServDef {
name: "tuned"
install_mode: "server"
profile: "performance"
}
]
}
```
### Workflow Customization
```kcl
import ..main
# Custom retry policy for network operations
network_retry_policy: main.RetryPolicy = main.RetryPolicy {
max_attempts: 5
initial_delay: 10
max_delay: 120
backoff_multiplier: 1.5
retry_on_errors: ["connection_error", "dns_error", "timeout"]
}
# Workflow with custom settings
custom_workflow: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "custom_001"
name: "Custom Network Deployment"
# Use custom retry policy for all operations
default_retry_policy: network_retry_policy
operations: [
# ... your operations
]
}
```
## Best Practices Demonstrated
### 1. **Dependency Management**
- Use sequential dependencies for ordered operations
- Use conditional dependencies for health checks
- Set appropriate timeouts for each dependency
### 2. **Resource Configuration**
- Always set resource limits for Kubernetes deployments
- Use appropriate server plans based on workload
- Configure persistent storage for stateful services
### 3. **Monitoring & Observability**
- Enable monitoring for production workflows
- Configure appropriate log levels
- Set up notifications for critical operations
### 4. **Error Handling**
- Configure retry policies based on operation type
- Use rollback strategies for critical deployments
- Set appropriate timeouts for different operations
### 5. **Security**
- Use encrypted storage for sensitive workflows
- Configure proper network isolation
- Use secrets management for credentials
## Troubleshooting
### Common Issues
**Schema Validation Errors**
```bash
# Check for typos in schema names
kcl run examples/basic_server.k --debug
# Validate against specific schema
kcl vet examples/basic_server.k --schema main.Server
```
**Missing Required Fields**
```bash
# Error: attribute 'labels' of Server is required
# Solution: Always provide required fields
server: main.Server = main.Server {
hostname: "web-01"
title: "Web Server"
labels: "env: prod" # ✅ Required field
user: "admin" # ✅ Required field
}
```
**Import Errors**
```bash
# Use relative imports within the package
import ..main # ✅ Correct
# Not absolute imports
import provisioning.main # ❌ May not work in examples
```
### Testing Examples
```bash
# Run all examples to verify they work
for file in examples/*.k; do
echo "Testing $file"
kcl run "$file" > /dev/null && echo "✅ PASS" || echo "❌ FAIL"
done
# Test JSON serialization
kcl run examples/simple_workflow.k --format json | jq '.workflow_id'
```
## Contributing Examples
When adding new examples:
1. **Follow naming convention**: `{purpose}_{type}.k`
2. **Include comprehensive comments**
3. **Demonstrate specific features**
4. **Test before committing**
5. **Update this README**
Example template:
```kcl
# {Purpose} Example
# Demonstrates {specific features}
import ..main
# Clear, descriptive variable names
example_resource: main.ResourceType = main.ResourceType {
# Well-commented configuration
required_field: "value"
# Explain non-obvious settings
optional_field: "explained_value" # Why this value
}
```

80
examples/basic_server.k Normal file
View File

@ -0,0 +1,80 @@
# Basic Server Configuration Example
# Shows how to define a simple server with common settings
import ..main
# Simple web server configuration
web_server: main.Server = main.Server {
hostname: "web-01"
title: "Production Web Server"
labels: "env: prod, tier: web, role: frontend"
user: "deploy"
# Network configuration
network_public_ipv4: True
network_utility_ipv4: True
fix_local_hosts: True
# SSH configuration
user_ssh_port: 22
# Basic taskservs to install
taskservs: [
main.TaskServDef {
name: "nginx"
install_mode: "library"
profile: "production"
},
main.TaskServDef {
name: "prometheus-node-exporter"
install_mode: "server"
profile: "monitoring"
}
]
}
# Database server with different configuration
db_server: main.Server = main.Server {
hostname: "db-01"
title: "PostgreSQL Database Server"
labels: "env: prod, tier: data, role: database"
user: "postgres"
# Database servers typically don't need public IPs
network_public_ipv4: False
network_utility_ipv4: True
# Install database-related taskservs
taskservs: [
main.TaskServDef {
name: "postgresql"
install_mode: "server"
profile: "production"
},
main.TaskServDef {
name: "postgres-exporter"
install_mode: "library"
profile: "monitoring"
}
]
}
# Development server with minimal configuration
dev_server: main.Server = main.Server {
hostname: "dev-01"
title: "Development Server"
labels: "env: dev, tier: development"
user: "developer"
# Development servers can have relaxed settings
network_public_ipv4: True
delete_lock: False # Allow easy deletion for dev environments
taskservs: [
main.TaskServDef {
name: "docker"
install_mode: "library"
profile: "development"
}
]
}

View File

@ -0,0 +1,325 @@
# Kubernetes Deployment Example
# Shows complete K8s deployment with services, volumes, and monitoring
import ..main
# Web application deployment in Kubernetes
webapp_k8s: main.K8sDeploy = main.K8sDeploy {
name: "webapp"
namespace: "production"
create_ns: True
# Deployment specification
spec: main.K8sDeploySpec {
replicas: 3
containers: [
main.K8sContainers {
name: "webapp"
image: "nginx:1.21-alpine"
# Port configuration
ports: [
main.K8sPort {
name: "http"
container: 80
target: 8080
}
]
# Resource requirements
resources_requests: main.K8sResources {
memory: "64Mi"
cpu: "50m"
}
resources_limits: main.K8sResources {
memory: "128Mi"
cpu: "100m"
}
# Environment variables
env: [
main.K8sKeyVal {
key: "NODE_ENV"
value: "production"
},
main.K8sKeyVal {
key: "LOG_LEVEL"
value: "info"
}
]
# Mount configuration volume
volumeMounts: [
main.K8sVolumeMount {
name: "config"
mountPath: "/etc/nginx/conf.d"
readOnly: True
}
]
}
]
# Volume configuration
volumes: [
main.K8sVolume {
name: "config"
typ: "configMap"
configMap: main.K8sConfigMap {
name: "webapp-config"
}
}
]
# Node selection for production workloads
nodeSelector: [
main.K8sKeyVal {
key: "node-type"
value: "production"
}
]
# Anti-affinity to spread pods across nodes
affinity: main.K8sAffinity {
antiAffinity: main.K8sAntyAffinityLabelSelector {
typ: "preferredDuringSchedulingIgnoredDuringExecution"
weight: 100
labelSelector: [
main.K8sAffinityMatch {
key: "app"
operator: "In"
values: ["webapp"]
}
]
topologyKey: "kubernetes.io/hostname"
}
}
}
# Service configuration
service: main.K8sService {
name: "webapp-service"
typ: "ClusterIP"
ports: [
main.K8sPort {
name: "http"
target: 80
nodePort: 30080
}
]
selector: [
main.K8sKeyVal {
key: "app"
value: "webapp"
}
]
}
# Labels for the deployment
labels: [
main.K8sKeyVal {
key: "app"
value: "webapp"
},
main.K8sKeyVal {
key: "version"
value: "v1.0.0"
},
main.K8sKeyVal {
key: "environment"
value: "production"
}
]
}
# Database deployment with persistent storage
database_k8s: main.K8sDeploy = main.K8sDeploy {
name: "postgres"
namespace: "production"
spec: main.K8sDeploySpec {
replicas: 1 # Database typically runs single instance
containers: [
main.K8sContainers {
name: "postgres"
image: "postgres:15-alpine"
ports: [
main.K8sPort {
name: "postgres"
container: 5432
target: 5432
}
]
# Database needs more resources
resources_requests: main.K8sResources {
memory: "256Mi"
cpu: "100m"
}
resources_limits: main.K8sResources {
memory: "512Mi"
cpu: "500m"
}
# Database environment
env: [
main.K8sKeyVal {
key: "POSTGRES_DB"
value: "webapp"
},
main.K8sKeyVal {
key: "POSTGRES_USER"
value: "webapp"
},
main.K8sKeyVal {
key: "POSTGRES_PASSWORD"
value: "changeme" # Use secrets in production
}
]
# Persistent data volume
volumeMounts: [
main.K8sVolumeMount {
name: "postgres-data"
mountPath: "/var/lib/postgresql/data"
readOnly: False
}
]
}
]
# Persistent volume for database
volumes: [
main.K8sVolume {
name: "postgres-data"
typ: "volumeClaim"
persitentVolumeClaim: main.K8sVolumeClaim {
name: "postgres-pvc"
storageClassName: "manual"
storage: "10Gi"
modes: ["ReadWriteOnce"]
reclaimPolicy: "Retain"
}
}
]
}
# Internal service for database
service: main.K8sService {
name: "postgres-service"
typ: "ClusterIP"
ports: [
main.K8sPort {
name: "postgres"
target: 5432
}
]
selector: [
main.K8sKeyVal {
key: "app"
value: "postgres"
}
]
}
labels: [
main.K8sKeyVal {
key: "app"
value: "postgres"
},
main.K8sKeyVal {
key: "component"
value: "database"
}
]
}
# Monitoring deployment using Prometheus
monitoring_k8s: main.K8sDeploy = main.K8sDeploy {
name: "prometheus"
namespace: "monitoring"
create_ns: True
spec: main.K8sDeploySpec {
replicas: 1
containers: [
main.K8sContainers {
name: "prometheus"
image: "prom/prometheus:v2.40.0"
ports: [
main.K8sPort {
name: "web"
container: 9090
target: 9090
}
]
resources_requests: main.K8sResources {
memory: "512Mi"
cpu: "200m"
}
resources_limits: main.K8sResources {
memory: "1Gi"
cpu: "500m"
}
volumeMounts: [
main.K8sVolumeMount {
name: "prometheus-config"
mountPath: "/etc/prometheus"
readOnly: True
},
main.K8sVolumeMount {
name: "prometheus-data"
mountPath: "/prometheus"
readOnly: False
}
]
}
]
volumes: [
main.K8sVolume {
name: "prometheus-config"
typ: "configMap"
configMap: main.K8sConfigMap {
name: "prometheus-config"
}
},
main.K8sVolume {
name: "prometheus-data"
typ: "volumeClaim"
persitentVolumeClaim: main.K8sVolumeClaim {
name: "prometheus-pvc"
storage: "20Gi"
storageClassName: "manual"
modes: ["ReadWriteOnce"]
}
}
]
}
service: main.K8sService {
name: "prometheus-service"
typ: "NodePort"
ports: [
main.K8sPort {
name: "web"
target: 9090
nodePort: 30090
}
]
}
labels: [
main.K8sKeyVal {
key: "app"
value: "prometheus"
},
main.K8sKeyVal {
key: "component"
value: "monitoring"
}
]
}

View File

@ -0,0 +1,452 @@
# Mixed Provider Workflow Example
# Demonstrates deployment across multiple cloud providers with advanced features
import ..main
# Production deployment across UpCloud and AWS
production_deployment: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "prod_multicloud_001"
name: "Production Multi-Cloud Deployment"
description: "Deploy production infrastructure across UpCloud (compute) and AWS (managed services)"
operations: [
# Phase 1: Create UpCloud infrastructure
main.BatchOperation {
operation_id: "upcloud_network"
name: "Create UpCloud Network Infrastructure"
operation_type: "custom"
provider: "upcloud"
action: "create"
parameters: {
"resource_type": "network"
"vpc_cidr": "10.0.0.0/16"
"subnets": "public,private"
"zones": "fi-hel2,de-fra1"
}
priority: 10
timeout: 300
}
main.BatchOperation {
operation_id: "upcloud_compute"
name: "Create UpCloud Compute Instances"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "6"
"plan": "2xCPU-4GB"
"zones": "fi-hel2,de-fra1"
"distribution": "even" # Spread across zones
"server_type": "k8s-worker"
}
dependencies: [
main.DependencyDef {
target_operation_id: "upcloud_network"
dependency_type: "sequential"
timeout: 300
}
]
priority: 9
timeout: 900
allow_parallel: True
}
# Phase 2: Create AWS managed services
main.BatchOperation {
operation_id: "aws_database"
name: "Create AWS RDS PostgreSQL"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "rds"
"engine": "postgresql"
"version": "15.4"
"instance_class": "db.t3.medium"
"allocated_storage": "100"
"multi_az": "true"
"region": "eu-west-1"
"vpc_security_groups": "prod-db-sg"
}
priority: 9
timeout: 1800 # RDS can take time
allow_parallel: True
}
main.BatchOperation {
operation_id: "aws_redis"
name: "Create AWS ElastiCache Redis"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "elasticache"
"engine": "redis"
"node_type": "cache.t3.micro"
"num_cache_nodes": "2"
"region": "eu-west-1"
"parameter_group": "default.redis7"
}
priority: 9
timeout: 1200
allow_parallel: True
}
# Phase 3: Set up Kubernetes cluster on UpCloud
main.BatchOperation {
operation_id: "install_k8s_control"
name: "Install Kubernetes Control Plane"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"role": "control-plane"
"version": "v1.31.0"
"target_count": "3"
"ha_mode": "true"
"container_runtime": "containerd"
}
dependencies: [
main.DependencyDef {
target_operation_id: "upcloud_compute"
dependency_type: "sequential"
timeout: 600
}
]
priority: 8
timeout: 1800
}
main.BatchOperation {
operation_id: "install_k8s_workers"
name: "Install Kubernetes Worker Nodes"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"role": "worker"
"target_count": "3"
"container_runtime": "containerd"
"join_existing": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_k8s_control"
dependency_type: "sequential"
timeout: 300
}
]
priority: 7
timeout: 1200
}
# Phase 4: Install cluster services
main.BatchOperation {
operation_id: "install_networking"
name: "Install Cluster Networking"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "cilium"
"version": "1.14.2"
"mode": "tunnel"
"enable_hubble": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_k8s_workers"
dependency_type: "sequential"
timeout: 300
}
]
priority: 6
timeout: 600
}
main.BatchOperation {
operation_id: "install_monitoring"
name: "Install Monitoring Stack"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "prometheus-stack"
"grafana_enabled": "true"
"alertmanager_enabled": "true"
"node_exporter_enabled": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_networking"
dependency_type: "sequential"
timeout: 180
}
]
priority: 5
timeout: 900
}
# Phase 5: Configure cross-cloud connectivity
main.BatchOperation {
operation_id: "configure_connectivity"
name: "Configure Cross-Cloud Connectivity"
operation_type: "custom"
action: "configure"
parameters: {
"connectivity_type": "vpn"
"upcloud_endpoint": "dynamic"
"aws_vpc_id": "auto-detect"
"encryption": "wireguard"
"routing": "bgp"
}
dependencies: [
main.DependencyDef {
target_operation_id: "aws_database"
dependency_type: "sequential"
timeout: 60
},
main.DependencyDef {
target_operation_id: "install_monitoring"
dependency_type: "sequential"
timeout: 60
}
]
priority: 4
timeout: 600
}
# Phase 6: Final validation
main.BatchOperation {
operation_id: "validate_deployment"
name: "Validate Complete Deployment"
operation_type: "custom"
action: "configure"
parameters: {
"validation_type": "end_to_end"
"test_database_connectivity": "true"
"test_redis_connectivity": "true"
"test_k8s_cluster": "true"
"test_monitoring": "true"
}
dependencies: [
main.DependencyDef {
target_operation_id: "configure_connectivity"
dependency_type: "sequential"
timeout: 300
}
]
priority: 1
timeout: 600
}
]
# Advanced workflow configuration
max_parallel_operations: 4
global_timeout: 7200 # 2 hours
fail_fast: False # Continue on non-critical failures
# SurrealDB for persistent state
storage: main.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.internal:8000"
"namespace": "production"
"database": "multicloud_workflows"
"user": "workflow_executor"
"auth_token": "{{env.SURREALDB_TOKEN}}"
}
enable_persistence: True
retention_hours: 2160 # 90 days for production
enable_compression: True
encryption: main.SecretProvider {
provider: "sops"
sops_config: main.SopsConfig {
config_path: "./.sops.yaml"
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
use_age: True
}
}
}
# Comprehensive monitoring
monitoring: main.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: True
enable_notifications: True
notification_channels: [
"webhook:slack://prod-ops-alerts",
"webhook:pagerduty://high-priority",
"email:devops-team@company.com"
]
log_level: "info"
collection_interval: 30
}
# Production-grade retry policy
default_retry_policy: main.RetryPolicy {
max_attempts: 3
initial_delay: 60
max_delay: 600
backoff_multiplier: 2
retry_on_errors: [
"timeout",
"connection_error",
"rate_limit",
"resource_unavailable",
"quota_exceeded"
]
retry_on_any_error: False
}
# Conservative rollback strategy
default_rollback_strategy: main.RollbackStrategy {
enabled: True
strategy: "manual" # Manual approval for production rollbacks
preserve_partial_state: True
rollback_timeout: 1800
custom_rollback_operations: [
"backup_state",
"notify_team",
"create_incident"
]
}
# Execution context for tracking
execution_context: {
"environment": "production"
"deployment_type": "multi_cloud"
"cost_center": "infrastructure"
"owner": "platform-team"
"change_request": "CHG-2025-001"
"approval": "approved"
}
# Hooks for integration
pre_workflow_hooks: [
"validate_prerequisites",
"check_maintenance_windows",
"notify_deployment_start"
]
post_workflow_hooks: [
"run_smoke_tests",
"update_monitoring_dashboards",
"notify_deployment_complete",
"update_documentation"
]
}
# Advanced batch executor configuration for this workflow
multicloud_executor: main.BatchExecutor = main.BatchExecutor {
executor_id: "multicloud_prod_executor"
name: "Multi-Cloud Production Executor"
description: "Production-ready executor for multi-cloud deployments"
scheduler: main.BatchScheduler {
strategy: "resource_aware"
resource_limits: {
"max_cpu_cores": 32
"max_memory_mb": 65536
"max_network_bandwidth": 10000
"max_concurrent_api_calls": 100
}
scheduling_interval: 15
enable_preemption: True
}
# Multiple queues for different priorities
queues: [
main.BatchQueue {
queue_id: "critical"
queue_type: "priority"
max_size: 50
retention_period: 86400
max_delivery_attempts: 5
},
main.BatchQueue {
queue_id: "standard"
queue_type: "standard"
max_size: 200
retention_period: 604800
dead_letter_queue: "failed_operations"
max_delivery_attempts: 3
}
]
# Mixed provider configuration
provider_config: main.ProviderMixConfig {
primary_provider: "upcloud"
secondary_providers: ["aws"]
provider_selection: "cost_optimize"
cross_provider_networking: {
"vpn_enabled": "true"
"mesh_networking": "wireguard"
"encryption": "aes256"
}
provider_limits: {
"upcloud": {
"max_servers": 50
"max_storage_gb": 10000
"api_rate_limit": "100/min"
}
"aws": {
"max_instances": 20
"max_ebs_gb": 5000
"api_rate_limit": "1000/min"
}
}
}
# Production health monitoring
health_check: main.BatchHealthCheck {
enabled: True
check_interval: 30
check_timeout: 15
failure_threshold: 2
success_threshold: 3
health_checks: [
"http://localhost:8080/health",
"check_provider_apis",
"check_storage_backend",
"check_monitoring_systems"
]
failure_actions: [
"alert",
"graceful_degradation",
"escalate"
]
}
# Intelligent autoscaling
autoscaling: main.BatchAutoscaling {
enabled: True
min_parallel: 3
max_parallel: 15
scale_up_threshold: 0.75
scale_down_threshold: 0.25
cooldown_period: 300
target_utilization: 0.60
}
# Comprehensive metrics
metrics: main.BatchMetrics {
detailed_metrics: True
retention_hours: 2160 # 90 days
aggregation_intervals: [60, 300, 1800, 3600, 86400]
enable_export: True
export_config: {
"prometheus_endpoint": "http://prometheus.monitoring:9090"
"grafana_dashboard": "multicloud_operations"
"datadog_api_key": "{{env.DATADOG_API_KEY}}"
}
custom_metrics: [
"provider_api_latency",
"cross_cloud_bandwidth",
"cost_tracking",
"sla_compliance"
]
}
}

156
examples/simple_workflow.k Normal file
View File

@ -0,0 +1,156 @@
# Simple Workflow Example
# Demonstrates basic workflow creation with sequential operations
import ..main
# Simple web application deployment workflow
web_app_deployment: main.BatchWorkflow = main.BatchWorkflow {
workflow_id: "webapp_deploy_001"
name: "Web Application Deployment"
description: "Deploy a simple web application with database backend"
operations: [
# Step 1: Create database server
main.BatchOperation {
operation_id: "create_database"
name: "Create Database Server"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"hostname": "webapp-db"
"plan": "1xCPU-2GB"
"zone": "fi-hel2"
"server_type": "database"
}
priority: 10
timeout: 600 # 10 minutes
}
# Step 2: Create web servers (can run in parallel)
main.BatchOperation {
operation_id: "create_web_servers"
name: "Create Web Servers"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "2"
"hostname_prefix": "webapp-web"
"plan": "1xCPU-1GB"
"zone": "fi-hel2"
"server_type": "web"
}
priority: 10
timeout: 600
allow_parallel: True
}
# Step 3: Install database after database server is ready
main.BatchOperation {
operation_id: "install_database"
name: "Install PostgreSQL"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "postgresql"
"version": "15"
"target_servers": "webapp-db"
}
dependencies: [
main.DependencyDef {
target_operation_id: "create_database"
dependency_type: "sequential"
timeout: 300
}
]
priority: 8
timeout: 900 # 15 minutes for database installation
}
# Step 4: Install web stack after web servers are ready
main.BatchOperation {
operation_id: "install_web_stack"
name: "Install Web Stack"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "nginx"
"target_servers": "webapp-web-*"
"config_template": "reverse_proxy"
}
dependencies: [
main.DependencyDef {
target_operation_id: "create_web_servers"
dependency_type: "sequential"
timeout: 300
}
]
priority: 8
timeout: 600
}
# Step 5: Configure application after all components are ready
main.BatchOperation {
operation_id: "configure_application"
name: "Configure Web Application"
operation_type: "custom"
action: "configure"
parameters: {
"config_type": "application"
"database_url": "postgres://webapp-db:5432/webapp"
"web_servers": "webapp-web-01,webapp-web-02"
}
dependencies: [
main.DependencyDef {
target_operation_id: "install_database"
dependency_type: "sequential"
timeout: 60
},
main.DependencyDef {
target_operation_id: "install_web_stack"
dependency_type: "sequential"
timeout: 60
}
]
priority: 5
timeout: 300
}
]
# Workflow settings
max_parallel_operations: 3
global_timeout: 3600 # 1 hour total
fail_fast: True # Stop on first failure
# Simple filesystem storage for this example
storage: main.StorageConfig {
backend: "filesystem"
base_path: "./webapp_deployments"
enable_persistence: True
retention_hours: 168 # 1 week
}
# Basic monitoring
monitoring: main.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: False # Simplified for example
log_level: "info"
}
# Conservative retry policy
default_retry_policy: main.RetryPolicy {
max_attempts: 2
initial_delay: 30
backoff_multiplier: 2
retry_on_errors: ["timeout", "connection_error"]
}
# Enable rollback for safety
default_rollback_strategy: main.RollbackStrategy {
enabled: True
strategy: "immediate"
preserve_partial_state: False
}
}

457
examples_batch.k Normal file
View File

@ -0,0 +1,457 @@
# Info: KCL batch workflow examples for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.1
# Date: 25-09-2025
# Description: Usage examples for batch workflows and operations
import .workflows
import .batch
import .settings
# Example 1: Mixed Provider Infrastructure Deployment
mixed_provider_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "mixed_infra_deploy_001"
name: "Mixed Provider Infrastructure Deployment"
description: "Deploy infrastructure across UpCloud and AWS with cross-provider networking"
operations: [
workflows.BatchOperation {
operation_id: "create_upcloud_servers"
name: "Create UpCloud Web Servers"
operation_type: "server"
provider: "upcloud"
action: "create"
parameters: {
"server_count": "3"
"server_type": "web"
"zone": "fi-hel2"
"plan": "1xCPU-2GB"
}
allow_parallel: True
priority: 10
}
workflows.BatchOperation {
operation_id: "create_aws_database"
name: "Create AWS RDS Database"
operation_type: "server"
provider: "aws"
action: "create"
parameters: {
"service": "rds"
"instance_class": "db.t3.micro"
"engine": "postgresql"
"region": "eu-west-1"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_upcloud_servers"
dependency_type: "sequential"
timeout: 600
}
]
priority: 5
}
workflows.BatchOperation {
operation_id: "install_kubernetes"
name: "Install Kubernetes on UpCloud servers"
operation_type: "taskserv"
provider: "upcloud"
action: "create"
parameters: {
"taskserv": "kubernetes"
"version": "v1.28.0"
"cluster_name": "prod-cluster"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_upcloud_servers"
dependency_type: "sequential"
timeout: 1200
}
]
# 1 hour for K8s installation
timeout: 3600
priority: 8
}
workflows.BatchOperation {
operation_id: "setup_monitoring"
name: "Setup Prometheus monitoring"
operation_type: "taskserv"
action: "create"
parameters: {
"taskserv": "prometheus"
"namespace": "monitoring"
"retention": "30d"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "install_kubernetes"
dependency_type: "sequential"
timeout: 600
}
]
priority: 3
}
]
max_parallel_operations: 3
fail_fast: False
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://localhost:8000"
"namespace": "provisioning"
"database": "batch_workflows"
}
enable_persistence: True
# 30 days
retention_hours: 720
}
monitoring: workflows.MonitoringConfig {
enabled: True
backend: "prometheus"
enable_tracing: True
enable_notifications: True
notification_channels: ["webhook:slack://ops-channel"]
}
default_retry_policy: workflows.RetryPolicy {
max_attempts: 3
initial_delay: 10
backoff_multiplier: 2
retry_on_errors: ["connection_error", "timeout", "rate_limit", "resource_unavailable"]
}
execution_context: {
"environment": "production"
"cost_center": "infrastructure"
"owner": "devops-team"
}
}
# Example 2: Server Scaling Workflow with SurrealDB Backend
server_scaling_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "server_scaling_002"
name: "Auto-scaling Server Workflow"
description: "Scale servers based on load with automatic rollback on failure"
operations: [
workflows.BatchOperation {
operation_id: "scale_web_servers"
name: "Scale web servers up"
operation_type: "server"
action: "scale"
parameters: {
"target_count": "6"
"current_count": "3"
"server_group": "web-tier"
}
retry_policy: workflows.RetryPolicy {
max_attempts: 2
initial_delay: 30
retry_on_errors: ["resource_limit", "quota_exceeded"]
}
rollback_strategy: workflows.RollbackStrategy {
enabled: True
strategy: "immediate"
custom_rollback_operations: ["scale_down_to_original"]
}
}
workflows.BatchOperation {
operation_id: "update_load_balancer"
name: "Update load balancer configuration"
operation_type: "custom"
action: "configure"
parameters: {
"service": "haproxy"
"config_template": "web_tier_6_servers"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "scale_web_servers"
dependency_type: "conditional"
conditions: ["servers_ready", "health_check_passed"]
timeout: 300
}
]
}
]
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.local:8000"
"namespace": "scaling"
"database": "operations"
}
}
fail_fast: True
}
# Example 3: Maintenance Workflow with Filesystem Backend
maintenance_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow {
workflow_id: "maintenance_003"
name: "System Maintenance Workflow"
description: "Perform scheduled maintenance across multiple providers"
operations: [
workflows.BatchOperation {
operation_id: "backup_databases"
name: "Backup all databases"
operation_type: "custom"
action: "create"
parameters: {
"backup_type": "full"
"compression": "gzip"
"retention_days": "30"
}
# 2 hours
timeout: 7200
}
workflows.BatchOperation {
operation_id: "update_taskservs"
name: "Update all taskservs to latest versions"
operation_type: "taskserv"
action: "update"
parameters: {
"update_strategy": "rolling"
"max_unavailable": "1"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "backup_databases"
dependency_type: "sequential"
}
]
# Sequential updates for safety
allow_parallel: False
}
workflows.BatchOperation {
operation_id: "verify_services"
name: "Verify all services are healthy"
operation_type: "custom"
action: "configure"
parameters: {
"verification_type": "health_check"
"timeout_per_service": "30"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "update_taskservs"
dependency_type: "sequential"
}
]
}
]
storage: workflows.StorageConfig {
backend: "filesystem"
base_path: "./maintenance_workflows"
enable_persistence: True
enable_compression: True
}
pre_workflow_hooks: ["notify_maintenance_start", "set_maintenance_mode"]
post_workflow_hooks: ["unset_maintenance_mode", "notify_maintenance_complete"]
}
# Example 4: Comprehensive Batch Executor Configuration
production_batch_executor: batch.BatchExecutor = batch.BatchExecutor {
executor_id: "prod_batch_executor"
name: "Production Batch Executor"
description: "Production-ready batch executor with full observability and mixed provider support"
scheduler: batch.BatchScheduler {
strategy: "resource_aware"
resource_limits: {
"max_cpu_cores": 16
"max_memory_mb": 32768
# 10Gbps
"max_network_bandwidth": 10000
}
scheduling_interval: 5
enable_preemption: True
}
queues: [
batch.BatchQueue {
queue_id: "high_priority"
queue_type: "priority"
max_size: 100
# 1 day
retention_period: 86400
max_delivery_attempts: 5
}
batch.BatchQueue {
queue_id: "standard"
queue_type: "standard"
max_size: 500
dead_letter_queue: "failed_operations"
}
batch.BatchQueue {
queue_id: "failed_operations"
queue_type: "dead_letter"
# 7 days
retention_period: 604800
}
]
resource_constraints: [
batch.ResourceConstraint {
resource_type: "cpu"
resource_name: "total_cpu_cores"
max_units: 16
units_per_operation: 1
hard_constraint: True
}
batch.ResourceConstraint {
resource_type: "memory"
resource_name: "total_memory_gb"
max_units: 32
units_per_operation: 2
hard_constraint: True
}
]
provider_config: batch.ProviderMixConfig {
primary_provider: "upcloud"
secondary_providers: ["aws"]
provider_selection: "cost_optimize"
cross_provider_networking: {
"vpn_enabled": "True"
"mesh_networking": "wireguard"
}
provider_limits: {
"upcloud": {"max_servers": 20, "max_storage_gb": 1000}
"aws": {"max_instances": 10, "max_ebs_gb": 500}
}
}
health_check: batch.BatchHealthCheck {
enabled: True
check_interval: 30
failure_threshold: 2
health_checks: ["http://localhost:8080/health", "check_disk_space", "check_memory_usage"]
failure_actions: ["alert", "scale_down", "rollback"]
}
autoscaling: batch.BatchAutoscaling {
enabled: True
min_parallel: 2
max_parallel: 12
scale_up_threshold: 0.85
scale_down_threshold: 0.15
target_utilization: 0.65
# 3 minutes
cooldown_period: 180
}
metrics: batch.BatchMetrics {
detailed_metrics: True
# 30 days
retention_hours: 720
# 1min, 5min, 30min, 1hour
aggregation_intervals: [60, 300, 1800, 3600]
enable_export: True
export_config: {
"prometheus_endpoint": "http://prometheus.local:9090"
"grafana_dashboard": "batch_operations_dashboard"
}
}
storage: workflows.StorageConfig {
backend: "surrealdb"
connection_config: {
"url": "ws://surrealdb.prod:8000"
"namespace": "production"
"database": "batch_workflows"
"user": "batch_executor"
"auth_token": "{{env.SURREALDB_TOKEN}}"
}
enable_persistence: True
# 90 days
retention_hours: 2160
enable_compression: True
encryption: settings.SecretProvider {
provider: "sops"
sops_config: settings.SopsConfig {
config_path: "./.sops.yaml"
age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt"
use_age: True
}
}
}
security_config: {
"tls_enabled": "True"
"auth_required": "True"
"rbac_enabled": "True"
"audit_level": "full"
}
webhook_endpoints: [
"https://hooks.slack.com/services/ops-notifications"
"https://api.pagerduty.com/generic/incidents"
]
performance_config: {
"io_threads": "8"
"worker_threads": "16"
"batch_size": "50"
"connection_pool_size": "20"
}
}
# Example 5: Template for Common Infrastructure Deployment
infra_deployment_template: workflows.WorkflowTemplate = workflows.WorkflowTemplate {
template_id: "standard_infra_deployment"
name: "Standard Infrastructure Deployment Template"
description: "Template for deploying standard infrastructure with customizable parameters"
category: "infrastructure"
workflow_template: workflows.BatchWorkflow {
# Template parameter: {{template.workflow_id}}
workflow_id: "custom_deployment"
# Template parameter: {{template.workflow_name}}
name: "Custom Deployment"
operations: [
workflows.BatchOperation {
operation_id: "create_servers"
# Template: Create {{template.server_count}} servers
name: "Create servers"
operation_type: "server"
# Template parameter: {{template.provider}}
provider: "upcloud"
action: "create"
parameters: {
# Template parameter: {{template.server_count}}
"count": "3"
# Template parameter: {{template.server_type}}
"type": "web"
# Template parameter: {{template.zone}}
"zone": "fi-hel2"
}
}
workflows.BatchOperation {
operation_id: "install_base_taskservs"
name: "Install base taskservs"
operation_type: "taskserv"
action: "create"
parameters: {
# Template parameter: {{template.base_taskservs}}
"taskservs": "kubernetes,prometheus,grafana"
}
dependencies: [
workflows.DependencyDef {
target_operation_id: "create_servers"
dependency_type: "sequential"
}
]
}
]
storage: workflows.StorageConfig {
# Template parameter: {{template.storage_backend}}
backend: "filesystem"
# Template parameter: {{template.storage_path}}
base_path: "./deployments"
}
}
parameters: {
"workflow_id": "custom_deployment"
"workflow_name": "Custom Deployment"
"server_count": "3"
"server_type": "web"
"provider": "upcloud"
"zone": "fi-hel2"
"base_taskservs": "kubernetes,prometheus,grafana"
"storage_backend": "filesystem"
"storage_path": "./deployments"
}
required_parameters: [
"workflow_id"
"server_count"
"provider"
]
examples: [
"Small deployment: server_count=2, server_type=micro"
"Production deployment: server_count=6, server_type=standard, provider=upcloud"
]
}

325
gitea.k Normal file
View File

@ -0,0 +1,325 @@
"""
Gitea Integration Configuration Schemas
This module defines schemas for Gitea service configuration, including:
- Local and remote Gitea deployment options
- Repository management
- Workspace integration
- Extension publishing
- Locking mechanism
Version: 1.0.0
KCL Version: 0.11.3+
"""
schema GiteaConfig:
"""
Main Gitea service configuration
Supports both local (self-hosted) and remote Gitea instances.
Local mode can deploy via Docker or binary.
Examples:
# Local Docker deployment
GiteaConfig {
mode = "local"
local = LocalGitea {
enabled = True
deployment = "docker"
port = 3000
auto_start = True
}
}
# Remote Gitea instance
GiteaConfig {
mode = "remote"
remote = RemoteGitea {
enabled = True
url = "https://gitea.example.com"
api_url = "https://gitea.example.com/api/v1"
}
}
"""
mode: "local" | "remote"
local?: LocalGitea
remote?: RemoteGitea
auth: GiteaAuth
repositories: GiteaRepositories = GiteaRepositories {}
workspace_features: WorkspaceFeatures = WorkspaceFeatures {}
check:
mode == "local" and local != None or mode == "remote" and remote != None, \
"Must configure local or remote based on mode"
mode == "local" and local.enabled or mode == "remote" and remote.enabled, \
"Selected Gitea mode must be enabled"
schema LocalGitea:
"""
Local Gitea deployment configuration
Supports Docker container or binary deployment.
"""
enabled: bool = False
deployment: "docker" | "binary"
port: int = 3000
data_dir: str = "~/.provisioning/gitea"
auto_start: bool = False
docker?: DockerGitea
binary?: BinaryGitea
check:
enabled, "Local Gitea must be enabled if configured"
port > 0 and port < 65536, \
"Port must be between 1 and 65535"
len(data_dir) > 0, "Data directory required"
deployment == "docker" and docker != None or \
deployment == "binary" and binary != None, \
"Must configure docker or binary based on deployment type"
schema DockerGitea:
"""Docker-based Gitea deployment"""
image: str = "gitea/gitea:1.21"
container_name: str = "provisioning-gitea"
ssh_port: int = 222
environment: {str: str} = {
"USER_UID" = "1000"
"USER_GID" = "1000"
"GITEA__database__DB_TYPE" = "sqlite3"
}
volumes: [str] = [
"gitea-data:/data"
"/etc/timezone:/etc/timezone:ro"
"/etc/localtime:/etc/localtime:ro"
]
restart_policy: str = "unless-stopped"
check:
len(image) > 0, "Docker image required"
len(container_name) > 0, "Container name required"
ssh_port > 0 and ssh_port < 65536, "SSH port must be 1-65535"
schema BinaryGitea:
"""Binary-based Gitea deployment"""
binary_path: str
config_path: str
version: str = "1.21.0"
user: str = "git"
group: str = "git"
check:
len(binary_path) > 0, "Binary path required"
len(config_path) > 0, "Config path required"
schema RemoteGitea:
"""
Remote Gitea instance configuration
Points to existing Gitea server.
"""
enabled: bool = False
url: str
api_url: str
check:
enabled, "Remote Gitea must be enabled if configured"
len(url) > 0 and url.startswith("http"), \
"URL must start with http:// or https://"
len(api_url) > 0 and api_url.startswith("http"), \
"API URL must start with http:// or https://"
schema GiteaAuth:
"""
Gitea authentication configuration
Token-based authentication for API access.
Token should be stored in encrypted file (SOPS).
"""
token_path: str
username?: str
check:
len(token_path) > 0, "Token path required"
schema GiteaRepositories:
"""
Repository organization and naming configuration
Defines organization structure and repository names.
"""
organization: str = "provisioning"
core_repo: str = "provisioning-core"
extensions_repo: str = "provisioning-extensions"
platform_repo: str = "provisioning-platform"
workspaces_org: str = "workspaces"
check:
len(organization) > 0, "Organization name required"
len(core_repo) > 0, "Core repo name required"
len(extensions_repo) > 0, "Extensions repo name required"
len(platform_repo) > 0, "Platform repo name required"
len(workspaces_org) > 0, "Workspaces org name required"
schema WorkspaceFeatures:
"""
Workspace integration feature flags
Controls which Gitea features are enabled for workspaces.
"""
git_integration: bool = True
locking_enabled: bool = True
webhooks_enabled: bool = False
auto_sync: bool = False
branch_protection: bool = False
check:
git_integration or not locking_enabled, \
"Locking requires git integration"
schema GiteaRepository:
"""
Gitea repository metadata
Used for creating and managing repositories.
"""
name: str
owner: str
description?: str
private: bool = False
auto_init: bool = True
default_branch: str = "main"
gitignore?: str
license?: str
readme?: str
check:
len(name) > 0, "Repository name required"
len(owner) > 0, "Repository owner required"
schema GiteaRelease:
"""
Gitea release configuration
Used for publishing extensions and versioned artifacts.
"""
tag_name: str
release_name: str
body?: str
draft: bool = False
prerelease: bool = False
target_commitish: str = "main"
check:
len(tag_name) > 0, "Tag name required"
len(release_name) > 0, "Release name required"
schema GiteaIssue:
"""
Gitea issue configuration
Used for workspace locking mechanism.
"""
title: str
body: str
labels: [str] = []
assignee?: str
milestone?: int
check:
len(title) > 0, "Issue title required"
schema WorkspaceLock:
"""
Workspace lock metadata
Stored as Gitea issue for distributed locking.
"""
workspace_name: str
lock_type: "read" | "write" | "deploy"
user: str
timestamp: str
operation?: str
expiry?: str
force_unlock: bool = False
check:
len(workspace_name) > 0, "Workspace name required"
len(user) > 0, "User required"
len(timestamp) > 0, "Timestamp required"
schema ExtensionPublishConfig:
"""
Extension publishing configuration
Defines how extensions are packaged and published to Gitea.
"""
extension_path: str
version: str
release_notes?: str
include_patterns: [str] = ["*.nu", "*.k", "*.toml", "*.md"]
exclude_patterns: [str] = ["*.tmp", "*.log", ".git/*"]
compression: "tar.gz" | "zip" = "tar.gz"
check:
len(extension_path) > 0, "Extension path required"
len(version) > 0, "Version required"
schema GiteaWebhook:
"""
Gitea webhook configuration
For future integration with automated workflows.
"""
url: str
content_type: "json" | "form" = "json"
secret?: str
events: [str] = ["push", "pull_request", "release"]
active: bool = True
check:
len(url) > 0 and url.startswith("http"), \
"Webhook URL must start with http:// or https://"
# Example configurations
_local_docker_gitea = GiteaConfig {
mode = "local"
local = LocalGitea {
enabled = True
deployment = "docker"
port = 3000
data_dir = "~/.provisioning/gitea"
auto_start = True
docker = DockerGitea {
image = "gitea/gitea:1.21"
container_name = "provisioning-gitea"
}
}
auth = GiteaAuth {
token_path = "~/.provisioning/secrets/gitea-token.enc"
username = "provisioning"
}
}
_remote_gitea = GiteaConfig {
mode = "remote"
remote = RemoteGitea {
enabled = True
url = "https://gitea.example.com"
api_url = "https://gitea.example.com/api/v1"
}
auth = GiteaAuth {
token_path = "~/.provisioning/secrets/gitea-token.enc"
username = "provisioning"
}
}

259
k8s_deploy.k Normal file
View File

@ -0,0 +1,259 @@
# Info: KCL core lib service schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
schema K8sPort:
"""
K8S Port settings
"""
name: str
typ?: str = "TCP"
container?: int
nodePort?: int
target?: int
schema K8sKeyVal:
"""
K8S label,selector,env settings
"""
key: str
value: str
schema K8sKeyPath:
"""
K8S key,path settings
"""
key: str
path: str
schema K8sVolumeMount:
"""
K8S VolumeMounts settings
"""
name: str
readOnly: bool = False
mountPath: str
subPath?: str
schema K8sVolumeClaim:
"""
K8S VolumeClaim settings
"""
name: str
storageClassName: "manual" | "nfs-client" | "rook-cephfs" = "manual"
modes: ["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"] = ["ReadWriteOnce"]
abbrev_mode?: ["RWO" | "ROX" | "RWX" | "RWOP"] = ["RWO"]
reclaimPolicy?: "Recycle" | "Retain" | "Delete" = "Retain"
storage?: str
typ: "volumeClaim" | "configMap" | "secret" | "" = ""
pvMode?: "unspecified" | "Filesystem" | "Block"
pvcMode?: "unspecified" | "Filesystem" | "Block"
hostPath?: str
schema K8sConfigMap:
"""
K8S Volume ConfigMap settings
"""
name: str
schema K8sSecret:
"""
K8S Volume Secret settings
"""
name: str
items: [K8sKeyPath]
schema K8sVolume:
"""
K8S Volume settings
"""
name: str
typ: "volumeClaim" | "configMap" | "secret" = "volumeClaim"
persitentVolumeClaim?: K8sVolumeClaim
items?: [K8sKeyPath]
configMap?: K8sConfigMap
secret?: K8sSecret
schema K8sService:
"""
K8S Service settings
"""
name: str
typ: "ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None" = "ClusterIP"
externalName?: str
proto: "TCP" = "TCP"
ports: [K8sPort]
selector?: [K8sKeyVal]
externaIPs?: [str]
schema K8sContainers:
"""
K8S Container settings
"""
name: str = "main"
resources_requests?: K8sResources
resources_limits?: K8sResources
image: str
cmd?: str
imagePull: "IfNotPresent" | "Always" | "Never" = "IfNotPresent"
env?: [K8sKeyVal]
ports?: [K8sPort]
volumeMounts?: [K8sVolumeMount]
schema K8sBackup:
"""
K8S Backup settings
"""
name: str
typ: str
mount_path: str
schema K8sResources:
#"64Mi"
memory: str
#"250m"
cpu: str
schema K8sDeploySpec:
"""
K8S Deployment Spec settings
"""
# K8s Deploy replica
replicas: int = 1
hostUsers?: bool = True
# K8s Deploy containers
containers: [K8sContainers]
imagePullSecret?: str
nodeSelector?: [K8sKeyVal]
nodeName?: str
affinity?: K8sAffinity
# K8s Deploy Volumes
volumes?: [K8sVolume]
# K8s Secrets
secrets?: [K8sSecret]
schema K8sAffinityMatch:
"""
K8S Deployment Affinity Match settings
"""
key: str
operator: "In" | "NotIn" | "Exists" | "DoesNotExist"
values: [str]
schema K8sAffinityLabelSelector:
"""
K8S Deployment Affinity Label Selector settings
"""
typ: "requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution" = "requiredDuringSchedulingIgnoredDuringExecution"
labelSelector: [K8sAffinityMatch]
# example: topology.kubernetes.io/zon
topologyKey?: str
matchLabelKeys?: [str]
schema K8sPrxyTLS:
"""
K8S Deployment Proxy TLS settings
"""
httpsRedirect?: bool = False
mode?: "SIMPLE" | "PASSTHROUGH" | "MULTI" | "" = "SIMPLE"
credentialName?: str
schema K8sPrxyPort:
"""
K8S Proxy Port settings
"""
name: str
number?: int
proto: "HTTP" | "HTTPS" | "TCP" = "HTTPS"
schema K8sPrxyGatewayServer:
"""
K8S Deployment Proxy Gateway Server settings
"""
port: K8sPrxyPort
tls?: K8sPrxyTLS
hosts?: [str]
schema K8sPrxyVirtualServiceRoute:
"""
K8S Deployment Proxy Virtual Service Route settings
"""
port_number: int
host: str
schema K8sPrxyVirtualServiceMatchURL:
"""
K8S Deployment Proxy Virtual Service Match URL settings
"""
port?: int
sniHost?: [str]
schema K8sPrxyVirtualServiceMatch:
"""
K8S Deployment Proxy Virtual Service Match settings
"""
typ: "tcp" | "http" | "tls"
location?: [K8sPrxyVirtualServiceMatchURL]
route_destination?: [K8sPrxyVirtualServiceRoute]
schema K8sPrxyVirtualService:
"""
K8S Deployment Proxy Virtual Service settings
"""
hosts: [str]
gateways: [str]
matches?: [K8sPrxyVirtualServiceMatch]
schema K8sAntyAffinityLabelSelector(K8sAffinityLabelSelector):
"""
K8S Deployment AntyAffinity Label Selector settings
"""
weight: int = 100
schema K8sAffinity:
"""
K8S Deployment Affinity settings
"""
affinity?: K8sAffinityLabelSelector
antiAffinity?: K8sAntyAffinityLabelSelector
schema K8sDefs:
name: str
ns: str
domain: str
full_domain: str
primary_dom: str
cluster_domain: str
schema K8sDeploy:
"""
K8S Deployment settings
"""
# K8s Deploy Name
name: str
# K8s Deploy name-in-filenames
name_in_files: str = "${name}"
# K8s NameSpace
namespace: str | "default"
# K8s Create NameSpace
create_ns: bool = False
full_domain?: str
# K8s Deploy labels
labels: [K8sKeyVal] = [K8sKeyVal {key: "${name}", value: "${name}"}]
sel_labels: [K8sKeyVal] = labels
tpl_labels: [K8sKeyVal] = labels
spec: K8sDeploySpec
# Cluster Ingres Proxy to use
prxy?: "istio"
prxy_ns?: str = "istio-system"
prxyGatewayServers?: [K8sPrxyGatewayServer]
prxyVirtualService?: K8sPrxyVirtualService
# TSL certs path for service
tls_path?: str = "ssl"
# Create bin/apply.sh
bin_apply: bool = True
# K8s Service
service?: K8sService
# Service Backup K8s JOB
backups?: [K8sBackup]

5
kcl.mod Normal file
View File

@ -0,0 +1,5 @@
[package]
name = "provisioning"
edition = "v0.11.3"
version = "0.0.1"

0
kcl.mod.lock Normal file
View File

70
lib.k Normal file
View File

@ -0,0 +1,70 @@
# Info: KCL core lib schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
schema StorageVol:
"""
StorageVol settings
"""
name: str
size: int = 0
total: int = size
type: "ext4" | "xfs" | "btrfs" | "raw" | "zfs" = "ext4"
mount: bool = True
mount_path?: str
fstab: bool = True
check:
len(name) > 0, "Check name value"
#mount == True and mount_path != Undefined
schema Storage(StorageVol):
"""
Storage settings
"""
parts?: [StorageVol] = []
check:
len(name) > 0, "Check name value"
total >= sum([p.size for p in parts]), "🛑 Size Total parts ${sum([p.size for p in parts])} is greater than total storage ${total}.."
schema TaskServDef:
"""
TaskServDef settings
"""
name: str
install_mode: "getfile" | "library" | "server" | "library-server" | "server-library" = "library"
profile: str = "default"
target_save_path: str = ""
schema ClusterDef:
"""
ClusterDef settings
"""
name: str
profile: str = "default"
target_save_path: str = ""
schema ScaleData:
"""
scale data
"""
def: str
disabled: bool = False
mode: "auto" | "manual" | "ondemand" = "manual"
expire?: str
from?: str
to?: str
schema ScaleResource:
"""
scale server settings
"""
default: ScaleData
fallback?: ScaleData
up?: ScaleData
down?: ScaleData
min?: ScaleData
max?: ScaleData
path: str = "/etc/scale_provisioning"

56
main.k Normal file
View File

@ -0,0 +1,56 @@
# Main entry point for provisioning KCL module
# This file imports all schemas to make them discoverable as package submodules
# Author: JesusPerezLorenzo
# Release: 0.1.0
# Date: 29-09-2025
# ============================================================================
# IMPORTANT: KCL Import Pattern
# ============================================================================
# This module uses DIRECT SUBMODULE IMPORTS pattern (no re-exports).
#
# WHY NO RE-EXPORTS?
# Re-exports like "Settings = settings.Settings" create immutable variable
# assignments in KCL, causing ImmutableError (E1001) when extensions try to
# import them. KCL v0.11.3 doesn't support Python-style namespace re-exports.
#
# CORRECT USAGE IN EXTENSIONS:
# import provisioning.settings # For Settings, SecretProvider, SopsConfig
# import provisioning.defaults # For ServerDefaults schemas
# import provisioning.lib # For Storage, TaskServDef, ClusterDef
# import provisioning.server # For Server schema
# import provisioning.cluster # For Cluster schema
# import provisioning.dependencies # For TaskservDependencies, HealthCheck
# import provisioning.workflows # For BatchWorkflow, BatchOperation
# import provisioning.batch # For BatchScheduler, BatchExecutor
# import provisioning.version # For Version, TaskservVersion
# import provisioning.k8s_deploy # For K8s* schemas
# import provisioning.services # For ServiceRegistry, ServiceDefinition
#
# EXAMPLE:
# import provisioning.lib as lib
# import provisioning.settings as settings
#
# _storage = lib.Storage {
# device = "/dev/sda"
# size = 100
# }
#
# ANTI-PATTERN (DO NOT USE):
# Settings = settings.Settings # ❌ Causes ImmutableError!
# Server = server.Server # ❌ Causes ImmutableError!
#
# ============================================================================
# Import core module schemas to make them part of the provisioning package
import .settings
import .defaults
import .lib
import .server
import .cluster
import .dependencies
import .workflows
import .batch
import .version
import .k8s_deploy
import .services

830
modes.k Normal file
View File

@ -0,0 +1,830 @@
# Info: KCL execution mode schemas for provisioning
# Author: Mode System Implementation
# Release: 1.0.0
# Date: 2025-10-06
"""
Execution mode schemas defining deployment patterns and service configurations
Modes:
- solo: Single developer, local development
- multi-user: Team collaboration with shared services
- cicd: CI/CD pipeline execution
- enterprise: Production enterprise deployment
"""
import provisioning.settings as cfg
import provisioning.kcl.oci_registry as oci
schema ExecutionMode:
"""
Base execution mode schema defining common configuration
All execution modes inherit from this base schema and must
specify service deployment strategy, authentication, and
workspace policies.
"""
# Mode identifier
mode_name: "solo" | "multi-user" | "cicd" | "enterprise"
# Human-readable description
description: str
# Authentication strategy
authentication: AuthenticationStrategy
# Service deployment configurations
services: ServiceDeployments
# Extension source configuration
extensions: ExtensionConfig
# Workspace management policies
workspaces: WorkspacePolicy
# Security configuration
security: SecurityConfig
# Resource limits (optional, for multi-user/enterprise)
resource_limits?: ResourceLimits
check:
len(description) > 0, "Mode description required"
schema AuthenticationStrategy:
"""Authentication configuration for mode"""
# Authentication type
auth_type: "none" | "token" | "mtls" | "oauth" | "kms"
# Token configuration (for token auth)
token_config?: TokenConfig
# mTLS configuration (for mtls auth)
mtls_config?: MTLSConfig
# OAuth configuration (for oauth auth)
oauth_config?: OAuthConfig
# SSH key storage location
ssh_key_storage: "local" | "kms" | "vault" = "local"
check:
auth_type == "none" or (
(auth_type == "token" and token_config != Undefined) or
(auth_type == "mtls" and mtls_config != Undefined) or
(auth_type == "oauth" and oauth_config != Undefined) or
(auth_type == "kms")
), "Auth config must match auth type"
schema TokenConfig:
"""Token-based authentication configuration"""
token_path: str
token_format: "jwt" | "opaque" = "jwt"
expiry_seconds: int = 86400 # 24 hours
refresh_enabled: bool = True
check:
len(token_path) > 0, "Token path required"
expiry_seconds > 0, "Expiry must be positive"
schema MTLSConfig:
"""Mutual TLS authentication configuration"""
client_cert_path: str
client_key_path: str
ca_cert_path: str
verify_server: bool = True
check:
len(client_cert_path) > 0, "Client cert path required"
len(client_key_path) > 0, "Client key path required"
len(ca_cert_path) > 0, "CA cert path required"
schema OAuthConfig:
"""OAuth 2.0 authentication configuration"""
provider_url: str
client_id: str
client_secret_path: str
scopes: [str] = ["read", "write"]
redirect_uri?: str
check:
len(provider_url) > 0, "Provider URL required"
len(client_id) > 0, "Client ID required"
schema ServiceDeployments:
"""Service deployment configuration"""
orchestrator: ServiceConfig
control_center?: ServiceConfig
coredns?: ServiceConfig
gitea?: ServiceConfig
oci_registry: oci.OCIRegistryConfig
# Custom services
custom_services?: {str: ServiceConfig}
schema ServiceConfig:
"""Individual service configuration"""
# Deployment location
deployment: "local" | "remote" | "k8s" | "disabled"
# For local deployment
local_config?: LocalServiceConfig
# For remote deployment
remote_config?: RemoteServiceConfig
# For Kubernetes deployment
k8s_config?: K8sServiceConfig
# Auto-start service
auto_start: bool = False
# Health check configuration
health_check?: HealthCheck
check:
deployment == "disabled" or (
(deployment == "local" and local_config != Undefined) or
(deployment == "remote" and remote_config != Undefined) or
(deployment == "k8s" and k8s_config != Undefined)
), "Service config must match deployment type"
schema LocalServiceConfig:
"""Local service deployment configuration"""
binary_path?: str
config_path?: str
data_dir: str
port: int
bind_address: str = "127.0.0.1"
tls_enabled: bool = False
check:
port > 0 and port < 65536, "Port must be 1-65535"
len(data_dir) > 0, "Data directory required"
schema RemoteServiceConfig:
"""Remote service configuration"""
endpoint: str
port?: int
tls_enabled: bool = True
verify_ssl: bool = True
timeout: int = 30
retries: int = 3
check:
len(endpoint) > 0, "Endpoint required"
timeout > 0, "Timeout must be positive"
schema K8sServiceConfig:
"""Kubernetes service deployment configuration"""
namespace: str = "provisioning"
deployment_name: str
service_name: str
replicas: int = 1
image: str
image_pull_policy: "Always" | "IfNotPresent" | "Never" = "IfNotPresent"
resources?: K8sResources
check:
len(namespace) > 0, "Namespace required"
len(deployment_name) > 0, "Deployment name required"
replicas > 0, "Replicas must be positive"
schema K8sResources:
"""Kubernetes resource requirements"""
cpu_request: str = "100m"
cpu_limit: str = "500m"
memory_request: str = "128Mi"
memory_limit: str = "512Mi"
schema HealthCheck:
"""Service health check configuration"""
enabled: bool = True
endpoint: str = "/health"
interval: int = 10 # seconds
timeout: int = 5
healthy_threshold: int = 2
unhealthy_threshold: int = 3
check:
interval > 0, "Interval must be positive"
timeout > 0 and timeout < interval, "Timeout must be less than interval"
schema ExtensionConfig:
"""Extension source and distribution configuration"""
# Extension source: local files, gitea, or OCI registry
source: "local" | "gitea" | "oci" | "mixed"
# Local path for extensions (for local source)
local_path?: str
# Gitea configuration (for gitea source)
gitea_config?: GiteaConfig
# OCI registry configuration (for oci source)
oci_registry?: OCIExtensionConfig
# Allow mixed sources
allow_mixed: bool = False
check:
source == "local" and local_path != Undefined or
source == "gitea" and gitea_config != Undefined or
source == "oci" and oci_registry != Undefined or
source == "mixed", "Extension config must match source type"
schema GiteaConfig:
"""Gitea extension repository configuration"""
url: str
organization: str = "provisioning"
username?: str
token_path?: str
verify_ssl: bool = True
check:
len(url) > 0, "Gitea URL required"
schema OCIExtensionConfig:
"""OCI registry extension configuration"""
enabled: bool = True
endpoint: str
namespace: str = "provisioning-extensions"
auth_token_path?: str
tls_enabled: bool = True
verify_ssl: bool = True
cache_dir: str = "~/.provisioning/oci-cache"
check:
len(endpoint) > 0, "OCI endpoint required"
len(namespace) > 0, "OCI namespace required"
schema WorkspacePolicy:
"""Workspace management policies"""
# Workspace locking
locking: "disabled" | "enabled" | "required"
# Lock provider (if locking enabled)
lock_provider?: "gitea" | "etcd" | "redis" | "filesystem"
# Git integration requirement
git_integration: "disabled" | "optional" | "required"
# Workspace isolation
isolation: "none" | "user" | "strict" = "user"
# Maximum concurrent workspaces per user
max_workspaces_per_user?: int
check:
locking == "disabled" or lock_provider != Undefined, \
"Lock provider required when locking enabled"
git_integration in ["disabled", "optional", "required"], \
"Invalid git integration setting"
schema SecurityConfig:
"""Security policies for mode"""
# Encryption requirements
encryption_at_rest: bool = False
encryption_in_transit: bool = False
# Secret management
secret_provider: cfg.SecretProvider = cfg.SecretProvider {}
# DNS modification policy
dns_modification: "none" | "coredns" | "system" = "none"
# Audit logging
audit_logging: bool = False
audit_log_path?: str
# Network policies
network_isolation: bool = False
check:
not audit_logging or audit_log_path != Undefined, \
"Audit log path required when audit logging enabled"
schema ResourceLimits:
"""Resource limits for multi-user/enterprise modes"""
# Per-user limits
max_servers_per_user: int = 10
max_cpu_cores_per_user: int = 32
max_memory_gb_per_user: int = 128
max_storage_gb_per_user: int = 500
# Global limits
max_total_servers?: int
max_total_cpu_cores?: int
max_total_memory_gb?: int
check:
max_servers_per_user > 0, "Max servers must be positive"
max_cpu_cores_per_user > 0, "Max CPU must be positive"
max_memory_gb_per_user > 0, "Max memory must be positive"
# ============================================================================
# Concrete Mode Schemas
# ============================================================================
schema SoloMode(ExecutionMode):
"""
Solo mode: Single developer local development
Characteristics:
- No authentication required
- Local service deployment
- Optional OCI registry for extension testing
- No workspace locking
- Minimal security constraints
Example:
SoloMode {
mode_name = "solo"
description = "Local development environment"
}
"""
mode_name: "solo" = "solo"
description: str = "Single developer local development mode"
authentication: AuthenticationStrategy = AuthenticationStrategy {
auth_type = "none"
ssh_key_storage = "local"
}
services: ServiceDeployments = ServiceDeployments {
orchestrator = ServiceConfig {
deployment = "local"
auto_start = True
local_config = LocalServiceConfig {
data_dir = "~/.provisioning/orchestrator"
port = 8080
}
}
control_center = ServiceConfig {
deployment = "disabled"
}
coredns = ServiceConfig {
deployment = "disabled"
}
gitea = ServiceConfig {
deployment = "disabled"
}
oci_registry = oci.OCIRegistryConfig {
deployment = "local"
type = "zot"
endpoint = "localhost"
port = 5000
tls_enabled = False
auth_required = False
local = oci.LocalOCIConfig {
data_dir = "~/.provisioning/oci-registry"
config_path = "~/.provisioning/oci-registry/config.json"
auto_start = False
}
namespaces = oci.OCINamespaces {
extensions = "dev-extensions"
kcl_packages = "dev-kcl"
platform_images = "dev-platform"
test_images = "dev-test"
}
}
}
extensions: ExtensionConfig = ExtensionConfig {
source = "local"
local_path = "./provisioning/extensions"
allow_mixed = True
}
workspaces: WorkspacePolicy = WorkspacePolicy {
locking = "disabled"
git_integration = "optional"
isolation = "none"
}
security: SecurityConfig = SecurityConfig {
encryption_at_rest = False
encryption_in_transit = False
dns_modification = "none"
audit_logging = False
network_isolation = False
}
schema MultiUserMode(ExecutionMode):
"""
Multi-user mode: Team collaboration with shared services
Characteristics:
- Token-based authentication
- Remote shared services
- OCI registry for extension distribution
- Workspace locking enabled
- Git integration required
- User resource limits
Example:
MultiUserMode {
mode_name = "multi-user"
description = "Team collaboration environment"
}
"""
mode_name: "multi-user" = "multi-user"
description: str = "Team collaboration with shared services"
authentication: AuthenticationStrategy = AuthenticationStrategy {
auth_type = "token"
token_config = TokenConfig {
token_path = "~/.provisioning/tokens/auth"
token_format = "jwt"
expiry_seconds = 86400
refresh_enabled = True
}
ssh_key_storage = "local"
}
services: ServiceDeployments = ServiceDeployments {
orchestrator = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "orchestrator.company.local"
port = 8080
tls_enabled = True
verify_ssl = True
timeout = 30
retries = 3
}
}
control_center = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "control.company.local"
port = 8081
tls_enabled = True
}
}
coredns = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "dns.company.local"
port = 53
tls_enabled = False
}
}
gitea = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "git.company.local"
port = 443
tls_enabled = True
}
}
oci_registry = oci.OCIRegistryConfig {
deployment = "remote"
type = "harbor"
endpoint = "harbor.company.local"
tls_enabled = True
auth_required = True
remote = oci.RemoteOCIConfig {
timeout = 30
retries = 3
verify_ssl = True
}
namespaces = oci.OCINamespaces {
extensions = "provisioning-extensions"
kcl_packages = "provisioning-kcl"
platform_images = "provisioning-platform"
test_images = "provisioning-test"
}
}
}
extensions: ExtensionConfig = ExtensionConfig {
source = "oci"
oci_registry = OCIExtensionConfig {
enabled = True
endpoint = "harbor.company.local"
namespace = "provisioning-extensions"
auth_token_path = "~/.provisioning/tokens/oci"
tls_enabled = True
verify_ssl = True
cache_dir = "~/.provisioning/oci-cache"
}
}
workspaces: WorkspacePolicy = WorkspacePolicy {
locking = "enabled"
lock_provider = "gitea"
git_integration = "required"
isolation = "user"
max_workspaces_per_user = 5
}
security: SecurityConfig = SecurityConfig {
encryption_at_rest = False
encryption_in_transit = True
dns_modification = "coredns"
audit_logging = True
audit_log_path = "/var/log/provisioning/audit.log"
network_isolation = False
}
resource_limits: ResourceLimits = ResourceLimits {
max_servers_per_user = 10
max_cpu_cores_per_user = 32
max_memory_gb_per_user = 128
max_storage_gb_per_user = 500
max_total_servers = 100
max_total_cpu_cores = 320
max_total_memory_gb = 1024
}
schema CICDMode(ExecutionMode):
"""
CI/CD mode: Automated pipeline execution
Characteristics:
- Token or mTLS authentication
- Remote service endpoints
- OCI registry for artifacts
- No workspace locking (stateless)
- Git integration required
- Ephemeral workspaces
Example:
CICDMode {
mode_name = "cicd"
description = "CI/CD pipeline environment"
}
"""
mode_name: "cicd" = "cicd"
description: str = "CI/CD pipeline automated execution"
authentication: AuthenticationStrategy = AuthenticationStrategy {
auth_type = "token"
token_config = TokenConfig {
token_path = "/var/run/secrets/provisioning/token"
token_format = "jwt"
expiry_seconds = 3600 # 1 hour
refresh_enabled = False
}
ssh_key_storage = "kms"
}
services: ServiceDeployments = ServiceDeployments {
orchestrator = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "orchestrator.cicd.local"
port = 8080
tls_enabled = True
verify_ssl = True
timeout = 60
retries = 5
}
}
control_center = ServiceConfig {
deployment = "disabled"
}
coredns = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "dns.cicd.local"
port = 53
}
}
gitea = ServiceConfig {
deployment = "remote"
remote_config = RemoteServiceConfig {
endpoint = "git.cicd.local"
port = 443
tls_enabled = True
}
}
oci_registry = oci.OCIRegistryConfig {
deployment = "remote"
type = "harbor"
endpoint = "registry.cicd.local"
tls_enabled = True
auth_required = True
remote = oci.RemoteOCIConfig {
timeout = 60
retries = 5
verify_ssl = True
}
namespaces = oci.OCINamespaces {
extensions = "cicd-extensions"
kcl_packages = "cicd-kcl"
platform_images = "cicd-platform"
test_images = "cicd-test"
}
}
}
extensions: ExtensionConfig = ExtensionConfig {
source = "oci"
oci_registry = OCIExtensionConfig {
enabled = True
endpoint = "registry.cicd.local"
namespace = "cicd-extensions"
auth_token_path = "/var/run/secrets/provisioning/oci-token"
tls_enabled = True
verify_ssl = True
cache_dir = "/tmp/provisioning-oci-cache"
}
}
workspaces: WorkspacePolicy = WorkspacePolicy {
locking = "disabled"
git_integration = "required"
isolation = "strict"
max_workspaces_per_user = 1
}
security: SecurityConfig = SecurityConfig {
encryption_at_rest = True
encryption_in_transit = True
dns_modification = "coredns"
audit_logging = True
audit_log_path = "/var/log/provisioning/cicd-audit.log"
network_isolation = True
}
resource_limits: ResourceLimits = ResourceLimits {
max_servers_per_user = 5
max_cpu_cores_per_user = 16
max_memory_gb_per_user = 64
max_storage_gb_per_user = 200
}
schema EnterpriseMode(ExecutionMode):
"""
Enterprise mode: Production enterprise deployment
Characteristics:
- mTLS or OAuth authentication
- Kubernetes-deployed services
- Enterprise OCI registry (Harbor HA)
- Workspace locking required
- Git integration required
- Full encryption and auditing
- Strict resource limits
Example:
EnterpriseMode {
mode_name = "enterprise"
description = "Production enterprise environment"
}
"""
mode_name: "enterprise" = "enterprise"
description: str = "Production enterprise deployment with full security"
authentication: AuthenticationStrategy = AuthenticationStrategy {
auth_type = "mtls"
mtls_config = MTLSConfig {
client_cert_path = "/etc/provisioning/certs/client.crt"
client_key_path = "/etc/provisioning/certs/client.key"
ca_cert_path = "/etc/provisioning/certs/ca.crt"
verify_server = True
}
ssh_key_storage = "kms"
}
services: ServiceDeployments = ServiceDeployments {
orchestrator = ServiceConfig {
deployment = "k8s"
k8s_config = K8sServiceConfig {
namespace = "provisioning-system"
deployment_name = "orchestrator"
service_name = "orchestrator-svc"
replicas = 3
image = "harbor.enterprise.local/provisioning/orchestrator:latest"
resources = K8sResources {
cpu_request = "500m"
cpu_limit = "2000m"
memory_request = "1Gi"
memory_limit = "4Gi"
}
}
}
control_center = ServiceConfig {
deployment = "k8s"
k8s_config = K8sServiceConfig {
namespace = "provisioning-system"
deployment_name = "control-center"
service_name = "control-center-svc"
replicas = 2
image = "harbor.enterprise.local/provisioning/control-center:latest"
}
}
coredns = ServiceConfig {
deployment = "k8s"
k8s_config = K8sServiceConfig {
namespace = "kube-system"
deployment_name = "coredns"
service_name = "kube-dns"
replicas = 2
image = "registry.k8s.io/coredns/coredns:latest"
}
}
gitea = ServiceConfig {
deployment = "k8s"
k8s_config = K8sServiceConfig {
namespace = "provisioning-system"
deployment_name = "gitea"
service_name = "gitea-svc"
replicas = 2
image = "gitea/gitea:latest"
}
}
oci_registry = oci.OCIRegistryConfig {
deployment = "remote"
type = "harbor"
endpoint = "harbor.enterprise.local"
tls_enabled = True
auth_required = True
remote = oci.RemoteOCIConfig {
timeout = 60
retries = 5
verify_ssl = True
}
namespaces = oci.OCINamespaces {
extensions = "prod-extensions"
kcl_packages = "prod-kcl"
platform_images = "prod-platform"
test_images = "test-images"
}
}
}
extensions: ExtensionConfig = ExtensionConfig {
source = "oci"
oci_registry = OCIExtensionConfig {
enabled = True
endpoint = "harbor.enterprise.local"
namespace = "prod-extensions"
auth_token_path = "/etc/provisioning/tokens/oci"
tls_enabled = True
verify_ssl = True
cache_dir = "/var/cache/provisioning/oci"
}
}
workspaces: WorkspacePolicy = WorkspacePolicy {
locking = "required"
lock_provider = "etcd"
git_integration = "required"
isolation = "strict"
max_workspaces_per_user = 3
}
security: SecurityConfig = SecurityConfig {
encryption_at_rest = True
encryption_in_transit = True
secret_provider = cfg.SecretProvider {
provider = "kms"
kms_config = cfg.KmsConfig {
server_url = "https://kms.enterprise.local"
auth_method = "certificate"
client_cert_path = "/etc/provisioning/certs/kms-client.crt"
client_key_path = "/etc/provisioning/certs/kms-client.key"
ca_cert_path = "/etc/provisioning/certs/kms-ca.crt"
verify_ssl = True
}
}
dns_modification = "system"
audit_logging = True
audit_log_path = "/var/log/provisioning/enterprise-audit.log"
network_isolation = True
}
resource_limits: ResourceLimits = ResourceLimits {
max_servers_per_user = 20
max_cpu_cores_per_user = 64
max_memory_gb_per_user = 256
max_storage_gb_per_user = 1000
max_total_servers = 500
max_total_cpu_cores = 2000
max_total_memory_gb = 8192
}

487
oci_registry.k Normal file
View File

@ -0,0 +1,487 @@
# Info: KCL OCI registry schemas for provisioning
# Author: Mode System Implementation
# Release: 1.0.0
# Date: 2025-10-06
"""
OCI (Open Container Initiative) registry configuration schemas
Supports multiple registry implementations:
- distribution: Docker Registry v2 (lightweight)
- zot: Cloud-native OCI registry
- harbor: Enterprise-grade registry with security scanning
- artifactory: JFrog Artifactory with OCI support
Purpose:
- Extension distribution via OCI artifacts
- KCL package distribution
- Platform container images
- Test environment images
"""
import regex
schema OCIRegistryConfig:
"""
OCI registry configuration for artifact and image distribution
Examples:
# Local development registry
OCIRegistryConfig {
deployment = "local"
type = "zot"
endpoint = "localhost"
port = 5000
tls_enabled = False
local = LocalOCIConfig {
data_dir = "~/.provisioning/oci-registry"
config_path = "~/.provisioning/oci-registry/config.json"
}
}
# Remote enterprise registry
OCIRegistryConfig {
deployment = "remote"
type = "harbor"
endpoint = "harbor.company.local"
tls_enabled = True
auth_required = True
remote = RemoteOCIConfig {
verify_ssl = True
}
}
"""
# Deployment type
deployment: "local" | "remote" | "disabled"
# Registry implementation type
type: "distribution" | "zot" | "harbor" | "artifactory"
# Registry endpoint (hostname or IP)
endpoint: str
# Registry port (optional, defaults by type)
port?: int = 5000
# TLS/SSL configuration
tls_enabled: bool = False
tls_cert_path?: str
tls_key_path?: str
ca_cert_path?: str
# Authentication
auth_required: bool = False
username?: str
password_path?: str # Path to password file
auth_token_path?: str # Path to auth token
# Local deployment configuration
local?: LocalOCIConfig
# Remote connection configuration
remote?: RemoteOCIConfig
# Artifact namespaces/repositories
namespaces: OCINamespaces
# Registry-specific features
features?: OCIRegistryFeatures
check:
len(endpoint) > 0, "OCI registry endpoint required"
port == Undefined or (port > 0 and port < 65536), \
"Port must be 1-65535"
deployment == "disabled" or (
(deployment == "local" and local != Undefined) or
(deployment == "remote" and remote != Undefined)
), "Config must match deployment type"
not auth_required or (
username != Undefined or auth_token_path != Undefined
), "Authentication config required when auth enabled"
not tls_enabled or (
tls_cert_path != Undefined and tls_key_path != Undefined
) or deployment == "remote", "TLS cert/key required for local TLS"
schema LocalOCIConfig:
"""
Local OCI registry deployment configuration
Used for:
- Solo mode development
- Testing OCI artifact distribution
- Offline extension development
"""
# Data storage directory
data_dir: str
# Registry configuration file path
config_path: str
# Auto-start registry on provisioning startup
auto_start: bool = False
# Binary path (optional, uses PATH if not specified)
binary_path?: str
# Log file path
log_file?: str = "${data_dir}/registry.log"
# HTTP configuration
http_config?: LocalHTTPConfig
# Storage configuration
storage_config?: LocalStorageConfig
check:
len(data_dir) > 0, "Data directory required"
len(config_path) > 0, "Config path required"
regex.match(data_dir, r"^[~/]"), \
"Data dir must be absolute or home-relative path"
schema LocalHTTPConfig:
"""HTTP configuration for local registry"""
listen_address: str = "127.0.0.1"
listen_port: int = 5000
read_timeout: int = 60 # seconds
write_timeout: int = 60
idle_timeout: int = 120
check:
listen_port > 0 and listen_port < 65536, "Port must be 1-65535"
schema LocalStorageConfig:
"""Storage configuration for local registry"""
# Storage backend
backend: "filesystem" | "s3" | "azure" = "filesystem"
# Filesystem storage
rootdirectory?: str
# Garbage collection
gc_enabled: bool = True
gc_interval: int = 3600 # seconds
# Deduplication
dedupe_enabled: bool = True
schema RemoteOCIConfig:
"""
Remote OCI registry connection configuration
Used for:
- Multi-user shared registry
- CI/CD artifact registry
- Enterprise production registry
"""
# Connection timeout (seconds)
timeout: int = 30
# Retry configuration
retries: int = 3
retry_delay: int = 5 # seconds
retry_backoff: float = 2.0 # exponential backoff multiplier
# SSL/TLS verification
verify_ssl: bool = True
# Proxy configuration (optional)
http_proxy?: str
https_proxy?: str
no_proxy?: [str]
# Rate limiting
rate_limit?: RateLimitConfig
check:
timeout > 0, "Timeout must be positive"
retries >= 0, "Retries must be non-negative"
retry_backoff > 1.0, "Backoff multiplier must be > 1.0"
schema RateLimitConfig:
"""Rate limiting configuration for remote registry"""
# Requests per second
requests_per_second: int = 10
# Burst size
burst: int = 20
# Per-operation limits (optional)
pull_limit?: int
push_limit?: int
check:
requests_per_second > 0, "Rate limit must be positive"
burst > 0, "Burst size must be positive"
schema OCINamespaces:
"""
OCI registry namespaces for different artifact types
Namespaces organize artifacts by purpose and allow
different access control policies per namespace.
"""
# Extension artifacts (providers, taskservs, clusters)
extensions: str = "provisioning-extensions"
# KCL package artifacts
kcl_packages: str = "provisioning-kcl"
# Platform service images (orchestrator, control-center)
platform_images: str = "provisioning-platform"
# Test environment images
test_images: str = "provisioning-test"
# Custom user-defined namespaces
custom?: {str: str}
check:
len(extensions) > 0, "Extensions namespace required"
len(kcl_packages) > 0, "KCL packages namespace required"
len(platform_images) > 0, "Platform images namespace required"
len(test_images) > 0, "Test images namespace required"
# Validate namespace naming convention
regex.match(extensions, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \
"Extensions namespace must be lowercase alphanumeric with hyphens"
regex.match(kcl_packages, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \
"KCL packages namespace must be lowercase alphanumeric with hyphens"
schema OCIRegistryFeatures:
"""
Registry-specific feature configuration
Different registry implementations support different features.
This schema allows enabling/disabling features based on
registry capabilities.
"""
# Vulnerability scanning (Harbor, Artifactory)
vulnerability_scanning: bool = False
scanner_type?: "trivy" | "clair" | "anchore"
# Image signing/verification (Notary, Cosign)
image_signing: bool = False
signing_method?: "notary" | "cosign"
# Replication (Harbor)
replication_enabled: bool = False
replication_targets?: [ReplicationTarget]
# Quota management
quota_enabled: bool = False
quota_config?: QuotaConfig
# Webhook notifications
webhook_enabled: bool = False
webhook_endpoints?: [str]
# Garbage collection
gc_enabled: bool = True
gc_schedule?: str = "0 2 * * *" # Daily at 2 AM
check:
not vulnerability_scanning or scanner_type != Undefined, \
"Scanner type required when vulnerability scanning enabled"
not image_signing or signing_method != Undefined, \
"Signing method required when image signing enabled"
schema ReplicationTarget:
"""Harbor replication target configuration"""
name: str
type: "harbor" | "docker-hub" | "aws-ecr" | "azure-acr" | "google-gcr"
endpoint: str
credentials?: str # Path to credentials file
verify_ssl: bool = True
check:
len(name) > 0, "Replication target name required"
len(endpoint) > 0, "Replication endpoint required"
schema QuotaConfig:
"""Registry quota configuration"""
# Storage quota (GB)
storage_limit_gb: int = 100
# Artifact count limit
artifact_limit?: int = 10000
# Per-namespace quotas
namespace_quotas?: {str: NamespaceQuota}
check:
storage_limit_gb > 0, "Storage limit must be positive"
schema NamespaceQuota:
"""Per-namespace quota configuration"""
storage_limit_gb: int = 50
artifact_limit: int = 1000
check:
storage_limit_gb > 0, "Namespace storage limit must be positive"
artifact_limit > 0, "Namespace artifact limit must be positive"
# ============================================================================
# Helper Schemas
# ============================================================================
schema OCIArtifactReference:
"""
OCI artifact reference for pulling/pushing artifacts
Format: <registry>/<namespace>/<repository>:<tag>@<digest>
"""
# Registry endpoint
registry: str
# Namespace/project
namespace: str
# Repository name
repository: str
# Tag (optional, defaults to "latest")
tag: str = "latest"
# Digest (optional, for content-addressable pulls)
digest?: str
# Computed full reference
full_reference: str = f"{registry}/{namespace}/{repository}:{tag}"
check:
len(registry) > 0, "Registry required"
len(namespace) > 0, "Namespace required"
len(repository) > 0, "Repository required"
len(tag) > 0, "Tag required"
regex.match(tag, r"^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$"), \
"Invalid tag format"
schema OCIPullPolicy:
"""
OCI artifact pull policy configuration
Defines caching and pull behavior for artifacts
"""
# Pull policy
policy: "always" | "if-not-present" | "never" = "if-not-present"
# Cache TTL (seconds)
cache_ttl: int = 3600
# Verify digest on cached artifacts
verify_cached: bool = True
# Allow insecure registries (development only)
allow_insecure: bool = False
check:
cache_ttl > 0, "Cache TTL must be positive"
policy in ["always", "if-not-present", "never"], \
"Invalid pull policy"
schema OCIPushPolicy:
"""
OCI artifact push policy configuration
Defines pushing behavior and constraints
"""
# Allow overwriting existing tags
allow_overwrite: bool = False
# Require tag signing before push
require_signing: bool = False
# Automatic tagging strategy
auto_tag: bool = True
tag_format?: str = "v{version}-{timestamp}"
# Compression
compression_enabled: bool = True
compression_level: int = 6 # 0-9
check:
compression_level >= 0 and compression_level <= 9, \
"Compression level must be 0-9"
# ============================================================================
# Registry-Specific Configuration
# ============================================================================
schema ZotRegistryConfig(OCIRegistryConfig):
"""
Zot registry specific configuration
Zot is a lightweight, cloud-native OCI registry focused on
minimal resource usage and developer experience.
"""
type: "zot" = "zot"
# Zot-specific features
zot_features?: ZotFeatures
schema ZotFeatures:
"""Zot-specific features"""
# Enable search API
search_enabled: bool = True
# Enable metrics endpoint
metrics_enabled: bool = True
metrics_port: int = 9090
# Enable sync (pull-through cache)
sync_enabled: bool = False
sync_registries?: [str]
# Enable scrub (background verification)
scrub_enabled: bool = True
scrub_interval: str = "24h"
schema HarborRegistryConfig(OCIRegistryConfig):
"""
Harbor registry specific configuration
Harbor is an enterprise-grade container registry with
security scanning, replication, and RBAC.
"""
type: "harbor" = "harbor"
# Harbor-specific configuration
harbor_config?: HarborConfig
schema HarborConfig:
"""Harbor-specific configuration"""
# Harbor project (namespace)
project: str
# Project visibility
public_project: bool = False
# Content trust (Notary)
content_trust: bool = False
# Auto-scan on push
auto_scan: bool = True
# Prevent vulnerable images
prevent_vulnerable: bool = True
severity_threshold: "critical" | "high" | "medium" | "low" = "high"
check:
len(project) > 0, "Harbor project required"

33
server.k Normal file
View File

@ -0,0 +1,33 @@
# Info: KCL core lib server schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
import regex
import .defaults
import .lib
schema Server(defaults.ServerDefaults):
"""
server settings
"""
not_use: bool = False
# Hostname as reference for resource if is changed later inside server, change will not be updated in resource inventory
hostname: str
title: str
network_private_id?: str
# extra hostnames for server local resolution
extra_hostnames?: [str]
delete_lock: bool = False
taskservs?: [lib.TaskServDef]
cluster?: [lib.ClusterDef]
check:
len(hostname) > 0, "Check hostname value"
len(title) > 0, "Check titlevalue"
priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition"
#network_private_ip == Undefined or regex.match(network_private_ip,"^\$.*$") or regex.match(network_private_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'network_private_ip = ${network_private_ip}' check value definition (use $vaule or xx.xx.xx.xx)"
#liveness_ip == Undefined or regex.match(liveness_ip,"^\$.*$") or regex.match(liveness_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'liveness_ip = ${liveness_ip}' check value definition (use $vaule or xx.xx.xx.xx)"
# len(adm_user.password) > 0, "Check Admin User password 'adm_user.password'"
# len(adm_user.email) > 0, "Check Admin User email 'adm_user.email'"
# len(db.name) > 0, "Check DB name"

254
services.k Normal file
View File

@ -0,0 +1,254 @@
"""
Service Registry Schema for Provisioning Platform
Defines platform services (orchestrator, control-center, CoreDNS, Gitea, OCI registry, etc.)
and their lifecycle management configuration.
Version: 1.0.0
"""
schema ServiceRegistry:
"""Platform service registry configuration"""
services: {str: ServiceDefinition}
check:
len(services) > 0, "At least one service must be defined"
schema ServiceDefinition:
"""Individual service definition"""
name: str
type: "platform" | "infrastructure" | "utility"
category: "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui" | "monitoring"
description?: str
# Service requirements
required_for: [str] = [] # Operations requiring this service
dependencies: [str] = [] # Other services this depends on
conflicts: [str] = [] # Services that conflict
# Deployment configuration
deployment: ServiceDeployment
# Health check
health_check: HealthCheck
# Startup configuration
startup: StartupConfig = StartupConfig {}
# Resource limits
resources?: ResourceLimits
check:
len(name) > 0, "Service name cannot be empty"
not (name in dependencies), "Service cannot depend on itself"
len(set(dependencies) & set(conflicts)) == 0, \
"Service cannot both depend on and conflict with same service"
schema ServiceDeployment:
"""Service deployment configuration"""
mode: "binary" | "docker" | "docker-compose" | "kubernetes" | "remote"
binary?: BinaryDeployment
docker?: DockerDeployment
docker_compose?: DockerComposeDeployment
kubernetes?: KubernetesDeployment
remote?: RemoteDeployment
check:
(mode == "binary" and binary != Undefined) or \
(mode == "docker" and docker != Undefined) or \
(mode == "docker-compose" and docker_compose != Undefined) or \
(mode == "kubernetes" and kubernetes != Undefined) or \
(mode == "remote" and remote != Undefined), \
"Deployment configuration must match deployment mode"
schema BinaryDeployment:
"""Binary service deployment"""
binary_path: str
args: [str] = []
working_dir?: str
env: {str: str} = {}
user?: str
group?: str
check:
len(binary_path) > 0, "Binary path cannot be empty"
schema DockerDeployment:
"""Docker container deployment"""
image: str
container_name: str
ports: [str] = []
volumes: [str] = []
environment: {str: str} = {}
command?: [str]
networks: [str] = []
restart_policy: "no" | "always" | "on-failure" | "unless-stopped" = "unless-stopped"
check:
len(image) > 0, "Docker image cannot be empty"
len(container_name) > 0, "Container name cannot be empty"
schema DockerComposeDeployment:
"""Docker Compose deployment"""
compose_file: str
service_name: str
project_name?: str
env_file?: str
check:
len(compose_file) > 0, "Compose file path cannot be empty"
len(service_name) > 0, "Service name cannot be empty"
schema KubernetesDeployment:
"""Kubernetes deployment"""
namespace: str
deployment_name: str
kubeconfig?: str
manifests_path?: str
helm_chart?: HelmChart
check:
len(namespace) > 0, "Namespace cannot be empty"
len(deployment_name) > 0, "Deployment name cannot be empty"
schema HelmChart:
"""Helm chart configuration"""
chart: str
release_name: str
repo_url?: str
version?: str
values_file?: str
check:
len(chart) > 0, "Chart name cannot be empty"
len(release_name) > 0, "Release name cannot be empty"
schema RemoteDeployment:
"""Remote service deployment"""
endpoint: str
tls_enabled: bool = True
auth_token_path?: str
cert_path?: str
check:
len(endpoint) > 0, "Endpoint cannot be empty"
schema HealthCheck:
"""Service health check configuration"""
type: "http" | "tcp" | "command" | "file" | "none"
http?: HttpHealthCheck
tcp?: TcpHealthCheck
command?: CommandHealthCheck
file?: FileHealthCheck
interval: int = 10
retries: int = 3
timeout: int = 5
check:
(type == "http" and http != Undefined) or \
(type == "tcp" and tcp != Undefined) or \
(type == "command" and command != Undefined) or \
(type == "file" and file != Undefined) or \
(type == "none"), \
"Health check configuration must match health check type"
interval > 0, "Interval must be positive"
retries > 0, "Retries must be positive"
timeout > 0, "Timeout must be positive"
schema HttpHealthCheck:
"""HTTP health check"""
endpoint: str
expected_status: int = 200
method: "GET" | "POST" | "HEAD" = "GET"
headers: {str: str} = {}
check:
len(endpoint) > 0, "Endpoint cannot be empty"
expected_status >= 100 and expected_status < 600, \
"HTTP status must be valid (100-599)"
schema TcpHealthCheck:
"""TCP health check"""
host: str
port: int
check:
len(host) > 0, "Host cannot be empty"
port > 0 and port <= 65535, "Port must be 1-65535"
schema CommandHealthCheck:
"""Command-based health check"""
command: str
expected_exit_code: int = 0
check:
len(command) > 0, "Command cannot be empty"
schema FileHealthCheck:
"""File-based health check"""
path: str
must_exist: bool = True
check:
len(path) > 0, "Path cannot be empty"
schema StartupConfig:
"""Service startup configuration"""
auto_start: bool = False
start_timeout: int = 60
start_order: int = 100
restart_on_failure: bool = True
max_restarts: int = 3
check:
start_timeout > 0, "Start timeout must be positive"
start_order > 0, "Start order must be positive"
max_restarts >= 0, "Max restarts must be non-negative"
schema ResourceLimits:
"""Resource limits for service"""
cpu_limit?: str # e.g., "2", "500m"
memory_limit?: str # e.g., "1Gi", "512Mi"
disk_limit?: str # e.g., "10Gi"
schema ServiceState:
"""Service runtime state"""
name: str
status: "running" | "stopped" | "failed" | "starting" | "stopping" | "unknown"
pid?: int
started_at?: str
uptime?: int
health_status: "healthy" | "unhealthy" | "unknown" = "unknown"
last_health_check?: str
restart_count: int = 0
schema ServiceOperation:
"""Service operation request"""
service_name: str
operation: "start" | "stop" | "restart" | "reload" | "health-check"
force: bool = False
timeout?: int
check:
len(service_name) > 0, "Service name cannot be empty"

151
settings.k Normal file
View File

@ -0,0 +1,151 @@
# Info: KCL core lib settings schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.4
# Date: 15-12-2023
schema SecretProvider:
"""
Secret provider configuration for SOPS or KMS
"""
# Secret provider type: sops or kms
provider: "sops" | "kms" = "sops"
# Configuration specific to SOPS
sops_config?: SopsConfig
# Configuration specific to KMS
kms_config?: KmsConfig
schema SopsConfig:
"""
SOPS configuration settings
"""
# Path to SOPS configuration file
config_path?: str
# Path to Age key file for encryption
age_key_file?: str
# Age recipients for encryption
age_recipients?: str
# Use Age encryption (default) or other methods
use_age: bool = True
schema KmsConfig:
"""
KMS configuration settings for Cosmian KMS
"""
# KMS server URL
server_url: str
# Authentication method: certificate, token, or basic
auth_method: "certificate" | "token" | "basic" = "certificate"
# Client certificate path (for certificate auth)
client_cert_path?: str
# Client private key path (for certificate auth)
client_key_path?: str
# CA certificate path for server verification
ca_cert_path?: str
# API token (for token auth)
api_token?: str
# Username (for basic auth)
username?: str
# Password (for basic auth)
password?: str
# Timeout for requests in seconds
timeout: int = 30
# Verify SSL certificates
verify_ssl: bool = True
schema AIProvider:
"""
AI provider configuration for natural language processing
"""
# Enable AI capabilities
enabled: bool = False
# AI provider type: openai, claude, or generic
provider: "openai" | "claude" | "generic" = "openai"
# API endpoint URL (for generic provider or custom endpoints)
api_endpoint?: str
# API key for authentication
api_key?: str
# Model name to use
model?: str
# Maximum tokens for responses
max_tokens: int = 2048
# Temperature for response creativity (0.0-1.0)
temperature: float = 0.3
# Timeout for API requests in seconds
timeout: int = 30
# Enable AI for template generation
enable_template_ai: bool = True
# Enable AI for queries
enable_query_ai: bool = True
# Enable AI for webhooks/chat interfaces
enable_webhook_ai: bool = False
schema RunSet:
# Wait until requested taskserv is completed: true or false
wait: bool = True
# Format for output: human (defaul) | yaml | json
# Server info can be requested via aws cli adding option: --out yam l| json |text | table
output_format: "human" | "yaml" | "json" = "human"
# Output path to copy results
output_path: str = "tmp/NOW-deploy"
# Inventory file
inventory_file: str = "./inventory.yaml"
# Use 'time' to get time info for commands if is not empty
use_time: bool = True
schema Settings:
"""
Settings
"""
# Main name for provisonning
main_name: str
main_title: str = main_name
# #provider: "local" | "upcloud" | "aws"
# # Settings from servers has priority over these defaults ones, if a value is not set in server item, defaults one will be used instead
# #defaults_path: str = "../defaults.yaml"
# Settings Data is AUTO Generated, Checked and AUTO Filled during operations taskservs
# Path for Automatic generasetings for VPC, Subnets, SG, etc.
settings_path: str = "./settings.yaml"
# Directory path for providers default-settings
defaults_provs_dirpath: str = "./defs"
# Suffix for providers default-settings filenames with extension (example: aws_defaults.k)
defaults_provs_suffix: str = "_defaults.k"
# Provision data directory path to save providers specific settings (uuids, vpc, etc)
prov_data_dirpath: str = "./data"
# Suffix for providers data-settings filenames with extension (example: aws_settings.k)
prov_data_suffix: str = "_settings.k"
# Directory path to collect created infos, taskservs
created_taskservs_dirpath: str = "./tmp/NOW_deployment"
# Directory path to collect resources for provisioning created infos, taskservs
prov_resources_path: str = "./resources"
# Directory path to collect created clusters
created_clusters_dirpath: str = "./tmp/NOW_clusters"
# Directory path to collect clusters for provisioning
prov_clusters_path: str = "./clusters"
# Directory path for local bin on provisioning
prov_local_bin_path: str = "./bin"
# Secret management configuration
secrets: SecretProvider = SecretProvider {}
# AI provider configuration
ai: AIProvider = AIProvider {}
runset: RunSet
# Default values can be overwrite by cluster setting
# Cluster clusters admin hosts to connect via SSH
cluster_admin_host: str
# Cluster clusters admin hosts port to connect via SSH
cluster_admin_port: int = 22
# Time to wait in seconds for servers for started state and ssh
servers_wait_started: int = 27
# Cluster clusters admin user connect via SSH
cluster_admin_user: str = "root"
# Services Save path or use main settings
clusters_save_path: str = "/${main_name}/clusters"
# Servers path
servers_paths: [str] = ["servers"]
# Common clusters definitions, mainly Cluster ones
clusters_paths: [str] = ["clusters"]
#clusters: [str] = [ "clusters" ]
#check:
# len(provider) > 0, "Check provider value"

107
version.k Normal file
View File

@ -0,0 +1,107 @@
"""
KCL Version Management Schema for Provisioning System
Provides type-safe version definitions with GitHub release integration
"""
schema Version:
"""Version information for a component with optional GitHub integration"""
# Version number (e.g., "1.31.0") or "latest"
current: str
# GitHub releases URL for automated checking
source?: str
# GitHub tags URL (alternative source)
tags?: str
# Official project website
site?: str
# Enable automatic latest version checking
check_latest?: bool = False
# Cache duration in seconds (24h default)
grace_period?: int = 86400
check:
len(current) > 0, "Version current field cannot be empty"
current == "latest" or current == "" or len(current.split(".")) >= 1, "Version must be semantic (x.y.z), 'latest', or empty"
schema TaskservVersion:
"""Complete taskserv version configuration with dependency tracking"""
# Taskserv name (must match directory)
name: str
# Primary version configuration
version: Version
# Other taskservs this component depends on
dependencies?: [str]
# Profile-specific version overrides
profiles?: {str:Version}
check:
len(name) > 0, "Taskserv name cannot be empty"
name == name.lower(), "Taskserv name must be lowercase"
schema VersionCache:
"""Cache structure for latest version lookups"""
# Resolved version string
version: str
# ISO timestamp of last fetch
fetched_at: str
# Source URL used for resolution
source: str
# Time-to-live in seconds
ttl: int = 86400
check:
len(version) > 0, "Cached version cannot be empty"
len(source) > 0, "Cache source URL cannot be empty"
# Package metadata for core provisioning KCL module
schema PackageMetadata:
"""Core package metadata for distribution"""
# Package name
name: str
# Package version
version: str
# API compatibility version
api_version: str
# Build timestamp
build_date: str
# Minimum KCL version required
kcl_min_version: str
# Maximum KCL version supported
kcl_max_version: str
# External dependencies
dependencies: {str:str}
# Feature flags
features: {str:bool}
# Available schema exports
schema_exports: [str]
check:
len(name) > 0, "Package name cannot be empty"
len(version) > 0, "Package version cannot be empty"
# Default package metadata
package_metadata: PackageMetadata = {
name = "provisioning"
version = "0.1.0"
api_version = "v1"
build_date = "2025-09-28"
kcl_min_version = "0.11.0"
kcl_max_version = "0.12.0"
dependencies = {}
features = {
server_management = True
cluster_orchestration = True
provider_abstraction = True
workflow_automation = True
batch_operations = True
}
schema_exports = [
"Settings"
"Server"
"Cluster"
"Provider"
"Workflow"
"BatchWorkflow"
"Version"
"PackageMetadata"
]
}

287
workflows.k Normal file
View File

@ -0,0 +1,287 @@
# Info: KCL batch workflow schemas for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.1
# Date: 25-09-2025
# Description: Core batch workflow schemas following PAP principles
import .settings
schema DependencyDef:
"""
Dependency definition between batch operations
Supports both sequential and conditional dependencies
"""
# Target operation ID that this dependency points to
target_operation_id: str
# Dependency type: 'sequential' waits for completion, 'conditional' waits for specific conditions
dependency_type: "sequential" | "conditional" = "sequential"
# For conditional dependencies, specify required conditions
conditions?: [str] = []
# Timeout in seconds to wait for dependency resolution
timeout: int = 300
# Whether failure of dependency should fail this operation
fail_on_dependency_error: bool = True
check:
len(target_operation_id) > 0, "Target operation ID cannot be empty"
timeout > 0, "Timeout must be positive"
schema RetryPolicy:
"""
Retry policy configuration for batch operations
Supports exponential backoff and custom retry conditions
"""
# Maximum number of retry attempts (0 = no retries)
max_attempts: int = 3
# Initial delay between retries in seconds
initial_delay: int = 5
# Maximum delay between retries in seconds
max_delay: int = 300
# Backoff multiplier (1.0 = linear, >1.0 = exponential)
backoff_multiplier: float = 2
# Specific error codes/conditions that should trigger retries
retry_on_errors?: [str] = ["connection_error", "timeout", "rate_limit"]
# Whether to retry on any error (if retry_on_errors is empty)
retry_on_any_error: bool = False
check:
max_attempts >= 0, "Max attempts cannot be negative"
initial_delay > 0, "Initial delay must be positive"
max_delay >= initial_delay, "Max delay must be >= initial delay"
backoff_multiplier >= 1, "Backoff multiplier must be >= 1.0"
schema RollbackStrategy:
"""
Rollback strategy configuration for failed batch operations
Supports different rollback approaches based on operation type
"""
# Whether rollback is enabled
enabled: bool = True
# Rollback strategy: 'none', 'immediate', 'batch_end', 'manual'
strategy: "none" | "immediate" | "batch_end" | "manual" = "immediate"
# Whether to preserve partial state for manual recovery
preserve_partial_state: bool = False
# Custom rollback commands/operations
custom_rollback_operations?: [str] = []
# Timeout for rollback operations
rollback_timeout: int = 600
check:
rollback_timeout > 0, "Rollback timeout must be positive"
schema MonitoringConfig:
"""
Monitoring and observability configuration for batch workflows
Integrates with various monitoring backends
"""
# Whether monitoring is enabled
enabled: bool = True
# Monitoring backend: 'prometheus', 'grafana', 'datadog', 'custom'
backend: "prometheus" | "grafana" | "datadog" | "custom" = "prometheus"
# Metrics endpoint URL (for custom backends)
endpoint?: str
# Metric collection interval in seconds
collection_interval: int = 30
# Whether to enable detailed operation tracing
enable_tracing: bool = True
# Log level for batch operations
log_level: "debug" | "info" | "warn" | "error" = "info"
# Whether to send notifications on workflow completion/failure
enable_notifications: bool = False
# Notification channels (webhooks, slack, email, etc.)
notification_channels?: [str] = []
check:
collection_interval > 0, "Collection interval must be positive"
schema StorageConfig:
"""
Storage backend configuration for batch workflow state and results
Supports multiple storage backends including SurrealDB and filesystem
"""
# Storage backend type
backend: "surrealdb" | "filesystem" | "redis" | "postgresql" = "filesystem"
# Connection configuration for database backends
connection_config?: {str:str} = {}
# Base path for filesystem backend
base_path: str = "./batch_workflows"
# Whether to enable state persistence
enable_persistence: bool = True
# State retention period in hours (0 = keep forever)
# 1 week default
retention_hours: int = 168
# Whether to compress stored data
enable_compression: bool = False
# Encryption settings for stored data
encryption?: settings.SecretProvider
check:
len(base_path) > 0, "Base path cannot be empty"
retention_hours >= 0, "Retention hours cannot be negative"
schema BatchOperation:
"""
Individual operation definition within a batch workflow
Supports various operation types with provider-agnostic configuration
"""
# Unique operation identifier within workflow
operation_id: str
# Human-readable operation name/description
name: str
# Operation type: server, taskserv, cluster, custom
operation_type: "server" | "taskserv" | "cluster" | "custom" = "server"
# Target provider (upcloud, aws, mixed, etc.)
provider?: str
# Specific action: create, delete, update, scale, etc.
action: "create" | "delete" | "update" | "scale" | "configure" = "create"
# Operation-specific parameters (flexible configuration)
parameters: {str:str} = {}
# Dependencies on other operations
dependencies?: [DependencyDef] = []
# Retry policy for this operation
retry_policy: RetryPolicy = RetryPolicy {}
# Rollback strategy for this operation
rollback_strategy: RollbackStrategy = RollbackStrategy {}
# Operation execution settings
# 30 minutes default
timeout: int = 1800
# Whether operation can run in parallel with others
allow_parallel: bool = True
# Priority for operation scheduling (higher = earlier execution)
priority: int = 0
# Validation rules for operation parameters
validation_rules?: [str] = []
# Expected outcomes/conditions for success
success_conditions?: [str] = []
check:
len(operation_id) > 0, "Operation ID cannot be empty"
len(name) > 0, "Operation name cannot be empty"
timeout > 0, "Timeout must be positive"
schema BatchWorkflow:
"""
Main batch workflow definition supporting mixed provider operations
Follows PAP principles with configuration-driven architecture
"""
# Unique workflow identifier
workflow_id: str
# Human-readable workflow name
name: str
# Workflow description
description?: str = ""
# Workflow metadata
version: str = "1.0.0"
created_at?: str
modified_at?: str
# List of operations in this workflow
operations: [BatchOperation]
# Global workflow settings
# Maximum parallel operations (0 = unlimited)
max_parallel_operations: int = 5
# Global timeout for entire workflow in seconds
# 2 hours default
global_timeout: int = 7200
# Whether to stop workflow on first failure
fail_fast: bool = False
# Storage backend configuration
storage: StorageConfig = StorageConfig {}
# Monitoring configuration
monitoring: MonitoringConfig = MonitoringConfig {}
# Global retry policy (can be overridden per operation)
default_retry_policy: RetryPolicy = RetryPolicy {}
# Global rollback strategy
default_rollback_strategy: RollbackStrategy = RollbackStrategy {}
# Workflow execution context
execution_context: {str:str} = {}
# Pre and post workflow hooks
pre_workflow_hooks?: [str] = []
post_workflow_hooks?: [str] = []
# Notification settings
notifications?: MonitoringConfig
check:
len(workflow_id) > 0, "Workflow ID cannot be empty"
len(name) > 0, "Workflow name cannot be empty"
len(operations) > 0, "Workflow must contain at least one operation"
max_parallel_operations >= 0, "Max parallel operations cannot be negative"
global_timeout > 0, "Global timeout must be positive"
# Validate operation IDs are unique (simplified check)
len(operations) >= 1, "Operations list must not be empty"
schema WorkflowExecution:
"""
Runtime execution state for batch workflows
Tracks progress, results, and state changes
"""
# Reference to the workflow being executed
workflow_id: str
# Unique execution instance identifier
execution_id: str
# Current execution status
status: "pending" | "running" | "paused" | "completed" | "failed" | "cancelled" = "pending"
# Execution timing
started_at?: str
completed_at?: str
# seconds
duration?: int
# Operation execution states
operation_states: {str:{str:str}} = {}
# Execution results and outputs
results: {str:str} = {}
# Error information
errors: [str] = []
# Resource usage tracking
resource_usage?: {str:str} = {}
# Rollback history
rollback_history: [str] = []
check:
len(workflow_id) > 0, "Workflow ID cannot be empty"
len(execution_id) > 0, "Execution ID cannot be empty"
schema WorkflowTemplate:
"""
Reusable workflow templates for common batch operations
Supports parameterization and customization
"""
# Template identifier
template_id: str
# Template name and description
name: str
description?: str = ""
# Template category
category: "infrastructure" | "deployment" | "maintenance" | "testing" | "custom" = "infrastructure"
# Base workflow definition
workflow_template: BatchWorkflow
# Template parameters that can be customized
parameters: {str:str} = {}
# Required parameters that must be provided
required_parameters: [str] = []
# Template versioning
version: str = "1.0.0"
# Compatibility information
min_provisioning_version?: str
# Usage examples and documentation
examples?: [str] = []
documentation_url?: str
check:
len(template_id) > 0, "Template ID cannot be empty"
len(name) > 0, "Template name cannot be empty"