commit f02b12cef31e245e9a1134948ec7e4fe91f8e8e8 Author: Jesús Pérez Date: Tue Oct 7 11:17:54 2025 +0100 init repo and codebase diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..366db47 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +.p +.claude +.vscode +.shellcheckrc +.coder +.migration +.zed +ai_demo.nu +CLAUDE.md +.cache +.coder +wrks +ROOT +OLD +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ +# Encryption keys and related files (CRITICAL - NEVER COMMIT) +.k.backup +*.key.backup + +config.*.toml +config.*back + +# where book is written +_book + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +node_modules/ + +**/output.css +**/input.css + +# Environment files +.env +.env.local +.env.production +.env.development +.env.staging + +# Keep example files +!.env.example + +# Configuration files (may contain sensitive data) +config.prod.toml +config.production.toml +config.local.toml +config.*.local.toml + +# Keep example configuration files +!config.toml +!config.dev.toml +!config.example.toml + +# Log files +logs/ +*.log + +# TLS certificates and keys +certs/ +*.pem +*.crt +*.key +*.p12 +*.pfx + +# Database files +*.db +*.sqlite +*.sqlite3 + +# Backup files +*.bak +*.backup +*.tmp +*~ + +# Encryption and security related files +*.encrypted +*.enc +secrets/ +private/ +security/ + +# Configuration backups that may contain secrets +config.*.backup +config.backup.* + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db +# Documentation build output +book-output/ +# Generated setup report +SETUP_COMPLETE.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..47a6732 --- /dev/null +++ b/README.md @@ -0,0 +1,412 @@ +

+ Provisioning Logo +

+

+ Provisioning +

+ + + +# Provisioning KCL Package + +A comprehensive KCL (KusionStack Configuration Language) package providing type-safe schemas for [Provisioning project](https://repo.jesusperez.pro/jesus/provisioning) batch workflows, and Kubernetes deployments. + +## Overview + +This package contains production-ready KCL schemas with configuration-driven, provider-agnostic infrastructure automation capabilities. + +### Package Structure + +``` +provisioning/kcl/ +├── main.k # Main entry point - import this +├── settings.k # Core system settings +├── lib.k # Common schemas and utilities +├── server.k # Server configuration schemas +├── cluster.k # Cluster management schemas +├── workflows.k # Batch workflow schemas +├── batch.k # Advanced batch operation utilities +├── dependencies.k # Taskserv dependency management +├── version.k # Version management schemas +├── k8s_deploy.k # Kubernetes deployment schemas +├── defaults.k # Default configurations +├── examples_batch.k # Comprehensive examples +└── docs/ # Documentation +``` + +## Quick Start + +### Import the Package + +```kcl +# Import the main entry point for access to all schemas +import provisioning.main + +# Or import from a relative path if working within the same project +import .main +``` + +### Basic Server Configuration + +```kcl +import .main + +# Define a simple server +web_server: main.Server = main.Server { + hostname: "web-01" + title: "Production Web Server" + labels: "env: prod, tier: web" + user: "admin" + + # Optional: Add taskservs to install + taskservs: [ + main.TaskServDef { + name: "nginx" + install_mode: "library" + profile: "production" + } + ] +} +``` + +### Batch Workflow Example + +```kcl +import .main + +# Define a multi-provider infrastructure deployment +deployment_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "prod_deploy_001" + name: "Production Infrastructure Deployment" + description: "Deploy web tier across UpCloud and AWS" + + operations: [ + # Create UpCloud servers + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "3" + "server_type": "web" + "zone": "fi-hel2" + "plan": "2xCPU-4GB" + } + priority: 10 + } + + # Install Kubernetes after servers are ready + main.BatchOperation { + operation_id: "install_k8s" + name: "Install Kubernetes Cluster" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "kubernetes" + "version": "v1.31.0" + "cluster_name": "prod-cluster" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 8 + } + ] + + # Global workflow settings + max_parallel_operations: 3 + fail_fast: False + + # Use SurrealDB for state persistence + storage: main.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://localhost:8000" + "namespace": "provisioning" + "database": "workflows" + } + enable_persistence: True + retention_hours: 720 # 30 days + } +} +``` + +### Kubernetes Deployment + +```kcl +import .main + +# Define a complete Kubernetes deployment +nginx_deployment: main.K8sDeploy = main.K8sDeploy { + name: "nginx-web" + namespace: "production" + create_ns: True + + spec: main.K8sDeploySpec { + replicas: 3 + containers: [ + main.K8sContainers { + name: "nginx" + image: "nginx:1.21" + ports: [ + main.K8sPort { + name: "http" + container: 80 + target: 8080 + } + ] + resources_requests: main.K8sResources { + memory: "128Mi" + cpu: "100m" + } + resources_limits: main.K8sResources { + memory: "256Mi" + cpu: "200m" + } + } + ] + } + + # Expose via service + service: main.K8sService { + name: "nginx-service" + typ: "LoadBalancer" + ports: [ + main.K8sPort { + name: "http" + target: 80 + nodePort: 30080 + } + ] + } +} +``` + +## Core Schemas + +### Server Management +- **`Server`**: Complete server configuration with defaults inheritance +- **`ServerDefaults`**: Default settings for server provisioning +- **`Storage`**, **`StorageVol`**: Storage configuration and partitioning + +### Workflow & Batch Operations +- **`BatchWorkflow`**: Multi-operation workflow with dependencies +- **`BatchOperation`**: Individual operation within workflows +- **`DependencyDef`**: Define sequential or conditional dependencies +- **`RetryPolicy`**: Configure retry behavior and backoff +- **`RollbackStrategy`**: Automatic rollback on failures + +### Taskserv Management +- **`TaskServDef`**: Infrastructure service definitions +- **`TaskservDependencies`**: Dependency management for taskservs +- **`HealthCheck`**: Health monitoring configuration + +### Kubernetes Deployments +- **`K8sDeploy`**: Complete Kubernetes deployment specification +- **`K8sService`**: Service definitions with load balancing +- **`K8sVolume`**: Persistent storage configuration +- **`K8sResources`**: Resource limits and requests + +### Configuration & Settings +- **`Settings`**: System-wide configuration +- **`SecretProvider`**: SOPS/KMS secret management +- **`AIProvider`**: AI integration configuration + +## Advanced Features + +### Mixed Provider Support + +Deploy across multiple cloud providers in a single workflow: + +```kcl +mixed_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "multi_cloud_001" + name: "Multi-Cloud Deployment" + + operations: [ + # UpCloud servers for web tier + main.BatchOperation { + operation_id: "upcloud_web" + provider: "upcloud" + parameters: {"zone": "fi-hel2", "count": "3"} + } + # AWS RDS for database + main.BatchOperation { + operation_id: "aws_database" + provider: "aws" + parameters: {"region": "eu-west-1", "engine": "postgresql"} + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_web" + dependency_type: "sequential" + } + ] + } + ] +} +``` + +### Resource Constraints & Autoscaling + +Configure intelligent resource management: + +```kcl +batch_executor: main.BatchExecutor = main.BatchExecutor { + executor_id: "production_executor" + name: "Production Batch Executor" + + # Resource limits + resource_constraints: [ + main.ResourceConstraint { + resource_type: "cpu" + resource_name: "total_cores" + max_units: 16 + units_per_operation: 2 + hard_constraint: True + } + ] + + # Auto-scaling configuration + autoscaling: main.BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 10 + scale_up_threshold: 0.8 + target_utilization: 0.65 + } +} +``` + +### Monitoring & Observability + +```kcl +monitoring_config: main.MonitoringConfig = main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: [ + "webhook:slack://ops-alerts", + "webhook:pagerduty://incidents" + ] + log_level: "info" +} +``` + +## Validation & Testing + +### Schema Validation + +```bash +# Validate individual files +kcl run server_config.k + +# Validate entire workflow +kcl run workflow_definition.k + +# Output as JSON for integration +kcl run workflow_definition.k --format json +``` + +### Built-in Constraints + +All schemas include comprehensive validation: + +```kcl +# Server hostnames must be non-empty +server: main.Server = main.Server { + hostname: "web-01" # ✅ Valid + # hostname: "" # ❌ Validation error +} + +# Resource constraints are enforced +resources: main.K8sResources = main.K8sResources { + memory: "128Mi" # ✅ Valid K8s format + # memory: "invalid" # ❌ Validation error +} + +# Dependency cycles are prevented +operation: main.BatchOperation = main.BatchOperation { + operation_id: "op1" + dependencies: [ + main.DependencyDef { + target_operation_id: "op2" # ✅ Valid dependency + # target_operation_id: "op1" # ❌ Self-reference prevented + } + ] +} +``` + +## Integration Examples + +### With Nushell Scripts + +```nushell +# Generate workflow from KCL +let workflow = (kcl run deployment.k --format json | from json) + +# Submit to batch executor +$workflow | to json | http post http://localhost:8080/workflows/batch/submit + +# Monitor progress +while true { + let status = (http get $"http://localhost:8080/workflows/batch/($workflow.workflow_id)") + if $status.status == "completed" { break } + sleep 5sec +} +``` + +### With Rust Orchestrator + +```rust +// Deserialize KCL output into Rust structs +let workflow: BatchWorkflow = serde_json::from_str(&kcl_output)?; + +// Execute via orchestrator +let executor = BatchExecutor::new(workflow); +executor.execute().await?; +``` + +## Package Metadata + +- **Version**: 0.1.0 +- **API Version**: v1 +- **KCL Compatibility**: 0.11.0 - 0.12.0 +- **Build Date**: 2025-09-28 + +### Features +- ✅ Server Management +- ✅ Cluster Orchestration +- ✅ Provider Abstraction +- ✅ Workflow Automation +- ✅ Batch Operations + +## Best Practices + +1. **Always import via main.k** for stability +2. **Use descriptive operation_id values** for dependency tracking +3. **Set appropriate timeouts** based on operation complexity +4. **Enable monitoring** for production workflows +5. **Test workflows** with small counts before production +6. **Use retry policies** for transient failures +7. **Configure rollback strategies** for critical operations + +## Contributing + +When adding new schemas: +1. Follow existing naming conventions +2. Add comprehensive validation rules +3. Include documentation strings +4. Export from `main.k` +5. Add examples to `examples_batch.k` +6. Update this README + +## License + +This package is part of the Provisioning project and follows the same license terms. diff --git a/REFERENCE.md b/REFERENCE.md new file mode 100644 index 0000000..fb64e73 --- /dev/null +++ b/REFERENCE.md @@ -0,0 +1,56 @@ +# KCL Schemas Reference + +This directory contains references to existing KCL schema implementations. + +## Current Implementation Locations + +### Settings Schema +- **Current**: `/Users/Akasha/repo-cnz/src/provisioning/kcl/settings.k` +- **New Reference**: `settings.k` (placeholder created) + +### Provider Schemas +- **Current**: Various files in `/Users/Akasha/repo-cnz/src/provisioning/providers/*/kcl/` +- **New Reference**: `providers.k` (to be created) + +### Workflow Schemas +- **Current**: Distributed across workflow implementations +- **New Reference**: `workflows.k` (to be created) + +## Migration Strategy + +### Phase 1: Analysis +- Inventory all existing KCL schemas +- Identify common patterns and duplications +- Document schema relationships + +### Phase 2: Consolidation +- Create unified schema files +- Preserve backward compatibility +- Update import paths gradually + +### Phase 3: Optimization +- Improve type safety +- Add comprehensive validation +- Enhance documentation + +## Benefits of Consolidation + +1. **Single Source of Truth**: Unified schema definitions +2. **Type Safety**: Compile-time validation across entire system +3. **Consistency**: Standardized configuration patterns +4. **Maintainability**: Easier schema evolution and updates + +## Current Status + +- **Reference Files**: Created with placeholders +- **Original Schemas**: Fully functional in existing locations +- **Migration**: Planned for future phase + +## Integration + +Consolidated schemas will be used by: +- Core provisioning engine +- Code generators +- Configuration validators +- Documentation systems +- IDE integrations (syntax highlighting, autocompletion) \ No newline at end of file diff --git a/batch.k b/batch.k new file mode 100644 index 0000000..17dacba --- /dev/null +++ b/batch.k @@ -0,0 +1,287 @@ +# Info: KCL batch operation utilities for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Batch operation utilities and helper schemas following PAP principles +import .workflows + +schema BatchScheduler: + """ + Scheduler configuration for batch operations + Supports various scheduling strategies and resource management + """ + # Scheduling strategy: 'fifo', 'priority', 'dependency_first', 'resource_aware' + strategy: "fifo" | "priority" | "dependency_first" | "resource_aware" = "dependency_first" + # Resource constraints for scheduling + resource_limits: {str:int} = { + # 0 = no limit + "max_cpu_cores": 0 + # 0 = no limit + "max_memory_mb": 0 + # 0 = no limit + "max_network_bandwidth": 0 + } + # Scheduling interval in seconds + scheduling_interval: int = 10 + # Whether to enable preemptive scheduling + enable_preemption: bool = False + + check: + scheduling_interval > 0, "Scheduling interval must be positive" + +schema BatchQueue: + """ + Queue configuration for batch operations + Supports priority queues and resource-based queuing + """ + # Queue name/identifier + queue_id: str + # Queue type: 'standard', 'priority', 'delay', 'dead_letter' + queue_type: "standard" | "priority" | "delay" | "dead_letter" = "standard" + # Maximum queue size (0 = unlimited) + max_size: int = 0 + + # Message retention period in seconds + # 7 days default + retention_period: int = 604800 + # Dead letter queue configuration + dead_letter_queue?: str + # Maximum delivery attempts before moving to dead letter + max_delivery_attempts: int = 3 + + check: + len(queue_id) > 0, "Queue ID cannot be empty" + max_size >= 0, "Max size cannot be negative" + retention_period > 0, "Retention period must be positive" + max_delivery_attempts > 0, "Max delivery attempts must be positive" + +schema ResourceConstraint: + """ + Resource constraint definition for batch operations + Ensures operations don't exceed available resources + """ + # Resource type: cpu, memory, network, storage, custom + resource_type: "cpu" | "memory" | "network" | "storage" | "custom" + # Resource name/identifier + resource_name: str + # Maximum units available + max_units: int + # Current units in use + current_units: int = 0 + # Units per operation (for estimation) + units_per_operation: int = 1 + # Whether this constraint is hard (fails operation) or soft (warns only) + hard_constraint: bool = True + + check: + len(resource_name) > 0, "Resource name cannot be empty" + max_units > 0, "Max units must be positive" + current_units >= 0, "Current units cannot be negative" + units_per_operation > 0, "Units per operation must be positive" + current_units <= max_units, "Current units cannot exceed max units" + +schema BatchMetrics: + """ + Metrics collection configuration for batch operations + Tracks performance, success rates, and resource utilization + """ + # Whether to collect detailed metrics + detailed_metrics: bool = True + + # Metrics retention period in hours + # 1 week + retention_hours: int = 168 + + # Metrics aggregation intervals + # 1min, 5min, 1hour + aggregation_intervals: [int] = [60, 300, 3600] + # Custom metrics to collect + custom_metrics?: [str] = [] + # Whether to export metrics to external systems + enable_export: bool = False + # Export configuration + export_config?: {str:str} = {} + + check: + retention_hours > 0, "Retention hours must be positive" + len(aggregation_intervals) > 0, "Must have at least one aggregation interval" + +schema ProviderMixConfig: + """ + Configuration for mixed provider batch operations + Handles cross-provider dependencies and resource coordination + """ + # Primary provider for the batch workflow + primary_provider: str = "upcloud" + # Secondary providers available + secondary_providers: [str] = [] + # Provider selection strategy for new resources + provider_selection: "primary_first" | "load_balance" | "cost_optimize" | "latency_optimize" = "primary_first" + # Cross-provider networking configuration + cross_provider_networking?: {str:str} = {} + # Shared storage configuration across providers + shared_storage?: workflows.StorageConfig + # Provider-specific resource limits + provider_limits: {str:{str:int}} = {} + + check: + len(primary_provider) > 0, "Primary provider cannot be empty" + +schema BatchHealthCheck: + """ + Health check configuration for batch operations + Monitors operation health and triggers recovery actions + """ + # Whether health checks are enabled + enabled: bool = True + # Health check interval in seconds + check_interval: int = 60 + # Health check timeout in seconds + check_timeout: int = 30 + # Failure threshold before marking as unhealthy + failure_threshold: int = 3 + # Success threshold to mark as healthy again + success_threshold: int = 2 + # Health check endpoints/commands + health_checks: [str] = [] + # Actions to take on health check failure + failure_actions: [str] = ["retry", "rollback"] + + check: + check_interval > 0, "Check interval must be positive" + check_timeout > 0, "Check timeout must be positive" + failure_threshold > 0, "Failure threshold must be positive" + success_threshold > 0, "Success threshold must be positive" + +schema BatchAutoscaling: + """ + Autoscaling configuration for batch operations + Dynamically adjusts resources based on load and performance + """ + # Whether autoscaling is enabled + enabled: bool = False + # Minimum number of parallel operations + min_parallel: int = 1 + # Maximum number of parallel operations + max_parallel: int = 10 + + # Scaling triggers based on metrics + # CPU/resource utilization + scale_up_threshold: float = 0.8 + scale_down_threshold: float = 0.2 + # Scaling cooldown period in seconds + cooldown_period: int = 300 + # Scaling step size + scale_step: int = 1 + # Target resource utilization + target_utilization: float = 0.6 + + check: + min_parallel > 0, "Min parallel must be positive" + max_parallel >= min_parallel, "Max parallel must be >= min parallel" + scale_up_threshold > scale_down_threshold, "Scale up threshold must be > scale down threshold" + 0 < target_utilization and target_utilization < 1, "Target utilization must be between 0 and 1" + cooldown_period > 0, "Cooldown period must be positive" + +schema BatchExecutor: + """ + Batch executor configuration combining all batch operation aspects + Main configuration schema for batch workflow execution engine + """ + # Executor identifier + executor_id: str + # Executor name and description + name: str + description?: str = "" + # Core scheduling configuration + scheduler: BatchScheduler = BatchScheduler {} + # Queue management + queues: [BatchQueue] = [BatchQueue {queue_id: "default"}] + # Resource constraints + resource_constraints: [ResourceConstraint] = [] + + # Mixed provider configuration + provider_config: ProviderMixConfig = ProviderMixConfig {} + # Health monitoring + health_check: BatchHealthCheck = BatchHealthCheck {} + # Autoscaling settings + autoscaling: BatchAutoscaling = BatchAutoscaling {} + + # Metrics and monitoring + metrics: BatchMetrics = BatchMetrics {} + # Storage configuration for execution state + storage: workflows.StorageConfig = workflows.StorageConfig {} + + # Security and access control + security_config: {str:str} = {} + # Audit logging configuration + audit_logging: bool = True + audit_log_path: str = "./logs/batch_audit.log" + + # Integration settings + webhook_endpoints: [str] = [] + api_endpoints: [str] = [] + + # Performance tuning + performance_config: {str:str} = { + "io_threads": "4" + "worker_threads": "8" + "batch_size": "100" + } + + check: + len(executor_id) > 0, "Executor ID cannot be empty" + len(name) > 0, "Executor name cannot be empty" + len(queues) > 0, "Must have at least one queue configured" + +# Utility functions and constants for batch operations +BatchOperationTypes: [str] = [ + "server_create" + "server_delete" + "server_scale" + "server_update" + "taskserv_install" + "taskserv_remove" + "taskserv_update" + "taskserv_configure" + "cluster_create" + "cluster_delete" + "cluster_scale" + "cluster_upgrade" + "custom_command" + "custom_script" + "custom_api_call" +] + +BatchProviders: [str] = [ + "upcloud" + "aws" + "local" + "mixed" + "custom" +] + +DefaultBatchConfig: BatchExecutor = BatchExecutor { + executor_id: "default_batch_executor" + name: "Default Batch Executor" + description: "Default configuration-driven batch executor for provisioning operations" + scheduler: BatchScheduler { + strategy: "dependency_first" + resource_limits: { + "max_cpu_cores": 8 + "max_memory_mb": 16384 + "max_network_bandwidth": 1000 + } + } + provider_config: ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws", "local"] + provider_selection: "primary_first" + } + autoscaling: BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 8 + target_utilization: 0.7 + } +} diff --git a/cluster.k b/cluster.k new file mode 100644 index 0000000..7f49a10 --- /dev/null +++ b/cluster.k @@ -0,0 +1,33 @@ +# Info: KCL core lib cluster schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import .lib + +schema Cluster: + """ + cluster settings + """ + not_use: bool = False + name: str + version: str + # Template deployment path in $PROVISIONING/templates + template?: "k8s-deploy" | "" + # Schema definition values + def: "K8sDeploy" | "" = "" + # Services Save path or use main settings + clusters_save_path?: str + # Profile to use + profile?: str + # host to admin cluster + admin_host?: str + # Cluster clusters admin hosts port to connect via SSH + admin_port?: int + # Cluster clusters admin user connect via SSH + admin_user?: str + ssh_key_path?: str + # cluster local definition_path directory + local_def_path: str = "./clusters/${name}" + # Scale mode settings for lib-ScaleResource + scale?: lib.ScaleResource + diff --git a/coredns.k b/coredns.k new file mode 100644 index 0000000..f29b6d3 --- /dev/null +++ b/coredns.k @@ -0,0 +1,484 @@ +# Info: KCL CoreDNS configuration schemas for provisioning system +# Author: CoreDNS Integration Agent +# Release: 1.0.0 +# Date: 2025-10-06 +# Purpose: Define CoreDNS service configuration, zones, and DNS management + +import regex + +schema CoreDNSConfig: + """ + CoreDNS service configuration + + Defines how CoreDNS is deployed and managed within the provisioning system. + Supports local binary, Docker, remote, and hybrid deployment modes. + + Examples: + # Local mode with auto-start + CoreDNSConfig { + mode = "local" + local = LocalCoreDNS { + enabled = True + auto_start = True + zones = ["provisioning.local", "workspace.local"] + } + } + + # Remote mode + CoreDNSConfig { + mode = "remote" + remote = RemoteCoreDNS { + enabled = True + endpoints = ["https://dns1.example.com", "https://dns2.example.com"] + zones = ["production.local"] + } + } + """ + # Deployment mode: local, remote, hybrid, or disabled + mode: "local" | "remote" | "hybrid" | "disabled" = "local" + + # Local CoreDNS configuration + local?: LocalCoreDNS + + # Remote CoreDNS configuration + remote?: RemoteCoreDNS + + # Dynamic DNS update configuration + dynamic_updates: DynamicDNS = DynamicDNS {} + + # Upstream DNS servers for forwarding + upstream: [str] = ["8.8.8.8", "1.1.1.1"] + + # Global TTL for DNS records (seconds) + default_ttl: int = 300 + + # Enable DNS query logging + enable_logging: bool = True + + # Enable metrics endpoint + enable_metrics: bool = True + + # Metrics port + metrics_port: int = 9153 + + check: + len(upstream) > 0, "At least one upstream DNS server required" + default_ttl > 0 and default_ttl <= 86400, "TTL must be 1-86400 seconds" + metrics_port >= 1024 and metrics_port <= 65535, "Metrics port must be 1024-65535" + mode != "local" or local != Undefined, "Local config required when mode is 'local'" + mode != "remote" or remote != Undefined, "Remote config required when mode is 'remote'" + mode != "hybrid" or (local != Undefined and remote != Undefined), \ + "Both local and remote config required when mode is 'hybrid'" + +schema LocalCoreDNS: + """ + Local CoreDNS binary configuration + + Manages CoreDNS running as a local binary or Docker container. + """ + # Enable local CoreDNS + enabled: bool = True + + # Deployment type: binary or docker + deployment_type: "binary" | "docker" = "binary" + + # Path to CoreDNS binary + binary_path: str = "~/.provisioning/bin/coredns" + + # Path to Corefile + config_path: str = "~/.provisioning/coredns/Corefile" + + # Path to zone files directory + zones_path: str = "~/.provisioning/coredns/zones" + + # DNS listening port + port: int = 5353 + + # Auto-start CoreDNS on system startup + auto_start: bool = True + + # Auto-restart on failure + auto_restart: bool = True + + # Managed DNS zones + zones: [str] = ["provisioning.local", "workspace.local"] + + # PID file path (for binary mode) + pid_file?: str = "~/.provisioning/coredns/coredns.pid" + + # Log file path + log_file?: str = "~/.provisioning/coredns/coredns.log" + + # Docker configuration (for docker mode) + docker?: DockerCoreDNS + + check: + port >= 1024 and port <= 65535, "Port must be 1024-65535" + len(zones) > 0, "At least one zone required" + deployment_type != "docker" or docker != Undefined, \ + "Docker config required when deployment_type is 'docker'" + +schema DockerCoreDNS: + """ + Docker-based CoreDNS deployment configuration + """ + # Docker image + image: str = "coredns/coredns:1.11.1" + + # Container name + container_name: str = "provisioning-coredns" + + # Restart policy + restart_policy: "no" | "always" | "unless-stopped" | "on-failure" = "unless-stopped" + + # Network mode + network_mode: str = "bridge" + + # Publish DNS port + publish_port: bool = True + + # Volume mounts (host:container) + volumes: [str] = [] + + check: + len(image) > 0, "Docker image required" + len(container_name) > 0, "Container name required" + +schema RemoteCoreDNS: + """ + Remote CoreDNS service configuration + + Connect to external CoreDNS instances for DNS management. + """ + # Enable remote CoreDNS + enabled: bool = True + + # Remote CoreDNS API endpoints + endpoints: [str] + + # Managed zones on remote servers + zones: [str] + + # Authentication token file path + auth_token_path?: str + + # TLS verification + verify_tls: bool = True + + # Connection timeout (seconds) + timeout: int = 30 + + # Health check interval (seconds) + health_check_interval: int = 60 + + check: + len(endpoints) > 0, "At least one remote endpoint required" + len(zones) > 0, "At least one zone required" + timeout > 0 and timeout <= 300, "Timeout must be 1-300 seconds" + health_check_interval >= 10, "Health check interval must be >= 10 seconds" + +schema DynamicDNS: + """ + Dynamic DNS update configuration + + Enables automatic DNS updates when infrastructure changes. + """ + # Enable dynamic DNS updates + enabled: bool = True + + # Orchestrator DNS API endpoint + api_endpoint: str = "http://localhost:8080/dns" + + # Automatically register servers on creation + auto_register_servers: bool = True + + # Automatically unregister servers on deletion + auto_unregister_servers: bool = True + + # Default TTL for dynamic records (seconds) + ttl: int = 300 + + # Update strategy: immediate, batched, or scheduled + update_strategy: "immediate" | "batched" | "scheduled" = "immediate" + + # Batch interval (seconds, for batched strategy) + batch_interval?: int = 60 + + # Retry configuration + retry_policy: RetryPolicy = RetryPolicy {} + + check: + ttl > 0 and ttl <= 86400, "TTL must be 1-86400 seconds" + update_strategy != "batched" or batch_interval != Undefined, \ + "Batch interval required for batched strategy" + batch_interval == Undefined or batch_interval >= 10, \ + "Batch interval must be >= 10 seconds" + +schema RetryPolicy: + """ + Retry policy for DNS update failures + """ + # Maximum retry attempts + max_attempts: int = 3 + + # Initial delay before first retry (seconds) + initial_delay: int = 5 + + # Backoff multiplier for subsequent retries + backoff_multiplier: float = 2.0 + + # Maximum delay between retries (seconds) + max_delay: int = 60 + + check: + max_attempts > 0 and max_attempts <= 10, "Max attempts must be 1-10" + initial_delay > 0, "Initial delay must be positive" + backoff_multiplier >= 1.0, "Backoff multiplier must be >= 1.0" + max_delay >= initial_delay, "Max delay must be >= initial delay" + +schema DNSZone: + """ + DNS zone configuration + + Defines a DNS zone with SOA, NS, and other records. + + Examples: + DNSZone { + name = "provisioning.local" + admin_email = "admin.provisioning.local" + nameservers = ["ns1.provisioning.local"] + records = [ + DNSRecord { + name = "server-01" + type = "A" + value = "10.0.1.10" + } + ] + } + """ + # Zone name (must be FQDN with trailing dot in zone file) + name: str + + # Zone file path + file_path?: str + + # SOA record configuration + soa: SOARecord = SOARecord {} + + # Nameserver hostnames + nameservers: [str] + + # Admin email (dots replaced with @ in zone file) + admin_email: str = $"admin.{name}" + + # DNS records + records: [DNSRecord] = [] + + # Default TTL for zone (seconds) + ttl: int = 3600 + + check: + len(name) > 0, "Zone name required" + regex.match(name, r"^[a-z0-9]([a-z0-9-\.]{0,253}[a-z0-9])?$"), \ + "Zone name must be valid domain name" + len(nameservers) > 0, "At least one nameserver required" + ttl > 0, "TTL must be positive" + +schema SOARecord: + """ + SOA (Start of Authority) record + """ + # Serial number (auto-incremented on updates) + serial: int = 1 + + # Refresh interval (seconds) + refresh: int = 3600 + + # Retry interval (seconds) + retry: int = 1800 + + # Expire time (seconds) + expire: int = 604800 + + # Minimum TTL (seconds) + minimum: int = 86400 + + check: + serial > 0, "Serial must be positive" + refresh > 0, "Refresh must be positive" + retry > 0, "Retry must be positive" + expire > refresh, "Expire must be > refresh" + minimum > 0, "Minimum must be positive" + +schema DNSRecord: + """ + DNS resource record + + Supports A, AAAA, CNAME, MX, TXT, NS, SRV, PTR records. + + Examples: + # A record + DNSRecord { + name = "server-01" + type = "A" + value = "10.0.1.10" + } + + # CNAME record + DNSRecord { + name = "web" + type = "CNAME" + value = "server-01.provisioning.local" + } + + # MX record + DNSRecord { + name = "@" + type = "MX" + priority = 10 + value = "mail.provisioning.local" + } + """ + # Record name (hostname or @) + name: str + + # Record type + type: "A" | "AAAA" | "CNAME" | "MX" | "TXT" | "NS" | "SOA" | "SRV" | "PTR" + + # Record value (IP address, hostname, or text) + value: str + + # TTL in seconds (optional, uses zone default) + ttl?: int + + # Priority (for MX and SRV records) + priority?: int + + # Weight (for SRV records) + weight?: int + + # Port (for SRV records) + port?: int + + # Comment + comment?: str + + check: + len(name) > 0, "Record name required" + len(value) > 0, "Record value required" + + # A record validation + type != "A" or regex.match(value, \ + r"^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), \ + "A record value must be valid IPv4 address" + + # AAAA record validation + type != "AAAA" or regex.match(value, r"^([0-9a-fA-F]{0,4}:){7}[0-9a-fA-F]{0,4}$"), \ + "AAAA record value must be valid IPv6 address" + + # MX/SRV priority validation + type not in ["MX", "SRV"] or priority != Undefined, \ + "Priority required for MX and SRV records" + + # SRV weight and port validation + type != "SRV" or (weight != Undefined and port != Undefined), \ + "Weight and port required for SRV records" + + # TTL validation + ttl == Undefined or (ttl > 0 and ttl <= 86400), \ + "TTL must be 1-86400 seconds" + +schema CorefilePlugin: + """ + Corefile plugin configuration + + Defines a plugin block in Corefile. + """ + # Plugin name (file, forward, cache, etc.) + name: str + + # Plugin arguments + args: [str] = [] + + # Plugin options (key-value pairs) + options: {str: str} = {} + + check: + len(name) > 0, "Plugin name required" + +schema CorefileZoneBlock: + """ + Corefile zone block configuration + + Defines a zone block with plugins in Corefile. + """ + # Zone name (e.g., "provisioning.local:5353") + zone: str + + # Port number + port: int = 53 + + # Plugins in this zone + plugins: [CorefilePlugin] + + check: + len(zone) > 0, "Zone required" + port >= 1024 and port <= 65535, "Port must be 1024-65535" + len(plugins) > 0, "At least one plugin required" + +schema DNSQueryLog: + """ + DNS query logging configuration + """ + # Enable query logging + enabled: bool = True + + # Log file path + log_file: str = "~/.provisioning/coredns/queries.log" + + # Log format: text or json + log_format: "text" | "json" = "text" + + # Log level: debug, info, warn, error + log_level: "debug" | "info" | "warn" | "error" = "info" + + # Rotate log files + rotate_enabled: bool = True + + # Max log file size (MB) + max_size_mb: int = 100 + + # Max number of rotated files + max_backups: int = 5 + + check: + max_size_mb > 0 and max_size_mb <= 1024, "Max size must be 1-1024 MB" + max_backups >= 0 and max_backups <= 100, "Max backups must be 0-100" + +schema DNSHealthCheck: + """ + CoreDNS health check configuration + """ + # Enable health checks + enabled: bool = True + + # Health check endpoint + endpoint: str = "http://localhost:8080/health" + + # Health check interval (seconds) + interval: int = 30 + + # Timeout for health check (seconds) + timeout: int = 5 + + # Unhealthy threshold (consecutive failures) + unhealthy_threshold: int = 3 + + # Healthy threshold (consecutive successes) + healthy_threshold: int = 2 + + check: + interval > 0, "Interval must be positive" + timeout > 0 and timeout < interval, "Timeout must be < interval" + unhealthy_threshold > 0, "Unhealthy threshold must be positive" + healthy_threshold > 0, "Healthy threshold must be positive" diff --git a/defaults.k b/defaults.k new file mode 100644 index 0000000..3033dfe --- /dev/null +++ b/defaults.k @@ -0,0 +1,75 @@ +# Info: KCL core lib defaults schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import regex +import .lib + +schema ServerDefaults: + """ + Server Defaults settings + """ + lock: bool = False + # To use private network, IPs will be set in servers items + priv_cidr_block?: str + time_zone: str = "UTC" + + #zone?: str + # Second to wait before check in for running state + running_wait: int = 10 + # Total seconds to wait for running state before timeout + running_timeout: int = 200 + + # Specific AMIs can be used with their ID + # If 'storage_os: find' storage_os_find will be used to find one in zone (region) + # expected something like: "name=debian-12 | arch=x86_64" or "name: debian-12 | arch: x86_64" will be parsed to find latest available + storage_os_find: str = "name: debian-12 | arch: x86_64" + + #storage_os?: str + #storage_os: ami-0eb11ab33f229b26c + # If not Storage size, Plan Storage size will be used + # storages is defined in Provider defaults + #storages?: [Storage] + # Add one or more SSH keys to the admin account. Accepted values are SSH public keys or filenames from + # where to read the keys. + # ssh public key to be included in /root/.ssh/authorized_keys + ssh_key_path?: str + # Public certificate must be created or imported as a key_name + # use: providers/aws/bin/on-ssh.sh (add -h to get info) + ssh_key_name?: str + # Use it to rewrite or update ssh_key + # ssh_key_mode: rewrite + # AWS do not use utility network, if no value it will not be set and utility IP will not be set + # public network, if no value it will not be set and public IP will not be set + network_utility_ipv4: bool = True + network_utility_ipv6: bool = False + network_public_ipv4?: bool = True + network_public_ipv6?: bool = False + network_public_ip?: str + #TODO settings for Elastic IPs or instace without pubic IP + # To use private network a VPC + Subnet + NetworkInfterface has to be created, IPs will be set in servers items + # In AWS this is only a name + network_private_name?: str + network_private_id?: str + primary_dns?: str + secondary_dns?: str + main_domain?: str + domains_search?: str + # Labels to describe the server in `key: value` format, multiple can be declared. + # Usage: env: dev + labels: str + # Main user (default Debian user is admin) + user: str + user_ssh_key_path?: str + user_home?: str = "/home/${user}" + user_ssh_port?: int = 22 + # If is not empty it will add servers entries to /etc/hosts and $HOME/.ssh/config + fix_local_hosts: bool = True + installer_user?: str = "${user}" + scale?: lib.ScaleResource + + check: + user == Undefined or len(user) > 0, "Check user value" + #len(ssh_key_path) > 0, "Check ssh_key_path" + priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition" + diff --git a/dependencies.k b/dependencies.k new file mode 100644 index 0000000..7562a4e --- /dev/null +++ b/dependencies.k @@ -0,0 +1,281 @@ +""" +KCL Dependency Management Schema for Provisioning System +Provides type-safe dependency declarations with resource requirements and health checks +""" + +schema ResourceRequirement: + """Resource requirements for taskserv installation and operation""" + # CPU requirement (K8s format) + cpu?: str = "100m" + # Memory requirement (K8s format) + memory?: str = "128Mi" + # Disk space requirement + disk?: str = "1Gi" + # Requires network connectivity + network?: bool = True + # Requires privileged access + privileged?: bool = False + + check: + len(cpu) > 0, "CPU requirement cannot be empty" + len(memory) > 0, "Memory requirement cannot be empty" + len(disk) > 0, "Disk requirement cannot be empty" + +schema HealthCheck: + """Health check definition for taskserv validation""" + # Command to execute for health check + command: str + # Check interval in seconds + interval?: int = 30 + # Command timeout in seconds + timeout?: int = 10 + # Number of retry attempts + retries?: int = 3 + # Consecutive successes needed + success_threshold?: int = 1 + # Consecutive failures to mark unhealthy + failure_threshold?: int = 3 + + check: + len(command) > 0, "Health check command cannot be empty" + interval > 0, "Health check interval must be positive" + timeout > 0, "Health check timeout must be positive" + retries >= 0, "Health check retries cannot be negative" + +schema InstallationPhase: + """Installation phase definition for ordered deployment""" + # Phase name (e.g., "pre-install", "install", "post-install") + name: str + # Execution order within phase (lower first) + order: int + # Can run in parallel with same order + parallel?: bool = False + # Phase is required for successful installation + required?: bool = True + + check: + len(name) > 0, "Installation phase name cannot be empty" + order >= 0, "Installation phase order cannot be negative" + name in ["pre-install", "install", "post-install", "validate", "cleanup"], "Phase name must be one of: pre-install, install, post-install, validate, cleanup" + +schema TaskservDependencies: + """Complete dependency configuration for a taskserv""" + # Taskserv name (must match directory) + name: str + + # Dependency relationships + # Required taskservs (must be installed first) + requires?: [str] + # Conflicting taskservs (cannot coexist) + conflicts?: [str] + # Optional taskservs (install if available) + optional?: [str] + # Services this taskserv provides + provides?: [str] + # Resource requirements + # Resource requirements for installation + resources: ResourceRequirement + + # Health and validation + # Health check definitions + health_checks?: [HealthCheck] + # Readiness check for installation completion + readiness_probe?: HealthCheck + # Installation control + # Installation phase definitions + phases?: [InstallationPhase] + # Installation timeout in seconds + timeout?: int = 600 + # Number of installation retry attempts + retry_count?: int = 3 + # Compatibility + # Supported operating systems + os_support?: [str] = ["linux"] + # Supported CPU architectures + arch_support?: [str] = ["amd64"] + # Compatible Kubernetes versions + k8s_versions?: [str] + + check: + len(name) > 0, "Taskserv name cannot be empty" + name == name.lower(), "Taskserv name must be lowercase" + timeout > 0, "Installation timeout must be positive" + retry_count >= 0, "Retry count cannot be negative" + len(os_support) > 0, "Must specify at least one supported OS" + len(arch_support) > 0, "Must specify at least one supported architecture" + +# Re-export for taskserv use +schema TaskservDependency(TaskservDependencies): + """Alias for TaskservDependencies - provides the same functionality""" + +# OCI Registry Integration Schemas + +schema OCISource: + """OCI registry configuration for extension distribution""" + # OCI registry endpoint (localhost:5000, harbor.company.com) + registry: str + # Namespace in registry (provisioning-extensions, provisioning-platform) + namespace: str + # Path to authentication token file + auth_token_path?: str + # Enable TLS for registry connection + tls_enabled: bool = False + # Skip TLS certificate verification (insecure, dev only) + insecure_skip_verify: bool = False + # OCI platform architecture + platform: str = "linux/amd64" + # Media type for KCL packages + media_type: str = "application/vnd.kcl.package.v1+tar" + + check: + len(registry) > 0, "OCI registry endpoint required" + len(namespace) > 0, "OCI namespace required" + not (insecure_skip_verify and tls_enabled), \ + "insecure_skip_verify should only be used without TLS" + platform in ["linux/amd64", "linux/arm64", "darwin/amd64", "darwin/arm64"], \ + "Platform must be one of: linux/amd64, linux/arm64, darwin/amd64, darwin/arm64" + +schema GiteaSource: + """Gitea repository configuration for extension distribution""" + # Gitea server URL + url: str + # Organization/namespace containing repositories + organization: str + # Path to authentication token file + auth_token_path?: str + # Use SSH instead of HTTPS + use_ssh: bool = False + # Branch to use for extensions + branch: str = "main" + + check: + len(url) > 0, "Gitea URL required" + len(organization) > 0, "Gitea organization required" + url.startswith("http://") or url.startswith("https://"), \ + "Gitea URL must start with http:// or https://" + +schema LocalSource: + """Local filesystem configuration for extension distribution""" + # Absolute path to extensions directory + path: str + # Watch for changes and auto-reload + watch: bool = False + + check: + len(path) > 0, "Local source path required" + path.startswith("/") or path.startswith("~"), \ + "Local source path must be absolute" + +schema HTTPSource: + """Generic HTTP/HTTPS configuration for extension distribution""" + # HTTP/HTTPS URL + url: str + # Authentication header (e.g., "Bearer token123") + auth_header?: str + # Use HTTP basic auth + basic_auth?: bool = False + # Username for basic auth + username?: str + # Password for basic auth + password?: str + + check: + len(url) > 0, "HTTP URL required" + url.startswith("http://") or url.startswith("https://"), \ + "URL must start with http:// or https://" + not basic_auth or (username and password), \ + "Basic auth requires username and password" + +schema ExtensionSource: + """Extension source configuration with multi-backend support""" + # Source type + type: "oci" | "gitea" | "local" | "http" + + # OCI registry source configuration + oci?: OCISource + # Gitea source configuration + gitea?: GiteaSource + # Local filesystem source configuration + local?: LocalSource + # HTTP source configuration + http?: HTTPSource + + check: + (type == "oci" and oci != None) or \ + (type == "gitea" and gitea != None) or \ + (type == "local" and local != None) or \ + (type == "http" and http != None), \ + "Source configuration must match selected type" + +schema ExtensionManifest: + """Extension package manifest for OCI distribution""" + # Extension name (must match directory name) + name: str + # Extension type + type: "provider" | "taskserv" | "cluster" + # Semantic version + version: str + # Extension description + description?: str + # Extension author/maintainer + author?: str + # License identifier (SPDX) + license?: str = "MIT" + # Extension homepage URL + homepage?: str + # Repository URL + repository?: str + # Extension dependencies + dependencies?: {str: str} + # Extension tags/keywords + tags?: [str] + # Supported platforms + platforms?: [str] = ["linux/amd64"] + # Minimum provisioning core version + min_provisioning_version?: str + + check: + len(name) > 0, "Extension name required" + name == name.lower(), "Extension name must be lowercase" + len(version) > 0, "Extension version required" + # Semantic version format (basic check) + version.count(".") >= 2, "Version must be semantic (x.y.z)" + +schema RepositoryConfig: + """Multi-repository configuration for dependency management""" + # Repository name + name: str + # Repository type + type: "core" | "extensions" | "platform" | "workspace" + # Source configuration + source: ExtensionSource + # Repository version/tag + version?: str + # Enable repository + enabled: bool = True + # Repository priority (higher = more priority) + priority: int = 100 + # Cache TTL in seconds + cache_ttl: int = 3600 + + check: + len(name) > 0, "Repository name required" + priority >= 0, "Repository priority cannot be negative" + cache_ttl > 0, "Cache TTL must be positive" + +schema DependencyResolution: + """Dependency resolution configuration""" + # Resolution strategy + strategy: "strict" | "latest" | "minimal" + # Allow pre-release versions + allow_prerelease: bool = False + # Enable version pinning + pin_versions: bool = True + # Maximum dependency depth + max_depth: int = 10 + # Conflict resolution strategy + conflict_strategy: "error" | "latest" | "highest_priority" + + check: + max_depth > 0 and max_depth <= 100, \ + "Max depth must be between 1 and 100" diff --git a/docs/BEST_PRACTICES.md b/docs/BEST_PRACTICES.md new file mode 100644 index 0000000..75c940a --- /dev/null +++ b/docs/BEST_PRACTICES.md @@ -0,0 +1,1200 @@ +# KCL Best Practices for Provisioning + +This document outlines best practices for using and developing with the provisioning KCL package, covering schema design, workflow patterns, and operational guidelines. + +## Table of Contents + +- [Schema Design](#schema-design) +- [Workflow Patterns](#workflow-patterns) +- [Error Handling](#error-handling) +- [Performance Optimization](#performance-optimization) +- [Security Considerations](#security-considerations) +- [Testing Strategies](#testing-strategies) +- [Maintenance Guidelines](#maintenance-guidelines) + +## Schema Design + +### 1. Clear Naming Conventions + +```kcl +# ✅ Good: Descriptive, consistent naming +schema ProductionWebServer: + """Web server optimized for production workloads""" + hostname: str # Clear, specific field names + fully_qualified_domain_name?: str + environment_classification: "dev" | "staging" | "prod" + cost_allocation_center: str + operational_team_owner: str + +# ✅ Good: Consistent prefixes for related schemas +schema K8sDeploymentSpec: + """Kubernetes deployment specification""" + replica_count: int + container_definitions: [K8sContainerSpec] + volume_mount_configs: [K8sVolumeMountSpec] + +schema K8sContainerSpec: + """Kubernetes container specification""" + image_reference: str + resource_requirements: K8sResourceRequirements + +# ❌ Avoid: Ambiguous or inconsistent naming +schema Server: # ❌ Too generic + name: str # ❌ Ambiguous - hostname? display name? + env: str # ❌ Unclear - environment? variables? + cfg: {str: str} # ❌ Cryptic abbreviations +``` + +### 2. Comprehensive Documentation + +```kcl +# ✅ Good: Detailed documentation with examples +schema ServerConfiguration: + """ + Production server configuration following company standards. + + This schema defines servers for multi-tier applications with + proper security, monitoring, and operational requirements. + + Example: + web_server: ServerConfiguration = ServerConfiguration { + hostname: "prod-web-01" + server_role: "frontend" + environment: "production" + cost_center: "engineering" + } + """ + + # Core identification (required) + hostname: str # DNS-compliant hostname (RFC 1123) + server_role: "frontend" | "backend" | "database" | "cache" + + # Environment and operational metadata + environment: "development" | "staging" | "production" + cost_center: str # Billing allocation identifier + primary_contact_team: str # Team responsible for maintenance + + # Security and compliance + security_zone: "dmz" | "internal" | "restricted" + compliance_requirements: [str] # e.g., ["pci", "sox", "hipaa"] + + # Optional operational settings + backup_policy?: str # Backup schedule identifier + monitoring_profile?: str # Monitoring configuration profile + + check: + # Hostname validation (DNS RFC 1123) + regex.match(hostname, "^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"), + "Hostname must be DNS-compliant (RFC 1123): ${hostname}" + + # Environment-specific validations + environment == "production" and len(primary_contact_team) > 0, + "Production servers must specify primary contact team" + + # Security requirements + security_zone == "restricted" and "encryption" in compliance_requirements, + "Restricted zone servers must have encryption compliance" + +# ❌ Avoid: Minimal or missing documentation +schema Srv: # ❌ No documentation + h: str # ❌ No field documentation + t: str # ❌ Cryptic field names +``` + +### 3. Hierarchical Schema Design + +```kcl +# ✅ Good: Base schemas with specialized extensions +schema BaseInfrastructureResource: + """Foundation schema for all infrastructure resources""" + + # Universal metadata + resource_name: str + creation_timestamp?: str + last_modified_timestamp?: str + created_by_user?: str + + # Organizational metadata + cost_center: str + project_identifier: str + environment: "dev" | "staging" | "prod" + + # Operational metadata + tags: {str: str} = {} + monitoring_enabled: bool = True + + check: + len(resource_name) > 0 and len(resource_name) <= 63, + "Resource name must be 1-63 characters" + + regex.match(resource_name, "^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"), + "Resource name must be DNS-label compatible" + +schema ComputeResource(BaseInfrastructureResource): + """Compute resources with CPU/memory specifications""" + + # Hardware specifications + cpu_cores: int + memory_gigabytes: int + storage_gigabytes: int + + # Performance characteristics + cpu_architecture: "x86_64" | "arm64" + performance_tier: "burstable" | "standard" | "high_performance" + + check: + cpu_cores > 0 and cpu_cores <= 128, + "CPU cores must be between 1 and 128" + + memory_gigabytes > 0 and memory_gigabytes <= 1024, + "Memory must be between 1GB and 1TB" + +schema ManagedDatabaseResource(BaseInfrastructureResource): + """Managed database service configuration""" + + # Database specifications + database_engine: "postgresql" | "mysql" | "redis" | "mongodb" + engine_version: str + instance_class: str + + # High availability and backup + multi_availability_zone: bool = False + backup_retention_days: int = 7 + automated_backup_enabled: bool = True + + # Security + encryption_at_rest: bool = True + encryption_in_transit: bool = True + + check: + environment == "prod" and multi_availability_zone == True, + "Production databases must enable multi-AZ" + + environment == "prod" and backup_retention_days >= 30, + "Production databases need minimum 30 days backup retention" +``` + +### 4. Flexible Configuration Patterns + +```kcl +# ✅ Good: Environment-aware defaults +schema EnvironmentAdaptiveConfiguration: + """Configuration that adapts based on environment""" + + environment: "dev" | "staging" | "prod" + + # Computed defaults based on environment + default_timeout_seconds: int = ( + environment == "prod" ? 300 : ( + environment == "staging" ? 180 : 60 + ) + ) + + default_retry_attempts: int = ( + environment == "prod" ? 5 : ( + environment == "staging" ? 3 : 1 + ) + ) + + resource_allocation: ComputeResource = ComputeResource { + resource_name: "default-compute" + cost_center: "shared" + project_identifier: "infrastructure" + environment: environment + + # Environment-specific resource sizing + cpu_cores: environment == "prod" ? 4 : (environment == "staging" ? 2 : 1) + memory_gigabytes: environment == "prod" ? 8 : (environment == "staging" ? 4 : 2) + storage_gigabytes: environment == "prod" ? 100 : 50 + + cpu_architecture: "x86_64" + performance_tier: environment == "prod" ? "high_performance" : "standard" + } + + monitoring_configuration: MonitoringConfig = MonitoringConfig { + collection_interval_seconds: environment == "prod" ? 15 : 60 + retention_days: environment == "prod" ? 90 : 30 + alert_thresholds: environment == "prod" ? "strict" : "relaxed" + } + +# ✅ Good: Composable configuration with mixins +schema SecurityMixin: + """Security-related configuration that can be mixed into other schemas""" + + encryption_enabled: bool = True + access_logging_enabled: bool = True + security_scan_enabled: bool = True + + # Security-specific validations + check: + encryption_enabled == True, + "Encryption must be enabled for security compliance" + +schema ComplianceMixin: + """Compliance-related configuration""" + + compliance_frameworks: [str] = [] + audit_logging_enabled: bool = False + data_retention_policy?: str + + check: + len(compliance_frameworks) > 0 and audit_logging_enabled == True, + "Compliance frameworks require audit logging" + +schema SecureComputeResource(ComputeResource, SecurityMixin, ComplianceMixin): + """Compute resource with security and compliance requirements""" + + # Additional security requirements for compute + secure_boot_enabled: bool = True + encrypted_storage: bool = True + + check: + # Inherit all parent validations, plus additional ones + "pci" in compliance_frameworks and encrypted_storage == True, + "PCI compliance requires encrypted storage" +``` + +## Workflow Patterns + +### 1. Dependency Management + +```kcl +# ✅ Good: Clear dependency patterns with proper error handling +schema InfrastructureWorkflow(main.BatchWorkflow): + """Infrastructure deployment with proper dependency management""" + + # Categorize operations for dependency analysis + foundation_operations: [str] = [] # Network, security groups, etc. + compute_operations: [str] = [] # Servers, instances + service_operations: [str] = [] # Applications, databases + validation_operations: [str] = [] # Testing, health checks + + check: + # Foundation must come first + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in foundation_operations]) > 0 + for op in operations + if op.operation_id in compute_operations + ]) or len(compute_operations) == 0, + "Compute operations must depend on foundation operations" + + # Services depend on compute + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in compute_operations]) > 0 + for op in operations + if op.operation_id in service_operations + ]) or len(service_operations) == 0, + "Service operations must depend on compute operations" + +# Example usage with proper dependency chains +production_deployment: InfrastructureWorkflow = InfrastructureWorkflow { + workflow_id: "prod-infra-2025-001" + name: "Production Infrastructure Deployment" + + foundation_operations: ["create_vpc", "setup_security_groups"] + compute_operations: ["create_web_servers", "create_db_servers"] + service_operations: ["install_applications", "configure_databases"] + validation_operations: ["run_health_checks", "validate_connectivity"] + + operations: [ + # Foundation layer + main.BatchOperation { + operation_id: "create_vpc" + name: "Create VPC and Networking" + operation_type: "custom" + action: "create" + parameters: {"cidr": "10.0.0.0/16"} + priority: 10 + timeout: 600 + }, + + # Compute layer (depends on foundation) + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + action: "create" + parameters: {"count": "3", "type": "web"} + dependencies: [ + main.DependencyDef { + target_operation_id: "create_vpc" + dependency_type: "sequential" + timeout: 300 + fail_on_dependency_error: True + } + ] + priority: 8 + timeout: 900 + }, + + # Service layer (depends on compute) + main.BatchOperation { + operation_id: "install_applications" + name: "Install Web Applications" + operation_type: "taskserv" + action: "create" + parameters: {"apps": ["nginx", "prometheus"]} + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "conditional" + conditions: ["servers_ready", "ssh_accessible"] + timeout: 600 + } + ] + priority: 6 + } + ] +} +``` + +### 2. Multi-Environment Workflows + +```kcl +# ✅ Good: Environment-specific workflow configurations +schema MultiEnvironmentWorkflow: + """Workflow that adapts to different environments""" + + base_workflow: main.BatchWorkflow + target_environment: "dev" | "staging" | "prod" + + # Environment-specific overrides + environment_config: EnvironmentConfig = EnvironmentConfig { + environment: target_environment + + # Adjust parallelism based on environment + max_parallel: target_environment == "prod" ? 3 : 5 + + # Adjust timeouts + operation_timeout_multiplier: target_environment == "prod" ? 1.5 : 1.0 + + # Monitoring intensity + monitoring_level: target_environment == "prod" ? "comprehensive" : "basic" + } + + # Generate final workflow with environment adaptations + final_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: f"{base_workflow.workflow_id}-{target_environment}" + name: f"{base_workflow.name} ({target_environment})" + description: base_workflow.description + + operations: [ + main.BatchOperation { + operation_id: op.operation_id + name: op.name + operation_type: op.operation_type + provider: op.provider + action: op.action + parameters: op.parameters + dependencies: op.dependencies + + # Environment-adapted timeout + timeout: int(op.timeout * environment_config.operation_timeout_multiplier) + + # Environment-adapted priority + priority: op.priority + allow_parallel: op.allow_parallel + + # Environment-specific retry policy + retry_policy: main.RetryPolicy { + max_attempts: target_environment == "prod" ? 3 : 2 + initial_delay: target_environment == "prod" ? 30 : 10 + backoff_multiplier: 2 + } + } + for op in base_workflow.operations + ] + + max_parallel_operations: environment_config.max_parallel + global_timeout: base_workflow.global_timeout + fail_fast: target_environment == "prod" ? False : True + + # Environment-specific storage + storage: main.StorageConfig { + backend: target_environment == "prod" ? "surrealdb" : "filesystem" + base_path: f"./workflows/{target_environment}" + enable_persistence: target_environment != "dev" + retention_hours: target_environment == "prod" ? 2160 : 168 # 90 days vs 1 week + } + + # Environment-specific monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: target_environment == "prod" + enable_notifications: target_environment != "dev" + log_level: target_environment == "dev" ? "debug" : "info" + } + } + +# Usage for different environments +dev_deployment: MultiEnvironmentWorkflow = MultiEnvironmentWorkflow { + target_environment: "dev" + base_workflow: main.BatchWorkflow { + workflow_id: "webapp-deploy" + name: "Web Application Deployment" + operations: [ + # ... base operations + ] + } +} + +prod_deployment: MultiEnvironmentWorkflow = MultiEnvironmentWorkflow { + target_environment: "prod" + base_workflow: dev_deployment.base_workflow # Reuse same base workflow +} +``` + +### 3. Error Recovery Patterns + +```kcl +# ✅ Good: Comprehensive error recovery strategy +schema ResilientWorkflow(main.BatchWorkflow): + """Workflow with advanced error recovery capabilities""" + + # Error categorization + critical_operations: [str] = [] # Operations that cannot fail + optional_operations: [str] = [] # Operations that can be skipped + retry_operations: [str] = [] # Operations with custom retry logic + + # Recovery strategies + global_error_strategy: "fail_fast" | "continue_on_error" | "intelligent" = "intelligent" + + # Enhanced operations with error handling + enhanced_operations: [EnhancedBatchOperation] = [ + EnhancedBatchOperation { + base_operation: op + is_critical: op.operation_id in critical_operations + is_optional: op.operation_id in optional_operations + custom_retry: op.operation_id in retry_operations + + # Adaptive retry policy based on operation characteristics + adaptive_retry_policy: main.RetryPolicy { + max_attempts: ( + is_critical ? 5 : ( + is_optional ? 1 : 3 + ) + ) + initial_delay: is_critical ? 60 : 30 + max_delay: is_critical ? 900 : 300 + backoff_multiplier: 2 + retry_on_errors: [ + "timeout", + "connection_error", + "rate_limit" + ] + (is_critical ? [ + "resource_unavailable", + "quota_exceeded" + ] : []) + } + + # Adaptive rollback strategy + adaptive_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: is_critical ? "manual" : "immediate" + preserve_partial_state: is_critical + custom_rollback_operations: is_critical ? [ + "notify_engineering_team", + "create_incident_ticket", + "preserve_debug_info" + ] : [] + } + } + for op in operations + ] + +schema EnhancedBatchOperation: + """Batch operation with enhanced error handling""" + + base_operation: main.BatchOperation + is_critical: bool = False + is_optional: bool = False + custom_retry: bool = False + + adaptive_retry_policy: main.RetryPolicy + adaptive_rollback_strategy: main.RollbackStrategy + + # Circuit breaker pattern + failure_threshold: int = 3 + recovery_timeout_seconds: int = 300 + + check: + not (is_critical and is_optional), + "Operation cannot be both critical and optional" +``` + +## Error Handling + +### 1. Graceful Degradation + +```kcl +# ✅ Good: Graceful degradation for non-critical components +schema GracefulDegradationWorkflow(main.BatchWorkflow): + """Workflow that can degrade gracefully on partial failures""" + + # Categorize operations by importance + core_operations: [str] = [] # Must succeed + enhancement_operations: [str] = [] # Nice to have + monitoring_operations: [str] = [] # Can be skipped if needed + + # Minimum viable deployment definition + minimum_viable_operations: [str] = core_operations + + # Degradation strategy + degradation_policy: DegradationPolicy = DegradationPolicy { + allow_partial_deployment: True + minimum_success_percentage: 80.0 + + operation_priorities: { + # Core operations (must succeed) + op_id: 10 for op_id in core_operations + } | { + # Enhancement operations (should succeed) + op_id: 5 for op_id in enhancement_operations + } | { + # Monitoring operations (can fail) + op_id: 1 for op_id in monitoring_operations + } + } + + check: + # Ensure minimum viable deployment is achievable + len(minimum_viable_operations) > 0, + "Must specify at least one operation for minimum viable deployment" + + # Core operations should not depend on enhancement operations + all([ + all([ + dep.target_operation_id not in enhancement_operations + for dep in op.dependencies or [] + ]) + for op in operations + if op.operation_id in core_operations + ]), + "Core operations should not depend on enhancement operations" + +schema DegradationPolicy: + """Policy for graceful degradation""" + + allow_partial_deployment: bool = False + minimum_success_percentage: float = 100.0 + operation_priorities: {str: int} = {} + + # Fallback configurations + fallback_configurations: {str: str} = {} + emergency_contacts: [str] = [] + + check: + 0.0 <= minimum_success_percentage and minimum_success_percentage <= 100.0, + "Success percentage must be between 0 and 100" +``` + +### 2. Circuit Breaker Patterns + +```kcl +# ✅ Good: Circuit breaker for external dependencies +schema CircuitBreakerOperation(main.BatchOperation): + """Operation with circuit breaker pattern for external dependencies""" + + # Circuit breaker configuration + circuit_breaker_enabled: bool = False + failure_threshold: int = 5 + recovery_timeout_seconds: int = 300 + + # Health check configuration + health_check_endpoint?: str + health_check_interval_seconds: int = 30 + + # Fallback behavior + fallback_enabled: bool = False + fallback_operation?: main.BatchOperation + + check: + circuit_breaker_enabled == True and failure_threshold > 0, + "Circuit breaker must have positive failure threshold" + + circuit_breaker_enabled == True and recovery_timeout_seconds > 0, + "Circuit breaker must have positive recovery timeout" + + fallback_enabled == True and fallback_operation != Undefined, + "Fallback requires fallback operation definition" + +# Example: Database operation with circuit breaker +database_operation_with_circuit_breaker: CircuitBreakerOperation = CircuitBreakerOperation { + # Base operation + operation_id: "setup_database" + name: "Setup Production Database" + operation_type: "server" + action: "create" + parameters: {"service": "postgresql", "version": "15"} + timeout: 1800 + + # Circuit breaker settings + circuit_breaker_enabled: True + failure_threshold: 3 + recovery_timeout_seconds: 600 + + # Health monitoring + health_check_endpoint: "http://db-health.internal/health" + health_check_interval_seconds: 60 + + # Fallback to read replica + fallback_enabled: True + fallback_operation: main.BatchOperation { + operation_id: "setup_database_readonly" + name: "Setup Read-Only Database Fallback" + operation_type: "server" + action: "create" + parameters: {"service": "postgresql", "mode": "readonly"} + timeout: 900 + } +} +``` + +## Performance Optimization + +### 1. Parallel Execution Strategies + +```kcl +# ✅ Good: Intelligent parallelization +schema OptimizedParallelWorkflow(main.BatchWorkflow): + """Workflow optimized for parallel execution""" + + # Parallel execution groups + parallel_groups: [[str]] = [] # Groups of operations that can run in parallel + + # Resource-aware scheduling + resource_requirements: {str: ResourceRequirement} = {} + total_available_resources: ResourceCapacity = ResourceCapacity { + max_cpu_cores: 16 + max_memory_gb: 64 + max_network_bandwidth_mbps: 1000 + max_concurrent_operations: 10 + } + + # Computed optimal parallelism + optimal_parallel_limit: int = min([ + total_available_resources.max_concurrent_operations, + len(operations), + 8 # Reasonable default maximum + ]) + + # Generate workflow with optimized settings + optimized_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: workflow_id + name: name + description: description + + operations: [ + OptimizedBatchOperation { + base_operation: op + resource_hint: resource_requirements[op.operation_id] or ResourceRequirement { + cpu_cores: 1 + memory_gb: 2 + estimated_duration_seconds: op.timeout / 2 + } + + # Enable parallelism for operations in parallel groups + computed_allow_parallel: any([ + op.operation_id in group and len(group) > 1 + for group in parallel_groups + ]) + } + for op in operations + ] + + max_parallel_operations: optimal_parallel_limit + global_timeout: global_timeout + fail_fast: fail_fast + + # Optimize storage for performance + storage: main.StorageConfig { + backend: "surrealdb" # Better for concurrent access + enable_compression: False # Trade space for speed + connection_config: { + "connection_pool_size": str(optimal_parallel_limit * 2) + "max_retries": "3" + "timeout": "30" + } + } + } + +schema OptimizedBatchOperation: + """Batch operation with performance optimizations""" + + base_operation: main.BatchOperation + resource_hint: ResourceRequirement + computed_allow_parallel: bool + + # Performance-optimized operation + optimized_operation: main.BatchOperation = main.BatchOperation { + operation_id: base_operation.operation_id + name: base_operation.name + operation_type: base_operation.operation_type + provider: base_operation.provider + action: base_operation.action + parameters: base_operation.parameters + dependencies: base_operation.dependencies + + # Optimized settings + timeout: max([base_operation.timeout, resource_hint.estimated_duration_seconds * 2]) + allow_parallel: computed_allow_parallel + priority: base_operation.priority + + # Performance-oriented retry policy + retry_policy: main.RetryPolicy { + max_attempts: 2 # Fewer retries for faster failure detection + initial_delay: 10 + max_delay: 60 + backoff_multiplier: 1.5 + retry_on_errors: ["timeout", "rate_limit"] # Only retry fast-failing errors + } + } + +schema ResourceRequirement: + """Resource requirements for performance planning""" + cpu_cores: int = 1 + memory_gb: int = 2 + estimated_duration_seconds: int = 300 + io_intensive: bool = False + network_intensive: bool = False + +schema ResourceCapacity: + """Available resource capacity""" + max_cpu_cores: int + max_memory_gb: int + max_network_bandwidth_mbps: int + max_concurrent_operations: int +``` + +### 2. Caching and Memoization + +```kcl +# ✅ Good: Caching for expensive operations +schema CachedOperation(main.BatchOperation): + """Operation with caching capabilities""" + + # Caching configuration + cache_enabled: bool = False + cache_key_template: str = "${operation_id}-${provider}-${action}" + cache_ttl_seconds: int = 3600 # 1 hour default + + # Cache invalidation rules + cache_invalidation_triggers: [str] = [] + force_cache_refresh: bool = False + + # Computed cache key + computed_cache_key: str = f"{operation_id}-{provider}-{action}" + + # Cache-aware timeout (shorter if cache hit expected) + cache_aware_timeout: int = cache_enabled ? timeout / 2 : timeout + + check: + cache_enabled == True and cache_ttl_seconds > 0, + "Cache TTL must be positive when caching is enabled" + +# Example: Cached provider operations +cached_server_creation: CachedOperation = CachedOperation { + # Base operation + operation_id: "create_standardized_servers" + name: "Create Standardized Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "plan": "2xCPU-4GB" + "zone": "fi-hel2" + "image": "ubuntu-22.04" + } + timeout: 900 + + # Caching settings + cache_enabled: True + cache_key_template: "server-${plan}-${zone}-${image}" + cache_ttl_seconds: 7200 # 2 hours + + # Cache invalidation + cache_invalidation_triggers: ["image_updated", "plan_changed"] +} +``` + +## Security Considerations + +### 1. Secure Configuration Management + +```kcl +# ✅ Good: Secure configuration with proper secret handling +schema SecureConfiguration: + """Security-first configuration management""" + + # Secret management + secrets_provider: main.SecretProvider = main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + + # Security classifications + data_classification: "public" | "internal" | "confidential" | "restricted" + encryption_required: bool = data_classification != "public" + audit_logging_required: bool = data_classification in ["confidential", "restricted"] + + # Access control + allowed_environments: [str] = ["dev", "staging", "prod"] + environment_access_matrix: {str: [str]} = { + "dev": ["developers", "qa_team"] + "staging": ["developers", "qa_team", "release_team"] + "prod": ["release_team", "operations_team"] + } + + # Network security + network_isolation_required: bool = data_classification in ["confidential", "restricted"] + vpc_isolation: bool = network_isolation_required + private_subnets_only: bool = data_classification == "restricted" + + check: + data_classification == "restricted" and encryption_required == True, + "Restricted data must be encrypted" + + audit_logging_required == True and len(audit_log_destinations) > 0, + "Audit logging destinations must be specified for sensitive data" + +# Example: Production security configuration +production_security: SecureConfiguration = SecureConfiguration { + data_classification: "confidential" + # encryption_required automatically becomes True + # audit_logging_required automatically becomes True + # network_isolation_required automatically becomes True + + allowed_environments: ["staging", "prod"] + environment_access_matrix: { + "staging": ["release_team", "security_team"] + "prod": ["operations_team", "security_team"] + } + + audit_log_destinations: [ + "siem://security.company.com", + "s3://audit-logs-prod/workflows" + ] +} +``` + +### 2. Compliance and Auditing + +```kcl +# ✅ Good: Compliance-aware workflow design +schema ComplianceWorkflow(main.BatchWorkflow): + """Workflow with built-in compliance features""" + + # Compliance framework requirements + compliance_frameworks: [str] = [] + compliance_metadata: ComplianceMetadata = ComplianceMetadata { + frameworks: compliance_frameworks + audit_trail_required: "sox" in compliance_frameworks or "pci" in compliance_frameworks + data_residency_requirements: "gdpr" in compliance_frameworks ? ["eu"] : [] + retention_requirements: get_retention_requirements(compliance_frameworks) + } + + # Enhanced workflow with compliance features + compliant_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: workflow_id + name: name + description: description + + operations: [ + ComplianceAwareBatchOperation { + base_operation: op + compliance_metadata: compliance_metadata + }.compliant_operation + for op in operations + ] + + # Compliance-aware storage + storage: main.StorageConfig { + backend: "surrealdb" + enable_persistence: True + retention_hours: compliance_metadata.retention_requirements.workflow_data_hours + enable_compression: False # For audit clarity + encryption: compliance_metadata.audit_trail_required ? main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } : Undefined + } + + # Compliance-aware monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: compliance_metadata.audit_trail_required + enable_notifications: True + log_level: "info" + collection_interval: compliance_metadata.audit_trail_required ? 15 : 30 + } + + # Audit trail in execution context + execution_context: execution_context | { + "compliance_frameworks": str(compliance_frameworks) + "audit_trail_enabled": str(compliance_metadata.audit_trail_required) + "data_classification": "confidential" + } + } + +schema ComplianceMetadata: + """Metadata for compliance requirements""" + frameworks: [str] + audit_trail_required: bool + data_residency_requirements: [str] + retention_requirements: RetentionRequirements + +schema RetentionRequirements: + """Data retention requirements based on compliance""" + workflow_data_hours: int = 8760 # 1 year default + audit_log_hours: int = 26280 # 3 years default + backup_retention_hours: int = 43800 # 5 years default + +schema ComplianceAwareBatchOperation: + """Batch operation with compliance awareness""" + base_operation: main.BatchOperation + compliance_metadata: ComplianceMetadata + + compliant_operation: main.BatchOperation = main.BatchOperation { + operation_id: base_operation.operation_id + name: base_operation.name + operation_type: base_operation.operation_type + provider: base_operation.provider + action: base_operation.action + parameters: base_operation.parameters | ( + compliance_metadata.audit_trail_required ? { + "audit_enabled": "true" + "compliance_mode": "strict" + } : {} + ) + dependencies: base_operation.dependencies + timeout: base_operation.timeout + allow_parallel: base_operation.allow_parallel + priority: base_operation.priority + + # Enhanced retry for compliance + retry_policy: main.RetryPolicy { + max_attempts: compliance_metadata.audit_trail_required ? 5 : 3 + initial_delay: 30 + max_delay: 300 + backoff_multiplier: 2 + retry_on_errors: ["timeout", "connection_error", "rate_limit"] + } + + # Conservative rollback for compliance + rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "manual" # Manual approval for compliance + preserve_partial_state: True + rollback_timeout: 1800 + custom_rollback_operations: [ + "create_audit_entry", + "notify_compliance_team", + "preserve_evidence" + ] + } + } + +# Helper function for retention requirements +def get_retention_requirements(frameworks: [str]) -> RetentionRequirements: + """Get retention requirements based on compliance frameworks""" + if "sox" in frameworks: + return RetentionRequirements { + workflow_data_hours: 43800 # 5 years + audit_log_hours: 61320 # 7 years + backup_retention_hours: 87600 # 10 years + } + elif "pci" in frameworks: + return RetentionRequirements { + workflow_data_hours: 8760 # 1 year + audit_log_hours: 26280 # 3 years + backup_retention_hours: 43800 # 5 years + } + else: + return RetentionRequirements { + workflow_data_hours: 8760 # 1 year default + audit_log_hours: 26280 # 3 years default + backup_retention_hours: 43800 # 5 years default + } +``` + +## Testing Strategies + +### 1. Schema Testing + +```bash +#!/bin/bash +# Schema testing script + +# Test 1: Basic syntax validation +echo "Testing schema syntax..." +find . -name "*.k" -exec kcl fmt {} \; + +# Test 2: Schema compilation +echo "Testing schema compilation..." +for file in *.k; do + echo "Testing $file" + kcl run "$file" > /dev/null || echo "FAILED: $file" +done + +# Test 3: Constraint validation +echo "Testing constraints..." +kcl run test_constraints.k + +# Test 4: JSON serialization +echo "Testing JSON serialization..." +kcl run examples/simple_workflow.k --format json | jq '.' > /dev/null + +# Test 5: Cross-schema compatibility +echo "Testing cross-schema compatibility..." +kcl run integration_test.k +``` + +### 2. Validation Testing + +```kcl +# Test configuration for validation +test_validation_cases: { + # Valid cases + valid_server: main.Server = main.Server { + hostname: "test-01" + title: "Test Server" + labels: "env: test" + user: "test" + } + + # Edge cases + minimal_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "minimal" + name: "Minimal Test Workflow" + operations: [ + main.BatchOperation { + operation_id: "test_op" + name: "Test Operation" + operation_type: "custom" + action: "test" + parameters: {} + } + ] + } + + # Boundary testing + max_timeout_operation: main.BatchOperation = main.BatchOperation { + operation_id: "max_timeout" + name: "Maximum Timeout Test" + operation_type: "custom" + action: "test" + parameters: {} + timeout: 86400 # 24 hours - test upper boundary + } +} +``` + +## Maintenance Guidelines + +### 1. Schema Evolution + +```kcl +# ✅ Good: Backward-compatible schema evolution +schema ServerV2(main.Server): + """Enhanced server schema with backward compatibility""" + + # New optional fields (backward compatible) + performance_profile?: "standard" | "high_performance" | "burstable" + auto_scaling_enabled?: bool = False + + # Deprecated fields (marked but still supported) + deprecated_field?: str # TODO: Remove in v3.0 + + # Version metadata + schema_version: str = "2.0" + + check: + # Maintain existing validations + len(hostname) > 0, "Hostname required" + len(title) > 0, "Title required" + + # New validations for new fields + performance_profile != Undefined and auto_scaling_enabled == True and performance_profile != "burstable", + "Auto-scaling not compatible with burstable performance profile" + +# Migration helper +schema ServerMigration: + """Helper for migrating from ServerV1 to ServerV2""" + + v1_server: main.Server + + v2_server: ServerV2 = ServerV2 { + # Copy all existing fields + hostname: v1_server.hostname + title: v1_server.title + labels: v1_server.labels + user: v1_server.user + + # Set defaults for new fields + performance_profile: "standard" + auto_scaling_enabled: False + + # Copy optional fields if they exist + taskservs: v1_server.taskservs + cluster: v1_server.cluster + } +``` + +### 2. Documentation Updates + +```kcl +# ✅ Good: Self-documenting schemas with examples +schema DocumentedWorkflow(main.BatchWorkflow): + """ + Production workflow with comprehensive documentation + + This workflow follows company best practices for: + - Multi-environment deployment + - Error handling and recovery + - Security and compliance + - Performance optimization + + Example Usage: + prod_workflow: DocumentedWorkflow = DocumentedWorkflow { + environment: "production" + security_level: "high" + base_workflow: main.BatchWorkflow { + workflow_id: "webapp-deploy-001" + name: "Web Application Deployment" + operations: [...] + } + } + + See Also: + - examples/production_workflow.k + - docs/WORKFLOW_PATTERNS.md + - docs/SECURITY_GUIDELINES.md + """ + + # Required metadata for documentation + environment: "dev" | "staging" | "prod" + security_level: "low" | "medium" | "high" + base_workflow: main.BatchWorkflow + + # Auto-generated documentation fields + documentation_generated_at: str = "{{now.date}}" + schema_version: str = "1.0" + + check: + environment == "prod" and security_level == "high", + "Production workflows must use high security level" +``` + +This comprehensive best practices guide provides the foundation for creating maintainable, secure, and performant KCL configurations for the provisioning system. \ No newline at end of file diff --git a/docs/VALIDATION.md b/docs/VALIDATION.md new file mode 100644 index 0000000..fb3e8ed --- /dev/null +++ b/docs/VALIDATION.md @@ -0,0 +1,800 @@ +# Schema Validation and Best Practices + +This document provides comprehensive guidance on validating KCL schemas and following best practices for the provisioning package. + +## Table of Contents + +- [Schema Validation](#schema-validation) +- [Built-in Constraints](#built-in-constraints) +- [Custom Validation](#custom-validation) +- [Best Practices](#best-practices) +- [Common Patterns](#common-patterns) +- [Troubleshooting](#troubleshooting) + +## Schema Validation + +### Basic Validation + +```bash +# Validate syntax and run schema checks +kcl run config.k + +# Format and validate all files +kcl fmt *.k + +# Validate with verbose output +kcl run config.k --debug + +# Validate against specific schema +kcl vet config.k --schema main.Server +``` + +### JSON Output Validation + +```bash +# Generate and validate JSON output +kcl run config.k --format json | jq '.' + +# Validate JSON schema structure +kcl run config.k --format json | jq '.workflow_id // error("Missing workflow_id")' + +# Pretty print for inspection +kcl run config.k --format json | jq '.operations[] | {operation_id, name, provider}' +``` + +### Validation in CI/CD + +```yaml +# GitHub Actions example +- name: Validate KCL Schemas + run: | + find . -name "*.k" -exec kcl fmt {} \; + find . -name "*.k" -exec kcl run {} \; + +# Check for schema changes +- name: Check Schema Compatibility + run: | + kcl run main.k --format json > current_schema.json + diff expected_schema.json current_schema.json +``` + +## Built-in Constraints + +### Server Schema Constraints + +```kcl +import .main + +# ✅ Valid server configuration +valid_server: main.Server = main.Server { + hostname: "web-01" # ✅ Non-empty string required + title: "Web Server" # ✅ Non-empty string required + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field + + # Optional but validated fields + user_ssh_port: 22 # ✅ Valid port number + running_timeout: 300 # ✅ Positive integer + time_zone: "UTC" # ✅ Valid timezone string +} + +# ❌ Invalid configurations that will fail validation +invalid_examples: { + # hostname: "" # ❌ Empty hostname not allowed + # title: "" # ❌ Empty title not allowed + # user_ssh_port: -1 # ❌ Negative port not allowed + # running_timeout: 0 # ❌ Zero timeout not allowed +} +``` + +### Workflow Schema Constraints + +```kcl +import .main + +# ✅ Valid workflow with proper constraints +valid_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "deploy_001" # ✅ Non-empty ID required + name: "Production Deployment" # ✅ Non-empty name required + operations: [ # ✅ At least one operation required + main.BatchOperation { + operation_id: "create_servers" # ✅ Unique operation ID + name: "Create Servers" + operation_type: "server" + action: "create" + parameters: {} + timeout: 600 # ✅ Positive timeout + priority: 5 # ✅ Valid priority + } + ] + max_parallel_operations: 3 # ✅ Non-negative number + global_timeout: 3600 # ✅ Positive global timeout +} + +# ❌ Constraint violations +constraint_violations: { + # workflow_id: "" # ❌ Empty workflow ID + # operations: [] # ❌ Empty operations list + # max_parallel_operations: -1 # ❌ Negative parallel limit + # global_timeout: 0 # ❌ Zero global timeout +} +``` + +### Kubernetes Schema Constraints + +```kcl +import .main + +# ✅ Valid Kubernetes deployment with constraints +valid_k8s: main.K8sDeploy = main.K8sDeploy { + name: "webapp" # ✅ Non-empty name + namespace: "production" # ✅ Valid namespace + + spec: main.K8sDeploySpec { + replicas: 3 # ✅ Positive replica count + containers: [ # ✅ At least one container required + main.K8sContainers { + name: "app" # ✅ Non-empty container name + image: "nginx:1.21" # ✅ Valid image reference + + resources_requests: main.K8sResources { + memory: "128Mi" # ✅ Valid K8s memory format + cpu: "100m" # ✅ Valid K8s CPU format + } + + resources_limits: main.K8sResources { + memory: "256Mi" # ✅ Limits >= requests (enforced) + cpu: "200m" + } + } + ] + } +} +``` + +### Dependency Schema Constraints + +```kcl +import .main + +# ✅ Valid dependency definitions +valid_dependencies: main.TaskservDependencies = main.TaskservDependencies { + name: "kubernetes" # ✅ Lowercase name required + + requires: ["containerd", "cni"] # ✅ Valid dependency list + conflicts: ["docker"] # ✅ Cannot coexist with docker + + resources: main.ResourceRequirement { + cpu: "100m" # ✅ Non-empty CPU requirement + memory: "128Mi" # ✅ Non-empty memory requirement + disk: "1Gi" # ✅ Non-empty disk requirement + } + + timeout: 600 # ✅ Positive timeout + retry_count: 3 # ✅ Non-negative retry count + + os_support: ["linux"] # ✅ At least one OS required + arch_support: ["amd64", "arm64"] # ✅ At least one arch required +} + +# ❌ Constraint violations +dependency_violations: { + # name: "Kubernetes" # ❌ Must be lowercase + # name: "" # ❌ Cannot be empty + # timeout: 0 # ❌ Must be positive + # retry_count: -1 # ❌ Cannot be negative + # os_support: [] # ❌ Must specify at least one OS +} +``` + +## Custom Validation + +### Adding Custom Constraints + +```kcl +import .main +import regex + +# Custom server schema with additional validation +schema CustomServer(main.Server): + """Custom server with additional business rules""" + + # Additional custom fields + environment: "dev" | "staging" | "prod" + cost_center: str + + check: + # Business rule: production servers must have specific naming + environment == "prod" and regex.match(hostname, "^prod-[a-z0-9-]+$"), + "Production servers must start with 'prod-'" + + # Business rule: staging servers have resource limits + environment == "staging" and len(taskservs or []) <= 3, + "Staging servers limited to 3 taskservs" + + # Business rule: cost center must be valid + cost_center in ["engineering", "operations", "security"], + "Invalid cost center: ${cost_center}" + +# Usage with validation +prod_server: CustomServer = CustomServer { + hostname: "prod-web-01" # ✅ Matches production naming + title: "Production Web Server" + labels: "env: prod" + user: "admin" + environment: "prod" # ✅ Valid environment + cost_center: "engineering" # ✅ Valid cost center +} +``` + +### Conditional Validation + +```kcl +import .main + +# Workflow with conditional validation based on environment +schema EnvironmentWorkflow(main.BatchWorkflow): + """Workflow with environment-specific validation""" + + environment: "dev" | "staging" | "prod" + + check: + # Production workflows must have monitoring + environment == "prod" and monitoring.enabled == True, + "Production workflows must enable monitoring" + + # Production workflows must have rollback enabled + environment == "prod" and default_rollback_strategy.enabled == True, + "Production workflows must enable rollback" + + # Development can have shorter timeouts + environment == "dev" and global_timeout <= 1800, # 30 minutes + "Development workflows should complete within 30 minutes" + + # Staging must have retry policies + environment == "staging" and default_retry_policy.max_attempts >= 2, + "Staging workflows must have retry policies" + +# Valid production workflow +prod_workflow: EnvironmentWorkflow = EnvironmentWorkflow { + workflow_id: "prod_deploy_001" + name: "Production Deployment" + environment: "prod" # ✅ Production environment + + operations: [ + main.BatchOperation { + operation_id: "deploy" + name: "Deploy Application" + operation_type: "server" + action: "create" + parameters: {} + } + ] + + # ✅ Required for production + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + } + + # ✅ Required for production + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "immediate" + } +} +``` + +### Cross-Field Validation + +```kcl +import .main + +# Validate relationships between fields +schema ValidatedBatchOperation(main.BatchOperation): + """Batch operation with cross-field validation""" + + check: + # Timeout should be reasonable for operation type + operation_type == "server" and timeout >= 300, + "Server operations need at least 5 minutes timeout" + + operation_type == "taskserv" and timeout >= 600, + "Taskserv operations need at least 10 minutes timeout" + + # High priority operations should have retry policies + priority >= 8 and retry_policy.max_attempts >= 2, + "High priority operations should have retry policies" + + # Parallel operations should have lower priority + allow_parallel == True and priority <= 7, + "Parallel operations should have lower priority for scheduling" + +# Validate workflow operation consistency +schema ConsistentWorkflow(main.BatchWorkflow): + """Workflow with consistent operation validation""" + + check: + # All operation IDs must be unique + len(operations) == len([op.operation_id for op in operations] | unique), + "All operation IDs must be unique" + + # Dependencies must reference existing operations + all([ + dep.target_operation_id in [op.operation_id for op in operations] + for op in operations + for dep in op.dependencies or [] + ]), + "All dependencies must reference existing operations" + + # No circular dependencies (simplified check) + len(operations) > 0, + "Workflow must have at least one operation" +``` + +## Best Practices + +### 1. Schema Design Principles + +```kcl +# ✅ Good: Descriptive field names and documentation +schema WellDocumentedServer: + """ + Server configuration for production workloads + Follows company security and operational standards + """ + + # Core identification + hostname: str # DNS-compliant hostname + fqdn?: str # Fully qualified domain name + + # Environment classification + environment: "dev" | "staging" | "prod" + classification: "public" | "internal" | "confidential" + + # Operational metadata + owner_team: str # Team responsible for maintenance + cost_center: str # Billing allocation + backup_required: bool # Whether automated backups are needed + + check: + len(hostname) > 0 and len(hostname) <= 63, "Hostname must be 1-63 characters" + len(owner_team) > 0, "Owner team must be specified" + len(cost_center) > 0, "Cost center must be specified" + +# ❌ Avoid: Unclear field names and missing validation +schema PoorlyDocumentedServer: + name: str # ❌ Ambiguous - hostname? title? display name? + env: str # ❌ No constraints - any string allowed + data: {str: str} # ❌ Unstructured data without validation +``` + +### 2. Validation Strategy + +```kcl +# ✅ Good: Layered validation with clear error messages +schema ProductionWorkflow(main.BatchWorkflow): + """Production workflow with comprehensive validation""" + + # Business metadata + change_request_id: str + approver: str + maintenance_window?: str + + check: + # Business process validation + regex.match(change_request_id, "^CHG-[0-9]{4}-[0-9]{3}$"), + "Change request ID must match format CHG-YYYY-NNN" + + # Operational validation + global_timeout <= 14400, # 4 hours max + "Production workflows must complete within 4 hours" + + # Safety validation + default_rollback_strategy.enabled == True, + "Production workflows must enable rollback" + + # Monitoring validation + monitoring.enabled == True and monitoring.enable_notifications == True, + "Production workflows must enable monitoring and notifications" + +# ✅ Good: Environment-specific defaults with validation +schema EnvironmentDefaults: + """Environment-specific default configurations""" + + environment: "dev" | "staging" | "prod" + + # Default timeouts by environment + default_timeout: int = environment == "prod" ? 1800 : (environment == "staging" ? 1200 : 600) + + # Default retry attempts by environment + default_retries: int = environment == "prod" ? 3 : (environment == "staging" ? 2 : 1) + + # Default monitoring settings + monitoring_enabled: bool = environment == "prod" ? True : False + + check: + default_timeout > 0, "Timeout must be positive" + default_retries >= 0, "Retries cannot be negative" +``` + +### 3. Schema Composition Patterns + +```kcl +# ✅ Good: Composable schema design +schema BaseResource: + """Common fields for all resources""" + name: str + tags: {str: str} = {} + created_at?: str + updated_at?: str + + check: + len(name) > 0, "Name cannot be empty" + regex.match(name, "^[a-z0-9-]+$"), "Name must be lowercase alphanumeric with hyphens" + +schema MonitoredResource(BaseResource): + """Resource with monitoring capabilities""" + monitoring_enabled: bool = True + alert_thresholds: {str: float} = {} + + check: + monitoring_enabled == True and len(alert_thresholds) > 0, + "Monitored resources must define alert thresholds" + +schema SecureResource(BaseResource): + """Resource with security requirements""" + encryption_enabled: bool = True + access_policy: str + compliance_tags: [str] = [] + + check: + encryption_enabled == True, "Security-sensitive resources must enable encryption" + len(access_policy) > 0, "Access policy must be defined" + "pci" in compliance_tags or "sox" in compliance_tags or "hipaa" in compliance_tags, + "Must specify compliance requirements" + +# Composed schema inheriting multiple patterns +schema ProductionDatabase(MonitoredResource, SecureResource): + """Production database with full operational requirements""" + backup_retention_days: int = 30 + high_availability: bool = True + + check: + backup_retention_days >= 7, "Production databases need minimum 7 days backup retention" + high_availability == True, "Production databases must be highly available" +``` + +### 4. Error Handling Patterns + +```kcl +# ✅ Good: Comprehensive error scenarios with specific handling +schema RobustBatchOperation(main.BatchOperation): + """Batch operation with robust error handling""" + + # Error classification + critical_operation: bool = False + max_failure_rate: float = 0.1 + + # Enhanced retry configuration + retry_policy: main.RetryPolicy = main.RetryPolicy { + max_attempts: critical_operation ? 5 : 3 + initial_delay: critical_operation ? 30 : 10 + max_delay: critical_operation ? 600 : 300 + backoff_multiplier: 2 + retry_on_errors: [ + "connection_error", + "timeout", + "rate_limit", + "resource_unavailable" + ] + } + + # Enhanced rollback strategy + rollback_strategy: main.RollbackStrategy = main.RollbackStrategy { + enabled: True + strategy: critical_operation ? "manual" : "immediate" + preserve_partial_state: critical_operation + custom_rollback_operations: critical_operation ? [ + "create_incident_ticket", + "notify_on_call_engineer", + "preserve_logs" + ] : [] + } + + check: + 0 <= max_failure_rate and max_failure_rate <= 1, + "Failure rate must be between 0 and 1" + + critical_operation == True and timeout >= 1800, + "Critical operations need extended timeout" +``` + +## Common Patterns + +### 1. Multi-Environment Configuration + +```kcl +# Configuration that adapts to environment +schema EnvironmentAwareConfig: + environment: "dev" | "staging" | "prod" + + # Computed values based on environment + replica_count: int = ( + environment == "prod" ? 3 : ( + environment == "staging" ? 2 : 1) + ) + + resource_requests: main.K8sResources = main.K8sResources { + memory: environment == "prod" ? "512Mi" : "256Mi" + cpu: environment == "prod" ? "200m" : "100m" + } + + monitoring_enabled: bool = environment != "dev" + + backup_enabled: bool = environment == "prod" + +# Usage pattern +prod_config: EnvironmentAwareConfig = EnvironmentAwareConfig { + environment: "prod" + # replica_count automatically becomes 3 + # monitoring_enabled automatically becomes True + # backup_enabled automatically becomes True +} +``` + +### 2. Provider Abstraction + +```kcl +# Provider-agnostic resource definition +schema AbstractServer: + """Provider-agnostic server specification""" + + # Common specification + cpu_cores: int + memory_gb: int + storage_gb: int + network_performance: "low" | "moderate" | "high" + + # Provider-specific mapping + provider: "upcloud" | "aws" | "gcp" + + # Computed provider-specific values + instance_type: str = ( + provider == "upcloud" ? f"{cpu_cores}xCPU-{memory_gb}GB" : ( + provider == "aws" ? f"m5.{cpu_cores == 1 ? 'large' : 'xlarge'}" : ( + provider == "gcp" ? f"n2-standard-{cpu_cores}" : "unknown" + )) + ) + + storage_type: str = ( + provider == "upcloud" ? "MaxIOPS" : ( + provider == "aws" ? "gp3" : ( + provider == "gcp" ? "pd-ssd" : "standard" + )) + ) + +# Multi-provider workflow using abstraction +mixed_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "mixed_deploy_001" + name: "Multi-Provider Deployment" + + operations: [ + # UpCloud servers + main.BatchOperation { + operation_id: "upcloud_servers" + provider: "upcloud" + parameters: { + "instance_type": "2xCPU-4GB" # UpCloud format + "storage_type": "MaxIOPS" + } + }, + # AWS servers + main.BatchOperation { + operation_id: "aws_servers" + provider: "aws" + parameters: { + "instance_type": "m5.large" # AWS format + "storage_type": "gp3" + } + } + ] +} +``` + +### 3. Dependency Management + +```kcl +# Complex dependency patterns +schema DependencyAwareWorkflow(main.BatchWorkflow): + """Workflow with intelligent dependency management""" + + # Categorize operations by type + infrastructure_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "server" + ] + + service_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "taskserv" + ] + + validation_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "custom" and "validate" in op.name.lower() + ] + + check: + # Infrastructure must come before services + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in infrastructure_ops]) > 0 + for op in operations + if op.operation_id in service_ops + ]) or len(service_ops) == 0, + "Service operations must depend on infrastructure operations" + + # Validation must come last + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in service_ops or dep.target_operation_id in infrastructure_ops]) > 0 + for op in operations + if op.operation_id in validation_ops + ]) or len(validation_ops) == 0, + "Validation operations must depend on other operations" +``` + +## Troubleshooting + +### Common Validation Errors + +#### 1. Missing Required Fields + +```bash +# Error: attribute 'labels' of Server is required +# ❌ Incomplete server definition +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + # Missing: labels, user +} + +# ✅ Complete server definition +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field +} +``` + +#### 2. Type Mismatches + +```bash +# Error: expect int, got str +# ❌ Wrong type +workflow: main.BatchWorkflow = main.BatchWorkflow { + max_parallel_operations: "3" # ❌ String instead of int +} + +# ✅ Correct type +workflow: main.BatchWorkflow = main.BatchWorkflow { + max_parallel_operations: 3 # ✅ Integer +} +``` + +#### 3. Constraint Violations + +```bash +# Error: Check failed: hostname cannot be empty +# ❌ Constraint violation +server: main.Server = main.Server { + hostname: "" # ❌ Empty string violates constraint + title: "Server" + labels: "env: prod" + user: "admin" +} + +# ✅ Valid constraint +server: main.Server = main.Server { + hostname: "web-01" # ✅ Non-empty string + title: "Server" + labels: "env: prod" + user: "admin" +} +``` + +### Debugging Techniques + +#### 1. Step-by-step Validation + +```bash +# Validate incrementally +kcl run basic_config.k # Start with minimal config +kcl run enhanced_config.k # Add features gradually +kcl run complete_config.k # Full configuration +``` + +#### 2. Schema Introspection + +```bash +# Check what fields are available +kcl run -c 'import .main; main.Server' --format json + +# Validate against specific schema +kcl vet config.k --schema main.Server + +# Debug with verbose output +kcl run config.k --debug --verbose +``` + +#### 3. Constraint Testing + +```kcl +# Test constraint behavior +test_constraints: { + # Test minimum values + min_timeout: main.BatchOperation { + operation_id: "test" + name: "Test" + operation_type: "server" + action: "create" + parameters: {} + timeout: 1 # Test minimum allowed + } + + # Test maximum values + max_parallel: main.BatchWorkflow { + workflow_id: "test" + name: "Test" + operations: [min_timeout] + max_parallel_operations: 100 # Test upper limits + } +} +``` + +### Performance Considerations + +#### 1. Schema Complexity + +```kcl +# ✅ Good: Simple, focused schemas +schema SimpleServer: + hostname: str + user: str + labels: str + + check: + len(hostname) > 0, "Hostname required" + +# ❌ Avoid: Overly complex schemas with many computed fields +schema OverlyComplexServer: + # ... many fields with complex interdependencies + # ... computationally expensive check conditions + # ... deep nested validations +``` + +#### 2. Validation Efficiency + +```kcl +# ✅ Good: Efficient validation +schema EfficientValidation: + name: str + tags: {str: str} + + check: + len(name) > 0, "Name required" # ✅ Simple check + len(tags) <= 10, "Maximum 10 tags allowed" # ✅ Simple count check + +# ❌ Avoid: Expensive validation +schema ExpensiveValidation: + items: [str] + + check: + # ❌ Expensive nested operations + all([regex.match(item, "^[a-z0-9-]+$") for item in items]), + "All items must match pattern" +``` + +This validation guide provides the foundation for creating robust, maintainable KCL schemas with proper error handling and validation strategies. \ No newline at end of file diff --git a/docs/provisioning.md b/docs/provisioning.md new file mode 100644 index 0000000..6757ece --- /dev/null +++ b/docs/provisioning.md @@ -0,0 +1,589 @@ +# provisioning + +## Index + +- [Cluster](#cluster) +- [ClusterDef](#clusterdef) +- [K8sAffinity](#k8saffinity) +- [K8sAffinityLabelSelector](#k8saffinitylabelselector) +- [K8sAffinityMatch](#k8saffinitymatch) +- [K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector) +- [K8sBackup](#k8sbackup) +- [K8sConfigMap](#k8sconfigmap) +- [K8sContainers](#k8scontainers) +- [K8sDefs](#k8sdefs) +- [K8sDeploy](#k8sdeploy) +- [K8sDeploySpec](#k8sdeployspec) +- [K8sKeyPath](#k8skeypath) +- [K8sKeyVal](#k8skeyval) +- [K8sPort](#k8sport) +- [K8sPrxyGatewayServer](#k8sprxygatewayserver) +- [K8sPrxyPort](#k8sprxyport) +- [K8sPrxyTLS](#k8sprxytls) +- [K8sPrxyVirtualService](#k8sprxyvirtualservice) +- [K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch) +- [K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl) +- [K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute) +- [K8sResources](#k8sresources) +- [K8sSecret](#k8ssecret) +- [K8sService](#k8sservice) +- [K8sVolume](#k8svolume) +- [K8sVolumeClaim](#k8svolumeclaim) +- [K8sVolumeMount](#k8svolumemount) +- [RunSet](#runset) +- [ScaleData](#scaledata) +- [ScaleResource](#scaleresource) +- [Server](#server) +- [ServerDefaults](#serverdefaults) +- [Settings](#settings) +- [Storage](#storage) +- [StorageVol](#storagevol) +- [TaskServDef](#taskservdef) + +## Schemas + +### Cluster + +cluster settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**admin_host**|str||| +|**admin_port**|int||| +|**admin_user**|str||| +|**clusters_save_path**|str||| +|**def** `required`|"K8sDeploy" | ""||""| +|**local_def_path** `required`|str||"./clusters/${name}"| +|**name** `required`|str||| +|**not_use** `required`|bool||False| +|**profile**|str||| +|**scale**|[ScaleResource](#scaleresource)||| +|**ssh_key_path**|str||| +|**template**|"k8s-deploy" | ""||| +|**version** `required`|str||| +### ClusterDef + +ClusterDef settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +|**profile** `required`|str||"default"| +### K8sAffinity + +K8S Deployment Affinity settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**affinity**|[K8sAffinityLabelSelector](#k8saffinitylabelselector)||| +|**antiAffinity**|[K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector)||| +### K8sAffinityLabelSelector + +K8S Deployment Affinity Label Selector settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]||| +|**matchLabelKeys**|[str]||| +|**topologyKey**|str||| +|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"| +### K8sAffinityMatch + +K8S Deployment Affinity Match settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**operator** `required`|"In" | "NotIn" | "Exists" | "DoesNotExist"||| +|**values** `required`|[str]||| +### K8sAntyAffinityLabelSelector + +K8S Deployment AntyAffinity Label Selector settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]||| +|**matchLabelKeys**|[str]||| +|**topologyKey**|str||| +|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"| +|**weight** `required`|int||100| +### K8sBackup + +K8S Backup settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**mount_path** `required`|str||| +|**name** `required`|str||| +|**typ** `required`|str||| +### K8sConfigMap + +K8S Volume ConfigMap settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +### K8sContainers + +K8S Container settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cmd**|str||| +|**env**|[[K8sKeyVal](#k8skeyval)]||| +|**image** `required`|str||| +|**imagePull** `required`|"IfNotPresent" | "Always" | "Never"||"IfNotPresent"| +|**name** `required`|str||"main"| +|**ports**|[[K8sPort](#k8sport)]||| +|**resources_limits**|[K8sResources](#k8sresources)||| +|**resources_requests**|[K8sResources](#k8sresources)||| +|**volumeMounts**|[[K8sVolumeMount](#k8svolumemount)]||| +### K8sDefs + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster_domain** `required`|str||| +|**domain** `required`|str||| +|**full_domain** `required`|str||| +|**name** `required`|str||| +|**ns** `required`|str||| +|**primary_dom** `required`|str||| +### K8sDeploy + +K8S Deployment settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**backups**|[[K8sBackup](#k8sbackup)]||| +|**bin_apply** `required`|bool||True| +|**create_ns** `required`|bool||False| +|**full_domain**|str||| +|**labels** `required`|[[K8sKeyVal](#k8skeyval)]||[K8sKeyVal {key: "${name}", value: "${name}"}]| +|**name** `required`|str||| +|**name_in_files** `required`|str||"${name}"| +|**namespace** `required`|str | "default"||| +|**prxy** `readOnly`|"istio"||"istio"| +|**prxyGatewayServers**|[[K8sPrxyGatewayServer](#k8sprxygatewayserver)]||| +|**prxyVirtualService**|[K8sPrxyVirtualService](#k8sprxyvirtualservice)||| +|**prxy_ns**|str||"istio-system"| +|**sel_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels| +|**service**|[K8sService](#k8sservice)||| +|**spec** `required`|[K8sDeploySpec](#k8sdeployspec)||| +|**tls_path**|str||"ssl"| +|**tpl_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels| +### K8sDeploySpec + +K8S Deployment Spec settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**affinity**|[K8sAffinity](#k8saffinity)||| +|**containers** `required`|[[K8sContainers](#k8scontainers)]||| +|**hostUsers**|bool||True| +|**imagePullSecret**|str||| +|**nodeName**|str||| +|**nodeSelector**|[[K8sKeyVal](#k8skeyval)]||| +|**replicas** `required`|int||1| +|**secrets**|[[K8sSecret](#k8ssecret)]||| +|**volumes**|[[K8sVolume](#k8svolume)]||| +### K8sKeyPath + +K8S key,path settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**path** `required`|str||| +### K8sKeyVal + +K8S label,selector,env settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**value** `required`|str||| +### K8sPort + +K8S Port settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**container**|int||| +|**name** `required`|str||| +|**nodePort**|int||| +|**target**|int||| +|**typ**|str||"TCP"| +### K8sPrxyGatewayServer + +K8S Deployment Proxy Gateway Server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**hosts**|[str]||| +|**port** `required`|[K8sPrxyPort](#k8sprxyport)||| +|**tls**|[K8sPrxyTLS](#k8sprxytls)||| +### K8sPrxyPort + +K8S Proxy Port settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +|**number**|int||| +|**proto** `required`|"HTTP" | "HTTPS" | "TCP"||"HTTPS"| +### K8sPrxyTLS + +K8S Deployment Proxy TLS settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**credentialName**|str||| +|**httpsRedirect**|bool||False| +|**mode**|"SIMPLE" | "PASSTHROUGH" | "MULTI" | ""||"SIMPLE"| +### K8sPrxyVirtualService + +K8S Deployment Proxy Virtual Service settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**gateways** `required`|[str]||| +|**hosts** `required`|[str]||| +|**matches**|[[K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch)]||| +### K8sPrxyVirtualServiceMatch + +K8S Deployment Proxy Virtual Service Match settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**location**|[[K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl)]||| +|**route_destination**|[[K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute)]||| +|**typ** `required`|"tcp" | "http" | "tls"||| +### K8sPrxyVirtualServiceMatchURL + +K8S Deployment Proxy Virtual Service Match URL settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**port**|int||| +|**sniHost**|[str]||| +### K8sPrxyVirtualServiceRoute + +K8S Deployment Proxy Virtual Service Route settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**host** `required`|str||| +|**port_number** `required`|int||| +### K8sResources + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cpu** `required`|str||| +|**memory** `required`|str||| +### K8sSecret + +K8S Volume Secret settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**items** `required`|[[K8sKeyPath](#k8skeypath)]||| +|**name** `required`|str||| +### K8sService + +K8S Service settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**externaIPs**|[str]||| +|**externalName**|str||| +|**name** `required`|str||| +|**ports** `required`|[[K8sPort](#k8sport)]||| +|**proto** `required` `readOnly`|"TCP"||"TCP"| +|**selector**|[[K8sKeyVal](#k8skeyval)]||| +|**typ** `required`|"ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None"||"ClusterIP"| +### K8sVolume + +K8S Volume settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**configMap**|[K8sConfigMap](#k8sconfigmap)||| +|**items**|[[K8sKeyPath](#k8skeypath)]||| +|**name** `required`|str||| +|**persitentVolumeClaim**|[K8sVolumeClaim](#k8svolumeclaim)||| +|**secret**|[K8sSecret](#k8ssecret)||| +|**typ** `required`|"volumeClaim" | "configMap" | "secret"||"volumeClaim"| +### K8sVolumeClaim + +K8S VolumeClaim settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**abbrev_mode**|["RWO" | "ROX" | "RWX" | "RWOP"]||["RWO"]| +|**hostPath**|str||| +|**modes** `required`|["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"]||["ReadWriteOnce"]| +|**name** `required`|str||| +|**pvMode**|"unspecified" | "Filesystem" | "Block"||| +|**pvcMode**|"unspecified" | "Filesystem" | "Block"||| +|**reclaimPolicy**|"Recycle" | "Retain" | "Delete"||"Retain"| +|**storage**|str||| +|**storageClassName** `required`|"manual" | "nfs-client" | "rook-cephfs"||"manual"| +|**typ** `required`|"volumeClaim" | "configMap" | "secret" | ""||""| +### K8sVolumeMount + +K8S VolumeMounts settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**mountPath** `required`|str||| +|**name** `required`|str||| +|**readOnly** `required`|bool||False| +|**subPath**|str||| +### RunSet + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**inventory_file** `required`|str||"./inventory.yaml"| +|**output_format** `required`|"human" | "yaml" | "json"||"human"| +|**output_path** `required`|str||"tmp/NOW-deploy"| +|**use_time** `required`|bool||True| +|**wait** `required`|bool||True| +### ScaleData + +scale data + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**def** `required`|str||| +|**disabled** `required`|bool||False| +|**expire**|str||| +|**from**|str||| +|**mode** `required`|"auto" | "manual" | "ondemand"||"manual"| +|**to**|str||| +### ScaleResource + +scale server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**default** `required`|[ScaleData](#scaledata)||| +|**down**|[ScaleData](#scaledata)||| +|**fallback**|[ScaleData](#scaledata)||| +|**max**|[ScaleData](#scaledata)||| +|**min**|[ScaleData](#scaledata)||| +|**path** `required`|str||"/etc/scale_provisioning"| +|**up**|[ScaleData](#scaledata)||| +### Server + +server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster**|[[ClusterDef](#clusterdef)]||| +|**delete_lock** `required`|bool||False| +|**domains_search**|str||| +|**extra_hostnames**|[str]||| +|**fix_local_hosts** `required`|bool||True| +|**hostname** `required`|str||| +|**installer_user**|str||"${user}"| +|**labels** `required`|str||| +|**lock** `required`|bool||False| +|**main_domain**|str||| +|**network_private_id**|str||| +|**network_private_name**|str||| +|**network_public_ip**|str||| +|**network_public_ipv4**|bool||True| +|**network_public_ipv6**|bool||False| +|**network_utility_ipv4** `required`|bool||True| +|**network_utility_ipv6** `required`|bool||False| +|**not_use** `required`|bool||False| +|**primary_dns**|str||| +|**priv_cidr_block**|str||| +|**running_timeout** `required`|int||200| +|**running_wait** `required`|int||10| +|**scale**|[ScaleResource](#scaleresource)||| +|**secondary_dns**|str||| +|**ssh_key_name**|str||| +|**ssh_key_path**|str||| +|**storage_os**|str||| +|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"| +|**taskservs**|[[TaskServDef](#taskservdef)]||| +|**time_zone** `required`|str||"UTC"| +|**title** `required`|str||| +|**user** `required`|str||| +|**user_home**|str||"/home/${user}"| +|**user_ssh_key_path**|str||| +|**user_ssh_port**|int||22| +|**zone**|str||| +### ServerDefaults + +Server Defaults settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**domains_search**|str||| +|**fix_local_hosts** `required`|bool||True| +|**installer_user**|str||"${user}"| +|**labels** `required`|str||| +|**lock** `required`|bool||False| +|**main_domain**|str||| +|**network_private_id**|str||| +|**network_private_name**|str||| +|**network_public_ip**|str||| +|**network_public_ipv4**|bool||True| +|**network_public_ipv6**|bool||False| +|**network_utility_ipv4** `required`|bool||True| +|**network_utility_ipv6** `required`|bool||False| +|**primary_dns**|str||| +|**priv_cidr_block**|str||| +|**running_timeout** `required`|int||200| +|**running_wait** `required`|int||10| +|**scale**|[ScaleResource](#scaleresource)||| +|**secondary_dns**|str||| +|**ssh_key_name**|str||| +|**ssh_key_path**|str||| +|**storage_os**|str||| +|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"| +|**time_zone** `required`|str||"UTC"| +|**user** `required`|str||| +|**user_home**|str||"/home/${user}"| +|**user_ssh_key_path**|str||| +|**user_ssh_port**|int||22| +|**zone**|str||| +### Settings + +Settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster_admin_host** `required`|str||| +|**cluster_admin_port** `required`|int||22| +|**cluster_admin_user** `required`|str||"root"| +|**clusters_paths** `required`|[str]||["clusters"]| +|**clusters_save_path** `required`|str||"/${main_name}/clusters"| +|**created_clusters_dirpath** `required`|str||"./tmp/NOW_clusters"| +|**created_taskservs_dirpath** `required`|str||"./tmp/NOW_deployment"| +|**defaults_provs_dirpath** `required`|str||"./defs"| +|**defaults_provs_suffix** `required`|str||"_defaults.k"| +|**main_name** `required`|str||| +|**main_title** `required`|str||main_name| +|**prov_clusters_path** `required`|str||"./clusters"| +|**prov_data_dirpath** `required`|str||"./data"| +|**prov_data_suffix** `required`|str||"_settings.k"| +|**prov_local_bin_path** `required`|str||"./bin"| +|**prov_resources_path** `required`|str||"./resources"| +|**runset** `required`|[RunSet](#runset)||| +|**servers_paths** `required`|[str]||["servers"]| +|**servers_wait_started** `required`|int||27| +|**settings_path** `required`|str||"./settings.yaml"| +### Storage + +Storage settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**fstab** `required`|bool||True| +|**mount** `required`|bool||True| +|**mount_path**|str||| +|**name** `required`|str||| +|**parts**|[[StorageVol](#storagevol)]||[]| +|**size** `required`|int||0| +|**total** `required`|int||size| +|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"| +### StorageVol + +StorageVol settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**fstab** `required`|bool||True| +|**mount** `required`|bool||True| +|**mount_path**|str||| +|**name** `required`|str||| +|**size** `required`|int||0| +|**total** `required`|int||size| +|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"| +### TaskServDef + +TaskServDef settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**install_mode** `required`|"getfile" | "library" | "server" | "library-server" | "server-library"||"library"| +|**name** `required`|str||| +|**profile** `required`|str||"default"| + diff --git a/docs/why_main.md b/docs/why_main.md new file mode 100644 index 0000000..05a8ace --- /dev/null +++ b/docs/why_main.md @@ -0,0 +1,40 @@ + ✅ main.k + + Purpose: + - Entry Point: Main entry point for the provisioning KCL package + - Schema Exports: Exports all schemas from sub-modules for external consumption + - API Interface: Provides a clean, organized API for external projects + + Key Features: + - Comprehensive Exports: All 70+ schemas properly exported with organized categories: + - Core configuration schemas (Settings, SecretProvider, etc.) + - Server and infrastructure schemas + - Workflow and batch operation schemas + - Kubernetes deployment schemas + - Version management schemas + - Package Metadata: Exports package information and constants + - Validated: Successfully tested with external imports and schema instantiation + + Benefits vs NO directory approach: + - Current Structure: Clean, organized exports with proper categorization + - NO Directory: Contains outdated files with broken imports and missing references + - Maintainability: Single source of truth for package exports + - Usage: External projects can simply import .main and access all schemas via main.SchemaName + + Example Usage: + import provisioning.main + + server: main.Server = main.Server { + hostname: "web-server" + title: "Web Server" + labels: "env: prod" + user: "admin" + } + + workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "deploy_001" + name: "Production Deployment" + operations: [...] + } + + The main.k file is now the recommended entry point for the provisioning KCL package diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..987ad1b --- /dev/null +++ b/examples/README.md @@ -0,0 +1,278 @@ +# KCL Package Examples + +This directory contains practical examples demonstrating how to use the provisioning KCL package schemas. + +## Example Files + +### 📄 `basic_server.k` +**Simple server configurations for different use cases** +- Web server with nginx and monitoring +- Database server with PostgreSQL +- Development server with Docker + +```bash +# Validate and run +kcl run basic_server.k + +# Export to JSON +kcl run basic_server.k --format json +``` + +### 📄 `simple_workflow.k` +**Basic workflow example with sequential operations** +- Database server creation +- Web server deployment +- Application configuration with dependencies + +Demonstrates: +- Sequential dependencies +- Retry policies +- Basic monitoring +- Filesystem storage + +### 📄 `kubernetes_deployment.k` +**Complete Kubernetes deployment examples** +- Web application with resources and affinity +- PostgreSQL with persistent storage +- Prometheus monitoring stack + +Features: +- Resource limits and requests +- Persistent volumes +- Services and networking +- ConfigMaps and secrets +- Anti-affinity rules + +### 📄 `mixed_provider_workflow.k` +**Advanced multi-cloud deployment** +- UpCloud compute infrastructure +- AWS managed services (RDS, ElastiCache) +- Kubernetes cluster setup +- Cross-cloud connectivity + +Advanced features: +- Multiple providers in single workflow +- Complex dependency chains +- Production-grade monitoring +- Encrypted state storage +- Comprehensive retry and rollback strategies + +## Running Examples + +### Basic Validation +```bash +# Check syntax and validate schemas +kcl fmt examples/*.k +kcl run examples/basic_server.k +``` + +### JSON Output for Integration +```bash +# Generate JSON for Nushell/Rust integration +kcl run examples/simple_workflow.k --format json > workflow.json + +# Use with Nushell +let workflow = (open workflow.json) +echo $"Workflow: ($workflow.name)" +echo $"Operations: ($workflow.operations | length)" +``` + +### Validation with Different Configurations +```bash +# Test with custom values +kcl run examples/basic_server.k -D hostname="my-server" -D user="ubuntu" + +# Override workflow settings +kcl run examples/simple_workflow.k -D max_parallel_operations=5 +``` + +## Integration Patterns + +### With Nushell Scripts +```nushell +# Load and submit workflow +def submit-workflow [file: string] { + let workflow = (kcl run $file --format json | from json) + $workflow | to json | http post http://localhost:8080/workflows/batch/submit +} + +# Monitor workflow progress +def monitor-workflow [workflow_id: string] { + while true { + let status = (http get $"http://localhost:8080/workflows/batch/($workflow_id)") + print $"Status: ($status.status)" + if $status.status in ["completed", "failed"] { break } + sleep 10sec + } +} +``` + +### With Rust Orchestrator +```rust +use serde_json; +use std::process::Command; + +// Generate workflow from KCL +let output = Command::new("kcl") + .args(&["run", "examples/simple_workflow.k", "--format", "json"]) + .output()?; + +// Parse and execute +let workflow: BatchWorkflow = serde_json::from_slice(&output.stdout)?; +let executor = BatchExecutor::new(workflow); +executor.execute().await?; +``` + +## Customization Examples + +### Server Configuration Variants +```kcl +import ..main + +# High-performance server +performance_server: main.Server = main.Server { + hostname: "perf-01" + title: "High Performance Server" + labels: "env: prod, tier: compute, performance: high" + user: "performance" + + # Override defaults for high-performance needs + running_timeout: 300 + + taskservs: [ + main.TaskServDef { + name: "tuned" + install_mode: "server" + profile: "performance" + } + ] +} +``` + +### Workflow Customization +```kcl +import ..main + +# Custom retry policy for network operations +network_retry_policy: main.RetryPolicy = main.RetryPolicy { + max_attempts: 5 + initial_delay: 10 + max_delay: 120 + backoff_multiplier: 1.5 + retry_on_errors: ["connection_error", "dns_error", "timeout"] +} + +# Workflow with custom settings +custom_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "custom_001" + name: "Custom Network Deployment" + + # Use custom retry policy for all operations + default_retry_policy: network_retry_policy + + operations: [ + # ... your operations + ] +} +``` + +## Best Practices Demonstrated + +### 1. **Dependency Management** +- Use sequential dependencies for ordered operations +- Use conditional dependencies for health checks +- Set appropriate timeouts for each dependency + +### 2. **Resource Configuration** +- Always set resource limits for Kubernetes deployments +- Use appropriate server plans based on workload +- Configure persistent storage for stateful services + +### 3. **Monitoring & Observability** +- Enable monitoring for production workflows +- Configure appropriate log levels +- Set up notifications for critical operations + +### 4. **Error Handling** +- Configure retry policies based on operation type +- Use rollback strategies for critical deployments +- Set appropriate timeouts for different operations + +### 5. **Security** +- Use encrypted storage for sensitive workflows +- Configure proper network isolation +- Use secrets management for credentials + +## Troubleshooting + +### Common Issues + +**Schema Validation Errors** +```bash +# Check for typos in schema names +kcl run examples/basic_server.k --debug + +# Validate against specific schema +kcl vet examples/basic_server.k --schema main.Server +``` + +**Missing Required Fields** +```bash +# Error: attribute 'labels' of Server is required +# Solution: Always provide required fields +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field +} +``` + +**Import Errors** +```bash +# Use relative imports within the package +import ..main # ✅ Correct + +# Not absolute imports +import provisioning.main # ❌ May not work in examples +``` + +### Testing Examples + +```bash +# Run all examples to verify they work +for file in examples/*.k; do + echo "Testing $file" + kcl run "$file" > /dev/null && echo "✅ PASS" || echo "❌ FAIL" +done + +# Test JSON serialization +kcl run examples/simple_workflow.k --format json | jq '.workflow_id' +``` + +## Contributing Examples + +When adding new examples: + +1. **Follow naming convention**: `{purpose}_{type}.k` +2. **Include comprehensive comments** +3. **Demonstrate specific features** +4. **Test before committing** +5. **Update this README** + +Example template: +```kcl +# {Purpose} Example +# Demonstrates {specific features} + +import ..main + +# Clear, descriptive variable names +example_resource: main.ResourceType = main.ResourceType { + # Well-commented configuration + required_field: "value" + + # Explain non-obvious settings + optional_field: "explained_value" # Why this value +} +``` \ No newline at end of file diff --git a/examples/basic_server.k b/examples/basic_server.k new file mode 100644 index 0000000..c59f99f --- /dev/null +++ b/examples/basic_server.k @@ -0,0 +1,80 @@ +# Basic Server Configuration Example +# Shows how to define a simple server with common settings + +import ..main + +# Simple web server configuration +web_server: main.Server = main.Server { + hostname: "web-01" + title: "Production Web Server" + labels: "env: prod, tier: web, role: frontend" + user: "deploy" + + # Network configuration + network_public_ipv4: True + network_utility_ipv4: True + fix_local_hosts: True + + # SSH configuration + user_ssh_port: 22 + + # Basic taskservs to install + taskservs: [ + main.TaskServDef { + name: "nginx" + install_mode: "library" + profile: "production" + }, + main.TaskServDef { + name: "prometheus-node-exporter" + install_mode: "server" + profile: "monitoring" + } + ] +} + +# Database server with different configuration +db_server: main.Server = main.Server { + hostname: "db-01" + title: "PostgreSQL Database Server" + labels: "env: prod, tier: data, role: database" + user: "postgres" + + # Database servers typically don't need public IPs + network_public_ipv4: False + network_utility_ipv4: True + + # Install database-related taskservs + taskservs: [ + main.TaskServDef { + name: "postgresql" + install_mode: "server" + profile: "production" + }, + main.TaskServDef { + name: "postgres-exporter" + install_mode: "library" + profile: "monitoring" + } + ] +} + +# Development server with minimal configuration +dev_server: main.Server = main.Server { + hostname: "dev-01" + title: "Development Server" + labels: "env: dev, tier: development" + user: "developer" + + # Development servers can have relaxed settings + network_public_ipv4: True + delete_lock: False # Allow easy deletion for dev environments + + taskservs: [ + main.TaskServDef { + name: "docker" + install_mode: "library" + profile: "development" + } + ] +} \ No newline at end of file diff --git a/examples/kubernetes_deployment.k b/examples/kubernetes_deployment.k new file mode 100644 index 0000000..1673923 --- /dev/null +++ b/examples/kubernetes_deployment.k @@ -0,0 +1,325 @@ +# Kubernetes Deployment Example +# Shows complete K8s deployment with services, volumes, and monitoring + +import ..main + +# Web application deployment in Kubernetes +webapp_k8s: main.K8sDeploy = main.K8sDeploy { + name: "webapp" + namespace: "production" + create_ns: True + + # Deployment specification + spec: main.K8sDeploySpec { + replicas: 3 + containers: [ + main.K8sContainers { + name: "webapp" + image: "nginx:1.21-alpine" + + # Port configuration + ports: [ + main.K8sPort { + name: "http" + container: 80 + target: 8080 + } + ] + + # Resource requirements + resources_requests: main.K8sResources { + memory: "64Mi" + cpu: "50m" + } + resources_limits: main.K8sResources { + memory: "128Mi" + cpu: "100m" + } + + # Environment variables + env: [ + main.K8sKeyVal { + key: "NODE_ENV" + value: "production" + }, + main.K8sKeyVal { + key: "LOG_LEVEL" + value: "info" + } + ] + + # Mount configuration volume + volumeMounts: [ + main.K8sVolumeMount { + name: "config" + mountPath: "/etc/nginx/conf.d" + readOnly: True + } + ] + } + ] + + # Volume configuration + volumes: [ + main.K8sVolume { + name: "config" + typ: "configMap" + configMap: main.K8sConfigMap { + name: "webapp-config" + } + } + ] + + # Node selection for production workloads + nodeSelector: [ + main.K8sKeyVal { + key: "node-type" + value: "production" + } + ] + + # Anti-affinity to spread pods across nodes + affinity: main.K8sAffinity { + antiAffinity: main.K8sAntyAffinityLabelSelector { + typ: "preferredDuringSchedulingIgnoredDuringExecution" + weight: 100 + labelSelector: [ + main.K8sAffinityMatch { + key: "app" + operator: "In" + values: ["webapp"] + } + ] + topologyKey: "kubernetes.io/hostname" + } + } + } + + # Service configuration + service: main.K8sService { + name: "webapp-service" + typ: "ClusterIP" + ports: [ + main.K8sPort { + name: "http" + target: 80 + nodePort: 30080 + } + ] + selector: [ + main.K8sKeyVal { + key: "app" + value: "webapp" + } + ] + } + + # Labels for the deployment + labels: [ + main.K8sKeyVal { + key: "app" + value: "webapp" + }, + main.K8sKeyVal { + key: "version" + value: "v1.0.0" + }, + main.K8sKeyVal { + key: "environment" + value: "production" + } + ] +} + +# Database deployment with persistent storage +database_k8s: main.K8sDeploy = main.K8sDeploy { + name: "postgres" + namespace: "production" + + spec: main.K8sDeploySpec { + replicas: 1 # Database typically runs single instance + containers: [ + main.K8sContainers { + name: "postgres" + image: "postgres:15-alpine" + + ports: [ + main.K8sPort { + name: "postgres" + container: 5432 + target: 5432 + } + ] + + # Database needs more resources + resources_requests: main.K8sResources { + memory: "256Mi" + cpu: "100m" + } + resources_limits: main.K8sResources { + memory: "512Mi" + cpu: "500m" + } + + # Database environment + env: [ + main.K8sKeyVal { + key: "POSTGRES_DB" + value: "webapp" + }, + main.K8sKeyVal { + key: "POSTGRES_USER" + value: "webapp" + }, + main.K8sKeyVal { + key: "POSTGRES_PASSWORD" + value: "changeme" # Use secrets in production + } + ] + + # Persistent data volume + volumeMounts: [ + main.K8sVolumeMount { + name: "postgres-data" + mountPath: "/var/lib/postgresql/data" + readOnly: False + } + ] + } + ] + + # Persistent volume for database + volumes: [ + main.K8sVolume { + name: "postgres-data" + typ: "volumeClaim" + persitentVolumeClaim: main.K8sVolumeClaim { + name: "postgres-pvc" + storageClassName: "manual" + storage: "10Gi" + modes: ["ReadWriteOnce"] + reclaimPolicy: "Retain" + } + } + ] + } + + # Internal service for database + service: main.K8sService { + name: "postgres-service" + typ: "ClusterIP" + ports: [ + main.K8sPort { + name: "postgres" + target: 5432 + } + ] + selector: [ + main.K8sKeyVal { + key: "app" + value: "postgres" + } + ] + } + + labels: [ + main.K8sKeyVal { + key: "app" + value: "postgres" + }, + main.K8sKeyVal { + key: "component" + value: "database" + } + ] +} + +# Monitoring deployment using Prometheus +monitoring_k8s: main.K8sDeploy = main.K8sDeploy { + name: "prometheus" + namespace: "monitoring" + create_ns: True + + spec: main.K8sDeploySpec { + replicas: 1 + containers: [ + main.K8sContainers { + name: "prometheus" + image: "prom/prometheus:v2.40.0" + + ports: [ + main.K8sPort { + name: "web" + container: 9090 + target: 9090 + } + ] + + resources_requests: main.K8sResources { + memory: "512Mi" + cpu: "200m" + } + resources_limits: main.K8sResources { + memory: "1Gi" + cpu: "500m" + } + + volumeMounts: [ + main.K8sVolumeMount { + name: "prometheus-config" + mountPath: "/etc/prometheus" + readOnly: True + }, + main.K8sVolumeMount { + name: "prometheus-data" + mountPath: "/prometheus" + readOnly: False + } + ] + } + ] + + volumes: [ + main.K8sVolume { + name: "prometheus-config" + typ: "configMap" + configMap: main.K8sConfigMap { + name: "prometheus-config" + } + }, + main.K8sVolume { + name: "prometheus-data" + typ: "volumeClaim" + persitentVolumeClaim: main.K8sVolumeClaim { + name: "prometheus-pvc" + storage: "20Gi" + storageClassName: "manual" + modes: ["ReadWriteOnce"] + } + } + ] + } + + service: main.K8sService { + name: "prometheus-service" + typ: "NodePort" + ports: [ + main.K8sPort { + name: "web" + target: 9090 + nodePort: 30090 + } + ] + } + + labels: [ + main.K8sKeyVal { + key: "app" + value: "prometheus" + }, + main.K8sKeyVal { + key: "component" + value: "monitoring" + } + ] +} \ No newline at end of file diff --git a/examples/mixed_provider_workflow.k b/examples/mixed_provider_workflow.k new file mode 100644 index 0000000..d0f7b8d --- /dev/null +++ b/examples/mixed_provider_workflow.k @@ -0,0 +1,452 @@ +# Mixed Provider Workflow Example +# Demonstrates deployment across multiple cloud providers with advanced features + +import ..main + +# Production deployment across UpCloud and AWS +production_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "prod_multicloud_001" + name: "Production Multi-Cloud Deployment" + description: "Deploy production infrastructure across UpCloud (compute) and AWS (managed services)" + + operations: [ + # Phase 1: Create UpCloud infrastructure + main.BatchOperation { + operation_id: "upcloud_network" + name: "Create UpCloud Network Infrastructure" + operation_type: "custom" + provider: "upcloud" + action: "create" + parameters: { + "resource_type": "network" + "vpc_cidr": "10.0.0.0/16" + "subnets": "public,private" + "zones": "fi-hel2,de-fra1" + } + priority: 10 + timeout: 300 + } + + main.BatchOperation { + operation_id: "upcloud_compute" + name: "Create UpCloud Compute Instances" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "6" + "plan": "2xCPU-4GB" + "zones": "fi-hel2,de-fra1" + "distribution": "even" # Spread across zones + "server_type": "k8s-worker" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_network" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 9 + timeout: 900 + allow_parallel: True + } + + # Phase 2: Create AWS managed services + main.BatchOperation { + operation_id: "aws_database" + name: "Create AWS RDS PostgreSQL" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "rds" + "engine": "postgresql" + "version": "15.4" + "instance_class": "db.t3.medium" + "allocated_storage": "100" + "multi_az": "true" + "region": "eu-west-1" + "vpc_security_groups": "prod-db-sg" + } + priority: 9 + timeout: 1800 # RDS can take time + allow_parallel: True + } + + main.BatchOperation { + operation_id: "aws_redis" + name: "Create AWS ElastiCache Redis" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "elasticache" + "engine": "redis" + "node_type": "cache.t3.micro" + "num_cache_nodes": "2" + "region": "eu-west-1" + "parameter_group": "default.redis7" + } + priority: 9 + timeout: 1200 + allow_parallel: True + } + + # Phase 3: Set up Kubernetes cluster on UpCloud + main.BatchOperation { + operation_id: "install_k8s_control" + name: "Install Kubernetes Control Plane" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "role": "control-plane" + "version": "v1.31.0" + "target_count": "3" + "ha_mode": "true" + "container_runtime": "containerd" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_compute" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 8 + timeout: 1800 + } + + main.BatchOperation { + operation_id: "install_k8s_workers" + name: "Install Kubernetes Worker Nodes" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "role": "worker" + "target_count": "3" + "container_runtime": "containerd" + "join_existing": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_k8s_control" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 7 + timeout: 1200 + } + + # Phase 4: Install cluster services + main.BatchOperation { + operation_id: "install_networking" + name: "Install Cluster Networking" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "cilium" + "version": "1.14.2" + "mode": "tunnel" + "enable_hubble": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_k8s_workers" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 6 + timeout: 600 + } + + main.BatchOperation { + operation_id: "install_monitoring" + name: "Install Monitoring Stack" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "prometheus-stack" + "grafana_enabled": "true" + "alertmanager_enabled": "true" + "node_exporter_enabled": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_networking" + dependency_type: "sequential" + timeout: 180 + } + ] + priority: 5 + timeout: 900 + } + + # Phase 5: Configure cross-cloud connectivity + main.BatchOperation { + operation_id: "configure_connectivity" + name: "Configure Cross-Cloud Connectivity" + operation_type: "custom" + action: "configure" + parameters: { + "connectivity_type": "vpn" + "upcloud_endpoint": "dynamic" + "aws_vpc_id": "auto-detect" + "encryption": "wireguard" + "routing": "bgp" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "aws_database" + dependency_type: "sequential" + timeout: 60 + }, + main.DependencyDef { + target_operation_id: "install_monitoring" + dependency_type: "sequential" + timeout: 60 + } + ] + priority: 4 + timeout: 600 + } + + # Phase 6: Final validation + main.BatchOperation { + operation_id: "validate_deployment" + name: "Validate Complete Deployment" + operation_type: "custom" + action: "configure" + parameters: { + "validation_type": "end_to_end" + "test_database_connectivity": "true" + "test_redis_connectivity": "true" + "test_k8s_cluster": "true" + "test_monitoring": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "configure_connectivity" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 1 + timeout: 600 + } + ] + + # Advanced workflow configuration + max_parallel_operations: 4 + global_timeout: 7200 # 2 hours + fail_fast: False # Continue on non-critical failures + + # SurrealDB for persistent state + storage: main.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.internal:8000" + "namespace": "production" + "database": "multicloud_workflows" + "user": "workflow_executor" + "auth_token": "{{env.SURREALDB_TOKEN}}" + } + enable_persistence: True + retention_hours: 2160 # 90 days for production + enable_compression: True + encryption: main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + } + + # Comprehensive monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: [ + "webhook:slack://prod-ops-alerts", + "webhook:pagerduty://high-priority", + "email:devops-team@company.com" + ] + log_level: "info" + collection_interval: 30 + } + + # Production-grade retry policy + default_retry_policy: main.RetryPolicy { + max_attempts: 3 + initial_delay: 60 + max_delay: 600 + backoff_multiplier: 2 + retry_on_errors: [ + "timeout", + "connection_error", + "rate_limit", + "resource_unavailable", + "quota_exceeded" + ] + retry_on_any_error: False + } + + # Conservative rollback strategy + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "manual" # Manual approval for production rollbacks + preserve_partial_state: True + rollback_timeout: 1800 + custom_rollback_operations: [ + "backup_state", + "notify_team", + "create_incident" + ] + } + + # Execution context for tracking + execution_context: { + "environment": "production" + "deployment_type": "multi_cloud" + "cost_center": "infrastructure" + "owner": "platform-team" + "change_request": "CHG-2025-001" + "approval": "approved" + } + + # Hooks for integration + pre_workflow_hooks: [ + "validate_prerequisites", + "check_maintenance_windows", + "notify_deployment_start" + ] + + post_workflow_hooks: [ + "run_smoke_tests", + "update_monitoring_dashboards", + "notify_deployment_complete", + "update_documentation" + ] +} + +# Advanced batch executor configuration for this workflow +multicloud_executor: main.BatchExecutor = main.BatchExecutor { + executor_id: "multicloud_prod_executor" + name: "Multi-Cloud Production Executor" + description: "Production-ready executor for multi-cloud deployments" + + scheduler: main.BatchScheduler { + strategy: "resource_aware" + resource_limits: { + "max_cpu_cores": 32 + "max_memory_mb": 65536 + "max_network_bandwidth": 10000 + "max_concurrent_api_calls": 100 + } + scheduling_interval: 15 + enable_preemption: True + } + + # Multiple queues for different priorities + queues: [ + main.BatchQueue { + queue_id: "critical" + queue_type: "priority" + max_size: 50 + retention_period: 86400 + max_delivery_attempts: 5 + }, + main.BatchQueue { + queue_id: "standard" + queue_type: "standard" + max_size: 200 + retention_period: 604800 + dead_letter_queue: "failed_operations" + max_delivery_attempts: 3 + } + ] + + # Mixed provider configuration + provider_config: main.ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws"] + provider_selection: "cost_optimize" + cross_provider_networking: { + "vpn_enabled": "true" + "mesh_networking": "wireguard" + "encryption": "aes256" + } + provider_limits: { + "upcloud": { + "max_servers": 50 + "max_storage_gb": 10000 + "api_rate_limit": "100/min" + } + "aws": { + "max_instances": 20 + "max_ebs_gb": 5000 + "api_rate_limit": "1000/min" + } + } + } + + # Production health monitoring + health_check: main.BatchHealthCheck { + enabled: True + check_interval: 30 + check_timeout: 15 + failure_threshold: 2 + success_threshold: 3 + health_checks: [ + "http://localhost:8080/health", + "check_provider_apis", + "check_storage_backend", + "check_monitoring_systems" + ] + failure_actions: [ + "alert", + "graceful_degradation", + "escalate" + ] + } + + # Intelligent autoscaling + autoscaling: main.BatchAutoscaling { + enabled: True + min_parallel: 3 + max_parallel: 15 + scale_up_threshold: 0.75 + scale_down_threshold: 0.25 + cooldown_period: 300 + target_utilization: 0.60 + } + + # Comprehensive metrics + metrics: main.BatchMetrics { + detailed_metrics: True + retention_hours: 2160 # 90 days + aggregation_intervals: [60, 300, 1800, 3600, 86400] + enable_export: True + export_config: { + "prometheus_endpoint": "http://prometheus.monitoring:9090" + "grafana_dashboard": "multicloud_operations" + "datadog_api_key": "{{env.DATADOG_API_KEY}}" + } + custom_metrics: [ + "provider_api_latency", + "cross_cloud_bandwidth", + "cost_tracking", + "sla_compliance" + ] + } +} \ No newline at end of file diff --git a/examples/simple_workflow.k b/examples/simple_workflow.k new file mode 100644 index 0000000..05fb47e --- /dev/null +++ b/examples/simple_workflow.k @@ -0,0 +1,156 @@ +# Simple Workflow Example +# Demonstrates basic workflow creation with sequential operations + +import ..main + +# Simple web application deployment workflow +web_app_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "webapp_deploy_001" + name: "Web Application Deployment" + description: "Deploy a simple web application with database backend" + + operations: [ + # Step 1: Create database server + main.BatchOperation { + operation_id: "create_database" + name: "Create Database Server" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "hostname": "webapp-db" + "plan": "1xCPU-2GB" + "zone": "fi-hel2" + "server_type": "database" + } + priority: 10 + timeout: 600 # 10 minutes + } + + # Step 2: Create web servers (can run in parallel) + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "2" + "hostname_prefix": "webapp-web" + "plan": "1xCPU-1GB" + "zone": "fi-hel2" + "server_type": "web" + } + priority: 10 + timeout: 600 + allow_parallel: True + } + + # Step 3: Install database after database server is ready + main.BatchOperation { + operation_id: "install_database" + name: "Install PostgreSQL" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "postgresql" + "version": "15" + "target_servers": "webapp-db" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_database" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 8 + timeout: 900 # 15 minutes for database installation + } + + # Step 4: Install web stack after web servers are ready + main.BatchOperation { + operation_id: "install_web_stack" + name: "Install Web Stack" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "nginx" + "target_servers": "webapp-web-*" + "config_template": "reverse_proxy" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 8 + timeout: 600 + } + + # Step 5: Configure application after all components are ready + main.BatchOperation { + operation_id: "configure_application" + name: "Configure Web Application" + operation_type: "custom" + action: "configure" + parameters: { + "config_type": "application" + "database_url": "postgres://webapp-db:5432/webapp" + "web_servers": "webapp-web-01,webapp-web-02" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_database" + dependency_type: "sequential" + timeout: 60 + }, + main.DependencyDef { + target_operation_id: "install_web_stack" + dependency_type: "sequential" + timeout: 60 + } + ] + priority: 5 + timeout: 300 + } + ] + + # Workflow settings + max_parallel_operations: 3 + global_timeout: 3600 # 1 hour total + fail_fast: True # Stop on first failure + + # Simple filesystem storage for this example + storage: main.StorageConfig { + backend: "filesystem" + base_path: "./webapp_deployments" + enable_persistence: True + retention_hours: 168 # 1 week + } + + # Basic monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: False # Simplified for example + log_level: "info" + } + + # Conservative retry policy + default_retry_policy: main.RetryPolicy { + max_attempts: 2 + initial_delay: 30 + backoff_multiplier: 2 + retry_on_errors: ["timeout", "connection_error"] + } + + # Enable rollback for safety + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "immediate" + preserve_partial_state: False + } +} \ No newline at end of file diff --git a/examples_batch.k b/examples_batch.k new file mode 100644 index 0000000..9b5c9c5 --- /dev/null +++ b/examples_batch.k @@ -0,0 +1,457 @@ +# Info: KCL batch workflow examples for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Usage examples for batch workflows and operations +import .workflows +import .batch +import .settings + +# Example 1: Mixed Provider Infrastructure Deployment +mixed_provider_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "mixed_infra_deploy_001" + name: "Mixed Provider Infrastructure Deployment" + description: "Deploy infrastructure across UpCloud and AWS with cross-provider networking" + operations: [ + workflows.BatchOperation { + operation_id: "create_upcloud_servers" + name: "Create UpCloud Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "3" + "server_type": "web" + "zone": "fi-hel2" + "plan": "1xCPU-2GB" + } + allow_parallel: True + priority: 10 + } + workflows.BatchOperation { + operation_id: "create_aws_database" + name: "Create AWS RDS Database" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "rds" + "instance_class": "db.t3.micro" + "engine": "postgresql" + "region": "eu-west-1" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_upcloud_servers" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 5 + } + workflows.BatchOperation { + operation_id: "install_kubernetes" + name: "Install Kubernetes on UpCloud servers" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "version": "v1.28.0" + "cluster_name": "prod-cluster" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_upcloud_servers" + dependency_type: "sequential" + timeout: 1200 + } + ] + # 1 hour for K8s installation + timeout: 3600 + priority: 8 + } + workflows.BatchOperation { + operation_id: "setup_monitoring" + name: "Setup Prometheus monitoring" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "prometheus" + "namespace": "monitoring" + "retention": "30d" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "install_kubernetes" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 3 + } + ] + max_parallel_operations: 3 + fail_fast: False + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://localhost:8000" + "namespace": "provisioning" + "database": "batch_workflows" + } + enable_persistence: True + # 30 days + retention_hours: 720 + } + monitoring: workflows.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: ["webhook:slack://ops-channel"] + } + default_retry_policy: workflows.RetryPolicy { + max_attempts: 3 + initial_delay: 10 + backoff_multiplier: 2 + retry_on_errors: ["connection_error", "timeout", "rate_limit", "resource_unavailable"] + } + execution_context: { + "environment": "production" + "cost_center": "infrastructure" + "owner": "devops-team" + } +} + +# Example 2: Server Scaling Workflow with SurrealDB Backend +server_scaling_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "server_scaling_002" + name: "Auto-scaling Server Workflow" + description: "Scale servers based on load with automatic rollback on failure" + operations: [ + workflows.BatchOperation { + operation_id: "scale_web_servers" + name: "Scale web servers up" + operation_type: "server" + action: "scale" + parameters: { + "target_count": "6" + "current_count": "3" + "server_group": "web-tier" + } + retry_policy: workflows.RetryPolicy { + max_attempts: 2 + initial_delay: 30 + retry_on_errors: ["resource_limit", "quota_exceeded"] + } + rollback_strategy: workflows.RollbackStrategy { + enabled: True + strategy: "immediate" + custom_rollback_operations: ["scale_down_to_original"] + } + } + workflows.BatchOperation { + operation_id: "update_load_balancer" + name: "Update load balancer configuration" + operation_type: "custom" + action: "configure" + parameters: { + "service": "haproxy" + "config_template": "web_tier_6_servers" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "scale_web_servers" + dependency_type: "conditional" + conditions: ["servers_ready", "health_check_passed"] + timeout: 300 + } + ] + } + ] + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.local:8000" + "namespace": "scaling" + "database": "operations" + } + } + fail_fast: True +} + +# Example 3: Maintenance Workflow with Filesystem Backend +maintenance_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "maintenance_003" + name: "System Maintenance Workflow" + description: "Perform scheduled maintenance across multiple providers" + operations: [ + workflows.BatchOperation { + operation_id: "backup_databases" + name: "Backup all databases" + operation_type: "custom" + action: "create" + parameters: { + "backup_type": "full" + "compression": "gzip" + "retention_days": "30" + } + # 2 hours + timeout: 7200 + } + workflows.BatchOperation { + operation_id: "update_taskservs" + name: "Update all taskservs to latest versions" + operation_type: "taskserv" + action: "update" + parameters: { + "update_strategy": "rolling" + "max_unavailable": "1" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "backup_databases" + dependency_type: "sequential" + } + ] + # Sequential updates for safety + allow_parallel: False + } + workflows.BatchOperation { + operation_id: "verify_services" + name: "Verify all services are healthy" + operation_type: "custom" + action: "configure" + parameters: { + "verification_type": "health_check" + "timeout_per_service": "30" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "update_taskservs" + dependency_type: "sequential" + } + ] + } + ] + storage: workflows.StorageConfig { + backend: "filesystem" + base_path: "./maintenance_workflows" + enable_persistence: True + enable_compression: True + } + pre_workflow_hooks: ["notify_maintenance_start", "set_maintenance_mode"] + post_workflow_hooks: ["unset_maintenance_mode", "notify_maintenance_complete"] +} + +# Example 4: Comprehensive Batch Executor Configuration +production_batch_executor: batch.BatchExecutor = batch.BatchExecutor { + executor_id: "prod_batch_executor" + name: "Production Batch Executor" + description: "Production-ready batch executor with full observability and mixed provider support" + scheduler: batch.BatchScheduler { + strategy: "resource_aware" + resource_limits: { + "max_cpu_cores": 16 + "max_memory_mb": 32768 + # 10Gbps + "max_network_bandwidth": 10000 + } + scheduling_interval: 5 + enable_preemption: True + } + queues: [ + batch.BatchQueue { + queue_id: "high_priority" + queue_type: "priority" + max_size: 100 + # 1 day + retention_period: 86400 + max_delivery_attempts: 5 + } + batch.BatchQueue { + queue_id: "standard" + queue_type: "standard" + max_size: 500 + dead_letter_queue: "failed_operations" + } + batch.BatchQueue { + queue_id: "failed_operations" + queue_type: "dead_letter" + # 7 days + retention_period: 604800 + } + ] + resource_constraints: [ + batch.ResourceConstraint { + resource_type: "cpu" + resource_name: "total_cpu_cores" + max_units: 16 + units_per_operation: 1 + hard_constraint: True + } + batch.ResourceConstraint { + resource_type: "memory" + resource_name: "total_memory_gb" + max_units: 32 + units_per_operation: 2 + hard_constraint: True + } + ] + provider_config: batch.ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws"] + provider_selection: "cost_optimize" + cross_provider_networking: { + "vpn_enabled": "True" + "mesh_networking": "wireguard" + } + provider_limits: { + "upcloud": {"max_servers": 20, "max_storage_gb": 1000} + "aws": {"max_instances": 10, "max_ebs_gb": 500} + } + } + health_check: batch.BatchHealthCheck { + enabled: True + check_interval: 30 + failure_threshold: 2 + health_checks: ["http://localhost:8080/health", "check_disk_space", "check_memory_usage"] + failure_actions: ["alert", "scale_down", "rollback"] + } + autoscaling: batch.BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 12 + scale_up_threshold: 0.85 + scale_down_threshold: 0.15 + target_utilization: 0.65 + # 3 minutes + cooldown_period: 180 + } + metrics: batch.BatchMetrics { + detailed_metrics: True + # 30 days + retention_hours: 720 + # 1min, 5min, 30min, 1hour + aggregation_intervals: [60, 300, 1800, 3600] + enable_export: True + export_config: { + "prometheus_endpoint": "http://prometheus.local:9090" + "grafana_dashboard": "batch_operations_dashboard" + } + } + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.prod:8000" + "namespace": "production" + "database": "batch_workflows" + "user": "batch_executor" + "auth_token": "{{env.SURREALDB_TOKEN}}" + } + enable_persistence: True + # 90 days + retention_hours: 2160 + enable_compression: True + encryption: settings.SecretProvider { + provider: "sops" + sops_config: settings.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + } + security_config: { + "tls_enabled": "True" + "auth_required": "True" + "rbac_enabled": "True" + "audit_level": "full" + } + webhook_endpoints: [ + "https://hooks.slack.com/services/ops-notifications" + "https://api.pagerduty.com/generic/incidents" + ] + performance_config: { + "io_threads": "8" + "worker_threads": "16" + "batch_size": "50" + "connection_pool_size": "20" + } +} + +# Example 5: Template for Common Infrastructure Deployment +infra_deployment_template: workflows.WorkflowTemplate = workflows.WorkflowTemplate { + template_id: "standard_infra_deployment" + name: "Standard Infrastructure Deployment Template" + description: "Template for deploying standard infrastructure with customizable parameters" + category: "infrastructure" + workflow_template: workflows.BatchWorkflow { + # Template parameter: {{template.workflow_id}} + workflow_id: "custom_deployment" + # Template parameter: {{template.workflow_name}} + name: "Custom Deployment" + operations: [ + workflows.BatchOperation { + operation_id: "create_servers" + # Template: Create {{template.server_count}} servers + name: "Create servers" + operation_type: "server" + # Template parameter: {{template.provider}} + provider: "upcloud" + action: "create" + parameters: { + # Template parameter: {{template.server_count}} + "count": "3" + # Template parameter: {{template.server_type}} + "type": "web" + # Template parameter: {{template.zone}} + "zone": "fi-hel2" + } + } + workflows.BatchOperation { + operation_id: "install_base_taskservs" + name: "Install base taskservs" + operation_type: "taskserv" + action: "create" + parameters: { + # Template parameter: {{template.base_taskservs}} + "taskservs": "kubernetes,prometheus,grafana" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_servers" + dependency_type: "sequential" + } + ] + } + ] + storage: workflows.StorageConfig { + # Template parameter: {{template.storage_backend}} + backend: "filesystem" + # Template parameter: {{template.storage_path}} + base_path: "./deployments" + } + } + parameters: { + "workflow_id": "custom_deployment" + "workflow_name": "Custom Deployment" + "server_count": "3" + "server_type": "web" + "provider": "upcloud" + "zone": "fi-hel2" + "base_taskservs": "kubernetes,prometheus,grafana" + "storage_backend": "filesystem" + "storage_path": "./deployments" + } + required_parameters: [ + "workflow_id" + "server_count" + "provider" + ] + examples: [ + "Small deployment: server_count=2, server_type=micro" + "Production deployment: server_count=6, server_type=standard, provider=upcloud" + ] +} diff --git a/gitea.k b/gitea.k new file mode 100644 index 0000000..689d4b6 --- /dev/null +++ b/gitea.k @@ -0,0 +1,325 @@ +""" +Gitea Integration Configuration Schemas + +This module defines schemas for Gitea service configuration, including: +- Local and remote Gitea deployment options +- Repository management +- Workspace integration +- Extension publishing +- Locking mechanism + +Version: 1.0.0 +KCL Version: 0.11.3+ +""" + +schema GiteaConfig: + """ + Main Gitea service configuration + + Supports both local (self-hosted) and remote Gitea instances. + Local mode can deploy via Docker or binary. + + Examples: + # Local Docker deployment + GiteaConfig { + mode = "local" + local = LocalGitea { + enabled = True + deployment = "docker" + port = 3000 + auto_start = True + } + } + + # Remote Gitea instance + GiteaConfig { + mode = "remote" + remote = RemoteGitea { + enabled = True + url = "https://gitea.example.com" + api_url = "https://gitea.example.com/api/v1" + } + } + """ + mode: "local" | "remote" + + local?: LocalGitea + remote?: RemoteGitea + + auth: GiteaAuth + + repositories: GiteaRepositories = GiteaRepositories {} + + workspace_features: WorkspaceFeatures = WorkspaceFeatures {} + + check: + mode == "local" and local != None or mode == "remote" and remote != None, \ + "Must configure local or remote based on mode" + + mode == "local" and local.enabled or mode == "remote" and remote.enabled, \ + "Selected Gitea mode must be enabled" + +schema LocalGitea: + """ + Local Gitea deployment configuration + + Supports Docker container or binary deployment. + """ + enabled: bool = False + deployment: "docker" | "binary" + port: int = 3000 + data_dir: str = "~/.provisioning/gitea" + auto_start: bool = False + + docker?: DockerGitea + binary?: BinaryGitea + + check: + enabled, "Local Gitea must be enabled if configured" + + port > 0 and port < 65536, \ + "Port must be between 1 and 65535" + + len(data_dir) > 0, "Data directory required" + + deployment == "docker" and docker != None or \ + deployment == "binary" and binary != None, \ + "Must configure docker or binary based on deployment type" + +schema DockerGitea: + """Docker-based Gitea deployment""" + image: str = "gitea/gitea:1.21" + container_name: str = "provisioning-gitea" + ssh_port: int = 222 + environment: {str: str} = { + "USER_UID" = "1000" + "USER_GID" = "1000" + "GITEA__database__DB_TYPE" = "sqlite3" + } + volumes: [str] = [ + "gitea-data:/data" + "/etc/timezone:/etc/timezone:ro" + "/etc/localtime:/etc/localtime:ro" + ] + restart_policy: str = "unless-stopped" + + check: + len(image) > 0, "Docker image required" + len(container_name) > 0, "Container name required" + ssh_port > 0 and ssh_port < 65536, "SSH port must be 1-65535" + +schema BinaryGitea: + """Binary-based Gitea deployment""" + binary_path: str + config_path: str + version: str = "1.21.0" + user: str = "git" + group: str = "git" + + check: + len(binary_path) > 0, "Binary path required" + len(config_path) > 0, "Config path required" + +schema RemoteGitea: + """ + Remote Gitea instance configuration + + Points to existing Gitea server. + """ + enabled: bool = False + url: str + api_url: str + + check: + enabled, "Remote Gitea must be enabled if configured" + + len(url) > 0 and url.startswith("http"), \ + "URL must start with http:// or https://" + + len(api_url) > 0 and api_url.startswith("http"), \ + "API URL must start with http:// or https://" + +schema GiteaAuth: + """ + Gitea authentication configuration + + Token-based authentication for API access. + Token should be stored in encrypted file (SOPS). + """ + token_path: str + username?: str + + check: + len(token_path) > 0, "Token path required" + +schema GiteaRepositories: + """ + Repository organization and naming configuration + + Defines organization structure and repository names. + """ + organization: str = "provisioning" + core_repo: str = "provisioning-core" + extensions_repo: str = "provisioning-extensions" + platform_repo: str = "provisioning-platform" + workspaces_org: str = "workspaces" + + check: + len(organization) > 0, "Organization name required" + len(core_repo) > 0, "Core repo name required" + len(extensions_repo) > 0, "Extensions repo name required" + len(platform_repo) > 0, "Platform repo name required" + len(workspaces_org) > 0, "Workspaces org name required" + +schema WorkspaceFeatures: + """ + Workspace integration feature flags + + Controls which Gitea features are enabled for workspaces. + """ + git_integration: bool = True + locking_enabled: bool = True + webhooks_enabled: bool = False + auto_sync: bool = False + branch_protection: bool = False + + check: + git_integration or not locking_enabled, \ + "Locking requires git integration" + +schema GiteaRepository: + """ + Gitea repository metadata + + Used for creating and managing repositories. + """ + name: str + owner: str + description?: str + private: bool = False + auto_init: bool = True + default_branch: str = "main" + gitignore?: str + license?: str + readme?: str + + check: + len(name) > 0, "Repository name required" + len(owner) > 0, "Repository owner required" + +schema GiteaRelease: + """ + Gitea release configuration + + Used for publishing extensions and versioned artifacts. + """ + tag_name: str + release_name: str + body?: str + draft: bool = False + prerelease: bool = False + target_commitish: str = "main" + + check: + len(tag_name) > 0, "Tag name required" + len(release_name) > 0, "Release name required" + +schema GiteaIssue: + """ + Gitea issue configuration + + Used for workspace locking mechanism. + """ + title: str + body: str + labels: [str] = [] + assignee?: str + milestone?: int + + check: + len(title) > 0, "Issue title required" + +schema WorkspaceLock: + """ + Workspace lock metadata + + Stored as Gitea issue for distributed locking. + """ + workspace_name: str + lock_type: "read" | "write" | "deploy" + user: str + timestamp: str + operation?: str + expiry?: str + force_unlock: bool = False + + check: + len(workspace_name) > 0, "Workspace name required" + len(user) > 0, "User required" + len(timestamp) > 0, "Timestamp required" + +schema ExtensionPublishConfig: + """ + Extension publishing configuration + + Defines how extensions are packaged and published to Gitea. + """ + extension_path: str + version: str + release_notes?: str + include_patterns: [str] = ["*.nu", "*.k", "*.toml", "*.md"] + exclude_patterns: [str] = ["*.tmp", "*.log", ".git/*"] + compression: "tar.gz" | "zip" = "tar.gz" + + check: + len(extension_path) > 0, "Extension path required" + len(version) > 0, "Version required" + +schema GiteaWebhook: + """ + Gitea webhook configuration + + For future integration with automated workflows. + """ + url: str + content_type: "json" | "form" = "json" + secret?: str + events: [str] = ["push", "pull_request", "release"] + active: bool = True + + check: + len(url) > 0 and url.startswith("http"), \ + "Webhook URL must start with http:// or https://" + +# Example configurations +_local_docker_gitea = GiteaConfig { + mode = "local" + local = LocalGitea { + enabled = True + deployment = "docker" + port = 3000 + data_dir = "~/.provisioning/gitea" + auto_start = True + docker = DockerGitea { + image = "gitea/gitea:1.21" + container_name = "provisioning-gitea" + } + } + auth = GiteaAuth { + token_path = "~/.provisioning/secrets/gitea-token.enc" + username = "provisioning" + } +} + +_remote_gitea = GiteaConfig { + mode = "remote" + remote = RemoteGitea { + enabled = True + url = "https://gitea.example.com" + api_url = "https://gitea.example.com/api/v1" + } + auth = GiteaAuth { + token_path = "~/.provisioning/secrets/gitea-token.enc" + username = "provisioning" + } +} diff --git a/k8s_deploy.k b/k8s_deploy.k new file mode 100644 index 0000000..d908a01 --- /dev/null +++ b/k8s_deploy.k @@ -0,0 +1,259 @@ +# Info: KCL core lib service schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema K8sPort: + """ + K8S Port settings + """ + name: str + typ?: str = "TCP" + container?: int + nodePort?: int + target?: int + +schema K8sKeyVal: + """ + K8S label,selector,env settings + """ + key: str + value: str + +schema K8sKeyPath: + """ + K8S key,path settings + """ + key: str + path: str + +schema K8sVolumeMount: + """ + K8S VolumeMounts settings + """ + name: str + readOnly: bool = False + mountPath: str + subPath?: str + +schema K8sVolumeClaim: + """ + K8S VolumeClaim settings + """ + name: str + storageClassName: "manual" | "nfs-client" | "rook-cephfs" = "manual" + modes: ["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"] = ["ReadWriteOnce"] + abbrev_mode?: ["RWO" | "ROX" | "RWX" | "RWOP"] = ["RWO"] + reclaimPolicy?: "Recycle" | "Retain" | "Delete" = "Retain" + storage?: str + typ: "volumeClaim" | "configMap" | "secret" | "" = "" + pvMode?: "unspecified" | "Filesystem" | "Block" + pvcMode?: "unspecified" | "Filesystem" | "Block" + hostPath?: str + +schema K8sConfigMap: + """ + K8S Volume ConfigMap settings + """ + name: str + +schema K8sSecret: + """ + K8S Volume Secret settings + """ + name: str + items: [K8sKeyPath] + +schema K8sVolume: + """ + K8S Volume settings + """ + name: str + typ: "volumeClaim" | "configMap" | "secret" = "volumeClaim" + persitentVolumeClaim?: K8sVolumeClaim + items?: [K8sKeyPath] + configMap?: K8sConfigMap + secret?: K8sSecret + +schema K8sService: + """ + K8S Service settings + """ + name: str + typ: "ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None" = "ClusterIP" + externalName?: str + proto: "TCP" = "TCP" + ports: [K8sPort] + selector?: [K8sKeyVal] + externaIPs?: [str] + +schema K8sContainers: + """ + K8S Container settings + """ + name: str = "main" + resources_requests?: K8sResources + resources_limits?: K8sResources + image: str + cmd?: str + imagePull: "IfNotPresent" | "Always" | "Never" = "IfNotPresent" + env?: [K8sKeyVal] + ports?: [K8sPort] + volumeMounts?: [K8sVolumeMount] + +schema K8sBackup: + """ + K8S Backup settings + """ + name: str + typ: str + mount_path: str + +schema K8sResources: + #"64Mi" + memory: str + #"250m" + cpu: str + +schema K8sDeploySpec: + """ + K8S Deployment Spec settings + """ + # K8s Deploy replica + replicas: int = 1 + hostUsers?: bool = True + # K8s Deploy containers + containers: [K8sContainers] + imagePullSecret?: str + nodeSelector?: [K8sKeyVal] + nodeName?: str + affinity?: K8sAffinity + # K8s Deploy Volumes + volumes?: [K8sVolume] + # K8s Secrets + secrets?: [K8sSecret] + +schema K8sAffinityMatch: + """ + K8S Deployment Affinity Match settings + """ + key: str + operator: "In" | "NotIn" | "Exists" | "DoesNotExist" + values: [str] + +schema K8sAffinityLabelSelector: + """ + K8S Deployment Affinity Label Selector settings + """ + typ: "requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution" = "requiredDuringSchedulingIgnoredDuringExecution" + labelSelector: [K8sAffinityMatch] + # example: topology.kubernetes.io/zon + topologyKey?: str + matchLabelKeys?: [str] + +schema K8sPrxyTLS: + """ + K8S Deployment Proxy TLS settings + """ + httpsRedirect?: bool = False + mode?: "SIMPLE" | "PASSTHROUGH" | "MULTI" | "" = "SIMPLE" + credentialName?: str + +schema K8sPrxyPort: + """ + K8S Proxy Port settings + """ + name: str + number?: int + proto: "HTTP" | "HTTPS" | "TCP" = "HTTPS" + +schema K8sPrxyGatewayServer: + """ + K8S Deployment Proxy Gateway Server settings + """ + port: K8sPrxyPort + tls?: K8sPrxyTLS + hosts?: [str] + +schema K8sPrxyVirtualServiceRoute: + """ + K8S Deployment Proxy Virtual Service Route settings + """ + port_number: int + host: str + +schema K8sPrxyVirtualServiceMatchURL: + """ + K8S Deployment Proxy Virtual Service Match URL settings + """ + port?: int + sniHost?: [str] + +schema K8sPrxyVirtualServiceMatch: + """ + K8S Deployment Proxy Virtual Service Match settings + """ + typ: "tcp" | "http" | "tls" + location?: [K8sPrxyVirtualServiceMatchURL] + route_destination?: [K8sPrxyVirtualServiceRoute] + +schema K8sPrxyVirtualService: + """ + K8S Deployment Proxy Virtual Service settings + """ + hosts: [str] + gateways: [str] + matches?: [K8sPrxyVirtualServiceMatch] + +schema K8sAntyAffinityLabelSelector(K8sAffinityLabelSelector): + """ + K8S Deployment AntyAffinity Label Selector settings + """ + weight: int = 100 + +schema K8sAffinity: + """ + K8S Deployment Affinity settings + """ + affinity?: K8sAffinityLabelSelector + antiAffinity?: K8sAntyAffinityLabelSelector + +schema K8sDefs: + name: str + ns: str + domain: str + full_domain: str + primary_dom: str + cluster_domain: str + +schema K8sDeploy: + """ + K8S Deployment settings + """ + # K8s Deploy Name + name: str + # K8s Deploy name-in-filenames + name_in_files: str = "${name}" + # K8s NameSpace + namespace: str | "default" + # K8s Create NameSpace + create_ns: bool = False + full_domain?: str + # K8s Deploy labels + labels: [K8sKeyVal] = [K8sKeyVal {key: "${name}", value: "${name}"}] + sel_labels: [K8sKeyVal] = labels + tpl_labels: [K8sKeyVal] = labels + spec: K8sDeploySpec + # Cluster Ingres Proxy to use + prxy?: "istio" + prxy_ns?: str = "istio-system" + prxyGatewayServers?: [K8sPrxyGatewayServer] + prxyVirtualService?: K8sPrxyVirtualService + # TSL certs path for service + tls_path?: str = "ssl" + # Create bin/apply.sh + bin_apply: bool = True + # K8s Service + service?: K8sService + # Service Backup K8s JOB + backups?: [K8sBackup] + diff --git a/kcl.mod b/kcl.mod new file mode 100644 index 0000000..03b4767 --- /dev/null +++ b/kcl.mod @@ -0,0 +1,5 @@ +[package] +name = "provisioning" +edition = "v0.11.3" +version = "0.0.1" + diff --git a/kcl.mod.lock b/kcl.mod.lock new file mode 100644 index 0000000..e69de29 diff --git a/lib.k b/lib.k new file mode 100644 index 0000000..7c852c4 --- /dev/null +++ b/lib.k @@ -0,0 +1,70 @@ +# Info: KCL core lib schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema StorageVol: + """ + StorageVol settings + """ + name: str + size: int = 0 + total: int = size + type: "ext4" | "xfs" | "btrfs" | "raw" | "zfs" = "ext4" + mount: bool = True + mount_path?: str + fstab: bool = True + + check: + len(name) > 0, "Check name value" + +#mount == True and mount_path != Undefined +schema Storage(StorageVol): + """ + Storage settings + """ + parts?: [StorageVol] = [] + + check: + len(name) > 0, "Check name value" + total >= sum([p.size for p in parts]), "🛑 Size Total parts ${sum([p.size for p in parts])} is greater than total storage ${total}.." + +schema TaskServDef: + """ + TaskServDef settings + """ + name: str + install_mode: "getfile" | "library" | "server" | "library-server" | "server-library" = "library" + profile: str = "default" + target_save_path: str = "" + +schema ClusterDef: + """ + ClusterDef settings + """ + name: str + profile: str = "default" + target_save_path: str = "" + +schema ScaleData: + """ + scale data + """ + def: str + disabled: bool = False + mode: "auto" | "manual" | "ondemand" = "manual" + expire?: str + from?: str + to?: str + +schema ScaleResource: + """ + scale server settings + """ + default: ScaleData + fallback?: ScaleData + up?: ScaleData + down?: ScaleData + min?: ScaleData + max?: ScaleData + path: str = "/etc/scale_provisioning" + diff --git a/main.k b/main.k new file mode 100644 index 0000000..64705bc --- /dev/null +++ b/main.k @@ -0,0 +1,56 @@ +# Main entry point for provisioning KCL module +# This file imports all schemas to make them discoverable as package submodules +# Author: JesusPerezLorenzo +# Release: 0.1.0 +# Date: 29-09-2025 + +# ============================================================================ +# IMPORTANT: KCL Import Pattern +# ============================================================================ +# This module uses DIRECT SUBMODULE IMPORTS pattern (no re-exports). +# +# WHY NO RE-EXPORTS? +# Re-exports like "Settings = settings.Settings" create immutable variable +# assignments in KCL, causing ImmutableError (E1001) when extensions try to +# import them. KCL v0.11.3 doesn't support Python-style namespace re-exports. +# +# CORRECT USAGE IN EXTENSIONS: +# import provisioning.settings # For Settings, SecretProvider, SopsConfig +# import provisioning.defaults # For ServerDefaults schemas +# import provisioning.lib # For Storage, TaskServDef, ClusterDef +# import provisioning.server # For Server schema +# import provisioning.cluster # For Cluster schema +# import provisioning.dependencies # For TaskservDependencies, HealthCheck +# import provisioning.workflows # For BatchWorkflow, BatchOperation +# import provisioning.batch # For BatchScheduler, BatchExecutor +# import provisioning.version # For Version, TaskservVersion +# import provisioning.k8s_deploy # For K8s* schemas +# import provisioning.services # For ServiceRegistry, ServiceDefinition +# +# EXAMPLE: +# import provisioning.lib as lib +# import provisioning.settings as settings +# +# _storage = lib.Storage { +# device = "/dev/sda" +# size = 100 +# } +# +# ANTI-PATTERN (DO NOT USE): +# Settings = settings.Settings # ❌ Causes ImmutableError! +# Server = server.Server # ❌ Causes ImmutableError! +# +# ============================================================================ + +# Import core module schemas to make them part of the provisioning package +import .settings +import .defaults +import .lib +import .server +import .cluster +import .dependencies +import .workflows +import .batch +import .version +import .k8s_deploy +import .services diff --git a/modes.k b/modes.k new file mode 100644 index 0000000..6a8766a --- /dev/null +++ b/modes.k @@ -0,0 +1,830 @@ +# Info: KCL execution mode schemas for provisioning +# Author: Mode System Implementation +# Release: 1.0.0 +# Date: 2025-10-06 + +""" +Execution mode schemas defining deployment patterns and service configurations + +Modes: + - solo: Single developer, local development + - multi-user: Team collaboration with shared services + - cicd: CI/CD pipeline execution + - enterprise: Production enterprise deployment +""" + +import provisioning.settings as cfg +import provisioning.kcl.oci_registry as oci + +schema ExecutionMode: + """ + Base execution mode schema defining common configuration + + All execution modes inherit from this base schema and must + specify service deployment strategy, authentication, and + workspace policies. + """ + + # Mode identifier + mode_name: "solo" | "multi-user" | "cicd" | "enterprise" + + # Human-readable description + description: str + + # Authentication strategy + authentication: AuthenticationStrategy + + # Service deployment configurations + services: ServiceDeployments + + # Extension source configuration + extensions: ExtensionConfig + + # Workspace management policies + workspaces: WorkspacePolicy + + # Security configuration + security: SecurityConfig + + # Resource limits (optional, for multi-user/enterprise) + resource_limits?: ResourceLimits + + check: + len(description) > 0, "Mode description required" + +schema AuthenticationStrategy: + """Authentication configuration for mode""" + + # Authentication type + auth_type: "none" | "token" | "mtls" | "oauth" | "kms" + + # Token configuration (for token auth) + token_config?: TokenConfig + + # mTLS configuration (for mtls auth) + mtls_config?: MTLSConfig + + # OAuth configuration (for oauth auth) + oauth_config?: OAuthConfig + + # SSH key storage location + ssh_key_storage: "local" | "kms" | "vault" = "local" + + check: + auth_type == "none" or ( + (auth_type == "token" and token_config != Undefined) or + (auth_type == "mtls" and mtls_config != Undefined) or + (auth_type == "oauth" and oauth_config != Undefined) or + (auth_type == "kms") + ), "Auth config must match auth type" + +schema TokenConfig: + """Token-based authentication configuration""" + + token_path: str + token_format: "jwt" | "opaque" = "jwt" + expiry_seconds: int = 86400 # 24 hours + refresh_enabled: bool = True + + check: + len(token_path) > 0, "Token path required" + expiry_seconds > 0, "Expiry must be positive" + +schema MTLSConfig: + """Mutual TLS authentication configuration""" + + client_cert_path: str + client_key_path: str + ca_cert_path: str + verify_server: bool = True + + check: + len(client_cert_path) > 0, "Client cert path required" + len(client_key_path) > 0, "Client key path required" + len(ca_cert_path) > 0, "CA cert path required" + +schema OAuthConfig: + """OAuth 2.0 authentication configuration""" + + provider_url: str + client_id: str + client_secret_path: str + scopes: [str] = ["read", "write"] + redirect_uri?: str + + check: + len(provider_url) > 0, "Provider URL required" + len(client_id) > 0, "Client ID required" + +schema ServiceDeployments: + """Service deployment configuration""" + + orchestrator: ServiceConfig + control_center?: ServiceConfig + coredns?: ServiceConfig + gitea?: ServiceConfig + oci_registry: oci.OCIRegistryConfig + + # Custom services + custom_services?: {str: ServiceConfig} + +schema ServiceConfig: + """Individual service configuration""" + + # Deployment location + deployment: "local" | "remote" | "k8s" | "disabled" + + # For local deployment + local_config?: LocalServiceConfig + + # For remote deployment + remote_config?: RemoteServiceConfig + + # For Kubernetes deployment + k8s_config?: K8sServiceConfig + + # Auto-start service + auto_start: bool = False + + # Health check configuration + health_check?: HealthCheck + + check: + deployment == "disabled" or ( + (deployment == "local" and local_config != Undefined) or + (deployment == "remote" and remote_config != Undefined) or + (deployment == "k8s" and k8s_config != Undefined) + ), "Service config must match deployment type" + +schema LocalServiceConfig: + """Local service deployment configuration""" + + binary_path?: str + config_path?: str + data_dir: str + port: int + bind_address: str = "127.0.0.1" + tls_enabled: bool = False + + check: + port > 0 and port < 65536, "Port must be 1-65535" + len(data_dir) > 0, "Data directory required" + +schema RemoteServiceConfig: + """Remote service configuration""" + + endpoint: str + port?: int + tls_enabled: bool = True + verify_ssl: bool = True + timeout: int = 30 + retries: int = 3 + + check: + len(endpoint) > 0, "Endpoint required" + timeout > 0, "Timeout must be positive" + +schema K8sServiceConfig: + """Kubernetes service deployment configuration""" + + namespace: str = "provisioning" + deployment_name: str + service_name: str + replicas: int = 1 + image: str + image_pull_policy: "Always" | "IfNotPresent" | "Never" = "IfNotPresent" + resources?: K8sResources + + check: + len(namespace) > 0, "Namespace required" + len(deployment_name) > 0, "Deployment name required" + replicas > 0, "Replicas must be positive" + +schema K8sResources: + """Kubernetes resource requirements""" + + cpu_request: str = "100m" + cpu_limit: str = "500m" + memory_request: str = "128Mi" + memory_limit: str = "512Mi" + +schema HealthCheck: + """Service health check configuration""" + + enabled: bool = True + endpoint: str = "/health" + interval: int = 10 # seconds + timeout: int = 5 + healthy_threshold: int = 2 + unhealthy_threshold: int = 3 + + check: + interval > 0, "Interval must be positive" + timeout > 0 and timeout < interval, "Timeout must be less than interval" + +schema ExtensionConfig: + """Extension source and distribution configuration""" + + # Extension source: local files, gitea, or OCI registry + source: "local" | "gitea" | "oci" | "mixed" + + # Local path for extensions (for local source) + local_path?: str + + # Gitea configuration (for gitea source) + gitea_config?: GiteaConfig + + # OCI registry configuration (for oci source) + oci_registry?: OCIExtensionConfig + + # Allow mixed sources + allow_mixed: bool = False + + check: + source == "local" and local_path != Undefined or + source == "gitea" and gitea_config != Undefined or + source == "oci" and oci_registry != Undefined or + source == "mixed", "Extension config must match source type" + +schema GiteaConfig: + """Gitea extension repository configuration""" + + url: str + organization: str = "provisioning" + username?: str + token_path?: str + verify_ssl: bool = True + + check: + len(url) > 0, "Gitea URL required" + +schema OCIExtensionConfig: + """OCI registry extension configuration""" + + enabled: bool = True + endpoint: str + namespace: str = "provisioning-extensions" + auth_token_path?: str + tls_enabled: bool = True + verify_ssl: bool = True + cache_dir: str = "~/.provisioning/oci-cache" + + check: + len(endpoint) > 0, "OCI endpoint required" + len(namespace) > 0, "OCI namespace required" + +schema WorkspacePolicy: + """Workspace management policies""" + + # Workspace locking + locking: "disabled" | "enabled" | "required" + + # Lock provider (if locking enabled) + lock_provider?: "gitea" | "etcd" | "redis" | "filesystem" + + # Git integration requirement + git_integration: "disabled" | "optional" | "required" + + # Workspace isolation + isolation: "none" | "user" | "strict" = "user" + + # Maximum concurrent workspaces per user + max_workspaces_per_user?: int + + check: + locking == "disabled" or lock_provider != Undefined, \ + "Lock provider required when locking enabled" + git_integration in ["disabled", "optional", "required"], \ + "Invalid git integration setting" + +schema SecurityConfig: + """Security policies for mode""" + + # Encryption requirements + encryption_at_rest: bool = False + encryption_in_transit: bool = False + + # Secret management + secret_provider: cfg.SecretProvider = cfg.SecretProvider {} + + # DNS modification policy + dns_modification: "none" | "coredns" | "system" = "none" + + # Audit logging + audit_logging: bool = False + audit_log_path?: str + + # Network policies + network_isolation: bool = False + + check: + not audit_logging or audit_log_path != Undefined, \ + "Audit log path required when audit logging enabled" + +schema ResourceLimits: + """Resource limits for multi-user/enterprise modes""" + + # Per-user limits + max_servers_per_user: int = 10 + max_cpu_cores_per_user: int = 32 + max_memory_gb_per_user: int = 128 + max_storage_gb_per_user: int = 500 + + # Global limits + max_total_servers?: int + max_total_cpu_cores?: int + max_total_memory_gb?: int + + check: + max_servers_per_user > 0, "Max servers must be positive" + max_cpu_cores_per_user > 0, "Max CPU must be positive" + max_memory_gb_per_user > 0, "Max memory must be positive" + +# ============================================================================ +# Concrete Mode Schemas +# ============================================================================ + +schema SoloMode(ExecutionMode): + """ + Solo mode: Single developer local development + + Characteristics: + - No authentication required + - Local service deployment + - Optional OCI registry for extension testing + - No workspace locking + - Minimal security constraints + + Example: + SoloMode { + mode_name = "solo" + description = "Local development environment" + } + """ + + mode_name: "solo" = "solo" + description: str = "Single developer local development mode" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "none" + ssh_key_storage = "local" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "local" + auto_start = True + local_config = LocalServiceConfig { + data_dir = "~/.provisioning/orchestrator" + port = 8080 + } + } + control_center = ServiceConfig { + deployment = "disabled" + } + coredns = ServiceConfig { + deployment = "disabled" + } + gitea = ServiceConfig { + deployment = "disabled" + } + oci_registry = oci.OCIRegistryConfig { + deployment = "local" + type = "zot" + endpoint = "localhost" + port = 5000 + tls_enabled = False + auth_required = False + local = oci.LocalOCIConfig { + data_dir = "~/.provisioning/oci-registry" + config_path = "~/.provisioning/oci-registry/config.json" + auto_start = False + } + namespaces = oci.OCINamespaces { + extensions = "dev-extensions" + kcl_packages = "dev-kcl" + platform_images = "dev-platform" + test_images = "dev-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "local" + local_path = "./provisioning/extensions" + allow_mixed = True + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "disabled" + git_integration = "optional" + isolation = "none" + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = False + encryption_in_transit = False + dns_modification = "none" + audit_logging = False + network_isolation = False + } + +schema MultiUserMode(ExecutionMode): + """ + Multi-user mode: Team collaboration with shared services + + Characteristics: + - Token-based authentication + - Remote shared services + - OCI registry for extension distribution + - Workspace locking enabled + - Git integration required + - User resource limits + + Example: + MultiUserMode { + mode_name = "multi-user" + description = "Team collaboration environment" + } + """ + + mode_name: "multi-user" = "multi-user" + description: str = "Team collaboration with shared services" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "token" + token_config = TokenConfig { + token_path = "~/.provisioning/tokens/auth" + token_format = "jwt" + expiry_seconds = 86400 + refresh_enabled = True + } + ssh_key_storage = "local" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "orchestrator.company.local" + port = 8080 + tls_enabled = True + verify_ssl = True + timeout = 30 + retries = 3 + } + } + control_center = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "control.company.local" + port = 8081 + tls_enabled = True + } + } + coredns = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "dns.company.local" + port = 53 + tls_enabled = False + } + } + gitea = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "git.company.local" + port = 443 + tls_enabled = True + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.company.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 30 + retries = 3 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "provisioning-extensions" + kcl_packages = "provisioning-kcl" + platform_images = "provisioning-platform" + test_images = "provisioning-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "harbor.company.local" + namespace = "provisioning-extensions" + auth_token_path = "~/.provisioning/tokens/oci" + tls_enabled = True + verify_ssl = True + cache_dir = "~/.provisioning/oci-cache" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "enabled" + lock_provider = "gitea" + git_integration = "required" + isolation = "user" + max_workspaces_per_user = 5 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = False + encryption_in_transit = True + dns_modification = "coredns" + audit_logging = True + audit_log_path = "/var/log/provisioning/audit.log" + network_isolation = False + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 10 + max_cpu_cores_per_user = 32 + max_memory_gb_per_user = 128 + max_storage_gb_per_user = 500 + max_total_servers = 100 + max_total_cpu_cores = 320 + max_total_memory_gb = 1024 + } + +schema CICDMode(ExecutionMode): + """ + CI/CD mode: Automated pipeline execution + + Characteristics: + - Token or mTLS authentication + - Remote service endpoints + - OCI registry for artifacts + - No workspace locking (stateless) + - Git integration required + - Ephemeral workspaces + + Example: + CICDMode { + mode_name = "cicd" + description = "CI/CD pipeline environment" + } + """ + + mode_name: "cicd" = "cicd" + description: str = "CI/CD pipeline automated execution" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "token" + token_config = TokenConfig { + token_path = "/var/run/secrets/provisioning/token" + token_format = "jwt" + expiry_seconds = 3600 # 1 hour + refresh_enabled = False + } + ssh_key_storage = "kms" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "orchestrator.cicd.local" + port = 8080 + tls_enabled = True + verify_ssl = True + timeout = 60 + retries = 5 + } + } + control_center = ServiceConfig { + deployment = "disabled" + } + coredns = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "dns.cicd.local" + port = 53 + } + } + gitea = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "git.cicd.local" + port = 443 + tls_enabled = True + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "registry.cicd.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 60 + retries = 5 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "cicd-extensions" + kcl_packages = "cicd-kcl" + platform_images = "cicd-platform" + test_images = "cicd-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "registry.cicd.local" + namespace = "cicd-extensions" + auth_token_path = "/var/run/secrets/provisioning/oci-token" + tls_enabled = True + verify_ssl = True + cache_dir = "/tmp/provisioning-oci-cache" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "disabled" + git_integration = "required" + isolation = "strict" + max_workspaces_per_user = 1 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = True + encryption_in_transit = True + dns_modification = "coredns" + audit_logging = True + audit_log_path = "/var/log/provisioning/cicd-audit.log" + network_isolation = True + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 5 + max_cpu_cores_per_user = 16 + max_memory_gb_per_user = 64 + max_storage_gb_per_user = 200 + } + +schema EnterpriseMode(ExecutionMode): + """ + Enterprise mode: Production enterprise deployment + + Characteristics: + - mTLS or OAuth authentication + - Kubernetes-deployed services + - Enterprise OCI registry (Harbor HA) + - Workspace locking required + - Git integration required + - Full encryption and auditing + - Strict resource limits + + Example: + EnterpriseMode { + mode_name = "enterprise" + description = "Production enterprise environment" + } + """ + + mode_name: "enterprise" = "enterprise" + description: str = "Production enterprise deployment with full security" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "mtls" + mtls_config = MTLSConfig { + client_cert_path = "/etc/provisioning/certs/client.crt" + client_key_path = "/etc/provisioning/certs/client.key" + ca_cert_path = "/etc/provisioning/certs/ca.crt" + verify_server = True + } + ssh_key_storage = "kms" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "orchestrator" + service_name = "orchestrator-svc" + replicas = 3 + image = "harbor.enterprise.local/provisioning/orchestrator:latest" + resources = K8sResources { + cpu_request = "500m" + cpu_limit = "2000m" + memory_request = "1Gi" + memory_limit = "4Gi" + } + } + } + control_center = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "control-center" + service_name = "control-center-svc" + replicas = 2 + image = "harbor.enterprise.local/provisioning/control-center:latest" + } + } + coredns = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "kube-system" + deployment_name = "coredns" + service_name = "kube-dns" + replicas = 2 + image = "registry.k8s.io/coredns/coredns:latest" + } + } + gitea = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "gitea" + service_name = "gitea-svc" + replicas = 2 + image = "gitea/gitea:latest" + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.enterprise.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 60 + retries = 5 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "prod-extensions" + kcl_packages = "prod-kcl" + platform_images = "prod-platform" + test_images = "test-images" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "harbor.enterprise.local" + namespace = "prod-extensions" + auth_token_path = "/etc/provisioning/tokens/oci" + tls_enabled = True + verify_ssl = True + cache_dir = "/var/cache/provisioning/oci" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "required" + lock_provider = "etcd" + git_integration = "required" + isolation = "strict" + max_workspaces_per_user = 3 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = True + encryption_in_transit = True + secret_provider = cfg.SecretProvider { + provider = "kms" + kms_config = cfg.KmsConfig { + server_url = "https://kms.enterprise.local" + auth_method = "certificate" + client_cert_path = "/etc/provisioning/certs/kms-client.crt" + client_key_path = "/etc/provisioning/certs/kms-client.key" + ca_cert_path = "/etc/provisioning/certs/kms-ca.crt" + verify_ssl = True + } + } + dns_modification = "system" + audit_logging = True + audit_log_path = "/var/log/provisioning/enterprise-audit.log" + network_isolation = True + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 20 + max_cpu_cores_per_user = 64 + max_memory_gb_per_user = 256 + max_storage_gb_per_user = 1000 + max_total_servers = 500 + max_total_cpu_cores = 2000 + max_total_memory_gb = 8192 + } diff --git a/oci_registry.k b/oci_registry.k new file mode 100644 index 0000000..12515ca --- /dev/null +++ b/oci_registry.k @@ -0,0 +1,487 @@ +# Info: KCL OCI registry schemas for provisioning +# Author: Mode System Implementation +# Release: 1.0.0 +# Date: 2025-10-06 + +""" +OCI (Open Container Initiative) registry configuration schemas + +Supports multiple registry implementations: + - distribution: Docker Registry v2 (lightweight) + - zot: Cloud-native OCI registry + - harbor: Enterprise-grade registry with security scanning + - artifactory: JFrog Artifactory with OCI support + +Purpose: + - Extension distribution via OCI artifacts + - KCL package distribution + - Platform container images + - Test environment images +""" + +import regex + +schema OCIRegistryConfig: + """ + OCI registry configuration for artifact and image distribution + + Examples: + # Local development registry + OCIRegistryConfig { + deployment = "local" + type = "zot" + endpoint = "localhost" + port = 5000 + tls_enabled = False + local = LocalOCIConfig { + data_dir = "~/.provisioning/oci-registry" + config_path = "~/.provisioning/oci-registry/config.json" + } + } + + # Remote enterprise registry + OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.company.local" + tls_enabled = True + auth_required = True + remote = RemoteOCIConfig { + verify_ssl = True + } + } + """ + + # Deployment type + deployment: "local" | "remote" | "disabled" + + # Registry implementation type + type: "distribution" | "zot" | "harbor" | "artifactory" + + # Registry endpoint (hostname or IP) + endpoint: str + + # Registry port (optional, defaults by type) + port?: int = 5000 + + # TLS/SSL configuration + tls_enabled: bool = False + tls_cert_path?: str + tls_key_path?: str + ca_cert_path?: str + + # Authentication + auth_required: bool = False + username?: str + password_path?: str # Path to password file + auth_token_path?: str # Path to auth token + + # Local deployment configuration + local?: LocalOCIConfig + + # Remote connection configuration + remote?: RemoteOCIConfig + + # Artifact namespaces/repositories + namespaces: OCINamespaces + + # Registry-specific features + features?: OCIRegistryFeatures + + check: + len(endpoint) > 0, "OCI registry endpoint required" + port == Undefined or (port > 0 and port < 65536), \ + "Port must be 1-65535" + deployment == "disabled" or ( + (deployment == "local" and local != Undefined) or + (deployment == "remote" and remote != Undefined) + ), "Config must match deployment type" + not auth_required or ( + username != Undefined or auth_token_path != Undefined + ), "Authentication config required when auth enabled" + not tls_enabled or ( + tls_cert_path != Undefined and tls_key_path != Undefined + ) or deployment == "remote", "TLS cert/key required for local TLS" + +schema LocalOCIConfig: + """ + Local OCI registry deployment configuration + + Used for: + - Solo mode development + - Testing OCI artifact distribution + - Offline extension development + """ + + # Data storage directory + data_dir: str + + # Registry configuration file path + config_path: str + + # Auto-start registry on provisioning startup + auto_start: bool = False + + # Binary path (optional, uses PATH if not specified) + binary_path?: str + + # Log file path + log_file?: str = "${data_dir}/registry.log" + + # HTTP configuration + http_config?: LocalHTTPConfig + + # Storage configuration + storage_config?: LocalStorageConfig + + check: + len(data_dir) > 0, "Data directory required" + len(config_path) > 0, "Config path required" + regex.match(data_dir, r"^[~/]"), \ + "Data dir must be absolute or home-relative path" + +schema LocalHTTPConfig: + """HTTP configuration for local registry""" + + listen_address: str = "127.0.0.1" + listen_port: int = 5000 + read_timeout: int = 60 # seconds + write_timeout: int = 60 + idle_timeout: int = 120 + + check: + listen_port > 0 and listen_port < 65536, "Port must be 1-65535" + +schema LocalStorageConfig: + """Storage configuration for local registry""" + + # Storage backend + backend: "filesystem" | "s3" | "azure" = "filesystem" + + # Filesystem storage + rootdirectory?: str + + # Garbage collection + gc_enabled: bool = True + gc_interval: int = 3600 # seconds + + # Deduplication + dedupe_enabled: bool = True + +schema RemoteOCIConfig: + """ + Remote OCI registry connection configuration + + Used for: + - Multi-user shared registry + - CI/CD artifact registry + - Enterprise production registry + """ + + # Connection timeout (seconds) + timeout: int = 30 + + # Retry configuration + retries: int = 3 + retry_delay: int = 5 # seconds + retry_backoff: float = 2.0 # exponential backoff multiplier + + # SSL/TLS verification + verify_ssl: bool = True + + # Proxy configuration (optional) + http_proxy?: str + https_proxy?: str + no_proxy?: [str] + + # Rate limiting + rate_limit?: RateLimitConfig + + check: + timeout > 0, "Timeout must be positive" + retries >= 0, "Retries must be non-negative" + retry_backoff > 1.0, "Backoff multiplier must be > 1.0" + +schema RateLimitConfig: + """Rate limiting configuration for remote registry""" + + # Requests per second + requests_per_second: int = 10 + + # Burst size + burst: int = 20 + + # Per-operation limits (optional) + pull_limit?: int + push_limit?: int + + check: + requests_per_second > 0, "Rate limit must be positive" + burst > 0, "Burst size must be positive" + +schema OCINamespaces: + """ + OCI registry namespaces for different artifact types + + Namespaces organize artifacts by purpose and allow + different access control policies per namespace. + """ + + # Extension artifacts (providers, taskservs, clusters) + extensions: str = "provisioning-extensions" + + # KCL package artifacts + kcl_packages: str = "provisioning-kcl" + + # Platform service images (orchestrator, control-center) + platform_images: str = "provisioning-platform" + + # Test environment images + test_images: str = "provisioning-test" + + # Custom user-defined namespaces + custom?: {str: str} + + check: + len(extensions) > 0, "Extensions namespace required" + len(kcl_packages) > 0, "KCL packages namespace required" + len(platform_images) > 0, "Platform images namespace required" + len(test_images) > 0, "Test images namespace required" + # Validate namespace naming convention + regex.match(extensions, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \ + "Extensions namespace must be lowercase alphanumeric with hyphens" + regex.match(kcl_packages, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \ + "KCL packages namespace must be lowercase alphanumeric with hyphens" + +schema OCIRegistryFeatures: + """ + Registry-specific feature configuration + + Different registry implementations support different features. + This schema allows enabling/disabling features based on + registry capabilities. + """ + + # Vulnerability scanning (Harbor, Artifactory) + vulnerability_scanning: bool = False + scanner_type?: "trivy" | "clair" | "anchore" + + # Image signing/verification (Notary, Cosign) + image_signing: bool = False + signing_method?: "notary" | "cosign" + + # Replication (Harbor) + replication_enabled: bool = False + replication_targets?: [ReplicationTarget] + + # Quota management + quota_enabled: bool = False + quota_config?: QuotaConfig + + # Webhook notifications + webhook_enabled: bool = False + webhook_endpoints?: [str] + + # Garbage collection + gc_enabled: bool = True + gc_schedule?: str = "0 2 * * *" # Daily at 2 AM + + check: + not vulnerability_scanning or scanner_type != Undefined, \ + "Scanner type required when vulnerability scanning enabled" + not image_signing or signing_method != Undefined, \ + "Signing method required when image signing enabled" + +schema ReplicationTarget: + """Harbor replication target configuration""" + + name: str + type: "harbor" | "docker-hub" | "aws-ecr" | "azure-acr" | "google-gcr" + endpoint: str + credentials?: str # Path to credentials file + verify_ssl: bool = True + + check: + len(name) > 0, "Replication target name required" + len(endpoint) > 0, "Replication endpoint required" + +schema QuotaConfig: + """Registry quota configuration""" + + # Storage quota (GB) + storage_limit_gb: int = 100 + + # Artifact count limit + artifact_limit?: int = 10000 + + # Per-namespace quotas + namespace_quotas?: {str: NamespaceQuota} + + check: + storage_limit_gb > 0, "Storage limit must be positive" + +schema NamespaceQuota: + """Per-namespace quota configuration""" + + storage_limit_gb: int = 50 + artifact_limit: int = 1000 + + check: + storage_limit_gb > 0, "Namespace storage limit must be positive" + artifact_limit > 0, "Namespace artifact limit must be positive" + +# ============================================================================ +# Helper Schemas +# ============================================================================ + +schema OCIArtifactReference: + """ + OCI artifact reference for pulling/pushing artifacts + + Format: //:@ + """ + + # Registry endpoint + registry: str + + # Namespace/project + namespace: str + + # Repository name + repository: str + + # Tag (optional, defaults to "latest") + tag: str = "latest" + + # Digest (optional, for content-addressable pulls) + digest?: str + + # Computed full reference + full_reference: str = f"{registry}/{namespace}/{repository}:{tag}" + + check: + len(registry) > 0, "Registry required" + len(namespace) > 0, "Namespace required" + len(repository) > 0, "Repository required" + len(tag) > 0, "Tag required" + regex.match(tag, r"^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$"), \ + "Invalid tag format" + +schema OCIPullPolicy: + """ + OCI artifact pull policy configuration + + Defines caching and pull behavior for artifacts + """ + + # Pull policy + policy: "always" | "if-not-present" | "never" = "if-not-present" + + # Cache TTL (seconds) + cache_ttl: int = 3600 + + # Verify digest on cached artifacts + verify_cached: bool = True + + # Allow insecure registries (development only) + allow_insecure: bool = False + + check: + cache_ttl > 0, "Cache TTL must be positive" + policy in ["always", "if-not-present", "never"], \ + "Invalid pull policy" + +schema OCIPushPolicy: + """ + OCI artifact push policy configuration + + Defines pushing behavior and constraints + """ + + # Allow overwriting existing tags + allow_overwrite: bool = False + + # Require tag signing before push + require_signing: bool = False + + # Automatic tagging strategy + auto_tag: bool = True + tag_format?: str = "v{version}-{timestamp}" + + # Compression + compression_enabled: bool = True + compression_level: int = 6 # 0-9 + + check: + compression_level >= 0 and compression_level <= 9, \ + "Compression level must be 0-9" + +# ============================================================================ +# Registry-Specific Configuration +# ============================================================================ + +schema ZotRegistryConfig(OCIRegistryConfig): + """ + Zot registry specific configuration + + Zot is a lightweight, cloud-native OCI registry focused on + minimal resource usage and developer experience. + """ + + type: "zot" = "zot" + + # Zot-specific features + zot_features?: ZotFeatures + +schema ZotFeatures: + """Zot-specific features""" + + # Enable search API + search_enabled: bool = True + + # Enable metrics endpoint + metrics_enabled: bool = True + metrics_port: int = 9090 + + # Enable sync (pull-through cache) + sync_enabled: bool = False + sync_registries?: [str] + + # Enable scrub (background verification) + scrub_enabled: bool = True + scrub_interval: str = "24h" + +schema HarborRegistryConfig(OCIRegistryConfig): + """ + Harbor registry specific configuration + + Harbor is an enterprise-grade container registry with + security scanning, replication, and RBAC. + """ + + type: "harbor" = "harbor" + + # Harbor-specific configuration + harbor_config?: HarborConfig + +schema HarborConfig: + """Harbor-specific configuration""" + + # Harbor project (namespace) + project: str + + # Project visibility + public_project: bool = False + + # Content trust (Notary) + content_trust: bool = False + + # Auto-scan on push + auto_scan: bool = True + + # Prevent vulnerable images + prevent_vulnerable: bool = True + severity_threshold: "critical" | "high" | "medium" | "low" = "high" + + check: + len(project) > 0, "Harbor project required" diff --git a/server.k b/server.k new file mode 100644 index 0000000..370b7e9 --- /dev/null +++ b/server.k @@ -0,0 +1,33 @@ +# Info: KCL core lib server schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import regex +import .defaults +import .lib + +schema Server(defaults.ServerDefaults): + """ + server settings + """ + not_use: bool = False + # Hostname as reference for resource if is changed later inside server, change will not be updated in resource inventory + hostname: str + title: str + network_private_id?: str + # extra hostnames for server local resolution + extra_hostnames?: [str] + delete_lock: bool = False + taskservs?: [lib.TaskServDef] + cluster?: [lib.ClusterDef] + + check: + len(hostname) > 0, "Check hostname value" + len(title) > 0, "Check titlevalue" + priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition" + +#network_private_ip == Undefined or regex.match(network_private_ip,"^\$.*$") or regex.match(network_private_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'network_private_ip = ${network_private_ip}' check value definition (use $vaule or xx.xx.xx.xx)" +#liveness_ip == Undefined or regex.match(liveness_ip,"^\$.*$") or regex.match(liveness_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'liveness_ip = ${liveness_ip}' check value definition (use $vaule or xx.xx.xx.xx)" +# len(adm_user.password) > 0, "Check Admin User password 'adm_user.password'" +# len(adm_user.email) > 0, "Check Admin User email 'adm_user.email'" +# len(db.name) > 0, "Check DB name" diff --git a/services.k b/services.k new file mode 100644 index 0000000..a33736b --- /dev/null +++ b/services.k @@ -0,0 +1,254 @@ +""" +Service Registry Schema for Provisioning Platform + +Defines platform services (orchestrator, control-center, CoreDNS, Gitea, OCI registry, etc.) +and their lifecycle management configuration. + +Version: 1.0.0 +""" + +schema ServiceRegistry: + """Platform service registry configuration""" + + services: {str: ServiceDefinition} + + check: + len(services) > 0, "At least one service must be defined" + +schema ServiceDefinition: + """Individual service definition""" + + name: str + type: "platform" | "infrastructure" | "utility" + category: "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui" | "monitoring" + description?: str + + # Service requirements + required_for: [str] = [] # Operations requiring this service + dependencies: [str] = [] # Other services this depends on + conflicts: [str] = [] # Services that conflict + + # Deployment configuration + deployment: ServiceDeployment + + # Health check + health_check: HealthCheck + + # Startup configuration + startup: StartupConfig = StartupConfig {} + + # Resource limits + resources?: ResourceLimits + + check: + len(name) > 0, "Service name cannot be empty" + not (name in dependencies), "Service cannot depend on itself" + len(set(dependencies) & set(conflicts)) == 0, \ + "Service cannot both depend on and conflict with same service" + +schema ServiceDeployment: + """Service deployment configuration""" + + mode: "binary" | "docker" | "docker-compose" | "kubernetes" | "remote" + + binary?: BinaryDeployment + docker?: DockerDeployment + docker_compose?: DockerComposeDeployment + kubernetes?: KubernetesDeployment + remote?: RemoteDeployment + + check: + (mode == "binary" and binary != Undefined) or \ + (mode == "docker" and docker != Undefined) or \ + (mode == "docker-compose" and docker_compose != Undefined) or \ + (mode == "kubernetes" and kubernetes != Undefined) or \ + (mode == "remote" and remote != Undefined), \ + "Deployment configuration must match deployment mode" + +schema BinaryDeployment: + """Binary service deployment""" + + binary_path: str + args: [str] = [] + working_dir?: str + env: {str: str} = {} + user?: str + group?: str + + check: + len(binary_path) > 0, "Binary path cannot be empty" + +schema DockerDeployment: + """Docker container deployment""" + + image: str + container_name: str + ports: [str] = [] + volumes: [str] = [] + environment: {str: str} = {} + command?: [str] + networks: [str] = [] + restart_policy: "no" | "always" | "on-failure" | "unless-stopped" = "unless-stopped" + + check: + len(image) > 0, "Docker image cannot be empty" + len(container_name) > 0, "Container name cannot be empty" + +schema DockerComposeDeployment: + """Docker Compose deployment""" + + compose_file: str + service_name: str + project_name?: str + env_file?: str + + check: + len(compose_file) > 0, "Compose file path cannot be empty" + len(service_name) > 0, "Service name cannot be empty" + +schema KubernetesDeployment: + """Kubernetes deployment""" + + namespace: str + deployment_name: str + kubeconfig?: str + manifests_path?: str + helm_chart?: HelmChart + + check: + len(namespace) > 0, "Namespace cannot be empty" + len(deployment_name) > 0, "Deployment name cannot be empty" + +schema HelmChart: + """Helm chart configuration""" + + chart: str + release_name: str + repo_url?: str + version?: str + values_file?: str + + check: + len(chart) > 0, "Chart name cannot be empty" + len(release_name) > 0, "Release name cannot be empty" + +schema RemoteDeployment: + """Remote service deployment""" + + endpoint: str + tls_enabled: bool = True + auth_token_path?: str + cert_path?: str + + check: + len(endpoint) > 0, "Endpoint cannot be empty" + +schema HealthCheck: + """Service health check configuration""" + + type: "http" | "tcp" | "command" | "file" | "none" + + http?: HttpHealthCheck + tcp?: TcpHealthCheck + command?: CommandHealthCheck + file?: FileHealthCheck + + interval: int = 10 + retries: int = 3 + timeout: int = 5 + + check: + (type == "http" and http != Undefined) or \ + (type == "tcp" and tcp != Undefined) or \ + (type == "command" and command != Undefined) or \ + (type == "file" and file != Undefined) or \ + (type == "none"), \ + "Health check configuration must match health check type" + interval > 0, "Interval must be positive" + retries > 0, "Retries must be positive" + timeout > 0, "Timeout must be positive" + +schema HttpHealthCheck: + """HTTP health check""" + + endpoint: str + expected_status: int = 200 + method: "GET" | "POST" | "HEAD" = "GET" + headers: {str: str} = {} + + check: + len(endpoint) > 0, "Endpoint cannot be empty" + expected_status >= 100 and expected_status < 600, \ + "HTTP status must be valid (100-599)" + +schema TcpHealthCheck: + """TCP health check""" + + host: str + port: int + + check: + len(host) > 0, "Host cannot be empty" + port > 0 and port <= 65535, "Port must be 1-65535" + +schema CommandHealthCheck: + """Command-based health check""" + + command: str + expected_exit_code: int = 0 + + check: + len(command) > 0, "Command cannot be empty" + +schema FileHealthCheck: + """File-based health check""" + + path: str + must_exist: bool = True + + check: + len(path) > 0, "Path cannot be empty" + +schema StartupConfig: + """Service startup configuration""" + + auto_start: bool = False + start_timeout: int = 60 + start_order: int = 100 + restart_on_failure: bool = True + max_restarts: int = 3 + + check: + start_timeout > 0, "Start timeout must be positive" + start_order > 0, "Start order must be positive" + max_restarts >= 0, "Max restarts must be non-negative" + +schema ResourceLimits: + """Resource limits for service""" + + cpu_limit?: str # e.g., "2", "500m" + memory_limit?: str # e.g., "1Gi", "512Mi" + disk_limit?: str # e.g., "10Gi" + +schema ServiceState: + """Service runtime state""" + + name: str + status: "running" | "stopped" | "failed" | "starting" | "stopping" | "unknown" + pid?: int + started_at?: str + uptime?: int + health_status: "healthy" | "unhealthy" | "unknown" = "unknown" + last_health_check?: str + restart_count: int = 0 + +schema ServiceOperation: + """Service operation request""" + + service_name: str + operation: "start" | "stop" | "restart" | "reload" | "health-check" + force: bool = False + timeout?: int + + check: + len(service_name) > 0, "Service name cannot be empty" diff --git a/settings.k b/settings.k new file mode 100644 index 0000000..e0587d3 --- /dev/null +++ b/settings.k @@ -0,0 +1,151 @@ +# Info: KCL core lib settings schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema SecretProvider: + """ + Secret provider configuration for SOPS or KMS + """ + # Secret provider type: sops or kms + provider: "sops" | "kms" = "sops" + # Configuration specific to SOPS + sops_config?: SopsConfig + # Configuration specific to KMS + kms_config?: KmsConfig + +schema SopsConfig: + """ + SOPS configuration settings + """ + # Path to SOPS configuration file + config_path?: str + # Path to Age key file for encryption + age_key_file?: str + # Age recipients for encryption + age_recipients?: str + # Use Age encryption (default) or other methods + use_age: bool = True + +schema KmsConfig: + """ + KMS configuration settings for Cosmian KMS + """ + # KMS server URL + server_url: str + # Authentication method: certificate, token, or basic + auth_method: "certificate" | "token" | "basic" = "certificate" + # Client certificate path (for certificate auth) + client_cert_path?: str + # Client private key path (for certificate auth) + client_key_path?: str + # CA certificate path for server verification + ca_cert_path?: str + # API token (for token auth) + api_token?: str + # Username (for basic auth) + username?: str + # Password (for basic auth) + password?: str + # Timeout for requests in seconds + timeout: int = 30 + # Verify SSL certificates + verify_ssl: bool = True + +schema AIProvider: + """ + AI provider configuration for natural language processing + """ + # Enable AI capabilities + enabled: bool = False + # AI provider type: openai, claude, or generic + provider: "openai" | "claude" | "generic" = "openai" + # API endpoint URL (for generic provider or custom endpoints) + api_endpoint?: str + # API key for authentication + api_key?: str + # Model name to use + model?: str + # Maximum tokens for responses + max_tokens: int = 2048 + # Temperature for response creativity (0.0-1.0) + temperature: float = 0.3 + # Timeout for API requests in seconds + timeout: int = 30 + # Enable AI for template generation + enable_template_ai: bool = True + # Enable AI for queries + enable_query_ai: bool = True + # Enable AI for webhooks/chat interfaces + enable_webhook_ai: bool = False + +schema RunSet: + # Wait until requested taskserv is completed: true or false + wait: bool = True + + # Format for output: human (defaul) | yaml | json + # Server info can be requested via aws cli adding option: --out yam l| json |text | table + output_format: "human" | "yaml" | "json" = "human" + # Output path to copy results + output_path: str = "tmp/NOW-deploy" + # Inventory file + inventory_file: str = "./inventory.yaml" + # Use 'time' to get time info for commands if is not empty + use_time: bool = True + +schema Settings: + """ + Settings + """ + # Main name for provisonning + main_name: str + main_title: str = main_name + + # #provider: "local" | "upcloud" | "aws" + # # Settings from servers has priority over these defaults ones, if a value is not set in server item, defaults one will be used instead + # #defaults_path: str = "../defaults.yaml" + # Settings Data is AUTO Generated, Checked and AUTO Filled during operations taskservs + # Path for Automatic generasetings for VPC, Subnets, SG, etc. + settings_path: str = "./settings.yaml" + # Directory path for providers default-settings + defaults_provs_dirpath: str = "./defs" + # Suffix for providers default-settings filenames with extension (example: aws_defaults.k) + defaults_provs_suffix: str = "_defaults.k" + # Provision data directory path to save providers specific settings (uuids, vpc, etc) + prov_data_dirpath: str = "./data" + # Suffix for providers data-settings filenames with extension (example: aws_settings.k) + prov_data_suffix: str = "_settings.k" + # Directory path to collect created infos, taskservs + created_taskservs_dirpath: str = "./tmp/NOW_deployment" + # Directory path to collect resources for provisioning created infos, taskservs + prov_resources_path: str = "./resources" + # Directory path to collect created clusters + created_clusters_dirpath: str = "./tmp/NOW_clusters" + # Directory path to collect clusters for provisioning + prov_clusters_path: str = "./clusters" + # Directory path for local bin on provisioning + prov_local_bin_path: str = "./bin" + # Secret management configuration + secrets: SecretProvider = SecretProvider {} + # AI provider configuration + ai: AIProvider = AIProvider {} + runset: RunSet + + # Default values can be overwrite by cluster setting + # Cluster clusters admin hosts to connect via SSH + cluster_admin_host: str + # Cluster clusters admin hosts port to connect via SSH + cluster_admin_port: int = 22 + # Time to wait in seconds for servers for started state and ssh + servers_wait_started: int = 27 + # Cluster clusters admin user connect via SSH + cluster_admin_user: str = "root" + # Services Save path or use main settings + clusters_save_path: str = "/${main_name}/clusters" + # Servers path + servers_paths: [str] = ["servers"] + # Common clusters definitions, mainly Cluster ones + clusters_paths: [str] = ["clusters"] + +#clusters: [str] = [ "clusters" ] +#check: +# len(provider) > 0, "Check provider value" diff --git a/version.k b/version.k new file mode 100644 index 0000000..1328c10 --- /dev/null +++ b/version.k @@ -0,0 +1,107 @@ +""" +KCL Version Management Schema for Provisioning System +Provides type-safe version definitions with GitHub release integration +""" + +schema Version: + """Version information for a component with optional GitHub integration""" + # Version number (e.g., "1.31.0") or "latest" + current: str + # GitHub releases URL for automated checking + source?: str + # GitHub tags URL (alternative source) + tags?: str + # Official project website + site?: str + # Enable automatic latest version checking + check_latest?: bool = False + # Cache duration in seconds (24h default) + grace_period?: int = 86400 + + check: + len(current) > 0, "Version current field cannot be empty" + current == "latest" or current == "" or len(current.split(".")) >= 1, "Version must be semantic (x.y.z), 'latest', or empty" + +schema TaskservVersion: + """Complete taskserv version configuration with dependency tracking""" + # Taskserv name (must match directory) + name: str + # Primary version configuration + version: Version + # Other taskservs this component depends on + dependencies?: [str] + # Profile-specific version overrides + profiles?: {str:Version} + + check: + len(name) > 0, "Taskserv name cannot be empty" + name == name.lower(), "Taskserv name must be lowercase" + +schema VersionCache: + """Cache structure for latest version lookups""" + # Resolved version string + version: str + # ISO timestamp of last fetch + fetched_at: str + # Source URL used for resolution + source: str + # Time-to-live in seconds + ttl: int = 86400 + + check: + len(version) > 0, "Cached version cannot be empty" + len(source) > 0, "Cache source URL cannot be empty" + +# Package metadata for core provisioning KCL module +schema PackageMetadata: + """Core package metadata for distribution""" + # Package name + name: str + # Package version + version: str + # API compatibility version + api_version: str + # Build timestamp + build_date: str + # Minimum KCL version required + kcl_min_version: str + # Maximum KCL version supported + kcl_max_version: str + # External dependencies + dependencies: {str:str} + # Feature flags + features: {str:bool} + # Available schema exports + schema_exports: [str] + + check: + len(name) > 0, "Package name cannot be empty" + len(version) > 0, "Package version cannot be empty" + +# Default package metadata +package_metadata: PackageMetadata = { + name = "provisioning" + version = "0.1.0" + api_version = "v1" + build_date = "2025-09-28" + kcl_min_version = "0.11.0" + kcl_max_version = "0.12.0" + dependencies = {} + features = { + server_management = True + cluster_orchestration = True + provider_abstraction = True + workflow_automation = True + batch_operations = True + } + schema_exports = [ + "Settings" + "Server" + "Cluster" + "Provider" + "Workflow" + "BatchWorkflow" + "Version" + "PackageMetadata" + ] +} diff --git a/workflows.k b/workflows.k new file mode 100644 index 0000000..a3e7ffc --- /dev/null +++ b/workflows.k @@ -0,0 +1,287 @@ +# Info: KCL batch workflow schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Core batch workflow schemas following PAP principles +import .settings + +schema DependencyDef: + """ + Dependency definition between batch operations + Supports both sequential and conditional dependencies + """ + # Target operation ID that this dependency points to + target_operation_id: str + # Dependency type: 'sequential' waits for completion, 'conditional' waits for specific conditions + dependency_type: "sequential" | "conditional" = "sequential" + # For conditional dependencies, specify required conditions + conditions?: [str] = [] + # Timeout in seconds to wait for dependency resolution + timeout: int = 300 + # Whether failure of dependency should fail this operation + fail_on_dependency_error: bool = True + + check: + len(target_operation_id) > 0, "Target operation ID cannot be empty" + timeout > 0, "Timeout must be positive" + +schema RetryPolicy: + """ + Retry policy configuration for batch operations + Supports exponential backoff and custom retry conditions + """ + # Maximum number of retry attempts (0 = no retries) + max_attempts: int = 3 + # Initial delay between retries in seconds + initial_delay: int = 5 + # Maximum delay between retries in seconds + max_delay: int = 300 + # Backoff multiplier (1.0 = linear, >1.0 = exponential) + backoff_multiplier: float = 2 + # Specific error codes/conditions that should trigger retries + retry_on_errors?: [str] = ["connection_error", "timeout", "rate_limit"] + # Whether to retry on any error (if retry_on_errors is empty) + retry_on_any_error: bool = False + + check: + max_attempts >= 0, "Max attempts cannot be negative" + initial_delay > 0, "Initial delay must be positive" + max_delay >= initial_delay, "Max delay must be >= initial delay" + backoff_multiplier >= 1, "Backoff multiplier must be >= 1.0" + +schema RollbackStrategy: + """ + Rollback strategy configuration for failed batch operations + Supports different rollback approaches based on operation type + """ + # Whether rollback is enabled + enabled: bool = True + # Rollback strategy: 'none', 'immediate', 'batch_end', 'manual' + strategy: "none" | "immediate" | "batch_end" | "manual" = "immediate" + # Whether to preserve partial state for manual recovery + preserve_partial_state: bool = False + # Custom rollback commands/operations + custom_rollback_operations?: [str] = [] + # Timeout for rollback operations + rollback_timeout: int = 600 + + check: + rollback_timeout > 0, "Rollback timeout must be positive" + +schema MonitoringConfig: + """ + Monitoring and observability configuration for batch workflows + Integrates with various monitoring backends + """ + # Whether monitoring is enabled + enabled: bool = True + # Monitoring backend: 'prometheus', 'grafana', 'datadog', 'custom' + backend: "prometheus" | "grafana" | "datadog" | "custom" = "prometheus" + # Metrics endpoint URL (for custom backends) + endpoint?: str + # Metric collection interval in seconds + collection_interval: int = 30 + # Whether to enable detailed operation tracing + enable_tracing: bool = True + # Log level for batch operations + log_level: "debug" | "info" | "warn" | "error" = "info" + # Whether to send notifications on workflow completion/failure + enable_notifications: bool = False + # Notification channels (webhooks, slack, email, etc.) + notification_channels?: [str] = [] + + check: + collection_interval > 0, "Collection interval must be positive" + +schema StorageConfig: + """ + Storage backend configuration for batch workflow state and results + Supports multiple storage backends including SurrealDB and filesystem + """ + # Storage backend type + backend: "surrealdb" | "filesystem" | "redis" | "postgresql" = "filesystem" + # Connection configuration for database backends + connection_config?: {str:str} = {} + # Base path for filesystem backend + base_path: str = "./batch_workflows" + # Whether to enable state persistence + enable_persistence: bool = True + + # State retention period in hours (0 = keep forever) + # 1 week default + retention_hours: int = 168 + # Whether to compress stored data + enable_compression: bool = False + # Encryption settings for stored data + encryption?: settings.SecretProvider + + check: + len(base_path) > 0, "Base path cannot be empty" + retention_hours >= 0, "Retention hours cannot be negative" + +schema BatchOperation: + """ + Individual operation definition within a batch workflow + Supports various operation types with provider-agnostic configuration + """ + # Unique operation identifier within workflow + operation_id: str + # Human-readable operation name/description + name: str + # Operation type: server, taskserv, cluster, custom + operation_type: "server" | "taskserv" | "cluster" | "custom" = "server" + # Target provider (upcloud, aws, mixed, etc.) + provider?: str + # Specific action: create, delete, update, scale, etc. + action: "create" | "delete" | "update" | "scale" | "configure" = "create" + + # Operation-specific parameters (flexible configuration) + parameters: {str:str} = {} + + # Dependencies on other operations + dependencies?: [DependencyDef] = [] + # Retry policy for this operation + retry_policy: RetryPolicy = RetryPolicy {} + # Rollback strategy for this operation + rollback_strategy: RollbackStrategy = RollbackStrategy {} + + # Operation execution settings + # 30 minutes default + timeout: int = 1800 + # Whether operation can run in parallel with others + allow_parallel: bool = True + # Priority for operation scheduling (higher = earlier execution) + priority: int = 0 + + # Validation rules for operation parameters + validation_rules?: [str] = [] + # Expected outcomes/conditions for success + success_conditions?: [str] = [] + + check: + len(operation_id) > 0, "Operation ID cannot be empty" + len(name) > 0, "Operation name cannot be empty" + timeout > 0, "Timeout must be positive" + +schema BatchWorkflow: + """ + Main batch workflow definition supporting mixed provider operations + Follows PAP principles with configuration-driven architecture + """ + # Unique workflow identifier + workflow_id: str + # Human-readable workflow name + name: str + # Workflow description + description?: str = "" + # Workflow metadata + version: str = "1.0.0" + created_at?: str + modified_at?: str + # List of operations in this workflow + operations: [BatchOperation] + + # Global workflow settings + # Maximum parallel operations (0 = unlimited) + max_parallel_operations: int = 5 + + # Global timeout for entire workflow in seconds + # 2 hours default + global_timeout: int = 7200 + # Whether to stop workflow on first failure + fail_fast: bool = False + + # Storage backend configuration + storage: StorageConfig = StorageConfig {} + # Monitoring configuration + monitoring: MonitoringConfig = MonitoringConfig {} + + # Global retry policy (can be overridden per operation) + default_retry_policy: RetryPolicy = RetryPolicy {} + # Global rollback strategy + default_rollback_strategy: RollbackStrategy = RollbackStrategy {} + + # Workflow execution context + execution_context: {str:str} = {} + + # Pre and post workflow hooks + pre_workflow_hooks?: [str] = [] + post_workflow_hooks?: [str] = [] + # Notification settings + notifications?: MonitoringConfig + + check: + len(workflow_id) > 0, "Workflow ID cannot be empty" + len(name) > 0, "Workflow name cannot be empty" + len(operations) > 0, "Workflow must contain at least one operation" + max_parallel_operations >= 0, "Max parallel operations cannot be negative" + global_timeout > 0, "Global timeout must be positive" + # Validate operation IDs are unique (simplified check) + len(operations) >= 1, "Operations list must not be empty" + +schema WorkflowExecution: + """ + Runtime execution state for batch workflows + Tracks progress, results, and state changes + """ + # Reference to the workflow being executed + workflow_id: str + # Unique execution instance identifier + execution_id: str + # Current execution status + status: "pending" | "running" | "paused" | "completed" | "failed" | "cancelled" = "pending" + + # Execution timing + started_at?: str + completed_at?: str + # seconds + duration?: int + # Operation execution states + operation_states: {str:{str:str}} = {} + # Execution results and outputs + results: {str:str} = {} + # Error information + errors: [str] = [] + + # Resource usage tracking + resource_usage?: {str:str} = {} + # Rollback history + rollback_history: [str] = [] + + check: + len(workflow_id) > 0, "Workflow ID cannot be empty" + len(execution_id) > 0, "Execution ID cannot be empty" + +schema WorkflowTemplate: + """ + Reusable workflow templates for common batch operations + Supports parameterization and customization + """ + # Template identifier + template_id: str + # Template name and description + name: str + description?: str = "" + # Template category + category: "infrastructure" | "deployment" | "maintenance" | "testing" | "custom" = "infrastructure" + + # Base workflow definition + workflow_template: BatchWorkflow + + # Template parameters that can be customized + parameters: {str:str} = {} + # Required parameters that must be provided + required_parameters: [str] = [] + + # Template versioning + version: str = "1.0.0" + # Compatibility information + min_provisioning_version?: str + # Usage examples and documentation + examples?: [str] = [] + documentation_url?: str + + check: + len(template_id) > 0, "Template ID cannot be empty" + len(name) > 0, "Template name cannot be empty" +