From f02b12cef31e245e9a1134948ec7e4fe91f8e8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jesu=CC=81s=20Pe=CC=81rez?= Date: Tue, 7 Oct 2025 11:17:54 +0100 Subject: [PATCH] init repo and codebase --- .gitignore | 110 +++ README.md | 412 ++++++++++ REFERENCE.md | 56 ++ batch.k | 287 +++++++ cluster.k | 33 + coredns.k | 484 +++++++++++ defaults.k | 75 ++ dependencies.k | 281 +++++++ docs/BEST_PRACTICES.md | 1200 ++++++++++++++++++++++++++++ docs/VALIDATION.md | 800 +++++++++++++++++++ docs/provisioning.md | 589 ++++++++++++++ docs/why_main.md | 40 + examples/README.md | 278 +++++++ examples/basic_server.k | 80 ++ examples/kubernetes_deployment.k | 325 ++++++++ examples/mixed_provider_workflow.k | 452 +++++++++++ examples/simple_workflow.k | 156 ++++ examples_batch.k | 457 +++++++++++ gitea.k | 325 ++++++++ k8s_deploy.k | 259 ++++++ kcl.mod | 5 + kcl.mod.lock | 0 lib.k | 70 ++ main.k | 56 ++ modes.k | 830 +++++++++++++++++++ oci_registry.k | 487 +++++++++++ server.k | 33 + services.k | 254 ++++++ settings.k | 151 ++++ version.k | 107 +++ workflows.k | 287 +++++++ 31 files changed, 8979 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 REFERENCE.md create mode 100644 batch.k create mode 100644 cluster.k create mode 100644 coredns.k create mode 100644 defaults.k create mode 100644 dependencies.k create mode 100644 docs/BEST_PRACTICES.md create mode 100644 docs/VALIDATION.md create mode 100644 docs/provisioning.md create mode 100644 docs/why_main.md create mode 100644 examples/README.md create mode 100644 examples/basic_server.k create mode 100644 examples/kubernetes_deployment.k create mode 100644 examples/mixed_provider_workflow.k create mode 100644 examples/simple_workflow.k create mode 100644 examples_batch.k create mode 100644 gitea.k create mode 100644 k8s_deploy.k create mode 100644 kcl.mod create mode 100644 kcl.mod.lock create mode 100644 lib.k create mode 100644 main.k create mode 100644 modes.k create mode 100644 oci_registry.k create mode 100644 server.k create mode 100644 services.k create mode 100644 settings.k create mode 100644 version.k create mode 100644 workflows.k diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..366db47 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +.p +.claude +.vscode +.shellcheckrc +.coder +.migration +.zed +ai_demo.nu +CLAUDE.md +.cache +.coder +wrks +ROOT +OLD +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ +# Encryption keys and related files (CRITICAL - NEVER COMMIT) +.k.backup +*.key.backup + +config.*.toml +config.*back + +# where book is written +_book + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +node_modules/ + +**/output.css +**/input.css + +# Environment files +.env +.env.local +.env.production +.env.development +.env.staging + +# Keep example files +!.env.example + +# Configuration files (may contain sensitive data) +config.prod.toml +config.production.toml +config.local.toml +config.*.local.toml + +# Keep example configuration files +!config.toml +!config.dev.toml +!config.example.toml + +# Log files +logs/ +*.log + +# TLS certificates and keys +certs/ +*.pem +*.crt +*.key +*.p12 +*.pfx + +# Database files +*.db +*.sqlite +*.sqlite3 + +# Backup files +*.bak +*.backup +*.tmp +*~ + +# Encryption and security related files +*.encrypted +*.enc +secrets/ +private/ +security/ + +# Configuration backups that may contain secrets +config.*.backup +config.backup.* + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db +# Documentation build output +book-output/ +# Generated setup report +SETUP_COMPLETE.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..47a6732 --- /dev/null +++ b/README.md @@ -0,0 +1,412 @@ +

+ Provisioning Logo +

+

+ Provisioning +

+ + + +# Provisioning KCL Package + +A comprehensive KCL (KusionStack Configuration Language) package providing type-safe schemas for [Provisioning project](https://repo.jesusperez.pro/jesus/provisioning) batch workflows, and Kubernetes deployments. + +## Overview + +This package contains production-ready KCL schemas with configuration-driven, provider-agnostic infrastructure automation capabilities. + +### Package Structure + +``` +provisioning/kcl/ +├── main.k # Main entry point - import this +├── settings.k # Core system settings +├── lib.k # Common schemas and utilities +├── server.k # Server configuration schemas +├── cluster.k # Cluster management schemas +├── workflows.k # Batch workflow schemas +├── batch.k # Advanced batch operation utilities +├── dependencies.k # Taskserv dependency management +├── version.k # Version management schemas +├── k8s_deploy.k # Kubernetes deployment schemas +├── defaults.k # Default configurations +├── examples_batch.k # Comprehensive examples +└── docs/ # Documentation +``` + +## Quick Start + +### Import the Package + +```kcl +# Import the main entry point for access to all schemas +import provisioning.main + +# Or import from a relative path if working within the same project +import .main +``` + +### Basic Server Configuration + +```kcl +import .main + +# Define a simple server +web_server: main.Server = main.Server { + hostname: "web-01" + title: "Production Web Server" + labels: "env: prod, tier: web" + user: "admin" + + # Optional: Add taskservs to install + taskservs: [ + main.TaskServDef { + name: "nginx" + install_mode: "library" + profile: "production" + } + ] +} +``` + +### Batch Workflow Example + +```kcl +import .main + +# Define a multi-provider infrastructure deployment +deployment_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "prod_deploy_001" + name: "Production Infrastructure Deployment" + description: "Deploy web tier across UpCloud and AWS" + + operations: [ + # Create UpCloud servers + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "3" + "server_type": "web" + "zone": "fi-hel2" + "plan": "2xCPU-4GB" + } + priority: 10 + } + + # Install Kubernetes after servers are ready + main.BatchOperation { + operation_id: "install_k8s" + name: "Install Kubernetes Cluster" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "kubernetes" + "version": "v1.31.0" + "cluster_name": "prod-cluster" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 8 + } + ] + + # Global workflow settings + max_parallel_operations: 3 + fail_fast: False + + # Use SurrealDB for state persistence + storage: main.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://localhost:8000" + "namespace": "provisioning" + "database": "workflows" + } + enable_persistence: True + retention_hours: 720 # 30 days + } +} +``` + +### Kubernetes Deployment + +```kcl +import .main + +# Define a complete Kubernetes deployment +nginx_deployment: main.K8sDeploy = main.K8sDeploy { + name: "nginx-web" + namespace: "production" + create_ns: True + + spec: main.K8sDeploySpec { + replicas: 3 + containers: [ + main.K8sContainers { + name: "nginx" + image: "nginx:1.21" + ports: [ + main.K8sPort { + name: "http" + container: 80 + target: 8080 + } + ] + resources_requests: main.K8sResources { + memory: "128Mi" + cpu: "100m" + } + resources_limits: main.K8sResources { + memory: "256Mi" + cpu: "200m" + } + } + ] + } + + # Expose via service + service: main.K8sService { + name: "nginx-service" + typ: "LoadBalancer" + ports: [ + main.K8sPort { + name: "http" + target: 80 + nodePort: 30080 + } + ] + } +} +``` + +## Core Schemas + +### Server Management +- **`Server`**: Complete server configuration with defaults inheritance +- **`ServerDefaults`**: Default settings for server provisioning +- **`Storage`**, **`StorageVol`**: Storage configuration and partitioning + +### Workflow & Batch Operations +- **`BatchWorkflow`**: Multi-operation workflow with dependencies +- **`BatchOperation`**: Individual operation within workflows +- **`DependencyDef`**: Define sequential or conditional dependencies +- **`RetryPolicy`**: Configure retry behavior and backoff +- **`RollbackStrategy`**: Automatic rollback on failures + +### Taskserv Management +- **`TaskServDef`**: Infrastructure service definitions +- **`TaskservDependencies`**: Dependency management for taskservs +- **`HealthCheck`**: Health monitoring configuration + +### Kubernetes Deployments +- **`K8sDeploy`**: Complete Kubernetes deployment specification +- **`K8sService`**: Service definitions with load balancing +- **`K8sVolume`**: Persistent storage configuration +- **`K8sResources`**: Resource limits and requests + +### Configuration & Settings +- **`Settings`**: System-wide configuration +- **`SecretProvider`**: SOPS/KMS secret management +- **`AIProvider`**: AI integration configuration + +## Advanced Features + +### Mixed Provider Support + +Deploy across multiple cloud providers in a single workflow: + +```kcl +mixed_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "multi_cloud_001" + name: "Multi-Cloud Deployment" + + operations: [ + # UpCloud servers for web tier + main.BatchOperation { + operation_id: "upcloud_web" + provider: "upcloud" + parameters: {"zone": "fi-hel2", "count": "3"} + } + # AWS RDS for database + main.BatchOperation { + operation_id: "aws_database" + provider: "aws" + parameters: {"region": "eu-west-1", "engine": "postgresql"} + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_web" + dependency_type: "sequential" + } + ] + } + ] +} +``` + +### Resource Constraints & Autoscaling + +Configure intelligent resource management: + +```kcl +batch_executor: main.BatchExecutor = main.BatchExecutor { + executor_id: "production_executor" + name: "Production Batch Executor" + + # Resource limits + resource_constraints: [ + main.ResourceConstraint { + resource_type: "cpu" + resource_name: "total_cores" + max_units: 16 + units_per_operation: 2 + hard_constraint: True + } + ] + + # Auto-scaling configuration + autoscaling: main.BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 10 + scale_up_threshold: 0.8 + target_utilization: 0.65 + } +} +``` + +### Monitoring & Observability + +```kcl +monitoring_config: main.MonitoringConfig = main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: [ + "webhook:slack://ops-alerts", + "webhook:pagerduty://incidents" + ] + log_level: "info" +} +``` + +## Validation & Testing + +### Schema Validation + +```bash +# Validate individual files +kcl run server_config.k + +# Validate entire workflow +kcl run workflow_definition.k + +# Output as JSON for integration +kcl run workflow_definition.k --format json +``` + +### Built-in Constraints + +All schemas include comprehensive validation: + +```kcl +# Server hostnames must be non-empty +server: main.Server = main.Server { + hostname: "web-01" # ✅ Valid + # hostname: "" # ❌ Validation error +} + +# Resource constraints are enforced +resources: main.K8sResources = main.K8sResources { + memory: "128Mi" # ✅ Valid K8s format + # memory: "invalid" # ❌ Validation error +} + +# Dependency cycles are prevented +operation: main.BatchOperation = main.BatchOperation { + operation_id: "op1" + dependencies: [ + main.DependencyDef { + target_operation_id: "op2" # ✅ Valid dependency + # target_operation_id: "op1" # ❌ Self-reference prevented + } + ] +} +``` + +## Integration Examples + +### With Nushell Scripts + +```nushell +# Generate workflow from KCL +let workflow = (kcl run deployment.k --format json | from json) + +# Submit to batch executor +$workflow | to json | http post http://localhost:8080/workflows/batch/submit + +# Monitor progress +while true { + let status = (http get $"http://localhost:8080/workflows/batch/($workflow.workflow_id)") + if $status.status == "completed" { break } + sleep 5sec +} +``` + +### With Rust Orchestrator + +```rust +// Deserialize KCL output into Rust structs +let workflow: BatchWorkflow = serde_json::from_str(&kcl_output)?; + +// Execute via orchestrator +let executor = BatchExecutor::new(workflow); +executor.execute().await?; +``` + +## Package Metadata + +- **Version**: 0.1.0 +- **API Version**: v1 +- **KCL Compatibility**: 0.11.0 - 0.12.0 +- **Build Date**: 2025-09-28 + +### Features +- ✅ Server Management +- ✅ Cluster Orchestration +- ✅ Provider Abstraction +- ✅ Workflow Automation +- ✅ Batch Operations + +## Best Practices + +1. **Always import via main.k** for stability +2. **Use descriptive operation_id values** for dependency tracking +3. **Set appropriate timeouts** based on operation complexity +4. **Enable monitoring** for production workflows +5. **Test workflows** with small counts before production +6. **Use retry policies** for transient failures +7. **Configure rollback strategies** for critical operations + +## Contributing + +When adding new schemas: +1. Follow existing naming conventions +2. Add comprehensive validation rules +3. Include documentation strings +4. Export from `main.k` +5. Add examples to `examples_batch.k` +6. Update this README + +## License + +This package is part of the Provisioning project and follows the same license terms. diff --git a/REFERENCE.md b/REFERENCE.md new file mode 100644 index 0000000..fb64e73 --- /dev/null +++ b/REFERENCE.md @@ -0,0 +1,56 @@ +# KCL Schemas Reference + +This directory contains references to existing KCL schema implementations. + +## Current Implementation Locations + +### Settings Schema +- **Current**: `/Users/Akasha/repo-cnz/src/provisioning/kcl/settings.k` +- **New Reference**: `settings.k` (placeholder created) + +### Provider Schemas +- **Current**: Various files in `/Users/Akasha/repo-cnz/src/provisioning/providers/*/kcl/` +- **New Reference**: `providers.k` (to be created) + +### Workflow Schemas +- **Current**: Distributed across workflow implementations +- **New Reference**: `workflows.k` (to be created) + +## Migration Strategy + +### Phase 1: Analysis +- Inventory all existing KCL schemas +- Identify common patterns and duplications +- Document schema relationships + +### Phase 2: Consolidation +- Create unified schema files +- Preserve backward compatibility +- Update import paths gradually + +### Phase 3: Optimization +- Improve type safety +- Add comprehensive validation +- Enhance documentation + +## Benefits of Consolidation + +1. **Single Source of Truth**: Unified schema definitions +2. **Type Safety**: Compile-time validation across entire system +3. **Consistency**: Standardized configuration patterns +4. **Maintainability**: Easier schema evolution and updates + +## Current Status + +- **Reference Files**: Created with placeholders +- **Original Schemas**: Fully functional in existing locations +- **Migration**: Planned for future phase + +## Integration + +Consolidated schemas will be used by: +- Core provisioning engine +- Code generators +- Configuration validators +- Documentation systems +- IDE integrations (syntax highlighting, autocompletion) \ No newline at end of file diff --git a/batch.k b/batch.k new file mode 100644 index 0000000..17dacba --- /dev/null +++ b/batch.k @@ -0,0 +1,287 @@ +# Info: KCL batch operation utilities for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Batch operation utilities and helper schemas following PAP principles +import .workflows + +schema BatchScheduler: + """ + Scheduler configuration for batch operations + Supports various scheduling strategies and resource management + """ + # Scheduling strategy: 'fifo', 'priority', 'dependency_first', 'resource_aware' + strategy: "fifo" | "priority" | "dependency_first" | "resource_aware" = "dependency_first" + # Resource constraints for scheduling + resource_limits: {str:int} = { + # 0 = no limit + "max_cpu_cores": 0 + # 0 = no limit + "max_memory_mb": 0 + # 0 = no limit + "max_network_bandwidth": 0 + } + # Scheduling interval in seconds + scheduling_interval: int = 10 + # Whether to enable preemptive scheduling + enable_preemption: bool = False + + check: + scheduling_interval > 0, "Scheduling interval must be positive" + +schema BatchQueue: + """ + Queue configuration for batch operations + Supports priority queues and resource-based queuing + """ + # Queue name/identifier + queue_id: str + # Queue type: 'standard', 'priority', 'delay', 'dead_letter' + queue_type: "standard" | "priority" | "delay" | "dead_letter" = "standard" + # Maximum queue size (0 = unlimited) + max_size: int = 0 + + # Message retention period in seconds + # 7 days default + retention_period: int = 604800 + # Dead letter queue configuration + dead_letter_queue?: str + # Maximum delivery attempts before moving to dead letter + max_delivery_attempts: int = 3 + + check: + len(queue_id) > 0, "Queue ID cannot be empty" + max_size >= 0, "Max size cannot be negative" + retention_period > 0, "Retention period must be positive" + max_delivery_attempts > 0, "Max delivery attempts must be positive" + +schema ResourceConstraint: + """ + Resource constraint definition for batch operations + Ensures operations don't exceed available resources + """ + # Resource type: cpu, memory, network, storage, custom + resource_type: "cpu" | "memory" | "network" | "storage" | "custom" + # Resource name/identifier + resource_name: str + # Maximum units available + max_units: int + # Current units in use + current_units: int = 0 + # Units per operation (for estimation) + units_per_operation: int = 1 + # Whether this constraint is hard (fails operation) or soft (warns only) + hard_constraint: bool = True + + check: + len(resource_name) > 0, "Resource name cannot be empty" + max_units > 0, "Max units must be positive" + current_units >= 0, "Current units cannot be negative" + units_per_operation > 0, "Units per operation must be positive" + current_units <= max_units, "Current units cannot exceed max units" + +schema BatchMetrics: + """ + Metrics collection configuration for batch operations + Tracks performance, success rates, and resource utilization + """ + # Whether to collect detailed metrics + detailed_metrics: bool = True + + # Metrics retention period in hours + # 1 week + retention_hours: int = 168 + + # Metrics aggregation intervals + # 1min, 5min, 1hour + aggregation_intervals: [int] = [60, 300, 3600] + # Custom metrics to collect + custom_metrics?: [str] = [] + # Whether to export metrics to external systems + enable_export: bool = False + # Export configuration + export_config?: {str:str} = {} + + check: + retention_hours > 0, "Retention hours must be positive" + len(aggregation_intervals) > 0, "Must have at least one aggregation interval" + +schema ProviderMixConfig: + """ + Configuration for mixed provider batch operations + Handles cross-provider dependencies and resource coordination + """ + # Primary provider for the batch workflow + primary_provider: str = "upcloud" + # Secondary providers available + secondary_providers: [str] = [] + # Provider selection strategy for new resources + provider_selection: "primary_first" | "load_balance" | "cost_optimize" | "latency_optimize" = "primary_first" + # Cross-provider networking configuration + cross_provider_networking?: {str:str} = {} + # Shared storage configuration across providers + shared_storage?: workflows.StorageConfig + # Provider-specific resource limits + provider_limits: {str:{str:int}} = {} + + check: + len(primary_provider) > 0, "Primary provider cannot be empty" + +schema BatchHealthCheck: + """ + Health check configuration for batch operations + Monitors operation health and triggers recovery actions + """ + # Whether health checks are enabled + enabled: bool = True + # Health check interval in seconds + check_interval: int = 60 + # Health check timeout in seconds + check_timeout: int = 30 + # Failure threshold before marking as unhealthy + failure_threshold: int = 3 + # Success threshold to mark as healthy again + success_threshold: int = 2 + # Health check endpoints/commands + health_checks: [str] = [] + # Actions to take on health check failure + failure_actions: [str] = ["retry", "rollback"] + + check: + check_interval > 0, "Check interval must be positive" + check_timeout > 0, "Check timeout must be positive" + failure_threshold > 0, "Failure threshold must be positive" + success_threshold > 0, "Success threshold must be positive" + +schema BatchAutoscaling: + """ + Autoscaling configuration for batch operations + Dynamically adjusts resources based on load and performance + """ + # Whether autoscaling is enabled + enabled: bool = False + # Minimum number of parallel operations + min_parallel: int = 1 + # Maximum number of parallel operations + max_parallel: int = 10 + + # Scaling triggers based on metrics + # CPU/resource utilization + scale_up_threshold: float = 0.8 + scale_down_threshold: float = 0.2 + # Scaling cooldown period in seconds + cooldown_period: int = 300 + # Scaling step size + scale_step: int = 1 + # Target resource utilization + target_utilization: float = 0.6 + + check: + min_parallel > 0, "Min parallel must be positive" + max_parallel >= min_parallel, "Max parallel must be >= min parallel" + scale_up_threshold > scale_down_threshold, "Scale up threshold must be > scale down threshold" + 0 < target_utilization and target_utilization < 1, "Target utilization must be between 0 and 1" + cooldown_period > 0, "Cooldown period must be positive" + +schema BatchExecutor: + """ + Batch executor configuration combining all batch operation aspects + Main configuration schema for batch workflow execution engine + """ + # Executor identifier + executor_id: str + # Executor name and description + name: str + description?: str = "" + # Core scheduling configuration + scheduler: BatchScheduler = BatchScheduler {} + # Queue management + queues: [BatchQueue] = [BatchQueue {queue_id: "default"}] + # Resource constraints + resource_constraints: [ResourceConstraint] = [] + + # Mixed provider configuration + provider_config: ProviderMixConfig = ProviderMixConfig {} + # Health monitoring + health_check: BatchHealthCheck = BatchHealthCheck {} + # Autoscaling settings + autoscaling: BatchAutoscaling = BatchAutoscaling {} + + # Metrics and monitoring + metrics: BatchMetrics = BatchMetrics {} + # Storage configuration for execution state + storage: workflows.StorageConfig = workflows.StorageConfig {} + + # Security and access control + security_config: {str:str} = {} + # Audit logging configuration + audit_logging: bool = True + audit_log_path: str = "./logs/batch_audit.log" + + # Integration settings + webhook_endpoints: [str] = [] + api_endpoints: [str] = [] + + # Performance tuning + performance_config: {str:str} = { + "io_threads": "4" + "worker_threads": "8" + "batch_size": "100" + } + + check: + len(executor_id) > 0, "Executor ID cannot be empty" + len(name) > 0, "Executor name cannot be empty" + len(queues) > 0, "Must have at least one queue configured" + +# Utility functions and constants for batch operations +BatchOperationTypes: [str] = [ + "server_create" + "server_delete" + "server_scale" + "server_update" + "taskserv_install" + "taskserv_remove" + "taskserv_update" + "taskserv_configure" + "cluster_create" + "cluster_delete" + "cluster_scale" + "cluster_upgrade" + "custom_command" + "custom_script" + "custom_api_call" +] + +BatchProviders: [str] = [ + "upcloud" + "aws" + "local" + "mixed" + "custom" +] + +DefaultBatchConfig: BatchExecutor = BatchExecutor { + executor_id: "default_batch_executor" + name: "Default Batch Executor" + description: "Default configuration-driven batch executor for provisioning operations" + scheduler: BatchScheduler { + strategy: "dependency_first" + resource_limits: { + "max_cpu_cores": 8 + "max_memory_mb": 16384 + "max_network_bandwidth": 1000 + } + } + provider_config: ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws", "local"] + provider_selection: "primary_first" + } + autoscaling: BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 8 + target_utilization: 0.7 + } +} diff --git a/cluster.k b/cluster.k new file mode 100644 index 0000000..7f49a10 --- /dev/null +++ b/cluster.k @@ -0,0 +1,33 @@ +# Info: KCL core lib cluster schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import .lib + +schema Cluster: + """ + cluster settings + """ + not_use: bool = False + name: str + version: str + # Template deployment path in $PROVISIONING/templates + template?: "k8s-deploy" | "" + # Schema definition values + def: "K8sDeploy" | "" = "" + # Services Save path or use main settings + clusters_save_path?: str + # Profile to use + profile?: str + # host to admin cluster + admin_host?: str + # Cluster clusters admin hosts port to connect via SSH + admin_port?: int + # Cluster clusters admin user connect via SSH + admin_user?: str + ssh_key_path?: str + # cluster local definition_path directory + local_def_path: str = "./clusters/${name}" + # Scale mode settings for lib-ScaleResource + scale?: lib.ScaleResource + diff --git a/coredns.k b/coredns.k new file mode 100644 index 0000000..f29b6d3 --- /dev/null +++ b/coredns.k @@ -0,0 +1,484 @@ +# Info: KCL CoreDNS configuration schemas for provisioning system +# Author: CoreDNS Integration Agent +# Release: 1.0.0 +# Date: 2025-10-06 +# Purpose: Define CoreDNS service configuration, zones, and DNS management + +import regex + +schema CoreDNSConfig: + """ + CoreDNS service configuration + + Defines how CoreDNS is deployed and managed within the provisioning system. + Supports local binary, Docker, remote, and hybrid deployment modes. + + Examples: + # Local mode with auto-start + CoreDNSConfig { + mode = "local" + local = LocalCoreDNS { + enabled = True + auto_start = True + zones = ["provisioning.local", "workspace.local"] + } + } + + # Remote mode + CoreDNSConfig { + mode = "remote" + remote = RemoteCoreDNS { + enabled = True + endpoints = ["https://dns1.example.com", "https://dns2.example.com"] + zones = ["production.local"] + } + } + """ + # Deployment mode: local, remote, hybrid, or disabled + mode: "local" | "remote" | "hybrid" | "disabled" = "local" + + # Local CoreDNS configuration + local?: LocalCoreDNS + + # Remote CoreDNS configuration + remote?: RemoteCoreDNS + + # Dynamic DNS update configuration + dynamic_updates: DynamicDNS = DynamicDNS {} + + # Upstream DNS servers for forwarding + upstream: [str] = ["8.8.8.8", "1.1.1.1"] + + # Global TTL for DNS records (seconds) + default_ttl: int = 300 + + # Enable DNS query logging + enable_logging: bool = True + + # Enable metrics endpoint + enable_metrics: bool = True + + # Metrics port + metrics_port: int = 9153 + + check: + len(upstream) > 0, "At least one upstream DNS server required" + default_ttl > 0 and default_ttl <= 86400, "TTL must be 1-86400 seconds" + metrics_port >= 1024 and metrics_port <= 65535, "Metrics port must be 1024-65535" + mode != "local" or local != Undefined, "Local config required when mode is 'local'" + mode != "remote" or remote != Undefined, "Remote config required when mode is 'remote'" + mode != "hybrid" or (local != Undefined and remote != Undefined), \ + "Both local and remote config required when mode is 'hybrid'" + +schema LocalCoreDNS: + """ + Local CoreDNS binary configuration + + Manages CoreDNS running as a local binary or Docker container. + """ + # Enable local CoreDNS + enabled: bool = True + + # Deployment type: binary or docker + deployment_type: "binary" | "docker" = "binary" + + # Path to CoreDNS binary + binary_path: str = "~/.provisioning/bin/coredns" + + # Path to Corefile + config_path: str = "~/.provisioning/coredns/Corefile" + + # Path to zone files directory + zones_path: str = "~/.provisioning/coredns/zones" + + # DNS listening port + port: int = 5353 + + # Auto-start CoreDNS on system startup + auto_start: bool = True + + # Auto-restart on failure + auto_restart: bool = True + + # Managed DNS zones + zones: [str] = ["provisioning.local", "workspace.local"] + + # PID file path (for binary mode) + pid_file?: str = "~/.provisioning/coredns/coredns.pid" + + # Log file path + log_file?: str = "~/.provisioning/coredns/coredns.log" + + # Docker configuration (for docker mode) + docker?: DockerCoreDNS + + check: + port >= 1024 and port <= 65535, "Port must be 1024-65535" + len(zones) > 0, "At least one zone required" + deployment_type != "docker" or docker != Undefined, \ + "Docker config required when deployment_type is 'docker'" + +schema DockerCoreDNS: + """ + Docker-based CoreDNS deployment configuration + """ + # Docker image + image: str = "coredns/coredns:1.11.1" + + # Container name + container_name: str = "provisioning-coredns" + + # Restart policy + restart_policy: "no" | "always" | "unless-stopped" | "on-failure" = "unless-stopped" + + # Network mode + network_mode: str = "bridge" + + # Publish DNS port + publish_port: bool = True + + # Volume mounts (host:container) + volumes: [str] = [] + + check: + len(image) > 0, "Docker image required" + len(container_name) > 0, "Container name required" + +schema RemoteCoreDNS: + """ + Remote CoreDNS service configuration + + Connect to external CoreDNS instances for DNS management. + """ + # Enable remote CoreDNS + enabled: bool = True + + # Remote CoreDNS API endpoints + endpoints: [str] + + # Managed zones on remote servers + zones: [str] + + # Authentication token file path + auth_token_path?: str + + # TLS verification + verify_tls: bool = True + + # Connection timeout (seconds) + timeout: int = 30 + + # Health check interval (seconds) + health_check_interval: int = 60 + + check: + len(endpoints) > 0, "At least one remote endpoint required" + len(zones) > 0, "At least one zone required" + timeout > 0 and timeout <= 300, "Timeout must be 1-300 seconds" + health_check_interval >= 10, "Health check interval must be >= 10 seconds" + +schema DynamicDNS: + """ + Dynamic DNS update configuration + + Enables automatic DNS updates when infrastructure changes. + """ + # Enable dynamic DNS updates + enabled: bool = True + + # Orchestrator DNS API endpoint + api_endpoint: str = "http://localhost:8080/dns" + + # Automatically register servers on creation + auto_register_servers: bool = True + + # Automatically unregister servers on deletion + auto_unregister_servers: bool = True + + # Default TTL for dynamic records (seconds) + ttl: int = 300 + + # Update strategy: immediate, batched, or scheduled + update_strategy: "immediate" | "batched" | "scheduled" = "immediate" + + # Batch interval (seconds, for batched strategy) + batch_interval?: int = 60 + + # Retry configuration + retry_policy: RetryPolicy = RetryPolicy {} + + check: + ttl > 0 and ttl <= 86400, "TTL must be 1-86400 seconds" + update_strategy != "batched" or batch_interval != Undefined, \ + "Batch interval required for batched strategy" + batch_interval == Undefined or batch_interval >= 10, \ + "Batch interval must be >= 10 seconds" + +schema RetryPolicy: + """ + Retry policy for DNS update failures + """ + # Maximum retry attempts + max_attempts: int = 3 + + # Initial delay before first retry (seconds) + initial_delay: int = 5 + + # Backoff multiplier for subsequent retries + backoff_multiplier: float = 2.0 + + # Maximum delay between retries (seconds) + max_delay: int = 60 + + check: + max_attempts > 0 and max_attempts <= 10, "Max attempts must be 1-10" + initial_delay > 0, "Initial delay must be positive" + backoff_multiplier >= 1.0, "Backoff multiplier must be >= 1.0" + max_delay >= initial_delay, "Max delay must be >= initial delay" + +schema DNSZone: + """ + DNS zone configuration + + Defines a DNS zone with SOA, NS, and other records. + + Examples: + DNSZone { + name = "provisioning.local" + admin_email = "admin.provisioning.local" + nameservers = ["ns1.provisioning.local"] + records = [ + DNSRecord { + name = "server-01" + type = "A" + value = "10.0.1.10" + } + ] + } + """ + # Zone name (must be FQDN with trailing dot in zone file) + name: str + + # Zone file path + file_path?: str + + # SOA record configuration + soa: SOARecord = SOARecord {} + + # Nameserver hostnames + nameservers: [str] + + # Admin email (dots replaced with @ in zone file) + admin_email: str = $"admin.{name}" + + # DNS records + records: [DNSRecord] = [] + + # Default TTL for zone (seconds) + ttl: int = 3600 + + check: + len(name) > 0, "Zone name required" + regex.match(name, r"^[a-z0-9]([a-z0-9-\.]{0,253}[a-z0-9])?$"), \ + "Zone name must be valid domain name" + len(nameservers) > 0, "At least one nameserver required" + ttl > 0, "TTL must be positive" + +schema SOARecord: + """ + SOA (Start of Authority) record + """ + # Serial number (auto-incremented on updates) + serial: int = 1 + + # Refresh interval (seconds) + refresh: int = 3600 + + # Retry interval (seconds) + retry: int = 1800 + + # Expire time (seconds) + expire: int = 604800 + + # Minimum TTL (seconds) + minimum: int = 86400 + + check: + serial > 0, "Serial must be positive" + refresh > 0, "Refresh must be positive" + retry > 0, "Retry must be positive" + expire > refresh, "Expire must be > refresh" + minimum > 0, "Minimum must be positive" + +schema DNSRecord: + """ + DNS resource record + + Supports A, AAAA, CNAME, MX, TXT, NS, SRV, PTR records. + + Examples: + # A record + DNSRecord { + name = "server-01" + type = "A" + value = "10.0.1.10" + } + + # CNAME record + DNSRecord { + name = "web" + type = "CNAME" + value = "server-01.provisioning.local" + } + + # MX record + DNSRecord { + name = "@" + type = "MX" + priority = 10 + value = "mail.provisioning.local" + } + """ + # Record name (hostname or @) + name: str + + # Record type + type: "A" | "AAAA" | "CNAME" | "MX" | "TXT" | "NS" | "SOA" | "SRV" | "PTR" + + # Record value (IP address, hostname, or text) + value: str + + # TTL in seconds (optional, uses zone default) + ttl?: int + + # Priority (for MX and SRV records) + priority?: int + + # Weight (for SRV records) + weight?: int + + # Port (for SRV records) + port?: int + + # Comment + comment?: str + + check: + len(name) > 0, "Record name required" + len(value) > 0, "Record value required" + + # A record validation + type != "A" or regex.match(value, \ + r"^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), \ + "A record value must be valid IPv4 address" + + # AAAA record validation + type != "AAAA" or regex.match(value, r"^([0-9a-fA-F]{0,4}:){7}[0-9a-fA-F]{0,4}$"), \ + "AAAA record value must be valid IPv6 address" + + # MX/SRV priority validation + type not in ["MX", "SRV"] or priority != Undefined, \ + "Priority required for MX and SRV records" + + # SRV weight and port validation + type != "SRV" or (weight != Undefined and port != Undefined), \ + "Weight and port required for SRV records" + + # TTL validation + ttl == Undefined or (ttl > 0 and ttl <= 86400), \ + "TTL must be 1-86400 seconds" + +schema CorefilePlugin: + """ + Corefile plugin configuration + + Defines a plugin block in Corefile. + """ + # Plugin name (file, forward, cache, etc.) + name: str + + # Plugin arguments + args: [str] = [] + + # Plugin options (key-value pairs) + options: {str: str} = {} + + check: + len(name) > 0, "Plugin name required" + +schema CorefileZoneBlock: + """ + Corefile zone block configuration + + Defines a zone block with plugins in Corefile. + """ + # Zone name (e.g., "provisioning.local:5353") + zone: str + + # Port number + port: int = 53 + + # Plugins in this zone + plugins: [CorefilePlugin] + + check: + len(zone) > 0, "Zone required" + port >= 1024 and port <= 65535, "Port must be 1024-65535" + len(plugins) > 0, "At least one plugin required" + +schema DNSQueryLog: + """ + DNS query logging configuration + """ + # Enable query logging + enabled: bool = True + + # Log file path + log_file: str = "~/.provisioning/coredns/queries.log" + + # Log format: text or json + log_format: "text" | "json" = "text" + + # Log level: debug, info, warn, error + log_level: "debug" | "info" | "warn" | "error" = "info" + + # Rotate log files + rotate_enabled: bool = True + + # Max log file size (MB) + max_size_mb: int = 100 + + # Max number of rotated files + max_backups: int = 5 + + check: + max_size_mb > 0 and max_size_mb <= 1024, "Max size must be 1-1024 MB" + max_backups >= 0 and max_backups <= 100, "Max backups must be 0-100" + +schema DNSHealthCheck: + """ + CoreDNS health check configuration + """ + # Enable health checks + enabled: bool = True + + # Health check endpoint + endpoint: str = "http://localhost:8080/health" + + # Health check interval (seconds) + interval: int = 30 + + # Timeout for health check (seconds) + timeout: int = 5 + + # Unhealthy threshold (consecutive failures) + unhealthy_threshold: int = 3 + + # Healthy threshold (consecutive successes) + healthy_threshold: int = 2 + + check: + interval > 0, "Interval must be positive" + timeout > 0 and timeout < interval, "Timeout must be < interval" + unhealthy_threshold > 0, "Unhealthy threshold must be positive" + healthy_threshold > 0, "Healthy threshold must be positive" diff --git a/defaults.k b/defaults.k new file mode 100644 index 0000000..3033dfe --- /dev/null +++ b/defaults.k @@ -0,0 +1,75 @@ +# Info: KCL core lib defaults schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import regex +import .lib + +schema ServerDefaults: + """ + Server Defaults settings + """ + lock: bool = False + # To use private network, IPs will be set in servers items + priv_cidr_block?: str + time_zone: str = "UTC" + + #zone?: str + # Second to wait before check in for running state + running_wait: int = 10 + # Total seconds to wait for running state before timeout + running_timeout: int = 200 + + # Specific AMIs can be used with their ID + # If 'storage_os: find' storage_os_find will be used to find one in zone (region) + # expected something like: "name=debian-12 | arch=x86_64" or "name: debian-12 | arch: x86_64" will be parsed to find latest available + storage_os_find: str = "name: debian-12 | arch: x86_64" + + #storage_os?: str + #storage_os: ami-0eb11ab33f229b26c + # If not Storage size, Plan Storage size will be used + # storages is defined in Provider defaults + #storages?: [Storage] + # Add one or more SSH keys to the admin account. Accepted values are SSH public keys or filenames from + # where to read the keys. + # ssh public key to be included in /root/.ssh/authorized_keys + ssh_key_path?: str + # Public certificate must be created or imported as a key_name + # use: providers/aws/bin/on-ssh.sh (add -h to get info) + ssh_key_name?: str + # Use it to rewrite or update ssh_key + # ssh_key_mode: rewrite + # AWS do not use utility network, if no value it will not be set and utility IP will not be set + # public network, if no value it will not be set and public IP will not be set + network_utility_ipv4: bool = True + network_utility_ipv6: bool = False + network_public_ipv4?: bool = True + network_public_ipv6?: bool = False + network_public_ip?: str + #TODO settings for Elastic IPs or instace without pubic IP + # To use private network a VPC + Subnet + NetworkInfterface has to be created, IPs will be set in servers items + # In AWS this is only a name + network_private_name?: str + network_private_id?: str + primary_dns?: str + secondary_dns?: str + main_domain?: str + domains_search?: str + # Labels to describe the server in `key: value` format, multiple can be declared. + # Usage: env: dev + labels: str + # Main user (default Debian user is admin) + user: str + user_ssh_key_path?: str + user_home?: str = "/home/${user}" + user_ssh_port?: int = 22 + # If is not empty it will add servers entries to /etc/hosts and $HOME/.ssh/config + fix_local_hosts: bool = True + installer_user?: str = "${user}" + scale?: lib.ScaleResource + + check: + user == Undefined or len(user) > 0, "Check user value" + #len(ssh_key_path) > 0, "Check ssh_key_path" + priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition" + diff --git a/dependencies.k b/dependencies.k new file mode 100644 index 0000000..7562a4e --- /dev/null +++ b/dependencies.k @@ -0,0 +1,281 @@ +""" +KCL Dependency Management Schema for Provisioning System +Provides type-safe dependency declarations with resource requirements and health checks +""" + +schema ResourceRequirement: + """Resource requirements for taskserv installation and operation""" + # CPU requirement (K8s format) + cpu?: str = "100m" + # Memory requirement (K8s format) + memory?: str = "128Mi" + # Disk space requirement + disk?: str = "1Gi" + # Requires network connectivity + network?: bool = True + # Requires privileged access + privileged?: bool = False + + check: + len(cpu) > 0, "CPU requirement cannot be empty" + len(memory) > 0, "Memory requirement cannot be empty" + len(disk) > 0, "Disk requirement cannot be empty" + +schema HealthCheck: + """Health check definition for taskserv validation""" + # Command to execute for health check + command: str + # Check interval in seconds + interval?: int = 30 + # Command timeout in seconds + timeout?: int = 10 + # Number of retry attempts + retries?: int = 3 + # Consecutive successes needed + success_threshold?: int = 1 + # Consecutive failures to mark unhealthy + failure_threshold?: int = 3 + + check: + len(command) > 0, "Health check command cannot be empty" + interval > 0, "Health check interval must be positive" + timeout > 0, "Health check timeout must be positive" + retries >= 0, "Health check retries cannot be negative" + +schema InstallationPhase: + """Installation phase definition for ordered deployment""" + # Phase name (e.g., "pre-install", "install", "post-install") + name: str + # Execution order within phase (lower first) + order: int + # Can run in parallel with same order + parallel?: bool = False + # Phase is required for successful installation + required?: bool = True + + check: + len(name) > 0, "Installation phase name cannot be empty" + order >= 0, "Installation phase order cannot be negative" + name in ["pre-install", "install", "post-install", "validate", "cleanup"], "Phase name must be one of: pre-install, install, post-install, validate, cleanup" + +schema TaskservDependencies: + """Complete dependency configuration for a taskserv""" + # Taskserv name (must match directory) + name: str + + # Dependency relationships + # Required taskservs (must be installed first) + requires?: [str] + # Conflicting taskservs (cannot coexist) + conflicts?: [str] + # Optional taskservs (install if available) + optional?: [str] + # Services this taskserv provides + provides?: [str] + # Resource requirements + # Resource requirements for installation + resources: ResourceRequirement + + # Health and validation + # Health check definitions + health_checks?: [HealthCheck] + # Readiness check for installation completion + readiness_probe?: HealthCheck + # Installation control + # Installation phase definitions + phases?: [InstallationPhase] + # Installation timeout in seconds + timeout?: int = 600 + # Number of installation retry attempts + retry_count?: int = 3 + # Compatibility + # Supported operating systems + os_support?: [str] = ["linux"] + # Supported CPU architectures + arch_support?: [str] = ["amd64"] + # Compatible Kubernetes versions + k8s_versions?: [str] + + check: + len(name) > 0, "Taskserv name cannot be empty" + name == name.lower(), "Taskserv name must be lowercase" + timeout > 0, "Installation timeout must be positive" + retry_count >= 0, "Retry count cannot be negative" + len(os_support) > 0, "Must specify at least one supported OS" + len(arch_support) > 0, "Must specify at least one supported architecture" + +# Re-export for taskserv use +schema TaskservDependency(TaskservDependencies): + """Alias for TaskservDependencies - provides the same functionality""" + +# OCI Registry Integration Schemas + +schema OCISource: + """OCI registry configuration for extension distribution""" + # OCI registry endpoint (localhost:5000, harbor.company.com) + registry: str + # Namespace in registry (provisioning-extensions, provisioning-platform) + namespace: str + # Path to authentication token file + auth_token_path?: str + # Enable TLS for registry connection + tls_enabled: bool = False + # Skip TLS certificate verification (insecure, dev only) + insecure_skip_verify: bool = False + # OCI platform architecture + platform: str = "linux/amd64" + # Media type for KCL packages + media_type: str = "application/vnd.kcl.package.v1+tar" + + check: + len(registry) > 0, "OCI registry endpoint required" + len(namespace) > 0, "OCI namespace required" + not (insecure_skip_verify and tls_enabled), \ + "insecure_skip_verify should only be used without TLS" + platform in ["linux/amd64", "linux/arm64", "darwin/amd64", "darwin/arm64"], \ + "Platform must be one of: linux/amd64, linux/arm64, darwin/amd64, darwin/arm64" + +schema GiteaSource: + """Gitea repository configuration for extension distribution""" + # Gitea server URL + url: str + # Organization/namespace containing repositories + organization: str + # Path to authentication token file + auth_token_path?: str + # Use SSH instead of HTTPS + use_ssh: bool = False + # Branch to use for extensions + branch: str = "main" + + check: + len(url) > 0, "Gitea URL required" + len(organization) > 0, "Gitea organization required" + url.startswith("http://") or url.startswith("https://"), \ + "Gitea URL must start with http:// or https://" + +schema LocalSource: + """Local filesystem configuration for extension distribution""" + # Absolute path to extensions directory + path: str + # Watch for changes and auto-reload + watch: bool = False + + check: + len(path) > 0, "Local source path required" + path.startswith("/") or path.startswith("~"), \ + "Local source path must be absolute" + +schema HTTPSource: + """Generic HTTP/HTTPS configuration for extension distribution""" + # HTTP/HTTPS URL + url: str + # Authentication header (e.g., "Bearer token123") + auth_header?: str + # Use HTTP basic auth + basic_auth?: bool = False + # Username for basic auth + username?: str + # Password for basic auth + password?: str + + check: + len(url) > 0, "HTTP URL required" + url.startswith("http://") or url.startswith("https://"), \ + "URL must start with http:// or https://" + not basic_auth or (username and password), \ + "Basic auth requires username and password" + +schema ExtensionSource: + """Extension source configuration with multi-backend support""" + # Source type + type: "oci" | "gitea" | "local" | "http" + + # OCI registry source configuration + oci?: OCISource + # Gitea source configuration + gitea?: GiteaSource + # Local filesystem source configuration + local?: LocalSource + # HTTP source configuration + http?: HTTPSource + + check: + (type == "oci" and oci != None) or \ + (type == "gitea" and gitea != None) or \ + (type == "local" and local != None) or \ + (type == "http" and http != None), \ + "Source configuration must match selected type" + +schema ExtensionManifest: + """Extension package manifest for OCI distribution""" + # Extension name (must match directory name) + name: str + # Extension type + type: "provider" | "taskserv" | "cluster" + # Semantic version + version: str + # Extension description + description?: str + # Extension author/maintainer + author?: str + # License identifier (SPDX) + license?: str = "MIT" + # Extension homepage URL + homepage?: str + # Repository URL + repository?: str + # Extension dependencies + dependencies?: {str: str} + # Extension tags/keywords + tags?: [str] + # Supported platforms + platforms?: [str] = ["linux/amd64"] + # Minimum provisioning core version + min_provisioning_version?: str + + check: + len(name) > 0, "Extension name required" + name == name.lower(), "Extension name must be lowercase" + len(version) > 0, "Extension version required" + # Semantic version format (basic check) + version.count(".") >= 2, "Version must be semantic (x.y.z)" + +schema RepositoryConfig: + """Multi-repository configuration for dependency management""" + # Repository name + name: str + # Repository type + type: "core" | "extensions" | "platform" | "workspace" + # Source configuration + source: ExtensionSource + # Repository version/tag + version?: str + # Enable repository + enabled: bool = True + # Repository priority (higher = more priority) + priority: int = 100 + # Cache TTL in seconds + cache_ttl: int = 3600 + + check: + len(name) > 0, "Repository name required" + priority >= 0, "Repository priority cannot be negative" + cache_ttl > 0, "Cache TTL must be positive" + +schema DependencyResolution: + """Dependency resolution configuration""" + # Resolution strategy + strategy: "strict" | "latest" | "minimal" + # Allow pre-release versions + allow_prerelease: bool = False + # Enable version pinning + pin_versions: bool = True + # Maximum dependency depth + max_depth: int = 10 + # Conflict resolution strategy + conflict_strategy: "error" | "latest" | "highest_priority" + + check: + max_depth > 0 and max_depth <= 100, \ + "Max depth must be between 1 and 100" diff --git a/docs/BEST_PRACTICES.md b/docs/BEST_PRACTICES.md new file mode 100644 index 0000000..75c940a --- /dev/null +++ b/docs/BEST_PRACTICES.md @@ -0,0 +1,1200 @@ +# KCL Best Practices for Provisioning + +This document outlines best practices for using and developing with the provisioning KCL package, covering schema design, workflow patterns, and operational guidelines. + +## Table of Contents + +- [Schema Design](#schema-design) +- [Workflow Patterns](#workflow-patterns) +- [Error Handling](#error-handling) +- [Performance Optimization](#performance-optimization) +- [Security Considerations](#security-considerations) +- [Testing Strategies](#testing-strategies) +- [Maintenance Guidelines](#maintenance-guidelines) + +## Schema Design + +### 1. Clear Naming Conventions + +```kcl +# ✅ Good: Descriptive, consistent naming +schema ProductionWebServer: + """Web server optimized for production workloads""" + hostname: str # Clear, specific field names + fully_qualified_domain_name?: str + environment_classification: "dev" | "staging" | "prod" + cost_allocation_center: str + operational_team_owner: str + +# ✅ Good: Consistent prefixes for related schemas +schema K8sDeploymentSpec: + """Kubernetes deployment specification""" + replica_count: int + container_definitions: [K8sContainerSpec] + volume_mount_configs: [K8sVolumeMountSpec] + +schema K8sContainerSpec: + """Kubernetes container specification""" + image_reference: str + resource_requirements: K8sResourceRequirements + +# ❌ Avoid: Ambiguous or inconsistent naming +schema Server: # ❌ Too generic + name: str # ❌ Ambiguous - hostname? display name? + env: str # ❌ Unclear - environment? variables? + cfg: {str: str} # ❌ Cryptic abbreviations +``` + +### 2. Comprehensive Documentation + +```kcl +# ✅ Good: Detailed documentation with examples +schema ServerConfiguration: + """ + Production server configuration following company standards. + + This schema defines servers for multi-tier applications with + proper security, monitoring, and operational requirements. + + Example: + web_server: ServerConfiguration = ServerConfiguration { + hostname: "prod-web-01" + server_role: "frontend" + environment: "production" + cost_center: "engineering" + } + """ + + # Core identification (required) + hostname: str # DNS-compliant hostname (RFC 1123) + server_role: "frontend" | "backend" | "database" | "cache" + + # Environment and operational metadata + environment: "development" | "staging" | "production" + cost_center: str # Billing allocation identifier + primary_contact_team: str # Team responsible for maintenance + + # Security and compliance + security_zone: "dmz" | "internal" | "restricted" + compliance_requirements: [str] # e.g., ["pci", "sox", "hipaa"] + + # Optional operational settings + backup_policy?: str # Backup schedule identifier + monitoring_profile?: str # Monitoring configuration profile + + check: + # Hostname validation (DNS RFC 1123) + regex.match(hostname, "^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"), + "Hostname must be DNS-compliant (RFC 1123): ${hostname}" + + # Environment-specific validations + environment == "production" and len(primary_contact_team) > 0, + "Production servers must specify primary contact team" + + # Security requirements + security_zone == "restricted" and "encryption" in compliance_requirements, + "Restricted zone servers must have encryption compliance" + +# ❌ Avoid: Minimal or missing documentation +schema Srv: # ❌ No documentation + h: str # ❌ No field documentation + t: str # ❌ Cryptic field names +``` + +### 3. Hierarchical Schema Design + +```kcl +# ✅ Good: Base schemas with specialized extensions +schema BaseInfrastructureResource: + """Foundation schema for all infrastructure resources""" + + # Universal metadata + resource_name: str + creation_timestamp?: str + last_modified_timestamp?: str + created_by_user?: str + + # Organizational metadata + cost_center: str + project_identifier: str + environment: "dev" | "staging" | "prod" + + # Operational metadata + tags: {str: str} = {} + monitoring_enabled: bool = True + + check: + len(resource_name) > 0 and len(resource_name) <= 63, + "Resource name must be 1-63 characters" + + regex.match(resource_name, "^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"), + "Resource name must be DNS-label compatible" + +schema ComputeResource(BaseInfrastructureResource): + """Compute resources with CPU/memory specifications""" + + # Hardware specifications + cpu_cores: int + memory_gigabytes: int + storage_gigabytes: int + + # Performance characteristics + cpu_architecture: "x86_64" | "arm64" + performance_tier: "burstable" | "standard" | "high_performance" + + check: + cpu_cores > 0 and cpu_cores <= 128, + "CPU cores must be between 1 and 128" + + memory_gigabytes > 0 and memory_gigabytes <= 1024, + "Memory must be between 1GB and 1TB" + +schema ManagedDatabaseResource(BaseInfrastructureResource): + """Managed database service configuration""" + + # Database specifications + database_engine: "postgresql" | "mysql" | "redis" | "mongodb" + engine_version: str + instance_class: str + + # High availability and backup + multi_availability_zone: bool = False + backup_retention_days: int = 7 + automated_backup_enabled: bool = True + + # Security + encryption_at_rest: bool = True + encryption_in_transit: bool = True + + check: + environment == "prod" and multi_availability_zone == True, + "Production databases must enable multi-AZ" + + environment == "prod" and backup_retention_days >= 30, + "Production databases need minimum 30 days backup retention" +``` + +### 4. Flexible Configuration Patterns + +```kcl +# ✅ Good: Environment-aware defaults +schema EnvironmentAdaptiveConfiguration: + """Configuration that adapts based on environment""" + + environment: "dev" | "staging" | "prod" + + # Computed defaults based on environment + default_timeout_seconds: int = ( + environment == "prod" ? 300 : ( + environment == "staging" ? 180 : 60 + ) + ) + + default_retry_attempts: int = ( + environment == "prod" ? 5 : ( + environment == "staging" ? 3 : 1 + ) + ) + + resource_allocation: ComputeResource = ComputeResource { + resource_name: "default-compute" + cost_center: "shared" + project_identifier: "infrastructure" + environment: environment + + # Environment-specific resource sizing + cpu_cores: environment == "prod" ? 4 : (environment == "staging" ? 2 : 1) + memory_gigabytes: environment == "prod" ? 8 : (environment == "staging" ? 4 : 2) + storage_gigabytes: environment == "prod" ? 100 : 50 + + cpu_architecture: "x86_64" + performance_tier: environment == "prod" ? "high_performance" : "standard" + } + + monitoring_configuration: MonitoringConfig = MonitoringConfig { + collection_interval_seconds: environment == "prod" ? 15 : 60 + retention_days: environment == "prod" ? 90 : 30 + alert_thresholds: environment == "prod" ? "strict" : "relaxed" + } + +# ✅ Good: Composable configuration with mixins +schema SecurityMixin: + """Security-related configuration that can be mixed into other schemas""" + + encryption_enabled: bool = True + access_logging_enabled: bool = True + security_scan_enabled: bool = True + + # Security-specific validations + check: + encryption_enabled == True, + "Encryption must be enabled for security compliance" + +schema ComplianceMixin: + """Compliance-related configuration""" + + compliance_frameworks: [str] = [] + audit_logging_enabled: bool = False + data_retention_policy?: str + + check: + len(compliance_frameworks) > 0 and audit_logging_enabled == True, + "Compliance frameworks require audit logging" + +schema SecureComputeResource(ComputeResource, SecurityMixin, ComplianceMixin): + """Compute resource with security and compliance requirements""" + + # Additional security requirements for compute + secure_boot_enabled: bool = True + encrypted_storage: bool = True + + check: + # Inherit all parent validations, plus additional ones + "pci" in compliance_frameworks and encrypted_storage == True, + "PCI compliance requires encrypted storage" +``` + +## Workflow Patterns + +### 1. Dependency Management + +```kcl +# ✅ Good: Clear dependency patterns with proper error handling +schema InfrastructureWorkflow(main.BatchWorkflow): + """Infrastructure deployment with proper dependency management""" + + # Categorize operations for dependency analysis + foundation_operations: [str] = [] # Network, security groups, etc. + compute_operations: [str] = [] # Servers, instances + service_operations: [str] = [] # Applications, databases + validation_operations: [str] = [] # Testing, health checks + + check: + # Foundation must come first + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in foundation_operations]) > 0 + for op in operations + if op.operation_id in compute_operations + ]) or len(compute_operations) == 0, + "Compute operations must depend on foundation operations" + + # Services depend on compute + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in compute_operations]) > 0 + for op in operations + if op.operation_id in service_operations + ]) or len(service_operations) == 0, + "Service operations must depend on compute operations" + +# Example usage with proper dependency chains +production_deployment: InfrastructureWorkflow = InfrastructureWorkflow { + workflow_id: "prod-infra-2025-001" + name: "Production Infrastructure Deployment" + + foundation_operations: ["create_vpc", "setup_security_groups"] + compute_operations: ["create_web_servers", "create_db_servers"] + service_operations: ["install_applications", "configure_databases"] + validation_operations: ["run_health_checks", "validate_connectivity"] + + operations: [ + # Foundation layer + main.BatchOperation { + operation_id: "create_vpc" + name: "Create VPC and Networking" + operation_type: "custom" + action: "create" + parameters: {"cidr": "10.0.0.0/16"} + priority: 10 + timeout: 600 + }, + + # Compute layer (depends on foundation) + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + action: "create" + parameters: {"count": "3", "type": "web"} + dependencies: [ + main.DependencyDef { + target_operation_id: "create_vpc" + dependency_type: "sequential" + timeout: 300 + fail_on_dependency_error: True + } + ] + priority: 8 + timeout: 900 + }, + + # Service layer (depends on compute) + main.BatchOperation { + operation_id: "install_applications" + name: "Install Web Applications" + operation_type: "taskserv" + action: "create" + parameters: {"apps": ["nginx", "prometheus"]} + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "conditional" + conditions: ["servers_ready", "ssh_accessible"] + timeout: 600 + } + ] + priority: 6 + } + ] +} +``` + +### 2. Multi-Environment Workflows + +```kcl +# ✅ Good: Environment-specific workflow configurations +schema MultiEnvironmentWorkflow: + """Workflow that adapts to different environments""" + + base_workflow: main.BatchWorkflow + target_environment: "dev" | "staging" | "prod" + + # Environment-specific overrides + environment_config: EnvironmentConfig = EnvironmentConfig { + environment: target_environment + + # Adjust parallelism based on environment + max_parallel: target_environment == "prod" ? 3 : 5 + + # Adjust timeouts + operation_timeout_multiplier: target_environment == "prod" ? 1.5 : 1.0 + + # Monitoring intensity + monitoring_level: target_environment == "prod" ? "comprehensive" : "basic" + } + + # Generate final workflow with environment adaptations + final_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: f"{base_workflow.workflow_id}-{target_environment}" + name: f"{base_workflow.name} ({target_environment})" + description: base_workflow.description + + operations: [ + main.BatchOperation { + operation_id: op.operation_id + name: op.name + operation_type: op.operation_type + provider: op.provider + action: op.action + parameters: op.parameters + dependencies: op.dependencies + + # Environment-adapted timeout + timeout: int(op.timeout * environment_config.operation_timeout_multiplier) + + # Environment-adapted priority + priority: op.priority + allow_parallel: op.allow_parallel + + # Environment-specific retry policy + retry_policy: main.RetryPolicy { + max_attempts: target_environment == "prod" ? 3 : 2 + initial_delay: target_environment == "prod" ? 30 : 10 + backoff_multiplier: 2 + } + } + for op in base_workflow.operations + ] + + max_parallel_operations: environment_config.max_parallel + global_timeout: base_workflow.global_timeout + fail_fast: target_environment == "prod" ? False : True + + # Environment-specific storage + storage: main.StorageConfig { + backend: target_environment == "prod" ? "surrealdb" : "filesystem" + base_path: f"./workflows/{target_environment}" + enable_persistence: target_environment != "dev" + retention_hours: target_environment == "prod" ? 2160 : 168 # 90 days vs 1 week + } + + # Environment-specific monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: target_environment == "prod" + enable_notifications: target_environment != "dev" + log_level: target_environment == "dev" ? "debug" : "info" + } + } + +# Usage for different environments +dev_deployment: MultiEnvironmentWorkflow = MultiEnvironmentWorkflow { + target_environment: "dev" + base_workflow: main.BatchWorkflow { + workflow_id: "webapp-deploy" + name: "Web Application Deployment" + operations: [ + # ... base operations + ] + } +} + +prod_deployment: MultiEnvironmentWorkflow = MultiEnvironmentWorkflow { + target_environment: "prod" + base_workflow: dev_deployment.base_workflow # Reuse same base workflow +} +``` + +### 3. Error Recovery Patterns + +```kcl +# ✅ Good: Comprehensive error recovery strategy +schema ResilientWorkflow(main.BatchWorkflow): + """Workflow with advanced error recovery capabilities""" + + # Error categorization + critical_operations: [str] = [] # Operations that cannot fail + optional_operations: [str] = [] # Operations that can be skipped + retry_operations: [str] = [] # Operations with custom retry logic + + # Recovery strategies + global_error_strategy: "fail_fast" | "continue_on_error" | "intelligent" = "intelligent" + + # Enhanced operations with error handling + enhanced_operations: [EnhancedBatchOperation] = [ + EnhancedBatchOperation { + base_operation: op + is_critical: op.operation_id in critical_operations + is_optional: op.operation_id in optional_operations + custom_retry: op.operation_id in retry_operations + + # Adaptive retry policy based on operation characteristics + adaptive_retry_policy: main.RetryPolicy { + max_attempts: ( + is_critical ? 5 : ( + is_optional ? 1 : 3 + ) + ) + initial_delay: is_critical ? 60 : 30 + max_delay: is_critical ? 900 : 300 + backoff_multiplier: 2 + retry_on_errors: [ + "timeout", + "connection_error", + "rate_limit" + ] + (is_critical ? [ + "resource_unavailable", + "quota_exceeded" + ] : []) + } + + # Adaptive rollback strategy + adaptive_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: is_critical ? "manual" : "immediate" + preserve_partial_state: is_critical + custom_rollback_operations: is_critical ? [ + "notify_engineering_team", + "create_incident_ticket", + "preserve_debug_info" + ] : [] + } + } + for op in operations + ] + +schema EnhancedBatchOperation: + """Batch operation with enhanced error handling""" + + base_operation: main.BatchOperation + is_critical: bool = False + is_optional: bool = False + custom_retry: bool = False + + adaptive_retry_policy: main.RetryPolicy + adaptive_rollback_strategy: main.RollbackStrategy + + # Circuit breaker pattern + failure_threshold: int = 3 + recovery_timeout_seconds: int = 300 + + check: + not (is_critical and is_optional), + "Operation cannot be both critical and optional" +``` + +## Error Handling + +### 1. Graceful Degradation + +```kcl +# ✅ Good: Graceful degradation for non-critical components +schema GracefulDegradationWorkflow(main.BatchWorkflow): + """Workflow that can degrade gracefully on partial failures""" + + # Categorize operations by importance + core_operations: [str] = [] # Must succeed + enhancement_operations: [str] = [] # Nice to have + monitoring_operations: [str] = [] # Can be skipped if needed + + # Minimum viable deployment definition + minimum_viable_operations: [str] = core_operations + + # Degradation strategy + degradation_policy: DegradationPolicy = DegradationPolicy { + allow_partial_deployment: True + minimum_success_percentage: 80.0 + + operation_priorities: { + # Core operations (must succeed) + op_id: 10 for op_id in core_operations + } | { + # Enhancement operations (should succeed) + op_id: 5 for op_id in enhancement_operations + } | { + # Monitoring operations (can fail) + op_id: 1 for op_id in monitoring_operations + } + } + + check: + # Ensure minimum viable deployment is achievable + len(minimum_viable_operations) > 0, + "Must specify at least one operation for minimum viable deployment" + + # Core operations should not depend on enhancement operations + all([ + all([ + dep.target_operation_id not in enhancement_operations + for dep in op.dependencies or [] + ]) + for op in operations + if op.operation_id in core_operations + ]), + "Core operations should not depend on enhancement operations" + +schema DegradationPolicy: + """Policy for graceful degradation""" + + allow_partial_deployment: bool = False + minimum_success_percentage: float = 100.0 + operation_priorities: {str: int} = {} + + # Fallback configurations + fallback_configurations: {str: str} = {} + emergency_contacts: [str] = [] + + check: + 0.0 <= minimum_success_percentage and minimum_success_percentage <= 100.0, + "Success percentage must be between 0 and 100" +``` + +### 2. Circuit Breaker Patterns + +```kcl +# ✅ Good: Circuit breaker for external dependencies +schema CircuitBreakerOperation(main.BatchOperation): + """Operation with circuit breaker pattern for external dependencies""" + + # Circuit breaker configuration + circuit_breaker_enabled: bool = False + failure_threshold: int = 5 + recovery_timeout_seconds: int = 300 + + # Health check configuration + health_check_endpoint?: str + health_check_interval_seconds: int = 30 + + # Fallback behavior + fallback_enabled: bool = False + fallback_operation?: main.BatchOperation + + check: + circuit_breaker_enabled == True and failure_threshold > 0, + "Circuit breaker must have positive failure threshold" + + circuit_breaker_enabled == True and recovery_timeout_seconds > 0, + "Circuit breaker must have positive recovery timeout" + + fallback_enabled == True and fallback_operation != Undefined, + "Fallback requires fallback operation definition" + +# Example: Database operation with circuit breaker +database_operation_with_circuit_breaker: CircuitBreakerOperation = CircuitBreakerOperation { + # Base operation + operation_id: "setup_database" + name: "Setup Production Database" + operation_type: "server" + action: "create" + parameters: {"service": "postgresql", "version": "15"} + timeout: 1800 + + # Circuit breaker settings + circuit_breaker_enabled: True + failure_threshold: 3 + recovery_timeout_seconds: 600 + + # Health monitoring + health_check_endpoint: "http://db-health.internal/health" + health_check_interval_seconds: 60 + + # Fallback to read replica + fallback_enabled: True + fallback_operation: main.BatchOperation { + operation_id: "setup_database_readonly" + name: "Setup Read-Only Database Fallback" + operation_type: "server" + action: "create" + parameters: {"service": "postgresql", "mode": "readonly"} + timeout: 900 + } +} +``` + +## Performance Optimization + +### 1. Parallel Execution Strategies + +```kcl +# ✅ Good: Intelligent parallelization +schema OptimizedParallelWorkflow(main.BatchWorkflow): + """Workflow optimized for parallel execution""" + + # Parallel execution groups + parallel_groups: [[str]] = [] # Groups of operations that can run in parallel + + # Resource-aware scheduling + resource_requirements: {str: ResourceRequirement} = {} + total_available_resources: ResourceCapacity = ResourceCapacity { + max_cpu_cores: 16 + max_memory_gb: 64 + max_network_bandwidth_mbps: 1000 + max_concurrent_operations: 10 + } + + # Computed optimal parallelism + optimal_parallel_limit: int = min([ + total_available_resources.max_concurrent_operations, + len(operations), + 8 # Reasonable default maximum + ]) + + # Generate workflow with optimized settings + optimized_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: workflow_id + name: name + description: description + + operations: [ + OptimizedBatchOperation { + base_operation: op + resource_hint: resource_requirements[op.operation_id] or ResourceRequirement { + cpu_cores: 1 + memory_gb: 2 + estimated_duration_seconds: op.timeout / 2 + } + + # Enable parallelism for operations in parallel groups + computed_allow_parallel: any([ + op.operation_id in group and len(group) > 1 + for group in parallel_groups + ]) + } + for op in operations + ] + + max_parallel_operations: optimal_parallel_limit + global_timeout: global_timeout + fail_fast: fail_fast + + # Optimize storage for performance + storage: main.StorageConfig { + backend: "surrealdb" # Better for concurrent access + enable_compression: False # Trade space for speed + connection_config: { + "connection_pool_size": str(optimal_parallel_limit * 2) + "max_retries": "3" + "timeout": "30" + } + } + } + +schema OptimizedBatchOperation: + """Batch operation with performance optimizations""" + + base_operation: main.BatchOperation + resource_hint: ResourceRequirement + computed_allow_parallel: bool + + # Performance-optimized operation + optimized_operation: main.BatchOperation = main.BatchOperation { + operation_id: base_operation.operation_id + name: base_operation.name + operation_type: base_operation.operation_type + provider: base_operation.provider + action: base_operation.action + parameters: base_operation.parameters + dependencies: base_operation.dependencies + + # Optimized settings + timeout: max([base_operation.timeout, resource_hint.estimated_duration_seconds * 2]) + allow_parallel: computed_allow_parallel + priority: base_operation.priority + + # Performance-oriented retry policy + retry_policy: main.RetryPolicy { + max_attempts: 2 # Fewer retries for faster failure detection + initial_delay: 10 + max_delay: 60 + backoff_multiplier: 1.5 + retry_on_errors: ["timeout", "rate_limit"] # Only retry fast-failing errors + } + } + +schema ResourceRequirement: + """Resource requirements for performance planning""" + cpu_cores: int = 1 + memory_gb: int = 2 + estimated_duration_seconds: int = 300 + io_intensive: bool = False + network_intensive: bool = False + +schema ResourceCapacity: + """Available resource capacity""" + max_cpu_cores: int + max_memory_gb: int + max_network_bandwidth_mbps: int + max_concurrent_operations: int +``` + +### 2. Caching and Memoization + +```kcl +# ✅ Good: Caching for expensive operations +schema CachedOperation(main.BatchOperation): + """Operation with caching capabilities""" + + # Caching configuration + cache_enabled: bool = False + cache_key_template: str = "${operation_id}-${provider}-${action}" + cache_ttl_seconds: int = 3600 # 1 hour default + + # Cache invalidation rules + cache_invalidation_triggers: [str] = [] + force_cache_refresh: bool = False + + # Computed cache key + computed_cache_key: str = f"{operation_id}-{provider}-{action}" + + # Cache-aware timeout (shorter if cache hit expected) + cache_aware_timeout: int = cache_enabled ? timeout / 2 : timeout + + check: + cache_enabled == True and cache_ttl_seconds > 0, + "Cache TTL must be positive when caching is enabled" + +# Example: Cached provider operations +cached_server_creation: CachedOperation = CachedOperation { + # Base operation + operation_id: "create_standardized_servers" + name: "Create Standardized Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "plan": "2xCPU-4GB" + "zone": "fi-hel2" + "image": "ubuntu-22.04" + } + timeout: 900 + + # Caching settings + cache_enabled: True + cache_key_template: "server-${plan}-${zone}-${image}" + cache_ttl_seconds: 7200 # 2 hours + + # Cache invalidation + cache_invalidation_triggers: ["image_updated", "plan_changed"] +} +``` + +## Security Considerations + +### 1. Secure Configuration Management + +```kcl +# ✅ Good: Secure configuration with proper secret handling +schema SecureConfiguration: + """Security-first configuration management""" + + # Secret management + secrets_provider: main.SecretProvider = main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + + # Security classifications + data_classification: "public" | "internal" | "confidential" | "restricted" + encryption_required: bool = data_classification != "public" + audit_logging_required: bool = data_classification in ["confidential", "restricted"] + + # Access control + allowed_environments: [str] = ["dev", "staging", "prod"] + environment_access_matrix: {str: [str]} = { + "dev": ["developers", "qa_team"] + "staging": ["developers", "qa_team", "release_team"] + "prod": ["release_team", "operations_team"] + } + + # Network security + network_isolation_required: bool = data_classification in ["confidential", "restricted"] + vpc_isolation: bool = network_isolation_required + private_subnets_only: bool = data_classification == "restricted" + + check: + data_classification == "restricted" and encryption_required == True, + "Restricted data must be encrypted" + + audit_logging_required == True and len(audit_log_destinations) > 0, + "Audit logging destinations must be specified for sensitive data" + +# Example: Production security configuration +production_security: SecureConfiguration = SecureConfiguration { + data_classification: "confidential" + # encryption_required automatically becomes True + # audit_logging_required automatically becomes True + # network_isolation_required automatically becomes True + + allowed_environments: ["staging", "prod"] + environment_access_matrix: { + "staging": ["release_team", "security_team"] + "prod": ["operations_team", "security_team"] + } + + audit_log_destinations: [ + "siem://security.company.com", + "s3://audit-logs-prod/workflows" + ] +} +``` + +### 2. Compliance and Auditing + +```kcl +# ✅ Good: Compliance-aware workflow design +schema ComplianceWorkflow(main.BatchWorkflow): + """Workflow with built-in compliance features""" + + # Compliance framework requirements + compliance_frameworks: [str] = [] + compliance_metadata: ComplianceMetadata = ComplianceMetadata { + frameworks: compliance_frameworks + audit_trail_required: "sox" in compliance_frameworks or "pci" in compliance_frameworks + data_residency_requirements: "gdpr" in compliance_frameworks ? ["eu"] : [] + retention_requirements: get_retention_requirements(compliance_frameworks) + } + + # Enhanced workflow with compliance features + compliant_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: workflow_id + name: name + description: description + + operations: [ + ComplianceAwareBatchOperation { + base_operation: op + compliance_metadata: compliance_metadata + }.compliant_operation + for op in operations + ] + + # Compliance-aware storage + storage: main.StorageConfig { + backend: "surrealdb" + enable_persistence: True + retention_hours: compliance_metadata.retention_requirements.workflow_data_hours + enable_compression: False # For audit clarity + encryption: compliance_metadata.audit_trail_required ? main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } : Undefined + } + + # Compliance-aware monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: compliance_metadata.audit_trail_required + enable_notifications: True + log_level: "info" + collection_interval: compliance_metadata.audit_trail_required ? 15 : 30 + } + + # Audit trail in execution context + execution_context: execution_context | { + "compliance_frameworks": str(compliance_frameworks) + "audit_trail_enabled": str(compliance_metadata.audit_trail_required) + "data_classification": "confidential" + } + } + +schema ComplianceMetadata: + """Metadata for compliance requirements""" + frameworks: [str] + audit_trail_required: bool + data_residency_requirements: [str] + retention_requirements: RetentionRequirements + +schema RetentionRequirements: + """Data retention requirements based on compliance""" + workflow_data_hours: int = 8760 # 1 year default + audit_log_hours: int = 26280 # 3 years default + backup_retention_hours: int = 43800 # 5 years default + +schema ComplianceAwareBatchOperation: + """Batch operation with compliance awareness""" + base_operation: main.BatchOperation + compliance_metadata: ComplianceMetadata + + compliant_operation: main.BatchOperation = main.BatchOperation { + operation_id: base_operation.operation_id + name: base_operation.name + operation_type: base_operation.operation_type + provider: base_operation.provider + action: base_operation.action + parameters: base_operation.parameters | ( + compliance_metadata.audit_trail_required ? { + "audit_enabled": "true" + "compliance_mode": "strict" + } : {} + ) + dependencies: base_operation.dependencies + timeout: base_operation.timeout + allow_parallel: base_operation.allow_parallel + priority: base_operation.priority + + # Enhanced retry for compliance + retry_policy: main.RetryPolicy { + max_attempts: compliance_metadata.audit_trail_required ? 5 : 3 + initial_delay: 30 + max_delay: 300 + backoff_multiplier: 2 + retry_on_errors: ["timeout", "connection_error", "rate_limit"] + } + + # Conservative rollback for compliance + rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "manual" # Manual approval for compliance + preserve_partial_state: True + rollback_timeout: 1800 + custom_rollback_operations: [ + "create_audit_entry", + "notify_compliance_team", + "preserve_evidence" + ] + } + } + +# Helper function for retention requirements +def get_retention_requirements(frameworks: [str]) -> RetentionRequirements: + """Get retention requirements based on compliance frameworks""" + if "sox" in frameworks: + return RetentionRequirements { + workflow_data_hours: 43800 # 5 years + audit_log_hours: 61320 # 7 years + backup_retention_hours: 87600 # 10 years + } + elif "pci" in frameworks: + return RetentionRequirements { + workflow_data_hours: 8760 # 1 year + audit_log_hours: 26280 # 3 years + backup_retention_hours: 43800 # 5 years + } + else: + return RetentionRequirements { + workflow_data_hours: 8760 # 1 year default + audit_log_hours: 26280 # 3 years default + backup_retention_hours: 43800 # 5 years default + } +``` + +## Testing Strategies + +### 1. Schema Testing + +```bash +#!/bin/bash +# Schema testing script + +# Test 1: Basic syntax validation +echo "Testing schema syntax..." +find . -name "*.k" -exec kcl fmt {} \; + +# Test 2: Schema compilation +echo "Testing schema compilation..." +for file in *.k; do + echo "Testing $file" + kcl run "$file" > /dev/null || echo "FAILED: $file" +done + +# Test 3: Constraint validation +echo "Testing constraints..." +kcl run test_constraints.k + +# Test 4: JSON serialization +echo "Testing JSON serialization..." +kcl run examples/simple_workflow.k --format json | jq '.' > /dev/null + +# Test 5: Cross-schema compatibility +echo "Testing cross-schema compatibility..." +kcl run integration_test.k +``` + +### 2. Validation Testing + +```kcl +# Test configuration for validation +test_validation_cases: { + # Valid cases + valid_server: main.Server = main.Server { + hostname: "test-01" + title: "Test Server" + labels: "env: test" + user: "test" + } + + # Edge cases + minimal_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "minimal" + name: "Minimal Test Workflow" + operations: [ + main.BatchOperation { + operation_id: "test_op" + name: "Test Operation" + operation_type: "custom" + action: "test" + parameters: {} + } + ] + } + + # Boundary testing + max_timeout_operation: main.BatchOperation = main.BatchOperation { + operation_id: "max_timeout" + name: "Maximum Timeout Test" + operation_type: "custom" + action: "test" + parameters: {} + timeout: 86400 # 24 hours - test upper boundary + } +} +``` + +## Maintenance Guidelines + +### 1. Schema Evolution + +```kcl +# ✅ Good: Backward-compatible schema evolution +schema ServerV2(main.Server): + """Enhanced server schema with backward compatibility""" + + # New optional fields (backward compatible) + performance_profile?: "standard" | "high_performance" | "burstable" + auto_scaling_enabled?: bool = False + + # Deprecated fields (marked but still supported) + deprecated_field?: str # TODO: Remove in v3.0 + + # Version metadata + schema_version: str = "2.0" + + check: + # Maintain existing validations + len(hostname) > 0, "Hostname required" + len(title) > 0, "Title required" + + # New validations for new fields + performance_profile != Undefined and auto_scaling_enabled == True and performance_profile != "burstable", + "Auto-scaling not compatible with burstable performance profile" + +# Migration helper +schema ServerMigration: + """Helper for migrating from ServerV1 to ServerV2""" + + v1_server: main.Server + + v2_server: ServerV2 = ServerV2 { + # Copy all existing fields + hostname: v1_server.hostname + title: v1_server.title + labels: v1_server.labels + user: v1_server.user + + # Set defaults for new fields + performance_profile: "standard" + auto_scaling_enabled: False + + # Copy optional fields if they exist + taskservs: v1_server.taskservs + cluster: v1_server.cluster + } +``` + +### 2. Documentation Updates + +```kcl +# ✅ Good: Self-documenting schemas with examples +schema DocumentedWorkflow(main.BatchWorkflow): + """ + Production workflow with comprehensive documentation + + This workflow follows company best practices for: + - Multi-environment deployment + - Error handling and recovery + - Security and compliance + - Performance optimization + + Example Usage: + prod_workflow: DocumentedWorkflow = DocumentedWorkflow { + environment: "production" + security_level: "high" + base_workflow: main.BatchWorkflow { + workflow_id: "webapp-deploy-001" + name: "Web Application Deployment" + operations: [...] + } + } + + See Also: + - examples/production_workflow.k + - docs/WORKFLOW_PATTERNS.md + - docs/SECURITY_GUIDELINES.md + """ + + # Required metadata for documentation + environment: "dev" | "staging" | "prod" + security_level: "low" | "medium" | "high" + base_workflow: main.BatchWorkflow + + # Auto-generated documentation fields + documentation_generated_at: str = "{{now.date}}" + schema_version: str = "1.0" + + check: + environment == "prod" and security_level == "high", + "Production workflows must use high security level" +``` + +This comprehensive best practices guide provides the foundation for creating maintainable, secure, and performant KCL configurations for the provisioning system. \ No newline at end of file diff --git a/docs/VALIDATION.md b/docs/VALIDATION.md new file mode 100644 index 0000000..fb3e8ed --- /dev/null +++ b/docs/VALIDATION.md @@ -0,0 +1,800 @@ +# Schema Validation and Best Practices + +This document provides comprehensive guidance on validating KCL schemas and following best practices for the provisioning package. + +## Table of Contents + +- [Schema Validation](#schema-validation) +- [Built-in Constraints](#built-in-constraints) +- [Custom Validation](#custom-validation) +- [Best Practices](#best-practices) +- [Common Patterns](#common-patterns) +- [Troubleshooting](#troubleshooting) + +## Schema Validation + +### Basic Validation + +```bash +# Validate syntax and run schema checks +kcl run config.k + +# Format and validate all files +kcl fmt *.k + +# Validate with verbose output +kcl run config.k --debug + +# Validate against specific schema +kcl vet config.k --schema main.Server +``` + +### JSON Output Validation + +```bash +# Generate and validate JSON output +kcl run config.k --format json | jq '.' + +# Validate JSON schema structure +kcl run config.k --format json | jq '.workflow_id // error("Missing workflow_id")' + +# Pretty print for inspection +kcl run config.k --format json | jq '.operations[] | {operation_id, name, provider}' +``` + +### Validation in CI/CD + +```yaml +# GitHub Actions example +- name: Validate KCL Schemas + run: | + find . -name "*.k" -exec kcl fmt {} \; + find . -name "*.k" -exec kcl run {} \; + +# Check for schema changes +- name: Check Schema Compatibility + run: | + kcl run main.k --format json > current_schema.json + diff expected_schema.json current_schema.json +``` + +## Built-in Constraints + +### Server Schema Constraints + +```kcl +import .main + +# ✅ Valid server configuration +valid_server: main.Server = main.Server { + hostname: "web-01" # ✅ Non-empty string required + title: "Web Server" # ✅ Non-empty string required + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field + + # Optional but validated fields + user_ssh_port: 22 # ✅ Valid port number + running_timeout: 300 # ✅ Positive integer + time_zone: "UTC" # ✅ Valid timezone string +} + +# ❌ Invalid configurations that will fail validation +invalid_examples: { + # hostname: "" # ❌ Empty hostname not allowed + # title: "" # ❌ Empty title not allowed + # user_ssh_port: -1 # ❌ Negative port not allowed + # running_timeout: 0 # ❌ Zero timeout not allowed +} +``` + +### Workflow Schema Constraints + +```kcl +import .main + +# ✅ Valid workflow with proper constraints +valid_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "deploy_001" # ✅ Non-empty ID required + name: "Production Deployment" # ✅ Non-empty name required + operations: [ # ✅ At least one operation required + main.BatchOperation { + operation_id: "create_servers" # ✅ Unique operation ID + name: "Create Servers" + operation_type: "server" + action: "create" + parameters: {} + timeout: 600 # ✅ Positive timeout + priority: 5 # ✅ Valid priority + } + ] + max_parallel_operations: 3 # ✅ Non-negative number + global_timeout: 3600 # ✅ Positive global timeout +} + +# ❌ Constraint violations +constraint_violations: { + # workflow_id: "" # ❌ Empty workflow ID + # operations: [] # ❌ Empty operations list + # max_parallel_operations: -1 # ❌ Negative parallel limit + # global_timeout: 0 # ❌ Zero global timeout +} +``` + +### Kubernetes Schema Constraints + +```kcl +import .main + +# ✅ Valid Kubernetes deployment with constraints +valid_k8s: main.K8sDeploy = main.K8sDeploy { + name: "webapp" # ✅ Non-empty name + namespace: "production" # ✅ Valid namespace + + spec: main.K8sDeploySpec { + replicas: 3 # ✅ Positive replica count + containers: [ # ✅ At least one container required + main.K8sContainers { + name: "app" # ✅ Non-empty container name + image: "nginx:1.21" # ✅ Valid image reference + + resources_requests: main.K8sResources { + memory: "128Mi" # ✅ Valid K8s memory format + cpu: "100m" # ✅ Valid K8s CPU format + } + + resources_limits: main.K8sResources { + memory: "256Mi" # ✅ Limits >= requests (enforced) + cpu: "200m" + } + } + ] + } +} +``` + +### Dependency Schema Constraints + +```kcl +import .main + +# ✅ Valid dependency definitions +valid_dependencies: main.TaskservDependencies = main.TaskservDependencies { + name: "kubernetes" # ✅ Lowercase name required + + requires: ["containerd", "cni"] # ✅ Valid dependency list + conflicts: ["docker"] # ✅ Cannot coexist with docker + + resources: main.ResourceRequirement { + cpu: "100m" # ✅ Non-empty CPU requirement + memory: "128Mi" # ✅ Non-empty memory requirement + disk: "1Gi" # ✅ Non-empty disk requirement + } + + timeout: 600 # ✅ Positive timeout + retry_count: 3 # ✅ Non-negative retry count + + os_support: ["linux"] # ✅ At least one OS required + arch_support: ["amd64", "arm64"] # ✅ At least one arch required +} + +# ❌ Constraint violations +dependency_violations: { + # name: "Kubernetes" # ❌ Must be lowercase + # name: "" # ❌ Cannot be empty + # timeout: 0 # ❌ Must be positive + # retry_count: -1 # ❌ Cannot be negative + # os_support: [] # ❌ Must specify at least one OS +} +``` + +## Custom Validation + +### Adding Custom Constraints + +```kcl +import .main +import regex + +# Custom server schema with additional validation +schema CustomServer(main.Server): + """Custom server with additional business rules""" + + # Additional custom fields + environment: "dev" | "staging" | "prod" + cost_center: str + + check: + # Business rule: production servers must have specific naming + environment == "prod" and regex.match(hostname, "^prod-[a-z0-9-]+$"), + "Production servers must start with 'prod-'" + + # Business rule: staging servers have resource limits + environment == "staging" and len(taskservs or []) <= 3, + "Staging servers limited to 3 taskservs" + + # Business rule: cost center must be valid + cost_center in ["engineering", "operations", "security"], + "Invalid cost center: ${cost_center}" + +# Usage with validation +prod_server: CustomServer = CustomServer { + hostname: "prod-web-01" # ✅ Matches production naming + title: "Production Web Server" + labels: "env: prod" + user: "admin" + environment: "prod" # ✅ Valid environment + cost_center: "engineering" # ✅ Valid cost center +} +``` + +### Conditional Validation + +```kcl +import .main + +# Workflow with conditional validation based on environment +schema EnvironmentWorkflow(main.BatchWorkflow): + """Workflow with environment-specific validation""" + + environment: "dev" | "staging" | "prod" + + check: + # Production workflows must have monitoring + environment == "prod" and monitoring.enabled == True, + "Production workflows must enable monitoring" + + # Production workflows must have rollback enabled + environment == "prod" and default_rollback_strategy.enabled == True, + "Production workflows must enable rollback" + + # Development can have shorter timeouts + environment == "dev" and global_timeout <= 1800, # 30 minutes + "Development workflows should complete within 30 minutes" + + # Staging must have retry policies + environment == "staging" and default_retry_policy.max_attempts >= 2, + "Staging workflows must have retry policies" + +# Valid production workflow +prod_workflow: EnvironmentWorkflow = EnvironmentWorkflow { + workflow_id: "prod_deploy_001" + name: "Production Deployment" + environment: "prod" # ✅ Production environment + + operations: [ + main.BatchOperation { + operation_id: "deploy" + name: "Deploy Application" + operation_type: "server" + action: "create" + parameters: {} + } + ] + + # ✅ Required for production + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + } + + # ✅ Required for production + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "immediate" + } +} +``` + +### Cross-Field Validation + +```kcl +import .main + +# Validate relationships between fields +schema ValidatedBatchOperation(main.BatchOperation): + """Batch operation with cross-field validation""" + + check: + # Timeout should be reasonable for operation type + operation_type == "server" and timeout >= 300, + "Server operations need at least 5 minutes timeout" + + operation_type == "taskserv" and timeout >= 600, + "Taskserv operations need at least 10 minutes timeout" + + # High priority operations should have retry policies + priority >= 8 and retry_policy.max_attempts >= 2, + "High priority operations should have retry policies" + + # Parallel operations should have lower priority + allow_parallel == True and priority <= 7, + "Parallel operations should have lower priority for scheduling" + +# Validate workflow operation consistency +schema ConsistentWorkflow(main.BatchWorkflow): + """Workflow with consistent operation validation""" + + check: + # All operation IDs must be unique + len(operations) == len([op.operation_id for op in operations] | unique), + "All operation IDs must be unique" + + # Dependencies must reference existing operations + all([ + dep.target_operation_id in [op.operation_id for op in operations] + for op in operations + for dep in op.dependencies or [] + ]), + "All dependencies must reference existing operations" + + # No circular dependencies (simplified check) + len(operations) > 0, + "Workflow must have at least one operation" +``` + +## Best Practices + +### 1. Schema Design Principles + +```kcl +# ✅ Good: Descriptive field names and documentation +schema WellDocumentedServer: + """ + Server configuration for production workloads + Follows company security and operational standards + """ + + # Core identification + hostname: str # DNS-compliant hostname + fqdn?: str # Fully qualified domain name + + # Environment classification + environment: "dev" | "staging" | "prod" + classification: "public" | "internal" | "confidential" + + # Operational metadata + owner_team: str # Team responsible for maintenance + cost_center: str # Billing allocation + backup_required: bool # Whether automated backups are needed + + check: + len(hostname) > 0 and len(hostname) <= 63, "Hostname must be 1-63 characters" + len(owner_team) > 0, "Owner team must be specified" + len(cost_center) > 0, "Cost center must be specified" + +# ❌ Avoid: Unclear field names and missing validation +schema PoorlyDocumentedServer: + name: str # ❌ Ambiguous - hostname? title? display name? + env: str # ❌ No constraints - any string allowed + data: {str: str} # ❌ Unstructured data without validation +``` + +### 2. Validation Strategy + +```kcl +# ✅ Good: Layered validation with clear error messages +schema ProductionWorkflow(main.BatchWorkflow): + """Production workflow with comprehensive validation""" + + # Business metadata + change_request_id: str + approver: str + maintenance_window?: str + + check: + # Business process validation + regex.match(change_request_id, "^CHG-[0-9]{4}-[0-9]{3}$"), + "Change request ID must match format CHG-YYYY-NNN" + + # Operational validation + global_timeout <= 14400, # 4 hours max + "Production workflows must complete within 4 hours" + + # Safety validation + default_rollback_strategy.enabled == True, + "Production workflows must enable rollback" + + # Monitoring validation + monitoring.enabled == True and monitoring.enable_notifications == True, + "Production workflows must enable monitoring and notifications" + +# ✅ Good: Environment-specific defaults with validation +schema EnvironmentDefaults: + """Environment-specific default configurations""" + + environment: "dev" | "staging" | "prod" + + # Default timeouts by environment + default_timeout: int = environment == "prod" ? 1800 : (environment == "staging" ? 1200 : 600) + + # Default retry attempts by environment + default_retries: int = environment == "prod" ? 3 : (environment == "staging" ? 2 : 1) + + # Default monitoring settings + monitoring_enabled: bool = environment == "prod" ? True : False + + check: + default_timeout > 0, "Timeout must be positive" + default_retries >= 0, "Retries cannot be negative" +``` + +### 3. Schema Composition Patterns + +```kcl +# ✅ Good: Composable schema design +schema BaseResource: + """Common fields for all resources""" + name: str + tags: {str: str} = {} + created_at?: str + updated_at?: str + + check: + len(name) > 0, "Name cannot be empty" + regex.match(name, "^[a-z0-9-]+$"), "Name must be lowercase alphanumeric with hyphens" + +schema MonitoredResource(BaseResource): + """Resource with monitoring capabilities""" + monitoring_enabled: bool = True + alert_thresholds: {str: float} = {} + + check: + monitoring_enabled == True and len(alert_thresholds) > 0, + "Monitored resources must define alert thresholds" + +schema SecureResource(BaseResource): + """Resource with security requirements""" + encryption_enabled: bool = True + access_policy: str + compliance_tags: [str] = [] + + check: + encryption_enabled == True, "Security-sensitive resources must enable encryption" + len(access_policy) > 0, "Access policy must be defined" + "pci" in compliance_tags or "sox" in compliance_tags or "hipaa" in compliance_tags, + "Must specify compliance requirements" + +# Composed schema inheriting multiple patterns +schema ProductionDatabase(MonitoredResource, SecureResource): + """Production database with full operational requirements""" + backup_retention_days: int = 30 + high_availability: bool = True + + check: + backup_retention_days >= 7, "Production databases need minimum 7 days backup retention" + high_availability == True, "Production databases must be highly available" +``` + +### 4. Error Handling Patterns + +```kcl +# ✅ Good: Comprehensive error scenarios with specific handling +schema RobustBatchOperation(main.BatchOperation): + """Batch operation with robust error handling""" + + # Error classification + critical_operation: bool = False + max_failure_rate: float = 0.1 + + # Enhanced retry configuration + retry_policy: main.RetryPolicy = main.RetryPolicy { + max_attempts: critical_operation ? 5 : 3 + initial_delay: critical_operation ? 30 : 10 + max_delay: critical_operation ? 600 : 300 + backoff_multiplier: 2 + retry_on_errors: [ + "connection_error", + "timeout", + "rate_limit", + "resource_unavailable" + ] + } + + # Enhanced rollback strategy + rollback_strategy: main.RollbackStrategy = main.RollbackStrategy { + enabled: True + strategy: critical_operation ? "manual" : "immediate" + preserve_partial_state: critical_operation + custom_rollback_operations: critical_operation ? [ + "create_incident_ticket", + "notify_on_call_engineer", + "preserve_logs" + ] : [] + } + + check: + 0 <= max_failure_rate and max_failure_rate <= 1, + "Failure rate must be between 0 and 1" + + critical_operation == True and timeout >= 1800, + "Critical operations need extended timeout" +``` + +## Common Patterns + +### 1. Multi-Environment Configuration + +```kcl +# Configuration that adapts to environment +schema EnvironmentAwareConfig: + environment: "dev" | "staging" | "prod" + + # Computed values based on environment + replica_count: int = ( + environment == "prod" ? 3 : ( + environment == "staging" ? 2 : 1) + ) + + resource_requests: main.K8sResources = main.K8sResources { + memory: environment == "prod" ? "512Mi" : "256Mi" + cpu: environment == "prod" ? "200m" : "100m" + } + + monitoring_enabled: bool = environment != "dev" + + backup_enabled: bool = environment == "prod" + +# Usage pattern +prod_config: EnvironmentAwareConfig = EnvironmentAwareConfig { + environment: "prod" + # replica_count automatically becomes 3 + # monitoring_enabled automatically becomes True + # backup_enabled automatically becomes True +} +``` + +### 2. Provider Abstraction + +```kcl +# Provider-agnostic resource definition +schema AbstractServer: + """Provider-agnostic server specification""" + + # Common specification + cpu_cores: int + memory_gb: int + storage_gb: int + network_performance: "low" | "moderate" | "high" + + # Provider-specific mapping + provider: "upcloud" | "aws" | "gcp" + + # Computed provider-specific values + instance_type: str = ( + provider == "upcloud" ? f"{cpu_cores}xCPU-{memory_gb}GB" : ( + provider == "aws" ? f"m5.{cpu_cores == 1 ? 'large' : 'xlarge'}" : ( + provider == "gcp" ? f"n2-standard-{cpu_cores}" : "unknown" + )) + ) + + storage_type: str = ( + provider == "upcloud" ? "MaxIOPS" : ( + provider == "aws" ? "gp3" : ( + provider == "gcp" ? "pd-ssd" : "standard" + )) + ) + +# Multi-provider workflow using abstraction +mixed_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "mixed_deploy_001" + name: "Multi-Provider Deployment" + + operations: [ + # UpCloud servers + main.BatchOperation { + operation_id: "upcloud_servers" + provider: "upcloud" + parameters: { + "instance_type": "2xCPU-4GB" # UpCloud format + "storage_type": "MaxIOPS" + } + }, + # AWS servers + main.BatchOperation { + operation_id: "aws_servers" + provider: "aws" + parameters: { + "instance_type": "m5.large" # AWS format + "storage_type": "gp3" + } + } + ] +} +``` + +### 3. Dependency Management + +```kcl +# Complex dependency patterns +schema DependencyAwareWorkflow(main.BatchWorkflow): + """Workflow with intelligent dependency management""" + + # Categorize operations by type + infrastructure_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "server" + ] + + service_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "taskserv" + ] + + validation_ops: [str] = [ + op.operation_id for op in operations + if op.operation_type == "custom" and "validate" in op.name.lower() + ] + + check: + # Infrastructure must come before services + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in infrastructure_ops]) > 0 + for op in operations + if op.operation_id in service_ops + ]) or len(service_ops) == 0, + "Service operations must depend on infrastructure operations" + + # Validation must come last + all([ + len([dep for dep in op.dependencies or [] + if dep.target_operation_id in service_ops or dep.target_operation_id in infrastructure_ops]) > 0 + for op in operations + if op.operation_id in validation_ops + ]) or len(validation_ops) == 0, + "Validation operations must depend on other operations" +``` + +## Troubleshooting + +### Common Validation Errors + +#### 1. Missing Required Fields + +```bash +# Error: attribute 'labels' of Server is required +# ❌ Incomplete server definition +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + # Missing: labels, user +} + +# ✅ Complete server definition +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field +} +``` + +#### 2. Type Mismatches + +```bash +# Error: expect int, got str +# ❌ Wrong type +workflow: main.BatchWorkflow = main.BatchWorkflow { + max_parallel_operations: "3" # ❌ String instead of int +} + +# ✅ Correct type +workflow: main.BatchWorkflow = main.BatchWorkflow { + max_parallel_operations: 3 # ✅ Integer +} +``` + +#### 3. Constraint Violations + +```bash +# Error: Check failed: hostname cannot be empty +# ❌ Constraint violation +server: main.Server = main.Server { + hostname: "" # ❌ Empty string violates constraint + title: "Server" + labels: "env: prod" + user: "admin" +} + +# ✅ Valid constraint +server: main.Server = main.Server { + hostname: "web-01" # ✅ Non-empty string + title: "Server" + labels: "env: prod" + user: "admin" +} +``` + +### Debugging Techniques + +#### 1. Step-by-step Validation + +```bash +# Validate incrementally +kcl run basic_config.k # Start with minimal config +kcl run enhanced_config.k # Add features gradually +kcl run complete_config.k # Full configuration +``` + +#### 2. Schema Introspection + +```bash +# Check what fields are available +kcl run -c 'import .main; main.Server' --format json + +# Validate against specific schema +kcl vet config.k --schema main.Server + +# Debug with verbose output +kcl run config.k --debug --verbose +``` + +#### 3. Constraint Testing + +```kcl +# Test constraint behavior +test_constraints: { + # Test minimum values + min_timeout: main.BatchOperation { + operation_id: "test" + name: "Test" + operation_type: "server" + action: "create" + parameters: {} + timeout: 1 # Test minimum allowed + } + + # Test maximum values + max_parallel: main.BatchWorkflow { + workflow_id: "test" + name: "Test" + operations: [min_timeout] + max_parallel_operations: 100 # Test upper limits + } +} +``` + +### Performance Considerations + +#### 1. Schema Complexity + +```kcl +# ✅ Good: Simple, focused schemas +schema SimpleServer: + hostname: str + user: str + labels: str + + check: + len(hostname) > 0, "Hostname required" + +# ❌ Avoid: Overly complex schemas with many computed fields +schema OverlyComplexServer: + # ... many fields with complex interdependencies + # ... computationally expensive check conditions + # ... deep nested validations +``` + +#### 2. Validation Efficiency + +```kcl +# ✅ Good: Efficient validation +schema EfficientValidation: + name: str + tags: {str: str} + + check: + len(name) > 0, "Name required" # ✅ Simple check + len(tags) <= 10, "Maximum 10 tags allowed" # ✅ Simple count check + +# ❌ Avoid: Expensive validation +schema ExpensiveValidation: + items: [str] + + check: + # ❌ Expensive nested operations + all([regex.match(item, "^[a-z0-9-]+$") for item in items]), + "All items must match pattern" +``` + +This validation guide provides the foundation for creating robust, maintainable KCL schemas with proper error handling and validation strategies. \ No newline at end of file diff --git a/docs/provisioning.md b/docs/provisioning.md new file mode 100644 index 0000000..6757ece --- /dev/null +++ b/docs/provisioning.md @@ -0,0 +1,589 @@ +# provisioning + +## Index + +- [Cluster](#cluster) +- [ClusterDef](#clusterdef) +- [K8sAffinity](#k8saffinity) +- [K8sAffinityLabelSelector](#k8saffinitylabelselector) +- [K8sAffinityMatch](#k8saffinitymatch) +- [K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector) +- [K8sBackup](#k8sbackup) +- [K8sConfigMap](#k8sconfigmap) +- [K8sContainers](#k8scontainers) +- [K8sDefs](#k8sdefs) +- [K8sDeploy](#k8sdeploy) +- [K8sDeploySpec](#k8sdeployspec) +- [K8sKeyPath](#k8skeypath) +- [K8sKeyVal](#k8skeyval) +- [K8sPort](#k8sport) +- [K8sPrxyGatewayServer](#k8sprxygatewayserver) +- [K8sPrxyPort](#k8sprxyport) +- [K8sPrxyTLS](#k8sprxytls) +- [K8sPrxyVirtualService](#k8sprxyvirtualservice) +- [K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch) +- [K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl) +- [K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute) +- [K8sResources](#k8sresources) +- [K8sSecret](#k8ssecret) +- [K8sService](#k8sservice) +- [K8sVolume](#k8svolume) +- [K8sVolumeClaim](#k8svolumeclaim) +- [K8sVolumeMount](#k8svolumemount) +- [RunSet](#runset) +- [ScaleData](#scaledata) +- [ScaleResource](#scaleresource) +- [Server](#server) +- [ServerDefaults](#serverdefaults) +- [Settings](#settings) +- [Storage](#storage) +- [StorageVol](#storagevol) +- [TaskServDef](#taskservdef) + +## Schemas + +### Cluster + +cluster settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**admin_host**|str||| +|**admin_port**|int||| +|**admin_user**|str||| +|**clusters_save_path**|str||| +|**def** `required`|"K8sDeploy" | ""||""| +|**local_def_path** `required`|str||"./clusters/${name}"| +|**name** `required`|str||| +|**not_use** `required`|bool||False| +|**profile**|str||| +|**scale**|[ScaleResource](#scaleresource)||| +|**ssh_key_path**|str||| +|**template**|"k8s-deploy" | ""||| +|**version** `required`|str||| +### ClusterDef + +ClusterDef settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +|**profile** `required`|str||"default"| +### K8sAffinity + +K8S Deployment Affinity settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**affinity**|[K8sAffinityLabelSelector](#k8saffinitylabelselector)||| +|**antiAffinity**|[K8sAntyAffinityLabelSelector](#k8santyaffinitylabelselector)||| +### K8sAffinityLabelSelector + +K8S Deployment Affinity Label Selector settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]||| +|**matchLabelKeys**|[str]||| +|**topologyKey**|str||| +|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"| +### K8sAffinityMatch + +K8S Deployment Affinity Match settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**operator** `required`|"In" | "NotIn" | "Exists" | "DoesNotExist"||| +|**values** `required`|[str]||| +### K8sAntyAffinityLabelSelector + +K8S Deployment AntyAffinity Label Selector settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**labelSelector** `required`|[[K8sAffinityMatch](#k8saffinitymatch)]||| +|**matchLabelKeys**|[str]||| +|**topologyKey**|str||| +|**typ** `required`|"requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution"||"requiredDuringSchedulingIgnoredDuringExecution"| +|**weight** `required`|int||100| +### K8sBackup + +K8S Backup settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**mount_path** `required`|str||| +|**name** `required`|str||| +|**typ** `required`|str||| +### K8sConfigMap + +K8S Volume ConfigMap settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +### K8sContainers + +K8S Container settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cmd**|str||| +|**env**|[[K8sKeyVal](#k8skeyval)]||| +|**image** `required`|str||| +|**imagePull** `required`|"IfNotPresent" | "Always" | "Never"||"IfNotPresent"| +|**name** `required`|str||"main"| +|**ports**|[[K8sPort](#k8sport)]||| +|**resources_limits**|[K8sResources](#k8sresources)||| +|**resources_requests**|[K8sResources](#k8sresources)||| +|**volumeMounts**|[[K8sVolumeMount](#k8svolumemount)]||| +### K8sDefs + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster_domain** `required`|str||| +|**domain** `required`|str||| +|**full_domain** `required`|str||| +|**name** `required`|str||| +|**ns** `required`|str||| +|**primary_dom** `required`|str||| +### K8sDeploy + +K8S Deployment settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**backups**|[[K8sBackup](#k8sbackup)]||| +|**bin_apply** `required`|bool||True| +|**create_ns** `required`|bool||False| +|**full_domain**|str||| +|**labels** `required`|[[K8sKeyVal](#k8skeyval)]||[K8sKeyVal {key: "${name}", value: "${name}"}]| +|**name** `required`|str||| +|**name_in_files** `required`|str||"${name}"| +|**namespace** `required`|str | "default"||| +|**prxy** `readOnly`|"istio"||"istio"| +|**prxyGatewayServers**|[[K8sPrxyGatewayServer](#k8sprxygatewayserver)]||| +|**prxyVirtualService**|[K8sPrxyVirtualService](#k8sprxyvirtualservice)||| +|**prxy_ns**|str||"istio-system"| +|**sel_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels| +|**service**|[K8sService](#k8sservice)||| +|**spec** `required`|[K8sDeploySpec](#k8sdeployspec)||| +|**tls_path**|str||"ssl"| +|**tpl_labels** `required`|[[K8sKeyVal](#k8skeyval)]||labels| +### K8sDeploySpec + +K8S Deployment Spec settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**affinity**|[K8sAffinity](#k8saffinity)||| +|**containers** `required`|[[K8sContainers](#k8scontainers)]||| +|**hostUsers**|bool||True| +|**imagePullSecret**|str||| +|**nodeName**|str||| +|**nodeSelector**|[[K8sKeyVal](#k8skeyval)]||| +|**replicas** `required`|int||1| +|**secrets**|[[K8sSecret](#k8ssecret)]||| +|**volumes**|[[K8sVolume](#k8svolume)]||| +### K8sKeyPath + +K8S key,path settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**path** `required`|str||| +### K8sKeyVal + +K8S label,selector,env settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**key** `required`|str||| +|**value** `required`|str||| +### K8sPort + +K8S Port settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**container**|int||| +|**name** `required`|str||| +|**nodePort**|int||| +|**target**|int||| +|**typ**|str||"TCP"| +### K8sPrxyGatewayServer + +K8S Deployment Proxy Gateway Server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**hosts**|[str]||| +|**port** `required`|[K8sPrxyPort](#k8sprxyport)||| +|**tls**|[K8sPrxyTLS](#k8sprxytls)||| +### K8sPrxyPort + +K8S Proxy Port settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**name** `required`|str||| +|**number**|int||| +|**proto** `required`|"HTTP" | "HTTPS" | "TCP"||"HTTPS"| +### K8sPrxyTLS + +K8S Deployment Proxy TLS settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**credentialName**|str||| +|**httpsRedirect**|bool||False| +|**mode**|"SIMPLE" | "PASSTHROUGH" | "MULTI" | ""||"SIMPLE"| +### K8sPrxyVirtualService + +K8S Deployment Proxy Virtual Service settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**gateways** `required`|[str]||| +|**hosts** `required`|[str]||| +|**matches**|[[K8sPrxyVirtualServiceMatch](#k8sprxyvirtualservicematch)]||| +### K8sPrxyVirtualServiceMatch + +K8S Deployment Proxy Virtual Service Match settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**location**|[[K8sPrxyVirtualServiceMatchURL](#k8sprxyvirtualservicematchurl)]||| +|**route_destination**|[[K8sPrxyVirtualServiceRoute](#k8sprxyvirtualserviceroute)]||| +|**typ** `required`|"tcp" | "http" | "tls"||| +### K8sPrxyVirtualServiceMatchURL + +K8S Deployment Proxy Virtual Service Match URL settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**port**|int||| +|**sniHost**|[str]||| +### K8sPrxyVirtualServiceRoute + +K8S Deployment Proxy Virtual Service Route settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**host** `required`|str||| +|**port_number** `required`|int||| +### K8sResources + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cpu** `required`|str||| +|**memory** `required`|str||| +### K8sSecret + +K8S Volume Secret settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**items** `required`|[[K8sKeyPath](#k8skeypath)]||| +|**name** `required`|str||| +### K8sService + +K8S Service settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**externaIPs**|[str]||| +|**externalName**|str||| +|**name** `required`|str||| +|**ports** `required`|[[K8sPort](#k8sport)]||| +|**proto** `required` `readOnly`|"TCP"||"TCP"| +|**selector**|[[K8sKeyVal](#k8skeyval)]||| +|**typ** `required`|"ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None"||"ClusterIP"| +### K8sVolume + +K8S Volume settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**configMap**|[K8sConfigMap](#k8sconfigmap)||| +|**items**|[[K8sKeyPath](#k8skeypath)]||| +|**name** `required`|str||| +|**persitentVolumeClaim**|[K8sVolumeClaim](#k8svolumeclaim)||| +|**secret**|[K8sSecret](#k8ssecret)||| +|**typ** `required`|"volumeClaim" | "configMap" | "secret"||"volumeClaim"| +### K8sVolumeClaim + +K8S VolumeClaim settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**abbrev_mode**|["RWO" | "ROX" | "RWX" | "RWOP"]||["RWO"]| +|**hostPath**|str||| +|**modes** `required`|["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"]||["ReadWriteOnce"]| +|**name** `required`|str||| +|**pvMode**|"unspecified" | "Filesystem" | "Block"||| +|**pvcMode**|"unspecified" | "Filesystem" | "Block"||| +|**reclaimPolicy**|"Recycle" | "Retain" | "Delete"||"Retain"| +|**storage**|str||| +|**storageClassName** `required`|"manual" | "nfs-client" | "rook-cephfs"||"manual"| +|**typ** `required`|"volumeClaim" | "configMap" | "secret" | ""||""| +### K8sVolumeMount + +K8S VolumeMounts settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**mountPath** `required`|str||| +|**name** `required`|str||| +|**readOnly** `required`|bool||False| +|**subPath**|str||| +### RunSet + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**inventory_file** `required`|str||"./inventory.yaml"| +|**output_format** `required`|"human" | "yaml" | "json"||"human"| +|**output_path** `required`|str||"tmp/NOW-deploy"| +|**use_time** `required`|bool||True| +|**wait** `required`|bool||True| +### ScaleData + +scale data + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**def** `required`|str||| +|**disabled** `required`|bool||False| +|**expire**|str||| +|**from**|str||| +|**mode** `required`|"auto" | "manual" | "ondemand"||"manual"| +|**to**|str||| +### ScaleResource + +scale server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**default** `required`|[ScaleData](#scaledata)||| +|**down**|[ScaleData](#scaledata)||| +|**fallback**|[ScaleData](#scaledata)||| +|**max**|[ScaleData](#scaledata)||| +|**min**|[ScaleData](#scaledata)||| +|**path** `required`|str||"/etc/scale_provisioning"| +|**up**|[ScaleData](#scaledata)||| +### Server + +server settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster**|[[ClusterDef](#clusterdef)]||| +|**delete_lock** `required`|bool||False| +|**domains_search**|str||| +|**extra_hostnames**|[str]||| +|**fix_local_hosts** `required`|bool||True| +|**hostname** `required`|str||| +|**installer_user**|str||"${user}"| +|**labels** `required`|str||| +|**lock** `required`|bool||False| +|**main_domain**|str||| +|**network_private_id**|str||| +|**network_private_name**|str||| +|**network_public_ip**|str||| +|**network_public_ipv4**|bool||True| +|**network_public_ipv6**|bool||False| +|**network_utility_ipv4** `required`|bool||True| +|**network_utility_ipv6** `required`|bool||False| +|**not_use** `required`|bool||False| +|**primary_dns**|str||| +|**priv_cidr_block**|str||| +|**running_timeout** `required`|int||200| +|**running_wait** `required`|int||10| +|**scale**|[ScaleResource](#scaleresource)||| +|**secondary_dns**|str||| +|**ssh_key_name**|str||| +|**ssh_key_path**|str||| +|**storage_os**|str||| +|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"| +|**taskservs**|[[TaskServDef](#taskservdef)]||| +|**time_zone** `required`|str||"UTC"| +|**title** `required`|str||| +|**user** `required`|str||| +|**user_home**|str||"/home/${user}"| +|**user_ssh_key_path**|str||| +|**user_ssh_port**|int||22| +|**zone**|str||| +### ServerDefaults + +Server Defaults settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**domains_search**|str||| +|**fix_local_hosts** `required`|bool||True| +|**installer_user**|str||"${user}"| +|**labels** `required`|str||| +|**lock** `required`|bool||False| +|**main_domain**|str||| +|**network_private_id**|str||| +|**network_private_name**|str||| +|**network_public_ip**|str||| +|**network_public_ipv4**|bool||True| +|**network_public_ipv6**|bool||False| +|**network_utility_ipv4** `required`|bool||True| +|**network_utility_ipv6** `required`|bool||False| +|**primary_dns**|str||| +|**priv_cidr_block**|str||| +|**running_timeout** `required`|int||200| +|**running_wait** `required`|int||10| +|**scale**|[ScaleResource](#scaleresource)||| +|**secondary_dns**|str||| +|**ssh_key_name**|str||| +|**ssh_key_path**|str||| +|**storage_os**|str||| +|**storage_os_find** `required`|str||"name: debian-12 \| arch: x86_64"| +|**time_zone** `required`|str||"UTC"| +|**user** `required`|str||| +|**user_home**|str||"/home/${user}"| +|**user_ssh_key_path**|str||| +|**user_ssh_port**|int||22| +|**zone**|str||| +### Settings + +Settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**cluster_admin_host** `required`|str||| +|**cluster_admin_port** `required`|int||22| +|**cluster_admin_user** `required`|str||"root"| +|**clusters_paths** `required`|[str]||["clusters"]| +|**clusters_save_path** `required`|str||"/${main_name}/clusters"| +|**created_clusters_dirpath** `required`|str||"./tmp/NOW_clusters"| +|**created_taskservs_dirpath** `required`|str||"./tmp/NOW_deployment"| +|**defaults_provs_dirpath** `required`|str||"./defs"| +|**defaults_provs_suffix** `required`|str||"_defaults.k"| +|**main_name** `required`|str||| +|**main_title** `required`|str||main_name| +|**prov_clusters_path** `required`|str||"./clusters"| +|**prov_data_dirpath** `required`|str||"./data"| +|**prov_data_suffix** `required`|str||"_settings.k"| +|**prov_local_bin_path** `required`|str||"./bin"| +|**prov_resources_path** `required`|str||"./resources"| +|**runset** `required`|[RunSet](#runset)||| +|**servers_paths** `required`|[str]||["servers"]| +|**servers_wait_started** `required`|int||27| +|**settings_path** `required`|str||"./settings.yaml"| +### Storage + +Storage settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**fstab** `required`|bool||True| +|**mount** `required`|bool||True| +|**mount_path**|str||| +|**name** `required`|str||| +|**parts**|[[StorageVol](#storagevol)]||[]| +|**size** `required`|int||0| +|**total** `required`|int||size| +|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"| +### StorageVol + +StorageVol settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**fstab** `required`|bool||True| +|**mount** `required`|bool||True| +|**mount_path**|str||| +|**name** `required`|str||| +|**size** `required`|int||0| +|**total** `required`|int||size| +|**type** `required`|"ext4" | "xfs" | "btrfs" | "raw" | "zfs"||"ext4"| +### TaskServDef + +TaskServDef settings + +#### Attributes + +| name | type | description | default value | +| --- | --- | --- | --- | +|**install_mode** `required`|"getfile" | "library" | "server" | "library-server" | "server-library"||"library"| +|**name** `required`|str||| +|**profile** `required`|str||"default"| + diff --git a/docs/why_main.md b/docs/why_main.md new file mode 100644 index 0000000..05a8ace --- /dev/null +++ b/docs/why_main.md @@ -0,0 +1,40 @@ + ✅ main.k + + Purpose: + - Entry Point: Main entry point for the provisioning KCL package + - Schema Exports: Exports all schemas from sub-modules for external consumption + - API Interface: Provides a clean, organized API for external projects + + Key Features: + - Comprehensive Exports: All 70+ schemas properly exported with organized categories: + - Core configuration schemas (Settings, SecretProvider, etc.) + - Server and infrastructure schemas + - Workflow and batch operation schemas + - Kubernetes deployment schemas + - Version management schemas + - Package Metadata: Exports package information and constants + - Validated: Successfully tested with external imports and schema instantiation + + Benefits vs NO directory approach: + - Current Structure: Clean, organized exports with proper categorization + - NO Directory: Contains outdated files with broken imports and missing references + - Maintainability: Single source of truth for package exports + - Usage: External projects can simply import .main and access all schemas via main.SchemaName + + Example Usage: + import provisioning.main + + server: main.Server = main.Server { + hostname: "web-server" + title: "Web Server" + labels: "env: prod" + user: "admin" + } + + workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "deploy_001" + name: "Production Deployment" + operations: [...] + } + + The main.k file is now the recommended entry point for the provisioning KCL package diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..987ad1b --- /dev/null +++ b/examples/README.md @@ -0,0 +1,278 @@ +# KCL Package Examples + +This directory contains practical examples demonstrating how to use the provisioning KCL package schemas. + +## Example Files + +### 📄 `basic_server.k` +**Simple server configurations for different use cases** +- Web server with nginx and monitoring +- Database server with PostgreSQL +- Development server with Docker + +```bash +# Validate and run +kcl run basic_server.k + +# Export to JSON +kcl run basic_server.k --format json +``` + +### 📄 `simple_workflow.k` +**Basic workflow example with sequential operations** +- Database server creation +- Web server deployment +- Application configuration with dependencies + +Demonstrates: +- Sequential dependencies +- Retry policies +- Basic monitoring +- Filesystem storage + +### 📄 `kubernetes_deployment.k` +**Complete Kubernetes deployment examples** +- Web application with resources and affinity +- PostgreSQL with persistent storage +- Prometheus monitoring stack + +Features: +- Resource limits and requests +- Persistent volumes +- Services and networking +- ConfigMaps and secrets +- Anti-affinity rules + +### 📄 `mixed_provider_workflow.k` +**Advanced multi-cloud deployment** +- UpCloud compute infrastructure +- AWS managed services (RDS, ElastiCache) +- Kubernetes cluster setup +- Cross-cloud connectivity + +Advanced features: +- Multiple providers in single workflow +- Complex dependency chains +- Production-grade monitoring +- Encrypted state storage +- Comprehensive retry and rollback strategies + +## Running Examples + +### Basic Validation +```bash +# Check syntax and validate schemas +kcl fmt examples/*.k +kcl run examples/basic_server.k +``` + +### JSON Output for Integration +```bash +# Generate JSON for Nushell/Rust integration +kcl run examples/simple_workflow.k --format json > workflow.json + +# Use with Nushell +let workflow = (open workflow.json) +echo $"Workflow: ($workflow.name)" +echo $"Operations: ($workflow.operations | length)" +``` + +### Validation with Different Configurations +```bash +# Test with custom values +kcl run examples/basic_server.k -D hostname="my-server" -D user="ubuntu" + +# Override workflow settings +kcl run examples/simple_workflow.k -D max_parallel_operations=5 +``` + +## Integration Patterns + +### With Nushell Scripts +```nushell +# Load and submit workflow +def submit-workflow [file: string] { + let workflow = (kcl run $file --format json | from json) + $workflow | to json | http post http://localhost:8080/workflows/batch/submit +} + +# Monitor workflow progress +def monitor-workflow [workflow_id: string] { + while true { + let status = (http get $"http://localhost:8080/workflows/batch/($workflow_id)") + print $"Status: ($status.status)" + if $status.status in ["completed", "failed"] { break } + sleep 10sec + } +} +``` + +### With Rust Orchestrator +```rust +use serde_json; +use std::process::Command; + +// Generate workflow from KCL +let output = Command::new("kcl") + .args(&["run", "examples/simple_workflow.k", "--format", "json"]) + .output()?; + +// Parse and execute +let workflow: BatchWorkflow = serde_json::from_slice(&output.stdout)?; +let executor = BatchExecutor::new(workflow); +executor.execute().await?; +``` + +## Customization Examples + +### Server Configuration Variants +```kcl +import ..main + +# High-performance server +performance_server: main.Server = main.Server { + hostname: "perf-01" + title: "High Performance Server" + labels: "env: prod, tier: compute, performance: high" + user: "performance" + + # Override defaults for high-performance needs + running_timeout: 300 + + taskservs: [ + main.TaskServDef { + name: "tuned" + install_mode: "server" + profile: "performance" + } + ] +} +``` + +### Workflow Customization +```kcl +import ..main + +# Custom retry policy for network operations +network_retry_policy: main.RetryPolicy = main.RetryPolicy { + max_attempts: 5 + initial_delay: 10 + max_delay: 120 + backoff_multiplier: 1.5 + retry_on_errors: ["connection_error", "dns_error", "timeout"] +} + +# Workflow with custom settings +custom_workflow: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "custom_001" + name: "Custom Network Deployment" + + # Use custom retry policy for all operations + default_retry_policy: network_retry_policy + + operations: [ + # ... your operations + ] +} +``` + +## Best Practices Demonstrated + +### 1. **Dependency Management** +- Use sequential dependencies for ordered operations +- Use conditional dependencies for health checks +- Set appropriate timeouts for each dependency + +### 2. **Resource Configuration** +- Always set resource limits for Kubernetes deployments +- Use appropriate server plans based on workload +- Configure persistent storage for stateful services + +### 3. **Monitoring & Observability** +- Enable monitoring for production workflows +- Configure appropriate log levels +- Set up notifications for critical operations + +### 4. **Error Handling** +- Configure retry policies based on operation type +- Use rollback strategies for critical deployments +- Set appropriate timeouts for different operations + +### 5. **Security** +- Use encrypted storage for sensitive workflows +- Configure proper network isolation +- Use secrets management for credentials + +## Troubleshooting + +### Common Issues + +**Schema Validation Errors** +```bash +# Check for typos in schema names +kcl run examples/basic_server.k --debug + +# Validate against specific schema +kcl vet examples/basic_server.k --schema main.Server +``` + +**Missing Required Fields** +```bash +# Error: attribute 'labels' of Server is required +# Solution: Always provide required fields +server: main.Server = main.Server { + hostname: "web-01" + title: "Web Server" + labels: "env: prod" # ✅ Required field + user: "admin" # ✅ Required field +} +``` + +**Import Errors** +```bash +# Use relative imports within the package +import ..main # ✅ Correct + +# Not absolute imports +import provisioning.main # ❌ May not work in examples +``` + +### Testing Examples + +```bash +# Run all examples to verify they work +for file in examples/*.k; do + echo "Testing $file" + kcl run "$file" > /dev/null && echo "✅ PASS" || echo "❌ FAIL" +done + +# Test JSON serialization +kcl run examples/simple_workflow.k --format json | jq '.workflow_id' +``` + +## Contributing Examples + +When adding new examples: + +1. **Follow naming convention**: `{purpose}_{type}.k` +2. **Include comprehensive comments** +3. **Demonstrate specific features** +4. **Test before committing** +5. **Update this README** + +Example template: +```kcl +# {Purpose} Example +# Demonstrates {specific features} + +import ..main + +# Clear, descriptive variable names +example_resource: main.ResourceType = main.ResourceType { + # Well-commented configuration + required_field: "value" + + # Explain non-obvious settings + optional_field: "explained_value" # Why this value +} +``` \ No newline at end of file diff --git a/examples/basic_server.k b/examples/basic_server.k new file mode 100644 index 0000000..c59f99f --- /dev/null +++ b/examples/basic_server.k @@ -0,0 +1,80 @@ +# Basic Server Configuration Example +# Shows how to define a simple server with common settings + +import ..main + +# Simple web server configuration +web_server: main.Server = main.Server { + hostname: "web-01" + title: "Production Web Server" + labels: "env: prod, tier: web, role: frontend" + user: "deploy" + + # Network configuration + network_public_ipv4: True + network_utility_ipv4: True + fix_local_hosts: True + + # SSH configuration + user_ssh_port: 22 + + # Basic taskservs to install + taskservs: [ + main.TaskServDef { + name: "nginx" + install_mode: "library" + profile: "production" + }, + main.TaskServDef { + name: "prometheus-node-exporter" + install_mode: "server" + profile: "monitoring" + } + ] +} + +# Database server with different configuration +db_server: main.Server = main.Server { + hostname: "db-01" + title: "PostgreSQL Database Server" + labels: "env: prod, tier: data, role: database" + user: "postgres" + + # Database servers typically don't need public IPs + network_public_ipv4: False + network_utility_ipv4: True + + # Install database-related taskservs + taskservs: [ + main.TaskServDef { + name: "postgresql" + install_mode: "server" + profile: "production" + }, + main.TaskServDef { + name: "postgres-exporter" + install_mode: "library" + profile: "monitoring" + } + ] +} + +# Development server with minimal configuration +dev_server: main.Server = main.Server { + hostname: "dev-01" + title: "Development Server" + labels: "env: dev, tier: development" + user: "developer" + + # Development servers can have relaxed settings + network_public_ipv4: True + delete_lock: False # Allow easy deletion for dev environments + + taskservs: [ + main.TaskServDef { + name: "docker" + install_mode: "library" + profile: "development" + } + ] +} \ No newline at end of file diff --git a/examples/kubernetes_deployment.k b/examples/kubernetes_deployment.k new file mode 100644 index 0000000..1673923 --- /dev/null +++ b/examples/kubernetes_deployment.k @@ -0,0 +1,325 @@ +# Kubernetes Deployment Example +# Shows complete K8s deployment with services, volumes, and monitoring + +import ..main + +# Web application deployment in Kubernetes +webapp_k8s: main.K8sDeploy = main.K8sDeploy { + name: "webapp" + namespace: "production" + create_ns: True + + # Deployment specification + spec: main.K8sDeploySpec { + replicas: 3 + containers: [ + main.K8sContainers { + name: "webapp" + image: "nginx:1.21-alpine" + + # Port configuration + ports: [ + main.K8sPort { + name: "http" + container: 80 + target: 8080 + } + ] + + # Resource requirements + resources_requests: main.K8sResources { + memory: "64Mi" + cpu: "50m" + } + resources_limits: main.K8sResources { + memory: "128Mi" + cpu: "100m" + } + + # Environment variables + env: [ + main.K8sKeyVal { + key: "NODE_ENV" + value: "production" + }, + main.K8sKeyVal { + key: "LOG_LEVEL" + value: "info" + } + ] + + # Mount configuration volume + volumeMounts: [ + main.K8sVolumeMount { + name: "config" + mountPath: "/etc/nginx/conf.d" + readOnly: True + } + ] + } + ] + + # Volume configuration + volumes: [ + main.K8sVolume { + name: "config" + typ: "configMap" + configMap: main.K8sConfigMap { + name: "webapp-config" + } + } + ] + + # Node selection for production workloads + nodeSelector: [ + main.K8sKeyVal { + key: "node-type" + value: "production" + } + ] + + # Anti-affinity to spread pods across nodes + affinity: main.K8sAffinity { + antiAffinity: main.K8sAntyAffinityLabelSelector { + typ: "preferredDuringSchedulingIgnoredDuringExecution" + weight: 100 + labelSelector: [ + main.K8sAffinityMatch { + key: "app" + operator: "In" + values: ["webapp"] + } + ] + topologyKey: "kubernetes.io/hostname" + } + } + } + + # Service configuration + service: main.K8sService { + name: "webapp-service" + typ: "ClusterIP" + ports: [ + main.K8sPort { + name: "http" + target: 80 + nodePort: 30080 + } + ] + selector: [ + main.K8sKeyVal { + key: "app" + value: "webapp" + } + ] + } + + # Labels for the deployment + labels: [ + main.K8sKeyVal { + key: "app" + value: "webapp" + }, + main.K8sKeyVal { + key: "version" + value: "v1.0.0" + }, + main.K8sKeyVal { + key: "environment" + value: "production" + } + ] +} + +# Database deployment with persistent storage +database_k8s: main.K8sDeploy = main.K8sDeploy { + name: "postgres" + namespace: "production" + + spec: main.K8sDeploySpec { + replicas: 1 # Database typically runs single instance + containers: [ + main.K8sContainers { + name: "postgres" + image: "postgres:15-alpine" + + ports: [ + main.K8sPort { + name: "postgres" + container: 5432 + target: 5432 + } + ] + + # Database needs more resources + resources_requests: main.K8sResources { + memory: "256Mi" + cpu: "100m" + } + resources_limits: main.K8sResources { + memory: "512Mi" + cpu: "500m" + } + + # Database environment + env: [ + main.K8sKeyVal { + key: "POSTGRES_DB" + value: "webapp" + }, + main.K8sKeyVal { + key: "POSTGRES_USER" + value: "webapp" + }, + main.K8sKeyVal { + key: "POSTGRES_PASSWORD" + value: "changeme" # Use secrets in production + } + ] + + # Persistent data volume + volumeMounts: [ + main.K8sVolumeMount { + name: "postgres-data" + mountPath: "/var/lib/postgresql/data" + readOnly: False + } + ] + } + ] + + # Persistent volume for database + volumes: [ + main.K8sVolume { + name: "postgres-data" + typ: "volumeClaim" + persitentVolumeClaim: main.K8sVolumeClaim { + name: "postgres-pvc" + storageClassName: "manual" + storage: "10Gi" + modes: ["ReadWriteOnce"] + reclaimPolicy: "Retain" + } + } + ] + } + + # Internal service for database + service: main.K8sService { + name: "postgres-service" + typ: "ClusterIP" + ports: [ + main.K8sPort { + name: "postgres" + target: 5432 + } + ] + selector: [ + main.K8sKeyVal { + key: "app" + value: "postgres" + } + ] + } + + labels: [ + main.K8sKeyVal { + key: "app" + value: "postgres" + }, + main.K8sKeyVal { + key: "component" + value: "database" + } + ] +} + +# Monitoring deployment using Prometheus +monitoring_k8s: main.K8sDeploy = main.K8sDeploy { + name: "prometheus" + namespace: "monitoring" + create_ns: True + + spec: main.K8sDeploySpec { + replicas: 1 + containers: [ + main.K8sContainers { + name: "prometheus" + image: "prom/prometheus:v2.40.0" + + ports: [ + main.K8sPort { + name: "web" + container: 9090 + target: 9090 + } + ] + + resources_requests: main.K8sResources { + memory: "512Mi" + cpu: "200m" + } + resources_limits: main.K8sResources { + memory: "1Gi" + cpu: "500m" + } + + volumeMounts: [ + main.K8sVolumeMount { + name: "prometheus-config" + mountPath: "/etc/prometheus" + readOnly: True + }, + main.K8sVolumeMount { + name: "prometheus-data" + mountPath: "/prometheus" + readOnly: False + } + ] + } + ] + + volumes: [ + main.K8sVolume { + name: "prometheus-config" + typ: "configMap" + configMap: main.K8sConfigMap { + name: "prometheus-config" + } + }, + main.K8sVolume { + name: "prometheus-data" + typ: "volumeClaim" + persitentVolumeClaim: main.K8sVolumeClaim { + name: "prometheus-pvc" + storage: "20Gi" + storageClassName: "manual" + modes: ["ReadWriteOnce"] + } + } + ] + } + + service: main.K8sService { + name: "prometheus-service" + typ: "NodePort" + ports: [ + main.K8sPort { + name: "web" + target: 9090 + nodePort: 30090 + } + ] + } + + labels: [ + main.K8sKeyVal { + key: "app" + value: "prometheus" + }, + main.K8sKeyVal { + key: "component" + value: "monitoring" + } + ] +} \ No newline at end of file diff --git a/examples/mixed_provider_workflow.k b/examples/mixed_provider_workflow.k new file mode 100644 index 0000000..d0f7b8d --- /dev/null +++ b/examples/mixed_provider_workflow.k @@ -0,0 +1,452 @@ +# Mixed Provider Workflow Example +# Demonstrates deployment across multiple cloud providers with advanced features + +import ..main + +# Production deployment across UpCloud and AWS +production_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "prod_multicloud_001" + name: "Production Multi-Cloud Deployment" + description: "Deploy production infrastructure across UpCloud (compute) and AWS (managed services)" + + operations: [ + # Phase 1: Create UpCloud infrastructure + main.BatchOperation { + operation_id: "upcloud_network" + name: "Create UpCloud Network Infrastructure" + operation_type: "custom" + provider: "upcloud" + action: "create" + parameters: { + "resource_type": "network" + "vpc_cidr": "10.0.0.0/16" + "subnets": "public,private" + "zones": "fi-hel2,de-fra1" + } + priority: 10 + timeout: 300 + } + + main.BatchOperation { + operation_id: "upcloud_compute" + name: "Create UpCloud Compute Instances" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "6" + "plan": "2xCPU-4GB" + "zones": "fi-hel2,de-fra1" + "distribution": "even" # Spread across zones + "server_type": "k8s-worker" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_network" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 9 + timeout: 900 + allow_parallel: True + } + + # Phase 2: Create AWS managed services + main.BatchOperation { + operation_id: "aws_database" + name: "Create AWS RDS PostgreSQL" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "rds" + "engine": "postgresql" + "version": "15.4" + "instance_class": "db.t3.medium" + "allocated_storage": "100" + "multi_az": "true" + "region": "eu-west-1" + "vpc_security_groups": "prod-db-sg" + } + priority: 9 + timeout: 1800 # RDS can take time + allow_parallel: True + } + + main.BatchOperation { + operation_id: "aws_redis" + name: "Create AWS ElastiCache Redis" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "elasticache" + "engine": "redis" + "node_type": "cache.t3.micro" + "num_cache_nodes": "2" + "region": "eu-west-1" + "parameter_group": "default.redis7" + } + priority: 9 + timeout: 1200 + allow_parallel: True + } + + # Phase 3: Set up Kubernetes cluster on UpCloud + main.BatchOperation { + operation_id: "install_k8s_control" + name: "Install Kubernetes Control Plane" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "role": "control-plane" + "version": "v1.31.0" + "target_count": "3" + "ha_mode": "true" + "container_runtime": "containerd" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "upcloud_compute" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 8 + timeout: 1800 + } + + main.BatchOperation { + operation_id: "install_k8s_workers" + name: "Install Kubernetes Worker Nodes" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "role": "worker" + "target_count": "3" + "container_runtime": "containerd" + "join_existing": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_k8s_control" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 7 + timeout: 1200 + } + + # Phase 4: Install cluster services + main.BatchOperation { + operation_id: "install_networking" + name: "Install Cluster Networking" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "cilium" + "version": "1.14.2" + "mode": "tunnel" + "enable_hubble": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_k8s_workers" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 6 + timeout: 600 + } + + main.BatchOperation { + operation_id: "install_monitoring" + name: "Install Monitoring Stack" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "prometheus-stack" + "grafana_enabled": "true" + "alertmanager_enabled": "true" + "node_exporter_enabled": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_networking" + dependency_type: "sequential" + timeout: 180 + } + ] + priority: 5 + timeout: 900 + } + + # Phase 5: Configure cross-cloud connectivity + main.BatchOperation { + operation_id: "configure_connectivity" + name: "Configure Cross-Cloud Connectivity" + operation_type: "custom" + action: "configure" + parameters: { + "connectivity_type": "vpn" + "upcloud_endpoint": "dynamic" + "aws_vpc_id": "auto-detect" + "encryption": "wireguard" + "routing": "bgp" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "aws_database" + dependency_type: "sequential" + timeout: 60 + }, + main.DependencyDef { + target_operation_id: "install_monitoring" + dependency_type: "sequential" + timeout: 60 + } + ] + priority: 4 + timeout: 600 + } + + # Phase 6: Final validation + main.BatchOperation { + operation_id: "validate_deployment" + name: "Validate Complete Deployment" + operation_type: "custom" + action: "configure" + parameters: { + "validation_type": "end_to_end" + "test_database_connectivity": "true" + "test_redis_connectivity": "true" + "test_k8s_cluster": "true" + "test_monitoring": "true" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "configure_connectivity" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 1 + timeout: 600 + } + ] + + # Advanced workflow configuration + max_parallel_operations: 4 + global_timeout: 7200 # 2 hours + fail_fast: False # Continue on non-critical failures + + # SurrealDB for persistent state + storage: main.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.internal:8000" + "namespace": "production" + "database": "multicloud_workflows" + "user": "workflow_executor" + "auth_token": "{{env.SURREALDB_TOKEN}}" + } + enable_persistence: True + retention_hours: 2160 # 90 days for production + enable_compression: True + encryption: main.SecretProvider { + provider: "sops" + sops_config: main.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + } + + # Comprehensive monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: [ + "webhook:slack://prod-ops-alerts", + "webhook:pagerduty://high-priority", + "email:devops-team@company.com" + ] + log_level: "info" + collection_interval: 30 + } + + # Production-grade retry policy + default_retry_policy: main.RetryPolicy { + max_attempts: 3 + initial_delay: 60 + max_delay: 600 + backoff_multiplier: 2 + retry_on_errors: [ + "timeout", + "connection_error", + "rate_limit", + "resource_unavailable", + "quota_exceeded" + ] + retry_on_any_error: False + } + + # Conservative rollback strategy + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "manual" # Manual approval for production rollbacks + preserve_partial_state: True + rollback_timeout: 1800 + custom_rollback_operations: [ + "backup_state", + "notify_team", + "create_incident" + ] + } + + # Execution context for tracking + execution_context: { + "environment": "production" + "deployment_type": "multi_cloud" + "cost_center": "infrastructure" + "owner": "platform-team" + "change_request": "CHG-2025-001" + "approval": "approved" + } + + # Hooks for integration + pre_workflow_hooks: [ + "validate_prerequisites", + "check_maintenance_windows", + "notify_deployment_start" + ] + + post_workflow_hooks: [ + "run_smoke_tests", + "update_monitoring_dashboards", + "notify_deployment_complete", + "update_documentation" + ] +} + +# Advanced batch executor configuration for this workflow +multicloud_executor: main.BatchExecutor = main.BatchExecutor { + executor_id: "multicloud_prod_executor" + name: "Multi-Cloud Production Executor" + description: "Production-ready executor for multi-cloud deployments" + + scheduler: main.BatchScheduler { + strategy: "resource_aware" + resource_limits: { + "max_cpu_cores": 32 + "max_memory_mb": 65536 + "max_network_bandwidth": 10000 + "max_concurrent_api_calls": 100 + } + scheduling_interval: 15 + enable_preemption: True + } + + # Multiple queues for different priorities + queues: [ + main.BatchQueue { + queue_id: "critical" + queue_type: "priority" + max_size: 50 + retention_period: 86400 + max_delivery_attempts: 5 + }, + main.BatchQueue { + queue_id: "standard" + queue_type: "standard" + max_size: 200 + retention_period: 604800 + dead_letter_queue: "failed_operations" + max_delivery_attempts: 3 + } + ] + + # Mixed provider configuration + provider_config: main.ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws"] + provider_selection: "cost_optimize" + cross_provider_networking: { + "vpn_enabled": "true" + "mesh_networking": "wireguard" + "encryption": "aes256" + } + provider_limits: { + "upcloud": { + "max_servers": 50 + "max_storage_gb": 10000 + "api_rate_limit": "100/min" + } + "aws": { + "max_instances": 20 + "max_ebs_gb": 5000 + "api_rate_limit": "1000/min" + } + } + } + + # Production health monitoring + health_check: main.BatchHealthCheck { + enabled: True + check_interval: 30 + check_timeout: 15 + failure_threshold: 2 + success_threshold: 3 + health_checks: [ + "http://localhost:8080/health", + "check_provider_apis", + "check_storage_backend", + "check_monitoring_systems" + ] + failure_actions: [ + "alert", + "graceful_degradation", + "escalate" + ] + } + + # Intelligent autoscaling + autoscaling: main.BatchAutoscaling { + enabled: True + min_parallel: 3 + max_parallel: 15 + scale_up_threshold: 0.75 + scale_down_threshold: 0.25 + cooldown_period: 300 + target_utilization: 0.60 + } + + # Comprehensive metrics + metrics: main.BatchMetrics { + detailed_metrics: True + retention_hours: 2160 # 90 days + aggregation_intervals: [60, 300, 1800, 3600, 86400] + enable_export: True + export_config: { + "prometheus_endpoint": "http://prometheus.monitoring:9090" + "grafana_dashboard": "multicloud_operations" + "datadog_api_key": "{{env.DATADOG_API_KEY}}" + } + custom_metrics: [ + "provider_api_latency", + "cross_cloud_bandwidth", + "cost_tracking", + "sla_compliance" + ] + } +} \ No newline at end of file diff --git a/examples/simple_workflow.k b/examples/simple_workflow.k new file mode 100644 index 0000000..05fb47e --- /dev/null +++ b/examples/simple_workflow.k @@ -0,0 +1,156 @@ +# Simple Workflow Example +# Demonstrates basic workflow creation with sequential operations + +import ..main + +# Simple web application deployment workflow +web_app_deployment: main.BatchWorkflow = main.BatchWorkflow { + workflow_id: "webapp_deploy_001" + name: "Web Application Deployment" + description: "Deploy a simple web application with database backend" + + operations: [ + # Step 1: Create database server + main.BatchOperation { + operation_id: "create_database" + name: "Create Database Server" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "hostname": "webapp-db" + "plan": "1xCPU-2GB" + "zone": "fi-hel2" + "server_type": "database" + } + priority: 10 + timeout: 600 # 10 minutes + } + + # Step 2: Create web servers (can run in parallel) + main.BatchOperation { + operation_id: "create_web_servers" + name: "Create Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "2" + "hostname_prefix": "webapp-web" + "plan": "1xCPU-1GB" + "zone": "fi-hel2" + "server_type": "web" + } + priority: 10 + timeout: 600 + allow_parallel: True + } + + # Step 3: Install database after database server is ready + main.BatchOperation { + operation_id: "install_database" + name: "Install PostgreSQL" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "postgresql" + "version": "15" + "target_servers": "webapp-db" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_database" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 8 + timeout: 900 # 15 minutes for database installation + } + + # Step 4: Install web stack after web servers are ready + main.BatchOperation { + operation_id: "install_web_stack" + name: "Install Web Stack" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "nginx" + "target_servers": "webapp-web-*" + "config_template": "reverse_proxy" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "create_web_servers" + dependency_type: "sequential" + timeout: 300 + } + ] + priority: 8 + timeout: 600 + } + + # Step 5: Configure application after all components are ready + main.BatchOperation { + operation_id: "configure_application" + name: "Configure Web Application" + operation_type: "custom" + action: "configure" + parameters: { + "config_type": "application" + "database_url": "postgres://webapp-db:5432/webapp" + "web_servers": "webapp-web-01,webapp-web-02" + } + dependencies: [ + main.DependencyDef { + target_operation_id: "install_database" + dependency_type: "sequential" + timeout: 60 + }, + main.DependencyDef { + target_operation_id: "install_web_stack" + dependency_type: "sequential" + timeout: 60 + } + ] + priority: 5 + timeout: 300 + } + ] + + # Workflow settings + max_parallel_operations: 3 + global_timeout: 3600 # 1 hour total + fail_fast: True # Stop on first failure + + # Simple filesystem storage for this example + storage: main.StorageConfig { + backend: "filesystem" + base_path: "./webapp_deployments" + enable_persistence: True + retention_hours: 168 # 1 week + } + + # Basic monitoring + monitoring: main.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: False # Simplified for example + log_level: "info" + } + + # Conservative retry policy + default_retry_policy: main.RetryPolicy { + max_attempts: 2 + initial_delay: 30 + backoff_multiplier: 2 + retry_on_errors: ["timeout", "connection_error"] + } + + # Enable rollback for safety + default_rollback_strategy: main.RollbackStrategy { + enabled: True + strategy: "immediate" + preserve_partial_state: False + } +} \ No newline at end of file diff --git a/examples_batch.k b/examples_batch.k new file mode 100644 index 0000000..9b5c9c5 --- /dev/null +++ b/examples_batch.k @@ -0,0 +1,457 @@ +# Info: KCL batch workflow examples for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Usage examples for batch workflows and operations +import .workflows +import .batch +import .settings + +# Example 1: Mixed Provider Infrastructure Deployment +mixed_provider_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "mixed_infra_deploy_001" + name: "Mixed Provider Infrastructure Deployment" + description: "Deploy infrastructure across UpCloud and AWS with cross-provider networking" + operations: [ + workflows.BatchOperation { + operation_id: "create_upcloud_servers" + name: "Create UpCloud Web Servers" + operation_type: "server" + provider: "upcloud" + action: "create" + parameters: { + "server_count": "3" + "server_type": "web" + "zone": "fi-hel2" + "plan": "1xCPU-2GB" + } + allow_parallel: True + priority: 10 + } + workflows.BatchOperation { + operation_id: "create_aws_database" + name: "Create AWS RDS Database" + operation_type: "server" + provider: "aws" + action: "create" + parameters: { + "service": "rds" + "instance_class": "db.t3.micro" + "engine": "postgresql" + "region": "eu-west-1" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_upcloud_servers" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 5 + } + workflows.BatchOperation { + operation_id: "install_kubernetes" + name: "Install Kubernetes on UpCloud servers" + operation_type: "taskserv" + provider: "upcloud" + action: "create" + parameters: { + "taskserv": "kubernetes" + "version": "v1.28.0" + "cluster_name": "prod-cluster" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_upcloud_servers" + dependency_type: "sequential" + timeout: 1200 + } + ] + # 1 hour for K8s installation + timeout: 3600 + priority: 8 + } + workflows.BatchOperation { + operation_id: "setup_monitoring" + name: "Setup Prometheus monitoring" + operation_type: "taskserv" + action: "create" + parameters: { + "taskserv": "prometheus" + "namespace": "monitoring" + "retention": "30d" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "install_kubernetes" + dependency_type: "sequential" + timeout: 600 + } + ] + priority: 3 + } + ] + max_parallel_operations: 3 + fail_fast: False + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://localhost:8000" + "namespace": "provisioning" + "database": "batch_workflows" + } + enable_persistence: True + # 30 days + retention_hours: 720 + } + monitoring: workflows.MonitoringConfig { + enabled: True + backend: "prometheus" + enable_tracing: True + enable_notifications: True + notification_channels: ["webhook:slack://ops-channel"] + } + default_retry_policy: workflows.RetryPolicy { + max_attempts: 3 + initial_delay: 10 + backoff_multiplier: 2 + retry_on_errors: ["connection_error", "timeout", "rate_limit", "resource_unavailable"] + } + execution_context: { + "environment": "production" + "cost_center": "infrastructure" + "owner": "devops-team" + } +} + +# Example 2: Server Scaling Workflow with SurrealDB Backend +server_scaling_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "server_scaling_002" + name: "Auto-scaling Server Workflow" + description: "Scale servers based on load with automatic rollback on failure" + operations: [ + workflows.BatchOperation { + operation_id: "scale_web_servers" + name: "Scale web servers up" + operation_type: "server" + action: "scale" + parameters: { + "target_count": "6" + "current_count": "3" + "server_group": "web-tier" + } + retry_policy: workflows.RetryPolicy { + max_attempts: 2 + initial_delay: 30 + retry_on_errors: ["resource_limit", "quota_exceeded"] + } + rollback_strategy: workflows.RollbackStrategy { + enabled: True + strategy: "immediate" + custom_rollback_operations: ["scale_down_to_original"] + } + } + workflows.BatchOperation { + operation_id: "update_load_balancer" + name: "Update load balancer configuration" + operation_type: "custom" + action: "configure" + parameters: { + "service": "haproxy" + "config_template": "web_tier_6_servers" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "scale_web_servers" + dependency_type: "conditional" + conditions: ["servers_ready", "health_check_passed"] + timeout: 300 + } + ] + } + ] + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.local:8000" + "namespace": "scaling" + "database": "operations" + } + } + fail_fast: True +} + +# Example 3: Maintenance Workflow with Filesystem Backend +maintenance_workflow: workflows.BatchWorkflow = workflows.BatchWorkflow { + workflow_id: "maintenance_003" + name: "System Maintenance Workflow" + description: "Perform scheduled maintenance across multiple providers" + operations: [ + workflows.BatchOperation { + operation_id: "backup_databases" + name: "Backup all databases" + operation_type: "custom" + action: "create" + parameters: { + "backup_type": "full" + "compression": "gzip" + "retention_days": "30" + } + # 2 hours + timeout: 7200 + } + workflows.BatchOperation { + operation_id: "update_taskservs" + name: "Update all taskservs to latest versions" + operation_type: "taskserv" + action: "update" + parameters: { + "update_strategy": "rolling" + "max_unavailable": "1" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "backup_databases" + dependency_type: "sequential" + } + ] + # Sequential updates for safety + allow_parallel: False + } + workflows.BatchOperation { + operation_id: "verify_services" + name: "Verify all services are healthy" + operation_type: "custom" + action: "configure" + parameters: { + "verification_type": "health_check" + "timeout_per_service": "30" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "update_taskservs" + dependency_type: "sequential" + } + ] + } + ] + storage: workflows.StorageConfig { + backend: "filesystem" + base_path: "./maintenance_workflows" + enable_persistence: True + enable_compression: True + } + pre_workflow_hooks: ["notify_maintenance_start", "set_maintenance_mode"] + post_workflow_hooks: ["unset_maintenance_mode", "notify_maintenance_complete"] +} + +# Example 4: Comprehensive Batch Executor Configuration +production_batch_executor: batch.BatchExecutor = batch.BatchExecutor { + executor_id: "prod_batch_executor" + name: "Production Batch Executor" + description: "Production-ready batch executor with full observability and mixed provider support" + scheduler: batch.BatchScheduler { + strategy: "resource_aware" + resource_limits: { + "max_cpu_cores": 16 + "max_memory_mb": 32768 + # 10Gbps + "max_network_bandwidth": 10000 + } + scheduling_interval: 5 + enable_preemption: True + } + queues: [ + batch.BatchQueue { + queue_id: "high_priority" + queue_type: "priority" + max_size: 100 + # 1 day + retention_period: 86400 + max_delivery_attempts: 5 + } + batch.BatchQueue { + queue_id: "standard" + queue_type: "standard" + max_size: 500 + dead_letter_queue: "failed_operations" + } + batch.BatchQueue { + queue_id: "failed_operations" + queue_type: "dead_letter" + # 7 days + retention_period: 604800 + } + ] + resource_constraints: [ + batch.ResourceConstraint { + resource_type: "cpu" + resource_name: "total_cpu_cores" + max_units: 16 + units_per_operation: 1 + hard_constraint: True + } + batch.ResourceConstraint { + resource_type: "memory" + resource_name: "total_memory_gb" + max_units: 32 + units_per_operation: 2 + hard_constraint: True + } + ] + provider_config: batch.ProviderMixConfig { + primary_provider: "upcloud" + secondary_providers: ["aws"] + provider_selection: "cost_optimize" + cross_provider_networking: { + "vpn_enabled": "True" + "mesh_networking": "wireguard" + } + provider_limits: { + "upcloud": {"max_servers": 20, "max_storage_gb": 1000} + "aws": {"max_instances": 10, "max_ebs_gb": 500} + } + } + health_check: batch.BatchHealthCheck { + enabled: True + check_interval: 30 + failure_threshold: 2 + health_checks: ["http://localhost:8080/health", "check_disk_space", "check_memory_usage"] + failure_actions: ["alert", "scale_down", "rollback"] + } + autoscaling: batch.BatchAutoscaling { + enabled: True + min_parallel: 2 + max_parallel: 12 + scale_up_threshold: 0.85 + scale_down_threshold: 0.15 + target_utilization: 0.65 + # 3 minutes + cooldown_period: 180 + } + metrics: batch.BatchMetrics { + detailed_metrics: True + # 30 days + retention_hours: 720 + # 1min, 5min, 30min, 1hour + aggregation_intervals: [60, 300, 1800, 3600] + enable_export: True + export_config: { + "prometheus_endpoint": "http://prometheus.local:9090" + "grafana_dashboard": "batch_operations_dashboard" + } + } + storage: workflows.StorageConfig { + backend: "surrealdb" + connection_config: { + "url": "ws://surrealdb.prod:8000" + "namespace": "production" + "database": "batch_workflows" + "user": "batch_executor" + "auth_token": "{{env.SURREALDB_TOKEN}}" + } + enable_persistence: True + # 90 days + retention_hours: 2160 + enable_compression: True + encryption: settings.SecretProvider { + provider: "sops" + sops_config: settings.SopsConfig { + config_path: "./.sops.yaml" + age_key_file: "{{env.HOME}}/.config/sops/age/keys.txt" + use_age: True + } + } + } + security_config: { + "tls_enabled": "True" + "auth_required": "True" + "rbac_enabled": "True" + "audit_level": "full" + } + webhook_endpoints: [ + "https://hooks.slack.com/services/ops-notifications" + "https://api.pagerduty.com/generic/incidents" + ] + performance_config: { + "io_threads": "8" + "worker_threads": "16" + "batch_size": "50" + "connection_pool_size": "20" + } +} + +# Example 5: Template for Common Infrastructure Deployment +infra_deployment_template: workflows.WorkflowTemplate = workflows.WorkflowTemplate { + template_id: "standard_infra_deployment" + name: "Standard Infrastructure Deployment Template" + description: "Template for deploying standard infrastructure with customizable parameters" + category: "infrastructure" + workflow_template: workflows.BatchWorkflow { + # Template parameter: {{template.workflow_id}} + workflow_id: "custom_deployment" + # Template parameter: {{template.workflow_name}} + name: "Custom Deployment" + operations: [ + workflows.BatchOperation { + operation_id: "create_servers" + # Template: Create {{template.server_count}} servers + name: "Create servers" + operation_type: "server" + # Template parameter: {{template.provider}} + provider: "upcloud" + action: "create" + parameters: { + # Template parameter: {{template.server_count}} + "count": "3" + # Template parameter: {{template.server_type}} + "type": "web" + # Template parameter: {{template.zone}} + "zone": "fi-hel2" + } + } + workflows.BatchOperation { + operation_id: "install_base_taskservs" + name: "Install base taskservs" + operation_type: "taskserv" + action: "create" + parameters: { + # Template parameter: {{template.base_taskservs}} + "taskservs": "kubernetes,prometheus,grafana" + } + dependencies: [ + workflows.DependencyDef { + target_operation_id: "create_servers" + dependency_type: "sequential" + } + ] + } + ] + storage: workflows.StorageConfig { + # Template parameter: {{template.storage_backend}} + backend: "filesystem" + # Template parameter: {{template.storage_path}} + base_path: "./deployments" + } + } + parameters: { + "workflow_id": "custom_deployment" + "workflow_name": "Custom Deployment" + "server_count": "3" + "server_type": "web" + "provider": "upcloud" + "zone": "fi-hel2" + "base_taskservs": "kubernetes,prometheus,grafana" + "storage_backend": "filesystem" + "storage_path": "./deployments" + } + required_parameters: [ + "workflow_id" + "server_count" + "provider" + ] + examples: [ + "Small deployment: server_count=2, server_type=micro" + "Production deployment: server_count=6, server_type=standard, provider=upcloud" + ] +} diff --git a/gitea.k b/gitea.k new file mode 100644 index 0000000..689d4b6 --- /dev/null +++ b/gitea.k @@ -0,0 +1,325 @@ +""" +Gitea Integration Configuration Schemas + +This module defines schemas for Gitea service configuration, including: +- Local and remote Gitea deployment options +- Repository management +- Workspace integration +- Extension publishing +- Locking mechanism + +Version: 1.0.0 +KCL Version: 0.11.3+ +""" + +schema GiteaConfig: + """ + Main Gitea service configuration + + Supports both local (self-hosted) and remote Gitea instances. + Local mode can deploy via Docker or binary. + + Examples: + # Local Docker deployment + GiteaConfig { + mode = "local" + local = LocalGitea { + enabled = True + deployment = "docker" + port = 3000 + auto_start = True + } + } + + # Remote Gitea instance + GiteaConfig { + mode = "remote" + remote = RemoteGitea { + enabled = True + url = "https://gitea.example.com" + api_url = "https://gitea.example.com/api/v1" + } + } + """ + mode: "local" | "remote" + + local?: LocalGitea + remote?: RemoteGitea + + auth: GiteaAuth + + repositories: GiteaRepositories = GiteaRepositories {} + + workspace_features: WorkspaceFeatures = WorkspaceFeatures {} + + check: + mode == "local" and local != None or mode == "remote" and remote != None, \ + "Must configure local or remote based on mode" + + mode == "local" and local.enabled or mode == "remote" and remote.enabled, \ + "Selected Gitea mode must be enabled" + +schema LocalGitea: + """ + Local Gitea deployment configuration + + Supports Docker container or binary deployment. + """ + enabled: bool = False + deployment: "docker" | "binary" + port: int = 3000 + data_dir: str = "~/.provisioning/gitea" + auto_start: bool = False + + docker?: DockerGitea + binary?: BinaryGitea + + check: + enabled, "Local Gitea must be enabled if configured" + + port > 0 and port < 65536, \ + "Port must be between 1 and 65535" + + len(data_dir) > 0, "Data directory required" + + deployment == "docker" and docker != None or \ + deployment == "binary" and binary != None, \ + "Must configure docker or binary based on deployment type" + +schema DockerGitea: + """Docker-based Gitea deployment""" + image: str = "gitea/gitea:1.21" + container_name: str = "provisioning-gitea" + ssh_port: int = 222 + environment: {str: str} = { + "USER_UID" = "1000" + "USER_GID" = "1000" + "GITEA__database__DB_TYPE" = "sqlite3" + } + volumes: [str] = [ + "gitea-data:/data" + "/etc/timezone:/etc/timezone:ro" + "/etc/localtime:/etc/localtime:ro" + ] + restart_policy: str = "unless-stopped" + + check: + len(image) > 0, "Docker image required" + len(container_name) > 0, "Container name required" + ssh_port > 0 and ssh_port < 65536, "SSH port must be 1-65535" + +schema BinaryGitea: + """Binary-based Gitea deployment""" + binary_path: str + config_path: str + version: str = "1.21.0" + user: str = "git" + group: str = "git" + + check: + len(binary_path) > 0, "Binary path required" + len(config_path) > 0, "Config path required" + +schema RemoteGitea: + """ + Remote Gitea instance configuration + + Points to existing Gitea server. + """ + enabled: bool = False + url: str + api_url: str + + check: + enabled, "Remote Gitea must be enabled if configured" + + len(url) > 0 and url.startswith("http"), \ + "URL must start with http:// or https://" + + len(api_url) > 0 and api_url.startswith("http"), \ + "API URL must start with http:// or https://" + +schema GiteaAuth: + """ + Gitea authentication configuration + + Token-based authentication for API access. + Token should be stored in encrypted file (SOPS). + """ + token_path: str + username?: str + + check: + len(token_path) > 0, "Token path required" + +schema GiteaRepositories: + """ + Repository organization and naming configuration + + Defines organization structure and repository names. + """ + organization: str = "provisioning" + core_repo: str = "provisioning-core" + extensions_repo: str = "provisioning-extensions" + platform_repo: str = "provisioning-platform" + workspaces_org: str = "workspaces" + + check: + len(organization) > 0, "Organization name required" + len(core_repo) > 0, "Core repo name required" + len(extensions_repo) > 0, "Extensions repo name required" + len(platform_repo) > 0, "Platform repo name required" + len(workspaces_org) > 0, "Workspaces org name required" + +schema WorkspaceFeatures: + """ + Workspace integration feature flags + + Controls which Gitea features are enabled for workspaces. + """ + git_integration: bool = True + locking_enabled: bool = True + webhooks_enabled: bool = False + auto_sync: bool = False + branch_protection: bool = False + + check: + git_integration or not locking_enabled, \ + "Locking requires git integration" + +schema GiteaRepository: + """ + Gitea repository metadata + + Used for creating and managing repositories. + """ + name: str + owner: str + description?: str + private: bool = False + auto_init: bool = True + default_branch: str = "main" + gitignore?: str + license?: str + readme?: str + + check: + len(name) > 0, "Repository name required" + len(owner) > 0, "Repository owner required" + +schema GiteaRelease: + """ + Gitea release configuration + + Used for publishing extensions and versioned artifacts. + """ + tag_name: str + release_name: str + body?: str + draft: bool = False + prerelease: bool = False + target_commitish: str = "main" + + check: + len(tag_name) > 0, "Tag name required" + len(release_name) > 0, "Release name required" + +schema GiteaIssue: + """ + Gitea issue configuration + + Used for workspace locking mechanism. + """ + title: str + body: str + labels: [str] = [] + assignee?: str + milestone?: int + + check: + len(title) > 0, "Issue title required" + +schema WorkspaceLock: + """ + Workspace lock metadata + + Stored as Gitea issue for distributed locking. + """ + workspace_name: str + lock_type: "read" | "write" | "deploy" + user: str + timestamp: str + operation?: str + expiry?: str + force_unlock: bool = False + + check: + len(workspace_name) > 0, "Workspace name required" + len(user) > 0, "User required" + len(timestamp) > 0, "Timestamp required" + +schema ExtensionPublishConfig: + """ + Extension publishing configuration + + Defines how extensions are packaged and published to Gitea. + """ + extension_path: str + version: str + release_notes?: str + include_patterns: [str] = ["*.nu", "*.k", "*.toml", "*.md"] + exclude_patterns: [str] = ["*.tmp", "*.log", ".git/*"] + compression: "tar.gz" | "zip" = "tar.gz" + + check: + len(extension_path) > 0, "Extension path required" + len(version) > 0, "Version required" + +schema GiteaWebhook: + """ + Gitea webhook configuration + + For future integration with automated workflows. + """ + url: str + content_type: "json" | "form" = "json" + secret?: str + events: [str] = ["push", "pull_request", "release"] + active: bool = True + + check: + len(url) > 0 and url.startswith("http"), \ + "Webhook URL must start with http:// or https://" + +# Example configurations +_local_docker_gitea = GiteaConfig { + mode = "local" + local = LocalGitea { + enabled = True + deployment = "docker" + port = 3000 + data_dir = "~/.provisioning/gitea" + auto_start = True + docker = DockerGitea { + image = "gitea/gitea:1.21" + container_name = "provisioning-gitea" + } + } + auth = GiteaAuth { + token_path = "~/.provisioning/secrets/gitea-token.enc" + username = "provisioning" + } +} + +_remote_gitea = GiteaConfig { + mode = "remote" + remote = RemoteGitea { + enabled = True + url = "https://gitea.example.com" + api_url = "https://gitea.example.com/api/v1" + } + auth = GiteaAuth { + token_path = "~/.provisioning/secrets/gitea-token.enc" + username = "provisioning" + } +} diff --git a/k8s_deploy.k b/k8s_deploy.k new file mode 100644 index 0000000..d908a01 --- /dev/null +++ b/k8s_deploy.k @@ -0,0 +1,259 @@ +# Info: KCL core lib service schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema K8sPort: + """ + K8S Port settings + """ + name: str + typ?: str = "TCP" + container?: int + nodePort?: int + target?: int + +schema K8sKeyVal: + """ + K8S label,selector,env settings + """ + key: str + value: str + +schema K8sKeyPath: + """ + K8S key,path settings + """ + key: str + path: str + +schema K8sVolumeMount: + """ + K8S VolumeMounts settings + """ + name: str + readOnly: bool = False + mountPath: str + subPath?: str + +schema K8sVolumeClaim: + """ + K8S VolumeClaim settings + """ + name: str + storageClassName: "manual" | "nfs-client" | "rook-cephfs" = "manual" + modes: ["ReadWriteOnce" | "ReadOnlyMain" | "ReadWriteMany" | "ReadWriteOncePod"] = ["ReadWriteOnce"] + abbrev_mode?: ["RWO" | "ROX" | "RWX" | "RWOP"] = ["RWO"] + reclaimPolicy?: "Recycle" | "Retain" | "Delete" = "Retain" + storage?: str + typ: "volumeClaim" | "configMap" | "secret" | "" = "" + pvMode?: "unspecified" | "Filesystem" | "Block" + pvcMode?: "unspecified" | "Filesystem" | "Block" + hostPath?: str + +schema K8sConfigMap: + """ + K8S Volume ConfigMap settings + """ + name: str + +schema K8sSecret: + """ + K8S Volume Secret settings + """ + name: str + items: [K8sKeyPath] + +schema K8sVolume: + """ + K8S Volume settings + """ + name: str + typ: "volumeClaim" | "configMap" | "secret" = "volumeClaim" + persitentVolumeClaim?: K8sVolumeClaim + items?: [K8sKeyPath] + configMap?: K8sConfigMap + secret?: K8sSecret + +schema K8sService: + """ + K8S Service settings + """ + name: str + typ: "ClusterIP" | "NodePort" | "LoadBalancer" | "ExternalName" | "Headless" | "None" = "ClusterIP" + externalName?: str + proto: "TCP" = "TCP" + ports: [K8sPort] + selector?: [K8sKeyVal] + externaIPs?: [str] + +schema K8sContainers: + """ + K8S Container settings + """ + name: str = "main" + resources_requests?: K8sResources + resources_limits?: K8sResources + image: str + cmd?: str + imagePull: "IfNotPresent" | "Always" | "Never" = "IfNotPresent" + env?: [K8sKeyVal] + ports?: [K8sPort] + volumeMounts?: [K8sVolumeMount] + +schema K8sBackup: + """ + K8S Backup settings + """ + name: str + typ: str + mount_path: str + +schema K8sResources: + #"64Mi" + memory: str + #"250m" + cpu: str + +schema K8sDeploySpec: + """ + K8S Deployment Spec settings + """ + # K8s Deploy replica + replicas: int = 1 + hostUsers?: bool = True + # K8s Deploy containers + containers: [K8sContainers] + imagePullSecret?: str + nodeSelector?: [K8sKeyVal] + nodeName?: str + affinity?: K8sAffinity + # K8s Deploy Volumes + volumes?: [K8sVolume] + # K8s Secrets + secrets?: [K8sSecret] + +schema K8sAffinityMatch: + """ + K8S Deployment Affinity Match settings + """ + key: str + operator: "In" | "NotIn" | "Exists" | "DoesNotExist" + values: [str] + +schema K8sAffinityLabelSelector: + """ + K8S Deployment Affinity Label Selector settings + """ + typ: "requiredDuringSchedulingIgnoredDuringExecution" | "preferredDuringSchedulingIgnoredDuringExecution" = "requiredDuringSchedulingIgnoredDuringExecution" + labelSelector: [K8sAffinityMatch] + # example: topology.kubernetes.io/zon + topologyKey?: str + matchLabelKeys?: [str] + +schema K8sPrxyTLS: + """ + K8S Deployment Proxy TLS settings + """ + httpsRedirect?: bool = False + mode?: "SIMPLE" | "PASSTHROUGH" | "MULTI" | "" = "SIMPLE" + credentialName?: str + +schema K8sPrxyPort: + """ + K8S Proxy Port settings + """ + name: str + number?: int + proto: "HTTP" | "HTTPS" | "TCP" = "HTTPS" + +schema K8sPrxyGatewayServer: + """ + K8S Deployment Proxy Gateway Server settings + """ + port: K8sPrxyPort + tls?: K8sPrxyTLS + hosts?: [str] + +schema K8sPrxyVirtualServiceRoute: + """ + K8S Deployment Proxy Virtual Service Route settings + """ + port_number: int + host: str + +schema K8sPrxyVirtualServiceMatchURL: + """ + K8S Deployment Proxy Virtual Service Match URL settings + """ + port?: int + sniHost?: [str] + +schema K8sPrxyVirtualServiceMatch: + """ + K8S Deployment Proxy Virtual Service Match settings + """ + typ: "tcp" | "http" | "tls" + location?: [K8sPrxyVirtualServiceMatchURL] + route_destination?: [K8sPrxyVirtualServiceRoute] + +schema K8sPrxyVirtualService: + """ + K8S Deployment Proxy Virtual Service settings + """ + hosts: [str] + gateways: [str] + matches?: [K8sPrxyVirtualServiceMatch] + +schema K8sAntyAffinityLabelSelector(K8sAffinityLabelSelector): + """ + K8S Deployment AntyAffinity Label Selector settings + """ + weight: int = 100 + +schema K8sAffinity: + """ + K8S Deployment Affinity settings + """ + affinity?: K8sAffinityLabelSelector + antiAffinity?: K8sAntyAffinityLabelSelector + +schema K8sDefs: + name: str + ns: str + domain: str + full_domain: str + primary_dom: str + cluster_domain: str + +schema K8sDeploy: + """ + K8S Deployment settings + """ + # K8s Deploy Name + name: str + # K8s Deploy name-in-filenames + name_in_files: str = "${name}" + # K8s NameSpace + namespace: str | "default" + # K8s Create NameSpace + create_ns: bool = False + full_domain?: str + # K8s Deploy labels + labels: [K8sKeyVal] = [K8sKeyVal {key: "${name}", value: "${name}"}] + sel_labels: [K8sKeyVal] = labels + tpl_labels: [K8sKeyVal] = labels + spec: K8sDeploySpec + # Cluster Ingres Proxy to use + prxy?: "istio" + prxy_ns?: str = "istio-system" + prxyGatewayServers?: [K8sPrxyGatewayServer] + prxyVirtualService?: K8sPrxyVirtualService + # TSL certs path for service + tls_path?: str = "ssl" + # Create bin/apply.sh + bin_apply: bool = True + # K8s Service + service?: K8sService + # Service Backup K8s JOB + backups?: [K8sBackup] + diff --git a/kcl.mod b/kcl.mod new file mode 100644 index 0000000..03b4767 --- /dev/null +++ b/kcl.mod @@ -0,0 +1,5 @@ +[package] +name = "provisioning" +edition = "v0.11.3" +version = "0.0.1" + diff --git a/kcl.mod.lock b/kcl.mod.lock new file mode 100644 index 0000000..e69de29 diff --git a/lib.k b/lib.k new file mode 100644 index 0000000..7c852c4 --- /dev/null +++ b/lib.k @@ -0,0 +1,70 @@ +# Info: KCL core lib schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema StorageVol: + """ + StorageVol settings + """ + name: str + size: int = 0 + total: int = size + type: "ext4" | "xfs" | "btrfs" | "raw" | "zfs" = "ext4" + mount: bool = True + mount_path?: str + fstab: bool = True + + check: + len(name) > 0, "Check name value" + +#mount == True and mount_path != Undefined +schema Storage(StorageVol): + """ + Storage settings + """ + parts?: [StorageVol] = [] + + check: + len(name) > 0, "Check name value" + total >= sum([p.size for p in parts]), "🛑 Size Total parts ${sum([p.size for p in parts])} is greater than total storage ${total}.." + +schema TaskServDef: + """ + TaskServDef settings + """ + name: str + install_mode: "getfile" | "library" | "server" | "library-server" | "server-library" = "library" + profile: str = "default" + target_save_path: str = "" + +schema ClusterDef: + """ + ClusterDef settings + """ + name: str + profile: str = "default" + target_save_path: str = "" + +schema ScaleData: + """ + scale data + """ + def: str + disabled: bool = False + mode: "auto" | "manual" | "ondemand" = "manual" + expire?: str + from?: str + to?: str + +schema ScaleResource: + """ + scale server settings + """ + default: ScaleData + fallback?: ScaleData + up?: ScaleData + down?: ScaleData + min?: ScaleData + max?: ScaleData + path: str = "/etc/scale_provisioning" + diff --git a/main.k b/main.k new file mode 100644 index 0000000..64705bc --- /dev/null +++ b/main.k @@ -0,0 +1,56 @@ +# Main entry point for provisioning KCL module +# This file imports all schemas to make them discoverable as package submodules +# Author: JesusPerezLorenzo +# Release: 0.1.0 +# Date: 29-09-2025 + +# ============================================================================ +# IMPORTANT: KCL Import Pattern +# ============================================================================ +# This module uses DIRECT SUBMODULE IMPORTS pattern (no re-exports). +# +# WHY NO RE-EXPORTS? +# Re-exports like "Settings = settings.Settings" create immutable variable +# assignments in KCL, causing ImmutableError (E1001) when extensions try to +# import them. KCL v0.11.3 doesn't support Python-style namespace re-exports. +# +# CORRECT USAGE IN EXTENSIONS: +# import provisioning.settings # For Settings, SecretProvider, SopsConfig +# import provisioning.defaults # For ServerDefaults schemas +# import provisioning.lib # For Storage, TaskServDef, ClusterDef +# import provisioning.server # For Server schema +# import provisioning.cluster # For Cluster schema +# import provisioning.dependencies # For TaskservDependencies, HealthCheck +# import provisioning.workflows # For BatchWorkflow, BatchOperation +# import provisioning.batch # For BatchScheduler, BatchExecutor +# import provisioning.version # For Version, TaskservVersion +# import provisioning.k8s_deploy # For K8s* schemas +# import provisioning.services # For ServiceRegistry, ServiceDefinition +# +# EXAMPLE: +# import provisioning.lib as lib +# import provisioning.settings as settings +# +# _storage = lib.Storage { +# device = "/dev/sda" +# size = 100 +# } +# +# ANTI-PATTERN (DO NOT USE): +# Settings = settings.Settings # ❌ Causes ImmutableError! +# Server = server.Server # ❌ Causes ImmutableError! +# +# ============================================================================ + +# Import core module schemas to make them part of the provisioning package +import .settings +import .defaults +import .lib +import .server +import .cluster +import .dependencies +import .workflows +import .batch +import .version +import .k8s_deploy +import .services diff --git a/modes.k b/modes.k new file mode 100644 index 0000000..6a8766a --- /dev/null +++ b/modes.k @@ -0,0 +1,830 @@ +# Info: KCL execution mode schemas for provisioning +# Author: Mode System Implementation +# Release: 1.0.0 +# Date: 2025-10-06 + +""" +Execution mode schemas defining deployment patterns and service configurations + +Modes: + - solo: Single developer, local development + - multi-user: Team collaboration with shared services + - cicd: CI/CD pipeline execution + - enterprise: Production enterprise deployment +""" + +import provisioning.settings as cfg +import provisioning.kcl.oci_registry as oci + +schema ExecutionMode: + """ + Base execution mode schema defining common configuration + + All execution modes inherit from this base schema and must + specify service deployment strategy, authentication, and + workspace policies. + """ + + # Mode identifier + mode_name: "solo" | "multi-user" | "cicd" | "enterprise" + + # Human-readable description + description: str + + # Authentication strategy + authentication: AuthenticationStrategy + + # Service deployment configurations + services: ServiceDeployments + + # Extension source configuration + extensions: ExtensionConfig + + # Workspace management policies + workspaces: WorkspacePolicy + + # Security configuration + security: SecurityConfig + + # Resource limits (optional, for multi-user/enterprise) + resource_limits?: ResourceLimits + + check: + len(description) > 0, "Mode description required" + +schema AuthenticationStrategy: + """Authentication configuration for mode""" + + # Authentication type + auth_type: "none" | "token" | "mtls" | "oauth" | "kms" + + # Token configuration (for token auth) + token_config?: TokenConfig + + # mTLS configuration (for mtls auth) + mtls_config?: MTLSConfig + + # OAuth configuration (for oauth auth) + oauth_config?: OAuthConfig + + # SSH key storage location + ssh_key_storage: "local" | "kms" | "vault" = "local" + + check: + auth_type == "none" or ( + (auth_type == "token" and token_config != Undefined) or + (auth_type == "mtls" and mtls_config != Undefined) or + (auth_type == "oauth" and oauth_config != Undefined) or + (auth_type == "kms") + ), "Auth config must match auth type" + +schema TokenConfig: + """Token-based authentication configuration""" + + token_path: str + token_format: "jwt" | "opaque" = "jwt" + expiry_seconds: int = 86400 # 24 hours + refresh_enabled: bool = True + + check: + len(token_path) > 0, "Token path required" + expiry_seconds > 0, "Expiry must be positive" + +schema MTLSConfig: + """Mutual TLS authentication configuration""" + + client_cert_path: str + client_key_path: str + ca_cert_path: str + verify_server: bool = True + + check: + len(client_cert_path) > 0, "Client cert path required" + len(client_key_path) > 0, "Client key path required" + len(ca_cert_path) > 0, "CA cert path required" + +schema OAuthConfig: + """OAuth 2.0 authentication configuration""" + + provider_url: str + client_id: str + client_secret_path: str + scopes: [str] = ["read", "write"] + redirect_uri?: str + + check: + len(provider_url) > 0, "Provider URL required" + len(client_id) > 0, "Client ID required" + +schema ServiceDeployments: + """Service deployment configuration""" + + orchestrator: ServiceConfig + control_center?: ServiceConfig + coredns?: ServiceConfig + gitea?: ServiceConfig + oci_registry: oci.OCIRegistryConfig + + # Custom services + custom_services?: {str: ServiceConfig} + +schema ServiceConfig: + """Individual service configuration""" + + # Deployment location + deployment: "local" | "remote" | "k8s" | "disabled" + + # For local deployment + local_config?: LocalServiceConfig + + # For remote deployment + remote_config?: RemoteServiceConfig + + # For Kubernetes deployment + k8s_config?: K8sServiceConfig + + # Auto-start service + auto_start: bool = False + + # Health check configuration + health_check?: HealthCheck + + check: + deployment == "disabled" or ( + (deployment == "local" and local_config != Undefined) or + (deployment == "remote" and remote_config != Undefined) or + (deployment == "k8s" and k8s_config != Undefined) + ), "Service config must match deployment type" + +schema LocalServiceConfig: + """Local service deployment configuration""" + + binary_path?: str + config_path?: str + data_dir: str + port: int + bind_address: str = "127.0.0.1" + tls_enabled: bool = False + + check: + port > 0 and port < 65536, "Port must be 1-65535" + len(data_dir) > 0, "Data directory required" + +schema RemoteServiceConfig: + """Remote service configuration""" + + endpoint: str + port?: int + tls_enabled: bool = True + verify_ssl: bool = True + timeout: int = 30 + retries: int = 3 + + check: + len(endpoint) > 0, "Endpoint required" + timeout > 0, "Timeout must be positive" + +schema K8sServiceConfig: + """Kubernetes service deployment configuration""" + + namespace: str = "provisioning" + deployment_name: str + service_name: str + replicas: int = 1 + image: str + image_pull_policy: "Always" | "IfNotPresent" | "Never" = "IfNotPresent" + resources?: K8sResources + + check: + len(namespace) > 0, "Namespace required" + len(deployment_name) > 0, "Deployment name required" + replicas > 0, "Replicas must be positive" + +schema K8sResources: + """Kubernetes resource requirements""" + + cpu_request: str = "100m" + cpu_limit: str = "500m" + memory_request: str = "128Mi" + memory_limit: str = "512Mi" + +schema HealthCheck: + """Service health check configuration""" + + enabled: bool = True + endpoint: str = "/health" + interval: int = 10 # seconds + timeout: int = 5 + healthy_threshold: int = 2 + unhealthy_threshold: int = 3 + + check: + interval > 0, "Interval must be positive" + timeout > 0 and timeout < interval, "Timeout must be less than interval" + +schema ExtensionConfig: + """Extension source and distribution configuration""" + + # Extension source: local files, gitea, or OCI registry + source: "local" | "gitea" | "oci" | "mixed" + + # Local path for extensions (for local source) + local_path?: str + + # Gitea configuration (for gitea source) + gitea_config?: GiteaConfig + + # OCI registry configuration (for oci source) + oci_registry?: OCIExtensionConfig + + # Allow mixed sources + allow_mixed: bool = False + + check: + source == "local" and local_path != Undefined or + source == "gitea" and gitea_config != Undefined or + source == "oci" and oci_registry != Undefined or + source == "mixed", "Extension config must match source type" + +schema GiteaConfig: + """Gitea extension repository configuration""" + + url: str + organization: str = "provisioning" + username?: str + token_path?: str + verify_ssl: bool = True + + check: + len(url) > 0, "Gitea URL required" + +schema OCIExtensionConfig: + """OCI registry extension configuration""" + + enabled: bool = True + endpoint: str + namespace: str = "provisioning-extensions" + auth_token_path?: str + tls_enabled: bool = True + verify_ssl: bool = True + cache_dir: str = "~/.provisioning/oci-cache" + + check: + len(endpoint) > 0, "OCI endpoint required" + len(namespace) > 0, "OCI namespace required" + +schema WorkspacePolicy: + """Workspace management policies""" + + # Workspace locking + locking: "disabled" | "enabled" | "required" + + # Lock provider (if locking enabled) + lock_provider?: "gitea" | "etcd" | "redis" | "filesystem" + + # Git integration requirement + git_integration: "disabled" | "optional" | "required" + + # Workspace isolation + isolation: "none" | "user" | "strict" = "user" + + # Maximum concurrent workspaces per user + max_workspaces_per_user?: int + + check: + locking == "disabled" or lock_provider != Undefined, \ + "Lock provider required when locking enabled" + git_integration in ["disabled", "optional", "required"], \ + "Invalid git integration setting" + +schema SecurityConfig: + """Security policies for mode""" + + # Encryption requirements + encryption_at_rest: bool = False + encryption_in_transit: bool = False + + # Secret management + secret_provider: cfg.SecretProvider = cfg.SecretProvider {} + + # DNS modification policy + dns_modification: "none" | "coredns" | "system" = "none" + + # Audit logging + audit_logging: bool = False + audit_log_path?: str + + # Network policies + network_isolation: bool = False + + check: + not audit_logging or audit_log_path != Undefined, \ + "Audit log path required when audit logging enabled" + +schema ResourceLimits: + """Resource limits for multi-user/enterprise modes""" + + # Per-user limits + max_servers_per_user: int = 10 + max_cpu_cores_per_user: int = 32 + max_memory_gb_per_user: int = 128 + max_storage_gb_per_user: int = 500 + + # Global limits + max_total_servers?: int + max_total_cpu_cores?: int + max_total_memory_gb?: int + + check: + max_servers_per_user > 0, "Max servers must be positive" + max_cpu_cores_per_user > 0, "Max CPU must be positive" + max_memory_gb_per_user > 0, "Max memory must be positive" + +# ============================================================================ +# Concrete Mode Schemas +# ============================================================================ + +schema SoloMode(ExecutionMode): + """ + Solo mode: Single developer local development + + Characteristics: + - No authentication required + - Local service deployment + - Optional OCI registry for extension testing + - No workspace locking + - Minimal security constraints + + Example: + SoloMode { + mode_name = "solo" + description = "Local development environment" + } + """ + + mode_name: "solo" = "solo" + description: str = "Single developer local development mode" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "none" + ssh_key_storage = "local" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "local" + auto_start = True + local_config = LocalServiceConfig { + data_dir = "~/.provisioning/orchestrator" + port = 8080 + } + } + control_center = ServiceConfig { + deployment = "disabled" + } + coredns = ServiceConfig { + deployment = "disabled" + } + gitea = ServiceConfig { + deployment = "disabled" + } + oci_registry = oci.OCIRegistryConfig { + deployment = "local" + type = "zot" + endpoint = "localhost" + port = 5000 + tls_enabled = False + auth_required = False + local = oci.LocalOCIConfig { + data_dir = "~/.provisioning/oci-registry" + config_path = "~/.provisioning/oci-registry/config.json" + auto_start = False + } + namespaces = oci.OCINamespaces { + extensions = "dev-extensions" + kcl_packages = "dev-kcl" + platform_images = "dev-platform" + test_images = "dev-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "local" + local_path = "./provisioning/extensions" + allow_mixed = True + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "disabled" + git_integration = "optional" + isolation = "none" + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = False + encryption_in_transit = False + dns_modification = "none" + audit_logging = False + network_isolation = False + } + +schema MultiUserMode(ExecutionMode): + """ + Multi-user mode: Team collaboration with shared services + + Characteristics: + - Token-based authentication + - Remote shared services + - OCI registry for extension distribution + - Workspace locking enabled + - Git integration required + - User resource limits + + Example: + MultiUserMode { + mode_name = "multi-user" + description = "Team collaboration environment" + } + """ + + mode_name: "multi-user" = "multi-user" + description: str = "Team collaboration with shared services" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "token" + token_config = TokenConfig { + token_path = "~/.provisioning/tokens/auth" + token_format = "jwt" + expiry_seconds = 86400 + refresh_enabled = True + } + ssh_key_storage = "local" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "orchestrator.company.local" + port = 8080 + tls_enabled = True + verify_ssl = True + timeout = 30 + retries = 3 + } + } + control_center = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "control.company.local" + port = 8081 + tls_enabled = True + } + } + coredns = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "dns.company.local" + port = 53 + tls_enabled = False + } + } + gitea = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "git.company.local" + port = 443 + tls_enabled = True + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.company.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 30 + retries = 3 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "provisioning-extensions" + kcl_packages = "provisioning-kcl" + platform_images = "provisioning-platform" + test_images = "provisioning-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "harbor.company.local" + namespace = "provisioning-extensions" + auth_token_path = "~/.provisioning/tokens/oci" + tls_enabled = True + verify_ssl = True + cache_dir = "~/.provisioning/oci-cache" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "enabled" + lock_provider = "gitea" + git_integration = "required" + isolation = "user" + max_workspaces_per_user = 5 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = False + encryption_in_transit = True + dns_modification = "coredns" + audit_logging = True + audit_log_path = "/var/log/provisioning/audit.log" + network_isolation = False + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 10 + max_cpu_cores_per_user = 32 + max_memory_gb_per_user = 128 + max_storage_gb_per_user = 500 + max_total_servers = 100 + max_total_cpu_cores = 320 + max_total_memory_gb = 1024 + } + +schema CICDMode(ExecutionMode): + """ + CI/CD mode: Automated pipeline execution + + Characteristics: + - Token or mTLS authentication + - Remote service endpoints + - OCI registry for artifacts + - No workspace locking (stateless) + - Git integration required + - Ephemeral workspaces + + Example: + CICDMode { + mode_name = "cicd" + description = "CI/CD pipeline environment" + } + """ + + mode_name: "cicd" = "cicd" + description: str = "CI/CD pipeline automated execution" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "token" + token_config = TokenConfig { + token_path = "/var/run/secrets/provisioning/token" + token_format = "jwt" + expiry_seconds = 3600 # 1 hour + refresh_enabled = False + } + ssh_key_storage = "kms" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "orchestrator.cicd.local" + port = 8080 + tls_enabled = True + verify_ssl = True + timeout = 60 + retries = 5 + } + } + control_center = ServiceConfig { + deployment = "disabled" + } + coredns = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "dns.cicd.local" + port = 53 + } + } + gitea = ServiceConfig { + deployment = "remote" + remote_config = RemoteServiceConfig { + endpoint = "git.cicd.local" + port = 443 + tls_enabled = True + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "registry.cicd.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 60 + retries = 5 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "cicd-extensions" + kcl_packages = "cicd-kcl" + platform_images = "cicd-platform" + test_images = "cicd-test" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "registry.cicd.local" + namespace = "cicd-extensions" + auth_token_path = "/var/run/secrets/provisioning/oci-token" + tls_enabled = True + verify_ssl = True + cache_dir = "/tmp/provisioning-oci-cache" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "disabled" + git_integration = "required" + isolation = "strict" + max_workspaces_per_user = 1 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = True + encryption_in_transit = True + dns_modification = "coredns" + audit_logging = True + audit_log_path = "/var/log/provisioning/cicd-audit.log" + network_isolation = True + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 5 + max_cpu_cores_per_user = 16 + max_memory_gb_per_user = 64 + max_storage_gb_per_user = 200 + } + +schema EnterpriseMode(ExecutionMode): + """ + Enterprise mode: Production enterprise deployment + + Characteristics: + - mTLS or OAuth authentication + - Kubernetes-deployed services + - Enterprise OCI registry (Harbor HA) + - Workspace locking required + - Git integration required + - Full encryption and auditing + - Strict resource limits + + Example: + EnterpriseMode { + mode_name = "enterprise" + description = "Production enterprise environment" + } + """ + + mode_name: "enterprise" = "enterprise" + description: str = "Production enterprise deployment with full security" + + authentication: AuthenticationStrategy = AuthenticationStrategy { + auth_type = "mtls" + mtls_config = MTLSConfig { + client_cert_path = "/etc/provisioning/certs/client.crt" + client_key_path = "/etc/provisioning/certs/client.key" + ca_cert_path = "/etc/provisioning/certs/ca.crt" + verify_server = True + } + ssh_key_storage = "kms" + } + + services: ServiceDeployments = ServiceDeployments { + orchestrator = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "orchestrator" + service_name = "orchestrator-svc" + replicas = 3 + image = "harbor.enterprise.local/provisioning/orchestrator:latest" + resources = K8sResources { + cpu_request = "500m" + cpu_limit = "2000m" + memory_request = "1Gi" + memory_limit = "4Gi" + } + } + } + control_center = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "control-center" + service_name = "control-center-svc" + replicas = 2 + image = "harbor.enterprise.local/provisioning/control-center:latest" + } + } + coredns = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "kube-system" + deployment_name = "coredns" + service_name = "kube-dns" + replicas = 2 + image = "registry.k8s.io/coredns/coredns:latest" + } + } + gitea = ServiceConfig { + deployment = "k8s" + k8s_config = K8sServiceConfig { + namespace = "provisioning-system" + deployment_name = "gitea" + service_name = "gitea-svc" + replicas = 2 + image = "gitea/gitea:latest" + } + } + oci_registry = oci.OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.enterprise.local" + tls_enabled = True + auth_required = True + remote = oci.RemoteOCIConfig { + timeout = 60 + retries = 5 + verify_ssl = True + } + namespaces = oci.OCINamespaces { + extensions = "prod-extensions" + kcl_packages = "prod-kcl" + platform_images = "prod-platform" + test_images = "test-images" + } + } + } + + extensions: ExtensionConfig = ExtensionConfig { + source = "oci" + oci_registry = OCIExtensionConfig { + enabled = True + endpoint = "harbor.enterprise.local" + namespace = "prod-extensions" + auth_token_path = "/etc/provisioning/tokens/oci" + tls_enabled = True + verify_ssl = True + cache_dir = "/var/cache/provisioning/oci" + } + } + + workspaces: WorkspacePolicy = WorkspacePolicy { + locking = "required" + lock_provider = "etcd" + git_integration = "required" + isolation = "strict" + max_workspaces_per_user = 3 + } + + security: SecurityConfig = SecurityConfig { + encryption_at_rest = True + encryption_in_transit = True + secret_provider = cfg.SecretProvider { + provider = "kms" + kms_config = cfg.KmsConfig { + server_url = "https://kms.enterprise.local" + auth_method = "certificate" + client_cert_path = "/etc/provisioning/certs/kms-client.crt" + client_key_path = "/etc/provisioning/certs/kms-client.key" + ca_cert_path = "/etc/provisioning/certs/kms-ca.crt" + verify_ssl = True + } + } + dns_modification = "system" + audit_logging = True + audit_log_path = "/var/log/provisioning/enterprise-audit.log" + network_isolation = True + } + + resource_limits: ResourceLimits = ResourceLimits { + max_servers_per_user = 20 + max_cpu_cores_per_user = 64 + max_memory_gb_per_user = 256 + max_storage_gb_per_user = 1000 + max_total_servers = 500 + max_total_cpu_cores = 2000 + max_total_memory_gb = 8192 + } diff --git a/oci_registry.k b/oci_registry.k new file mode 100644 index 0000000..12515ca --- /dev/null +++ b/oci_registry.k @@ -0,0 +1,487 @@ +# Info: KCL OCI registry schemas for provisioning +# Author: Mode System Implementation +# Release: 1.0.0 +# Date: 2025-10-06 + +""" +OCI (Open Container Initiative) registry configuration schemas + +Supports multiple registry implementations: + - distribution: Docker Registry v2 (lightweight) + - zot: Cloud-native OCI registry + - harbor: Enterprise-grade registry with security scanning + - artifactory: JFrog Artifactory with OCI support + +Purpose: + - Extension distribution via OCI artifacts + - KCL package distribution + - Platform container images + - Test environment images +""" + +import regex + +schema OCIRegistryConfig: + """ + OCI registry configuration for artifact and image distribution + + Examples: + # Local development registry + OCIRegistryConfig { + deployment = "local" + type = "zot" + endpoint = "localhost" + port = 5000 + tls_enabled = False + local = LocalOCIConfig { + data_dir = "~/.provisioning/oci-registry" + config_path = "~/.provisioning/oci-registry/config.json" + } + } + + # Remote enterprise registry + OCIRegistryConfig { + deployment = "remote" + type = "harbor" + endpoint = "harbor.company.local" + tls_enabled = True + auth_required = True + remote = RemoteOCIConfig { + verify_ssl = True + } + } + """ + + # Deployment type + deployment: "local" | "remote" | "disabled" + + # Registry implementation type + type: "distribution" | "zot" | "harbor" | "artifactory" + + # Registry endpoint (hostname or IP) + endpoint: str + + # Registry port (optional, defaults by type) + port?: int = 5000 + + # TLS/SSL configuration + tls_enabled: bool = False + tls_cert_path?: str + tls_key_path?: str + ca_cert_path?: str + + # Authentication + auth_required: bool = False + username?: str + password_path?: str # Path to password file + auth_token_path?: str # Path to auth token + + # Local deployment configuration + local?: LocalOCIConfig + + # Remote connection configuration + remote?: RemoteOCIConfig + + # Artifact namespaces/repositories + namespaces: OCINamespaces + + # Registry-specific features + features?: OCIRegistryFeatures + + check: + len(endpoint) > 0, "OCI registry endpoint required" + port == Undefined or (port > 0 and port < 65536), \ + "Port must be 1-65535" + deployment == "disabled" or ( + (deployment == "local" and local != Undefined) or + (deployment == "remote" and remote != Undefined) + ), "Config must match deployment type" + not auth_required or ( + username != Undefined or auth_token_path != Undefined + ), "Authentication config required when auth enabled" + not tls_enabled or ( + tls_cert_path != Undefined and tls_key_path != Undefined + ) or deployment == "remote", "TLS cert/key required for local TLS" + +schema LocalOCIConfig: + """ + Local OCI registry deployment configuration + + Used for: + - Solo mode development + - Testing OCI artifact distribution + - Offline extension development + """ + + # Data storage directory + data_dir: str + + # Registry configuration file path + config_path: str + + # Auto-start registry on provisioning startup + auto_start: bool = False + + # Binary path (optional, uses PATH if not specified) + binary_path?: str + + # Log file path + log_file?: str = "${data_dir}/registry.log" + + # HTTP configuration + http_config?: LocalHTTPConfig + + # Storage configuration + storage_config?: LocalStorageConfig + + check: + len(data_dir) > 0, "Data directory required" + len(config_path) > 0, "Config path required" + regex.match(data_dir, r"^[~/]"), \ + "Data dir must be absolute or home-relative path" + +schema LocalHTTPConfig: + """HTTP configuration for local registry""" + + listen_address: str = "127.0.0.1" + listen_port: int = 5000 + read_timeout: int = 60 # seconds + write_timeout: int = 60 + idle_timeout: int = 120 + + check: + listen_port > 0 and listen_port < 65536, "Port must be 1-65535" + +schema LocalStorageConfig: + """Storage configuration for local registry""" + + # Storage backend + backend: "filesystem" | "s3" | "azure" = "filesystem" + + # Filesystem storage + rootdirectory?: str + + # Garbage collection + gc_enabled: bool = True + gc_interval: int = 3600 # seconds + + # Deduplication + dedupe_enabled: bool = True + +schema RemoteOCIConfig: + """ + Remote OCI registry connection configuration + + Used for: + - Multi-user shared registry + - CI/CD artifact registry + - Enterprise production registry + """ + + # Connection timeout (seconds) + timeout: int = 30 + + # Retry configuration + retries: int = 3 + retry_delay: int = 5 # seconds + retry_backoff: float = 2.0 # exponential backoff multiplier + + # SSL/TLS verification + verify_ssl: bool = True + + # Proxy configuration (optional) + http_proxy?: str + https_proxy?: str + no_proxy?: [str] + + # Rate limiting + rate_limit?: RateLimitConfig + + check: + timeout > 0, "Timeout must be positive" + retries >= 0, "Retries must be non-negative" + retry_backoff > 1.0, "Backoff multiplier must be > 1.0" + +schema RateLimitConfig: + """Rate limiting configuration for remote registry""" + + # Requests per second + requests_per_second: int = 10 + + # Burst size + burst: int = 20 + + # Per-operation limits (optional) + pull_limit?: int + push_limit?: int + + check: + requests_per_second > 0, "Rate limit must be positive" + burst > 0, "Burst size must be positive" + +schema OCINamespaces: + """ + OCI registry namespaces for different artifact types + + Namespaces organize artifacts by purpose and allow + different access control policies per namespace. + """ + + # Extension artifacts (providers, taskservs, clusters) + extensions: str = "provisioning-extensions" + + # KCL package artifacts + kcl_packages: str = "provisioning-kcl" + + # Platform service images (orchestrator, control-center) + platform_images: str = "provisioning-platform" + + # Test environment images + test_images: str = "provisioning-test" + + # Custom user-defined namespaces + custom?: {str: str} + + check: + len(extensions) > 0, "Extensions namespace required" + len(kcl_packages) > 0, "KCL packages namespace required" + len(platform_images) > 0, "Platform images namespace required" + len(test_images) > 0, "Test images namespace required" + # Validate namespace naming convention + regex.match(extensions, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \ + "Extensions namespace must be lowercase alphanumeric with hyphens" + regex.match(kcl_packages, r"^[a-z0-9][a-z0-9-]*[a-z0-9]$"), \ + "KCL packages namespace must be lowercase alphanumeric with hyphens" + +schema OCIRegistryFeatures: + """ + Registry-specific feature configuration + + Different registry implementations support different features. + This schema allows enabling/disabling features based on + registry capabilities. + """ + + # Vulnerability scanning (Harbor, Artifactory) + vulnerability_scanning: bool = False + scanner_type?: "trivy" | "clair" | "anchore" + + # Image signing/verification (Notary, Cosign) + image_signing: bool = False + signing_method?: "notary" | "cosign" + + # Replication (Harbor) + replication_enabled: bool = False + replication_targets?: [ReplicationTarget] + + # Quota management + quota_enabled: bool = False + quota_config?: QuotaConfig + + # Webhook notifications + webhook_enabled: bool = False + webhook_endpoints?: [str] + + # Garbage collection + gc_enabled: bool = True + gc_schedule?: str = "0 2 * * *" # Daily at 2 AM + + check: + not vulnerability_scanning or scanner_type != Undefined, \ + "Scanner type required when vulnerability scanning enabled" + not image_signing or signing_method != Undefined, \ + "Signing method required when image signing enabled" + +schema ReplicationTarget: + """Harbor replication target configuration""" + + name: str + type: "harbor" | "docker-hub" | "aws-ecr" | "azure-acr" | "google-gcr" + endpoint: str + credentials?: str # Path to credentials file + verify_ssl: bool = True + + check: + len(name) > 0, "Replication target name required" + len(endpoint) > 0, "Replication endpoint required" + +schema QuotaConfig: + """Registry quota configuration""" + + # Storage quota (GB) + storage_limit_gb: int = 100 + + # Artifact count limit + artifact_limit?: int = 10000 + + # Per-namespace quotas + namespace_quotas?: {str: NamespaceQuota} + + check: + storage_limit_gb > 0, "Storage limit must be positive" + +schema NamespaceQuota: + """Per-namespace quota configuration""" + + storage_limit_gb: int = 50 + artifact_limit: int = 1000 + + check: + storage_limit_gb > 0, "Namespace storage limit must be positive" + artifact_limit > 0, "Namespace artifact limit must be positive" + +# ============================================================================ +# Helper Schemas +# ============================================================================ + +schema OCIArtifactReference: + """ + OCI artifact reference for pulling/pushing artifacts + + Format: //:@ + """ + + # Registry endpoint + registry: str + + # Namespace/project + namespace: str + + # Repository name + repository: str + + # Tag (optional, defaults to "latest") + tag: str = "latest" + + # Digest (optional, for content-addressable pulls) + digest?: str + + # Computed full reference + full_reference: str = f"{registry}/{namespace}/{repository}:{tag}" + + check: + len(registry) > 0, "Registry required" + len(namespace) > 0, "Namespace required" + len(repository) > 0, "Repository required" + len(tag) > 0, "Tag required" + regex.match(tag, r"^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$"), \ + "Invalid tag format" + +schema OCIPullPolicy: + """ + OCI artifact pull policy configuration + + Defines caching and pull behavior for artifacts + """ + + # Pull policy + policy: "always" | "if-not-present" | "never" = "if-not-present" + + # Cache TTL (seconds) + cache_ttl: int = 3600 + + # Verify digest on cached artifacts + verify_cached: bool = True + + # Allow insecure registries (development only) + allow_insecure: bool = False + + check: + cache_ttl > 0, "Cache TTL must be positive" + policy in ["always", "if-not-present", "never"], \ + "Invalid pull policy" + +schema OCIPushPolicy: + """ + OCI artifact push policy configuration + + Defines pushing behavior and constraints + """ + + # Allow overwriting existing tags + allow_overwrite: bool = False + + # Require tag signing before push + require_signing: bool = False + + # Automatic tagging strategy + auto_tag: bool = True + tag_format?: str = "v{version}-{timestamp}" + + # Compression + compression_enabled: bool = True + compression_level: int = 6 # 0-9 + + check: + compression_level >= 0 and compression_level <= 9, \ + "Compression level must be 0-9" + +# ============================================================================ +# Registry-Specific Configuration +# ============================================================================ + +schema ZotRegistryConfig(OCIRegistryConfig): + """ + Zot registry specific configuration + + Zot is a lightweight, cloud-native OCI registry focused on + minimal resource usage and developer experience. + """ + + type: "zot" = "zot" + + # Zot-specific features + zot_features?: ZotFeatures + +schema ZotFeatures: + """Zot-specific features""" + + # Enable search API + search_enabled: bool = True + + # Enable metrics endpoint + metrics_enabled: bool = True + metrics_port: int = 9090 + + # Enable sync (pull-through cache) + sync_enabled: bool = False + sync_registries?: [str] + + # Enable scrub (background verification) + scrub_enabled: bool = True + scrub_interval: str = "24h" + +schema HarborRegistryConfig(OCIRegistryConfig): + """ + Harbor registry specific configuration + + Harbor is an enterprise-grade container registry with + security scanning, replication, and RBAC. + """ + + type: "harbor" = "harbor" + + # Harbor-specific configuration + harbor_config?: HarborConfig + +schema HarborConfig: + """Harbor-specific configuration""" + + # Harbor project (namespace) + project: str + + # Project visibility + public_project: bool = False + + # Content trust (Notary) + content_trust: bool = False + + # Auto-scan on push + auto_scan: bool = True + + # Prevent vulnerable images + prevent_vulnerable: bool = True + severity_threshold: "critical" | "high" | "medium" | "low" = "high" + + check: + len(project) > 0, "Harbor project required" diff --git a/server.k b/server.k new file mode 100644 index 0000000..370b7e9 --- /dev/null +++ b/server.k @@ -0,0 +1,33 @@ +# Info: KCL core lib server schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +import regex +import .defaults +import .lib + +schema Server(defaults.ServerDefaults): + """ + server settings + """ + not_use: bool = False + # Hostname as reference for resource if is changed later inside server, change will not be updated in resource inventory + hostname: str + title: str + network_private_id?: str + # extra hostnames for server local resolution + extra_hostnames?: [str] + delete_lock: bool = False + taskservs?: [lib.TaskServDef] + cluster?: [lib.ClusterDef] + + check: + len(hostname) > 0, "Check hostname value" + len(title) > 0, "Check titlevalue" + priv_cidr_block == Undefined or regex.match(priv_cidr_block, "^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}\/(?:3[0-2]|[0-2]?[0-9])$"), "'priv_cidr_block = ${priv_cidr_block}' check value definition" + +#network_private_ip == Undefined or regex.match(network_private_ip,"^\$.*$") or regex.match(network_private_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'network_private_ip = ${network_private_ip}' check value definition (use $vaule or xx.xx.xx.xx)" +#liveness_ip == Undefined or regex.match(liveness_ip,"^\$.*$") or regex.match(liveness_ip, "^((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])$"), "'liveness_ip = ${liveness_ip}' check value definition (use $vaule or xx.xx.xx.xx)" +# len(adm_user.password) > 0, "Check Admin User password 'adm_user.password'" +# len(adm_user.email) > 0, "Check Admin User email 'adm_user.email'" +# len(db.name) > 0, "Check DB name" diff --git a/services.k b/services.k new file mode 100644 index 0000000..a33736b --- /dev/null +++ b/services.k @@ -0,0 +1,254 @@ +""" +Service Registry Schema for Provisioning Platform + +Defines platform services (orchestrator, control-center, CoreDNS, Gitea, OCI registry, etc.) +and their lifecycle management configuration. + +Version: 1.0.0 +""" + +schema ServiceRegistry: + """Platform service registry configuration""" + + services: {str: ServiceDefinition} + + check: + len(services) > 0, "At least one service must be defined" + +schema ServiceDefinition: + """Individual service definition""" + + name: str + type: "platform" | "infrastructure" | "utility" + category: "orchestration" | "auth" | "dns" | "git" | "registry" | "api" | "ui" | "monitoring" + description?: str + + # Service requirements + required_for: [str] = [] # Operations requiring this service + dependencies: [str] = [] # Other services this depends on + conflicts: [str] = [] # Services that conflict + + # Deployment configuration + deployment: ServiceDeployment + + # Health check + health_check: HealthCheck + + # Startup configuration + startup: StartupConfig = StartupConfig {} + + # Resource limits + resources?: ResourceLimits + + check: + len(name) > 0, "Service name cannot be empty" + not (name in dependencies), "Service cannot depend on itself" + len(set(dependencies) & set(conflicts)) == 0, \ + "Service cannot both depend on and conflict with same service" + +schema ServiceDeployment: + """Service deployment configuration""" + + mode: "binary" | "docker" | "docker-compose" | "kubernetes" | "remote" + + binary?: BinaryDeployment + docker?: DockerDeployment + docker_compose?: DockerComposeDeployment + kubernetes?: KubernetesDeployment + remote?: RemoteDeployment + + check: + (mode == "binary" and binary != Undefined) or \ + (mode == "docker" and docker != Undefined) or \ + (mode == "docker-compose" and docker_compose != Undefined) or \ + (mode == "kubernetes" and kubernetes != Undefined) or \ + (mode == "remote" and remote != Undefined), \ + "Deployment configuration must match deployment mode" + +schema BinaryDeployment: + """Binary service deployment""" + + binary_path: str + args: [str] = [] + working_dir?: str + env: {str: str} = {} + user?: str + group?: str + + check: + len(binary_path) > 0, "Binary path cannot be empty" + +schema DockerDeployment: + """Docker container deployment""" + + image: str + container_name: str + ports: [str] = [] + volumes: [str] = [] + environment: {str: str} = {} + command?: [str] + networks: [str] = [] + restart_policy: "no" | "always" | "on-failure" | "unless-stopped" = "unless-stopped" + + check: + len(image) > 0, "Docker image cannot be empty" + len(container_name) > 0, "Container name cannot be empty" + +schema DockerComposeDeployment: + """Docker Compose deployment""" + + compose_file: str + service_name: str + project_name?: str + env_file?: str + + check: + len(compose_file) > 0, "Compose file path cannot be empty" + len(service_name) > 0, "Service name cannot be empty" + +schema KubernetesDeployment: + """Kubernetes deployment""" + + namespace: str + deployment_name: str + kubeconfig?: str + manifests_path?: str + helm_chart?: HelmChart + + check: + len(namespace) > 0, "Namespace cannot be empty" + len(deployment_name) > 0, "Deployment name cannot be empty" + +schema HelmChart: + """Helm chart configuration""" + + chart: str + release_name: str + repo_url?: str + version?: str + values_file?: str + + check: + len(chart) > 0, "Chart name cannot be empty" + len(release_name) > 0, "Release name cannot be empty" + +schema RemoteDeployment: + """Remote service deployment""" + + endpoint: str + tls_enabled: bool = True + auth_token_path?: str + cert_path?: str + + check: + len(endpoint) > 0, "Endpoint cannot be empty" + +schema HealthCheck: + """Service health check configuration""" + + type: "http" | "tcp" | "command" | "file" | "none" + + http?: HttpHealthCheck + tcp?: TcpHealthCheck + command?: CommandHealthCheck + file?: FileHealthCheck + + interval: int = 10 + retries: int = 3 + timeout: int = 5 + + check: + (type == "http" and http != Undefined) or \ + (type == "tcp" and tcp != Undefined) or \ + (type == "command" and command != Undefined) or \ + (type == "file" and file != Undefined) or \ + (type == "none"), \ + "Health check configuration must match health check type" + interval > 0, "Interval must be positive" + retries > 0, "Retries must be positive" + timeout > 0, "Timeout must be positive" + +schema HttpHealthCheck: + """HTTP health check""" + + endpoint: str + expected_status: int = 200 + method: "GET" | "POST" | "HEAD" = "GET" + headers: {str: str} = {} + + check: + len(endpoint) > 0, "Endpoint cannot be empty" + expected_status >= 100 and expected_status < 600, \ + "HTTP status must be valid (100-599)" + +schema TcpHealthCheck: + """TCP health check""" + + host: str + port: int + + check: + len(host) > 0, "Host cannot be empty" + port > 0 and port <= 65535, "Port must be 1-65535" + +schema CommandHealthCheck: + """Command-based health check""" + + command: str + expected_exit_code: int = 0 + + check: + len(command) > 0, "Command cannot be empty" + +schema FileHealthCheck: + """File-based health check""" + + path: str + must_exist: bool = True + + check: + len(path) > 0, "Path cannot be empty" + +schema StartupConfig: + """Service startup configuration""" + + auto_start: bool = False + start_timeout: int = 60 + start_order: int = 100 + restart_on_failure: bool = True + max_restarts: int = 3 + + check: + start_timeout > 0, "Start timeout must be positive" + start_order > 0, "Start order must be positive" + max_restarts >= 0, "Max restarts must be non-negative" + +schema ResourceLimits: + """Resource limits for service""" + + cpu_limit?: str # e.g., "2", "500m" + memory_limit?: str # e.g., "1Gi", "512Mi" + disk_limit?: str # e.g., "10Gi" + +schema ServiceState: + """Service runtime state""" + + name: str + status: "running" | "stopped" | "failed" | "starting" | "stopping" | "unknown" + pid?: int + started_at?: str + uptime?: int + health_status: "healthy" | "unhealthy" | "unknown" = "unknown" + last_health_check?: str + restart_count: int = 0 + +schema ServiceOperation: + """Service operation request""" + + service_name: str + operation: "start" | "stop" | "restart" | "reload" | "health-check" + force: bool = False + timeout?: int + + check: + len(service_name) > 0, "Service name cannot be empty" diff --git a/settings.k b/settings.k new file mode 100644 index 0000000..e0587d3 --- /dev/null +++ b/settings.k @@ -0,0 +1,151 @@ +# Info: KCL core lib settings schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.4 +# Date: 15-12-2023 +schema SecretProvider: + """ + Secret provider configuration for SOPS or KMS + """ + # Secret provider type: sops or kms + provider: "sops" | "kms" = "sops" + # Configuration specific to SOPS + sops_config?: SopsConfig + # Configuration specific to KMS + kms_config?: KmsConfig + +schema SopsConfig: + """ + SOPS configuration settings + """ + # Path to SOPS configuration file + config_path?: str + # Path to Age key file for encryption + age_key_file?: str + # Age recipients for encryption + age_recipients?: str + # Use Age encryption (default) or other methods + use_age: bool = True + +schema KmsConfig: + """ + KMS configuration settings for Cosmian KMS + """ + # KMS server URL + server_url: str + # Authentication method: certificate, token, or basic + auth_method: "certificate" | "token" | "basic" = "certificate" + # Client certificate path (for certificate auth) + client_cert_path?: str + # Client private key path (for certificate auth) + client_key_path?: str + # CA certificate path for server verification + ca_cert_path?: str + # API token (for token auth) + api_token?: str + # Username (for basic auth) + username?: str + # Password (for basic auth) + password?: str + # Timeout for requests in seconds + timeout: int = 30 + # Verify SSL certificates + verify_ssl: bool = True + +schema AIProvider: + """ + AI provider configuration for natural language processing + """ + # Enable AI capabilities + enabled: bool = False + # AI provider type: openai, claude, or generic + provider: "openai" | "claude" | "generic" = "openai" + # API endpoint URL (for generic provider or custom endpoints) + api_endpoint?: str + # API key for authentication + api_key?: str + # Model name to use + model?: str + # Maximum tokens for responses + max_tokens: int = 2048 + # Temperature for response creativity (0.0-1.0) + temperature: float = 0.3 + # Timeout for API requests in seconds + timeout: int = 30 + # Enable AI for template generation + enable_template_ai: bool = True + # Enable AI for queries + enable_query_ai: bool = True + # Enable AI for webhooks/chat interfaces + enable_webhook_ai: bool = False + +schema RunSet: + # Wait until requested taskserv is completed: true or false + wait: bool = True + + # Format for output: human (defaul) | yaml | json + # Server info can be requested via aws cli adding option: --out yam l| json |text | table + output_format: "human" | "yaml" | "json" = "human" + # Output path to copy results + output_path: str = "tmp/NOW-deploy" + # Inventory file + inventory_file: str = "./inventory.yaml" + # Use 'time' to get time info for commands if is not empty + use_time: bool = True + +schema Settings: + """ + Settings + """ + # Main name for provisonning + main_name: str + main_title: str = main_name + + # #provider: "local" | "upcloud" | "aws" + # # Settings from servers has priority over these defaults ones, if a value is not set in server item, defaults one will be used instead + # #defaults_path: str = "../defaults.yaml" + # Settings Data is AUTO Generated, Checked and AUTO Filled during operations taskservs + # Path for Automatic generasetings for VPC, Subnets, SG, etc. + settings_path: str = "./settings.yaml" + # Directory path for providers default-settings + defaults_provs_dirpath: str = "./defs" + # Suffix for providers default-settings filenames with extension (example: aws_defaults.k) + defaults_provs_suffix: str = "_defaults.k" + # Provision data directory path to save providers specific settings (uuids, vpc, etc) + prov_data_dirpath: str = "./data" + # Suffix for providers data-settings filenames with extension (example: aws_settings.k) + prov_data_suffix: str = "_settings.k" + # Directory path to collect created infos, taskservs + created_taskservs_dirpath: str = "./tmp/NOW_deployment" + # Directory path to collect resources for provisioning created infos, taskservs + prov_resources_path: str = "./resources" + # Directory path to collect created clusters + created_clusters_dirpath: str = "./tmp/NOW_clusters" + # Directory path to collect clusters for provisioning + prov_clusters_path: str = "./clusters" + # Directory path for local bin on provisioning + prov_local_bin_path: str = "./bin" + # Secret management configuration + secrets: SecretProvider = SecretProvider {} + # AI provider configuration + ai: AIProvider = AIProvider {} + runset: RunSet + + # Default values can be overwrite by cluster setting + # Cluster clusters admin hosts to connect via SSH + cluster_admin_host: str + # Cluster clusters admin hosts port to connect via SSH + cluster_admin_port: int = 22 + # Time to wait in seconds for servers for started state and ssh + servers_wait_started: int = 27 + # Cluster clusters admin user connect via SSH + cluster_admin_user: str = "root" + # Services Save path or use main settings + clusters_save_path: str = "/${main_name}/clusters" + # Servers path + servers_paths: [str] = ["servers"] + # Common clusters definitions, mainly Cluster ones + clusters_paths: [str] = ["clusters"] + +#clusters: [str] = [ "clusters" ] +#check: +# len(provider) > 0, "Check provider value" diff --git a/version.k b/version.k new file mode 100644 index 0000000..1328c10 --- /dev/null +++ b/version.k @@ -0,0 +1,107 @@ +""" +KCL Version Management Schema for Provisioning System +Provides type-safe version definitions with GitHub release integration +""" + +schema Version: + """Version information for a component with optional GitHub integration""" + # Version number (e.g., "1.31.0") or "latest" + current: str + # GitHub releases URL for automated checking + source?: str + # GitHub tags URL (alternative source) + tags?: str + # Official project website + site?: str + # Enable automatic latest version checking + check_latest?: bool = False + # Cache duration in seconds (24h default) + grace_period?: int = 86400 + + check: + len(current) > 0, "Version current field cannot be empty" + current == "latest" or current == "" or len(current.split(".")) >= 1, "Version must be semantic (x.y.z), 'latest', or empty" + +schema TaskservVersion: + """Complete taskserv version configuration with dependency tracking""" + # Taskserv name (must match directory) + name: str + # Primary version configuration + version: Version + # Other taskservs this component depends on + dependencies?: [str] + # Profile-specific version overrides + profiles?: {str:Version} + + check: + len(name) > 0, "Taskserv name cannot be empty" + name == name.lower(), "Taskserv name must be lowercase" + +schema VersionCache: + """Cache structure for latest version lookups""" + # Resolved version string + version: str + # ISO timestamp of last fetch + fetched_at: str + # Source URL used for resolution + source: str + # Time-to-live in seconds + ttl: int = 86400 + + check: + len(version) > 0, "Cached version cannot be empty" + len(source) > 0, "Cache source URL cannot be empty" + +# Package metadata for core provisioning KCL module +schema PackageMetadata: + """Core package metadata for distribution""" + # Package name + name: str + # Package version + version: str + # API compatibility version + api_version: str + # Build timestamp + build_date: str + # Minimum KCL version required + kcl_min_version: str + # Maximum KCL version supported + kcl_max_version: str + # External dependencies + dependencies: {str:str} + # Feature flags + features: {str:bool} + # Available schema exports + schema_exports: [str] + + check: + len(name) > 0, "Package name cannot be empty" + len(version) > 0, "Package version cannot be empty" + +# Default package metadata +package_metadata: PackageMetadata = { + name = "provisioning" + version = "0.1.0" + api_version = "v1" + build_date = "2025-09-28" + kcl_min_version = "0.11.0" + kcl_max_version = "0.12.0" + dependencies = {} + features = { + server_management = True + cluster_orchestration = True + provider_abstraction = True + workflow_automation = True + batch_operations = True + } + schema_exports = [ + "Settings" + "Server" + "Cluster" + "Provider" + "Workflow" + "BatchWorkflow" + "Version" + "PackageMetadata" + ] +} diff --git a/workflows.k b/workflows.k new file mode 100644 index 0000000..a3e7ffc --- /dev/null +++ b/workflows.k @@ -0,0 +1,287 @@ +# Info: KCL batch workflow schemas for provisioning (Provisioning) +# Author: JesusPerezLorenzo +# Release: 0.0.1 +# Date: 25-09-2025 +# Description: Core batch workflow schemas following PAP principles +import .settings + +schema DependencyDef: + """ + Dependency definition between batch operations + Supports both sequential and conditional dependencies + """ + # Target operation ID that this dependency points to + target_operation_id: str + # Dependency type: 'sequential' waits for completion, 'conditional' waits for specific conditions + dependency_type: "sequential" | "conditional" = "sequential" + # For conditional dependencies, specify required conditions + conditions?: [str] = [] + # Timeout in seconds to wait for dependency resolution + timeout: int = 300 + # Whether failure of dependency should fail this operation + fail_on_dependency_error: bool = True + + check: + len(target_operation_id) > 0, "Target operation ID cannot be empty" + timeout > 0, "Timeout must be positive" + +schema RetryPolicy: + """ + Retry policy configuration for batch operations + Supports exponential backoff and custom retry conditions + """ + # Maximum number of retry attempts (0 = no retries) + max_attempts: int = 3 + # Initial delay between retries in seconds + initial_delay: int = 5 + # Maximum delay between retries in seconds + max_delay: int = 300 + # Backoff multiplier (1.0 = linear, >1.0 = exponential) + backoff_multiplier: float = 2 + # Specific error codes/conditions that should trigger retries + retry_on_errors?: [str] = ["connection_error", "timeout", "rate_limit"] + # Whether to retry on any error (if retry_on_errors is empty) + retry_on_any_error: bool = False + + check: + max_attempts >= 0, "Max attempts cannot be negative" + initial_delay > 0, "Initial delay must be positive" + max_delay >= initial_delay, "Max delay must be >= initial delay" + backoff_multiplier >= 1, "Backoff multiplier must be >= 1.0" + +schema RollbackStrategy: + """ + Rollback strategy configuration for failed batch operations + Supports different rollback approaches based on operation type + """ + # Whether rollback is enabled + enabled: bool = True + # Rollback strategy: 'none', 'immediate', 'batch_end', 'manual' + strategy: "none" | "immediate" | "batch_end" | "manual" = "immediate" + # Whether to preserve partial state for manual recovery + preserve_partial_state: bool = False + # Custom rollback commands/operations + custom_rollback_operations?: [str] = [] + # Timeout for rollback operations + rollback_timeout: int = 600 + + check: + rollback_timeout > 0, "Rollback timeout must be positive" + +schema MonitoringConfig: + """ + Monitoring and observability configuration for batch workflows + Integrates with various monitoring backends + """ + # Whether monitoring is enabled + enabled: bool = True + # Monitoring backend: 'prometheus', 'grafana', 'datadog', 'custom' + backend: "prometheus" | "grafana" | "datadog" | "custom" = "prometheus" + # Metrics endpoint URL (for custom backends) + endpoint?: str + # Metric collection interval in seconds + collection_interval: int = 30 + # Whether to enable detailed operation tracing + enable_tracing: bool = True + # Log level for batch operations + log_level: "debug" | "info" | "warn" | "error" = "info" + # Whether to send notifications on workflow completion/failure + enable_notifications: bool = False + # Notification channels (webhooks, slack, email, etc.) + notification_channels?: [str] = [] + + check: + collection_interval > 0, "Collection interval must be positive" + +schema StorageConfig: + """ + Storage backend configuration for batch workflow state and results + Supports multiple storage backends including SurrealDB and filesystem + """ + # Storage backend type + backend: "surrealdb" | "filesystem" | "redis" | "postgresql" = "filesystem" + # Connection configuration for database backends + connection_config?: {str:str} = {} + # Base path for filesystem backend + base_path: str = "./batch_workflows" + # Whether to enable state persistence + enable_persistence: bool = True + + # State retention period in hours (0 = keep forever) + # 1 week default + retention_hours: int = 168 + # Whether to compress stored data + enable_compression: bool = False + # Encryption settings for stored data + encryption?: settings.SecretProvider + + check: + len(base_path) > 0, "Base path cannot be empty" + retention_hours >= 0, "Retention hours cannot be negative" + +schema BatchOperation: + """ + Individual operation definition within a batch workflow + Supports various operation types with provider-agnostic configuration + """ + # Unique operation identifier within workflow + operation_id: str + # Human-readable operation name/description + name: str + # Operation type: server, taskserv, cluster, custom + operation_type: "server" | "taskserv" | "cluster" | "custom" = "server" + # Target provider (upcloud, aws, mixed, etc.) + provider?: str + # Specific action: create, delete, update, scale, etc. + action: "create" | "delete" | "update" | "scale" | "configure" = "create" + + # Operation-specific parameters (flexible configuration) + parameters: {str:str} = {} + + # Dependencies on other operations + dependencies?: [DependencyDef] = [] + # Retry policy for this operation + retry_policy: RetryPolicy = RetryPolicy {} + # Rollback strategy for this operation + rollback_strategy: RollbackStrategy = RollbackStrategy {} + + # Operation execution settings + # 30 minutes default + timeout: int = 1800 + # Whether operation can run in parallel with others + allow_parallel: bool = True + # Priority for operation scheduling (higher = earlier execution) + priority: int = 0 + + # Validation rules for operation parameters + validation_rules?: [str] = [] + # Expected outcomes/conditions for success + success_conditions?: [str] = [] + + check: + len(operation_id) > 0, "Operation ID cannot be empty" + len(name) > 0, "Operation name cannot be empty" + timeout > 0, "Timeout must be positive" + +schema BatchWorkflow: + """ + Main batch workflow definition supporting mixed provider operations + Follows PAP principles with configuration-driven architecture + """ + # Unique workflow identifier + workflow_id: str + # Human-readable workflow name + name: str + # Workflow description + description?: str = "" + # Workflow metadata + version: str = "1.0.0" + created_at?: str + modified_at?: str + # List of operations in this workflow + operations: [BatchOperation] + + # Global workflow settings + # Maximum parallel operations (0 = unlimited) + max_parallel_operations: int = 5 + + # Global timeout for entire workflow in seconds + # 2 hours default + global_timeout: int = 7200 + # Whether to stop workflow on first failure + fail_fast: bool = False + + # Storage backend configuration + storage: StorageConfig = StorageConfig {} + # Monitoring configuration + monitoring: MonitoringConfig = MonitoringConfig {} + + # Global retry policy (can be overridden per operation) + default_retry_policy: RetryPolicy = RetryPolicy {} + # Global rollback strategy + default_rollback_strategy: RollbackStrategy = RollbackStrategy {} + + # Workflow execution context + execution_context: {str:str} = {} + + # Pre and post workflow hooks + pre_workflow_hooks?: [str] = [] + post_workflow_hooks?: [str] = [] + # Notification settings + notifications?: MonitoringConfig + + check: + len(workflow_id) > 0, "Workflow ID cannot be empty" + len(name) > 0, "Workflow name cannot be empty" + len(operations) > 0, "Workflow must contain at least one operation" + max_parallel_operations >= 0, "Max parallel operations cannot be negative" + global_timeout > 0, "Global timeout must be positive" + # Validate operation IDs are unique (simplified check) + len(operations) >= 1, "Operations list must not be empty" + +schema WorkflowExecution: + """ + Runtime execution state for batch workflows + Tracks progress, results, and state changes + """ + # Reference to the workflow being executed + workflow_id: str + # Unique execution instance identifier + execution_id: str + # Current execution status + status: "pending" | "running" | "paused" | "completed" | "failed" | "cancelled" = "pending" + + # Execution timing + started_at?: str + completed_at?: str + # seconds + duration?: int + # Operation execution states + operation_states: {str:{str:str}} = {} + # Execution results and outputs + results: {str:str} = {} + # Error information + errors: [str] = [] + + # Resource usage tracking + resource_usage?: {str:str} = {} + # Rollback history + rollback_history: [str] = [] + + check: + len(workflow_id) > 0, "Workflow ID cannot be empty" + len(execution_id) > 0, "Execution ID cannot be empty" + +schema WorkflowTemplate: + """ + Reusable workflow templates for common batch operations + Supports parameterization and customization + """ + # Template identifier + template_id: str + # Template name and description + name: str + description?: str = "" + # Template category + category: "infrastructure" | "deployment" | "maintenance" | "testing" | "custom" = "infrastructure" + + # Base workflow definition + workflow_template: BatchWorkflow + + # Template parameters that can be customized + parameters: {str:str} = {} + # Required parameters that must be provided + required_parameters: [str] = [] + + # Template versioning + version: str = "1.0.0" + # Compatibility information + min_provisioning_version?: str + # Usage examples and documentation + examples?: [str] = [] + documentation_url?: str + + check: + len(template_id) > 0, "Template ID cannot be empty" + len(name) > 0, "Template name cannot be empty" +