prvng_kcl/batch.k

# Info: KCL batch operation utilities for provisioning (Provisioning)
# Author: JesusPerezLorenzo
# Release: 0.0.1
# Date: 25-09-2025
# Description: Batch operation utilities and helper schemas following PAP principles
import .workflows

schema BatchScheduler:
    """
    Scheduler configuration for batch operations
    Supports various scheduling strategies and resource management
    """
    # Scheduling strategy: 'fifo', 'priority', 'dependency_first', 'resource_aware'
    strategy: "fifo" | "priority" | "dependency_first" | "resource_aware" = "dependency_first"
    # Resource constraints for scheduling
    resource_limits: {str:int} = {
        # 0 = no limit
        "max_cpu_cores": 0
        # 0 = no limit
        "max_memory_mb": 0
        # 0 = no limit
        "max_network_bandwidth": 0
    }
    # Scheduling interval in seconds
    scheduling_interval: int = 10
    # Whether to enable preemptive scheduling
    enable_preemption: bool = False

    check:
        scheduling_interval > 0, "Scheduling interval must be positive"

schema BatchQueue:
    """
    Queue configuration for batch operations
    Supports priority queues and resource-based queuing
    """
    # Queue name/identifier
    queue_id: str
    # Queue type: 'standard', 'priority', 'delay', 'dead_letter'
    queue_type: "standard" | "priority" | "delay" | "dead_letter" = "standard"
    # Maximum queue size (0 = unlimited)
    max_size: int = 0

    # Message retention period in seconds
    # 7 days default
    retention_period: int = 604800
    # Dead letter queue configuration
    dead_letter_queue?: str
    # Maximum delivery attempts before moving to dead letter
    max_delivery_attempts: int = 3

    check:
        len(queue_id) > 0, "Queue ID cannot be empty"
        max_size >= 0, "Max size cannot be negative"
        retention_period > 0, "Retention period must be positive"
        max_delivery_attempts > 0, "Max delivery attempts must be positive"

schema ResourceConstraint:
    """
    Resource constraint definition for batch operations
    Ensures operations don't exceed available resources
    """
    # Resource type: cpu, memory, network, storage, custom
    resource_type: "cpu" | "memory" | "network" | "storage" | "custom"
    # Resource name/identifier
    resource_name: str
    # Maximum units available
    max_units: int
    # Current units in use
    current_units: int = 0
    # Units per operation (for estimation)
    units_per_operation: int = 1
    # Whether this constraint is hard (fails operation) or soft (warns only)
    hard_constraint: bool = True

    check:
        len(resource_name) > 0, "Resource name cannot be empty"
        max_units > 0, "Max units must be positive"
        current_units >= 0, "Current units cannot be negative"
        units_per_operation > 0, "Units per operation must be positive"
        current_units <= max_units, "Current units cannot exceed max units"

schema BatchMetrics:
    """
    Metrics collection configuration for batch operations
    Tracks performance, success rates, and resource utilization
    """
    # Whether to collect detailed metrics
    detailed_metrics: bool = True

    # Metrics retention period in hours
    # 1 week
    retention_hours: int = 168

    # Metrics aggregation intervals
    # 1min, 5min, 1hour
    aggregation_intervals: [int] = [60, 300, 3600]
    # Custom metrics to collect
    custom_metrics?: [str] = []
    # Whether to export metrics to external systems
    enable_export: bool = False
    # Export configuration
    export_config?: {str:str} = {}

    check:
        retention_hours > 0, "Retention hours must be positive"
        len(aggregation_intervals) > 0, "Must have at least one aggregation interval"

schema ProviderMixConfig:
    """
    Configuration for mixed provider batch operations
    Handles cross-provider dependencies and resource coordination
    """
    # Primary provider for the batch workflow
    primary_provider: str = "upcloud"
    # Secondary providers available
    secondary_providers: [str] = []
    # Provider selection strategy for new resources
    provider_selection: "primary_first" | "load_balance" | "cost_optimize" | "latency_optimize" = "primary_first"
    # Cross-provider networking configuration
    cross_provider_networking?: {str:str} = {}
    # Shared storage configuration across providers
    shared_storage?: workflows.StorageConfig
    # Provider-specific resource limits
    provider_limits: {str:{str:int}} = {}

    check:
        len(primary_provider) > 0, "Primary provider cannot be empty"

schema BatchHealthCheck:
    """
    Health check configuration for batch operations
    Monitors operation health and triggers recovery actions
    """
    # Whether health checks are enabled
    enabled: bool = True
    # Health check interval in seconds
    check_interval: int = 60
    # Health check timeout in seconds
    check_timeout: int = 30
    # Failure threshold before marking as unhealthy
    failure_threshold: int = 3
    # Success threshold to mark as healthy again
    success_threshold: int = 2
    # Health check endpoints/commands
    health_checks: [str] = []
    # Actions to take on health check failure
    failure_actions: [str] = ["retry", "rollback"]

    check:
        check_interval > 0, "Check interval must be positive"
        check_timeout > 0, "Check timeout must be positive"
        failure_threshold > 0, "Failure threshold must be positive"
        success_threshold > 0, "Success threshold must be positive"

schema BatchAutoscaling:
    """
    Autoscaling configuration for batch operations
    Dynamically adjusts resources based on load and performance
    """
    # Whether autoscaling is enabled
    enabled: bool = False
    # Minimum number of parallel operations
    min_parallel: int = 1
    # Maximum number of parallel operations
    max_parallel: int = 10

    # Scaling triggers based on metrics
    # CPU/resource utilization
    scale_up_threshold: float = 0.8
    scale_down_threshold: float = 0.2
    # Scaling cooldown period in seconds
    cooldown_period: int = 300
    # Scaling step size
    scale_step: int = 1
    # Target resource utilization
    target_utilization: float = 0.6

    check:
        min_parallel > 0, "Min parallel must be positive"
        max_parallel >= min_parallel, "Max parallel must be >= min parallel"
        scale_up_threshold > scale_down_threshold, "Scale up threshold must be > scale down threshold"
        0 < target_utilization and target_utilization < 1, "Target utilization must be between 0 and 1"
        cooldown_period > 0, "Cooldown period must be positive"

schema BatchExecutor:
    """
    Batch executor configuration combining all batch operation aspects
    Main configuration schema for batch workflow execution engine
    """
    # Executor identifier
    executor_id: str
    # Executor name and description
    name: str
    description?: str = ""
    # Core scheduling configuration
    scheduler: BatchScheduler = BatchScheduler {}
    # Queue management
    queues: [BatchQueue] = [BatchQueue {queue_id: "default"}]
    # Resource constraints
    resource_constraints: [ResourceConstraint] = []

    # Mixed provider configuration
    provider_config: ProviderMixConfig = ProviderMixConfig {}
    # Health monitoring
    health_check: BatchHealthCheck = BatchHealthCheck {}
    # Autoscaling settings
    autoscaling: BatchAutoscaling = BatchAutoscaling {}

    # Metrics and monitoring
    metrics: BatchMetrics = BatchMetrics {}
    # Storage configuration for execution state
    storage: workflows.StorageConfig = workflows.StorageConfig {}

    # Security and access control
    security_config: {str:str} = {}
    # Audit logging configuration
    audit_logging: bool = True
    audit_log_path: str = "./logs/batch_audit.log"

    # Integration settings
    webhook_endpoints: [str] = []
    api_endpoints: [str] = []

    # Performance tuning
    performance_config: {str:str} = {
        "io_threads": "4"
        "worker_threads": "8"
        "batch_size": "100"
    }

    check:
        len(executor_id) > 0, "Executor ID cannot be empty"
        len(name) > 0, "Executor name cannot be empty"
        len(queues) > 0, "Must have at least one queue configured"

# Utility functions and constants for batch operations
BatchOperationTypes: [str] = [
    "server_create"
    "server_delete"
    "server_scale"
    "server_update"
    "taskserv_install"
    "taskserv_remove"
    "taskserv_update"
    "taskserv_configure"
    "cluster_create"
    "cluster_delete"
    "cluster_scale"
    "cluster_upgrade"
    "custom_command"
    "custom_script"
    "custom_api_call"
]

BatchProviders: [str] = [
    "upcloud"
    "aws"
    "local"
    "mixed"
    "custom"
]

DefaultBatchConfig: BatchExecutor = BatchExecutor {
    executor_id: "default_batch_executor"
    name: "Default Batch Executor"
    description: "Default configuration-driven batch executor for provisioning operations"
    scheduler: BatchScheduler {
        strategy: "dependency_first"
        resource_limits: {
            "max_cpu_cores": 8
            "max_memory_mb": 16384
            "max_network_bandwidth": 1000
        }
    }
    provider_config: ProviderMixConfig {
        primary_provider: "upcloud"
        secondary_providers: ["aws", "local"]
        provider_selection: "primary_first"
    }
    autoscaling: BatchAutoscaling {
        enabled: True
        min_parallel: 2
        max_parallel: 8
        target_utilization: 0.7
    }
}