# VM Lifecycle and Persistence Schemas (Phase 2) # # Extends core VmConfig with persistence and lifecycle management. # Follows KCL patterns: schema-first, explicit types, check blocks. schema VmPersistence: """ VM persistence configuration for permanent/temporary management. Controls VM behavior across reboots and cleanup policies. Examples: # Permanent VM - persists across reboots VmPersistence { mode = "permanent" auto_start = true restart_policy = "always" } # Temporary VM - auto-cleanup after TTL VmPersistence { mode = "temporary" ttl_hours = 24 auto_cleanup = true force_cleanup = false } """ # Persistence mode (Pattern 8: Union types) mode: "permanent" | "temporary" = "permanent" # Permanent VM settings # Start on host boot auto_start: bool = False restart_policy: "no" | "always" | "on-failure" = "always" # Max restart attempts max_retries: int = 5 # Temporary VM settings # Time to live ttl_hours: int = 24 # Auto-delete on TTL auto_cleanup: bool = True # Force cleanup without graceful shutdown force_cleanup: bool = False # Seconds to wait before force kill cleanup_grace_period: int = 60 # State tracking # Creation timestamp (Unix epoch) created_at_unix: int # Cleanup timestamp if scheduled scheduled_cleanup?: int # Last state change timestamp last_state_change?: int check: # TTL validation ttl_hours > 0 and ttl_hours <= 8760, "TTL must be 1-8760 hours (1 year max)" # Restart policy validation not (auto_start and mode == "temporary"), "Temporary VMs cannot have auto_start enabled" # Cleanup validation not (force_cleanup and not auto_cleanup), "force_cleanup requires auto_cleanup enabled" # Grace period validation cleanup_grace_period >= 0 and cleanup_grace_period <= 300, "Grace period must be 0-300 seconds" schema VmLifecyclePolicy: """ VM lifecycle policy defining behavior across system events. Controls how VMs behave on host reboot, shutdown, and resource contention. """ # On host reboot behavior on_host_reboot: "start" | "keep-stopped" | "destroy" = "start" # On host shutdown behavior on_host_shutdown: "shutdown" | "save-state" | "destroy" = "shutdown" # On resource contention on_memory_pressure: "suspend" | "kill" | "none" = "none" on_disk_full: "suspend" | "kill" | "none" = "none" # Resource limits enforcement enforce_memory_limit: bool = True enforce_cpu_limit: bool = True # Risky if enabled enforce_disk_limit: bool = False check: on_host_shutdown != "save-state" or on_host_reboot != "destroy", "Cannot save-state on shutdown if VM is destroyed on reboot" schema VmCleanupSchedule: """ Cleanup scheduling information for temporary VMs. Tracks when VMs are scheduled for cleanup and status. """ vm_name: str vm_id: str mode: "temporary" = "temporary" # ISO 8601 timestamp created_at: str # ISO 8601 timestamp scheduled_cleanup_at: str ttl_hours: int # Cleanup status tracking cleanup_status: "pending" | "in-progress" | "completed" | "failed" = "pending" cleanup_attempts: int = 0 # ISO 8601 timestamp last_cleanup_attempt?: str cleanup_error?: str check: len(vm_name) > 0, "VM name required" len(vm_id) > 0, "VM ID required" ttl_hours > 0, "TTL must be positive" cleanup_attempts >= 0, "Cleanup attempts cannot be negative" schema VmRecoveryState: """ VM state snapshot for recovery after host reboot. Captures VM state before shutdown for restoration. """ vm_name: str vm_id: str state_before_shutdown: "running" | "stopped" | "paused" # ISO 8601 creation_timestamp: str # ISO 8601 last_checkpoint: str # Memory state (for save-state) # Path to memory dump file memory_snapshot?: str memory_size_mb?: int # Configuration snapshot # Full VmConfig at snapshot time (stored as JSON/dict) config_snapshot: {str: any} check: len(vm_name) > 0, "VM name required" state_before_shutdown in ["running", "stopped", "paused"], "Invalid shutdown state" schema VmAutoStartConfig: """ Configuration for automatic VM startup on host boot. Manages order and dependencies for VM startup. """ vm_name: str enabled: bool = True # Lower numbers start first start_order: int = 0 # Delay before starting start_delay_seconds: int = 0 # Wait for SSH before continuing wait_for_ssh: bool = True # Max wait time ssh_timeout_seconds: int = 300 on_start_failure: "stop" | "retry" | "ignore" = "retry" max_start_retries: int = 3 # Dependencies # Other VMs to start first depends_on: [str] = [] check: len(vm_name) > 0, "VM name required" start_order >= 0, "Start order must be non-negative" start_delay_seconds >= 0, "Delay must be non-negative" ssh_timeout_seconds > 0, "SSH timeout must be positive" max_start_retries > 0, "Max retries must be positive" on_start_failure in ["stop", "retry", "ignore"], "Invalid start failure policy" schema VmCleanupPolicy: """ Global cleanup policy for all temporary VMs on system. Defines system-wide cleanup behavior and constraints. """ # Cleanup scheduling cleanup_enabled: bool = True # How often to check for cleanup check_interval_minutes: int = 60 # HH:MM format cleanup_window_start: str = "02:00" # HH:MM format cleanup_window_end: str = "06:00" cleanup_in_window_only: bool = True # Resource constraints # Max VMs cleaning up simultaneously max_concurrent_cleanups: int = 3 # Max VMs to check per batch cleanup_batch_size: int = 10 # Safety features # Require approval before cleanup require_confirmation: bool = False # Log cleanups without executing dry_run_mode: bool = False # Skip cleanup if system busy skip_on_low_resources: bool = True # Logging and monitoring log_cleanup_operations: bool = True alert_on_cleanup_failure: bool = True # Keep cleanup logs for N days retention_days: int = 7 check: check_interval_minutes > 0 and check_interval_minutes <= 1440, "Check interval must be 1-1440 minutes (1 day max)" max_concurrent_cleanups > 0, "Must allow at least 1 concurrent cleanup" cleanup_batch_size > 0, "Batch size must be positive" retention_days >= 0, "Retention days cannot be negative" schema VmStateSnapshot: """ Snapshot of VM state for persistence and recovery. Used for state persistence across operations. """ vm_name: str # ISO 8601 timestamp snapshot_time: str vm_state: "stopped" | "starting" | "running" | "stopping" | "paused" | "error" # Resource state at snapshot cpu_usage_percent: float memory_usage_mb: int disk_usage_gb: int # Network state ip_addresses: [str] mac_addresses: [str] # Performance metrics uptime_seconds: int restart_count: int check: len(vm_name) > 0, "VM name required" cpu_usage_percent >= 0 and cpu_usage_percent <= 100, "CPU usage must be 0-100%" memory_usage_mb >= 0, "Memory usage cannot be negative" disk_usage_gb >= 0, "Disk usage cannot be negative" uptime_seconds >= 0, "Uptime cannot be negative" restart_count >= 0, "Restart count cannot be negative"