249 lines
7.5 KiB
Plaintext
249 lines
7.5 KiB
Plaintext
# VM Lifecycle and Persistence Schemas (Phase 2)
|
|
#
|
|
# Extends core VmConfig with persistence and lifecycle management.
|
|
# Follows KCL patterns: schema-first, explicit types, check blocks.
|
|
schema VmPersistence:
|
|
"""
|
|
VM persistence configuration for permanent/temporary management.
|
|
|
|
Controls VM behavior across reboots and cleanup policies.
|
|
|
|
Examples:
|
|
# Permanent VM - persists across reboots
|
|
VmPersistence {
|
|
mode = "permanent"
|
|
auto_start = true
|
|
restart_policy = "always"
|
|
}
|
|
|
|
# Temporary VM - auto-cleanup after TTL
|
|
VmPersistence {
|
|
mode = "temporary"
|
|
ttl_hours = 24
|
|
auto_cleanup = true
|
|
force_cleanup = false
|
|
}
|
|
"""
|
|
# Persistence mode (Pattern 8: Union types)
|
|
mode: "permanent" | "temporary" = "permanent"
|
|
|
|
# Permanent VM settings
|
|
# Start on host boot
|
|
auto_start: bool = False
|
|
restart_policy: "no" | "always" | "on-failure" = "always"
|
|
# Max restart attempts
|
|
max_retries: int = 5
|
|
|
|
# Temporary VM settings
|
|
# Time to live
|
|
ttl_hours: int = 24
|
|
# Auto-delete on TTL
|
|
auto_cleanup: bool = True
|
|
# Force cleanup without graceful shutdown
|
|
force_cleanup: bool = False
|
|
# Seconds to wait before force kill
|
|
cleanup_grace_period: int = 60
|
|
|
|
# State tracking
|
|
# Creation timestamp (Unix epoch)
|
|
created_at_unix: int
|
|
# Cleanup timestamp if scheduled
|
|
scheduled_cleanup?: int
|
|
# Last state change timestamp
|
|
last_state_change?: int
|
|
|
|
check:
|
|
# TTL validation
|
|
ttl_hours > 0 and ttl_hours <= 8760, "TTL must be 1-8760 hours (1 year max)"
|
|
# Restart policy validation
|
|
not (auto_start and mode == "temporary"), "Temporary VMs cannot have auto_start enabled"
|
|
# Cleanup validation
|
|
not (force_cleanup and not auto_cleanup), "force_cleanup requires auto_cleanup enabled"
|
|
# Grace period validation
|
|
cleanup_grace_period >= 0 and cleanup_grace_period <= 300, "Grace period must be 0-300 seconds"
|
|
|
|
schema VmLifecyclePolicy:
|
|
"""
|
|
VM lifecycle policy defining behavior across system events.
|
|
|
|
Controls how VMs behave on host reboot, shutdown, and resource contention.
|
|
"""
|
|
# On host reboot behavior
|
|
on_host_reboot: "start" | "keep-stopped" | "destroy" = "start"
|
|
|
|
# On host shutdown behavior
|
|
on_host_shutdown: "shutdown" | "save-state" | "destroy" = "shutdown"
|
|
|
|
# On resource contention
|
|
on_memory_pressure: "suspend" | "kill" | "none" = "none"
|
|
on_disk_full: "suspend" | "kill" | "none" = "none"
|
|
|
|
# Resource limits enforcement
|
|
enforce_memory_limit: bool = True
|
|
enforce_cpu_limit: bool = True
|
|
# Risky if enabled
|
|
enforce_disk_limit: bool = False
|
|
|
|
check:
|
|
on_host_shutdown != "save-state" or on_host_reboot != "destroy", "Cannot save-state on shutdown if VM is destroyed on reboot"
|
|
|
|
schema VmCleanupSchedule:
|
|
"""
|
|
Cleanup scheduling information for temporary VMs.
|
|
|
|
Tracks when VMs are scheduled for cleanup and status.
|
|
"""
|
|
vm_name: str
|
|
vm_id: str
|
|
mode: "temporary" = "temporary"
|
|
# ISO 8601 timestamp
|
|
created_at: str
|
|
# ISO 8601 timestamp
|
|
scheduled_cleanup_at: str
|
|
ttl_hours: int
|
|
|
|
# Cleanup status tracking
|
|
cleanup_status: "pending" | "in-progress" | "completed" | "failed" = "pending"
|
|
cleanup_attempts: int = 0
|
|
# ISO 8601 timestamp
|
|
last_cleanup_attempt?: str
|
|
cleanup_error?: str
|
|
|
|
check:
|
|
len(vm_name) > 0, "VM name required"
|
|
len(vm_id) > 0, "VM ID required"
|
|
ttl_hours > 0, "TTL must be positive"
|
|
cleanup_attempts >= 0, "Cleanup attempts cannot be negative"
|
|
|
|
schema VmRecoveryState:
|
|
"""
|
|
VM state snapshot for recovery after host reboot.
|
|
|
|
Captures VM state before shutdown for restoration.
|
|
"""
|
|
vm_name: str
|
|
vm_id: str
|
|
state_before_shutdown: "running" | "stopped" | "paused"
|
|
# ISO 8601
|
|
creation_timestamp: str
|
|
# ISO 8601
|
|
last_checkpoint: str
|
|
|
|
# Memory state (for save-state)
|
|
# Path to memory dump file
|
|
memory_snapshot?: str
|
|
memory_size_mb?: int
|
|
# Configuration snapshot
|
|
# Full VmConfig at snapshot time (stored as JSON/dict)
|
|
config_snapshot: {str: any}
|
|
|
|
check:
|
|
len(vm_name) > 0, "VM name required"
|
|
state_before_shutdown in ["running", "stopped", "paused"], "Invalid shutdown state"
|
|
|
|
schema VmAutoStartConfig:
|
|
"""
|
|
Configuration for automatic VM startup on host boot.
|
|
|
|
Manages order and dependencies for VM startup.
|
|
"""
|
|
vm_name: str
|
|
enabled: bool = True
|
|
# Lower numbers start first
|
|
start_order: int = 0
|
|
# Delay before starting
|
|
start_delay_seconds: int = 0
|
|
# Wait for SSH before continuing
|
|
wait_for_ssh: bool = True
|
|
# Max wait time
|
|
ssh_timeout_seconds: int = 300
|
|
on_start_failure: "stop" | "retry" | "ignore" = "retry"
|
|
max_start_retries: int = 3
|
|
|
|
# Dependencies
|
|
# Other VMs to start first
|
|
depends_on: [str] = []
|
|
|
|
check:
|
|
len(vm_name) > 0, "VM name required"
|
|
start_order >= 0, "Start order must be non-negative"
|
|
start_delay_seconds >= 0, "Delay must be non-negative"
|
|
ssh_timeout_seconds > 0, "SSH timeout must be positive"
|
|
max_start_retries > 0, "Max retries must be positive"
|
|
on_start_failure in ["stop", "retry", "ignore"], "Invalid start failure policy"
|
|
|
|
schema VmCleanupPolicy:
|
|
"""
|
|
Global cleanup policy for all temporary VMs on system.
|
|
|
|
Defines system-wide cleanup behavior and constraints.
|
|
"""
|
|
# Cleanup scheduling
|
|
cleanup_enabled: bool = True
|
|
# How often to check for cleanup
|
|
check_interval_minutes: int = 60
|
|
# HH:MM format
|
|
cleanup_window_start: str = "02:00"
|
|
# HH:MM format
|
|
cleanup_window_end: str = "06:00"
|
|
cleanup_in_window_only: bool = True
|
|
|
|
# Resource constraints
|
|
# Max VMs cleaning up simultaneously
|
|
max_concurrent_cleanups: int = 3
|
|
# Max VMs to check per batch
|
|
cleanup_batch_size: int = 10
|
|
|
|
# Safety features
|
|
# Require approval before cleanup
|
|
require_confirmation: bool = False
|
|
# Log cleanups without executing
|
|
dry_run_mode: bool = False
|
|
# Skip cleanup if system busy
|
|
skip_on_low_resources: bool = True
|
|
|
|
# Logging and monitoring
|
|
log_cleanup_operations: bool = True
|
|
alert_on_cleanup_failure: bool = True
|
|
# Keep cleanup logs for N days
|
|
retention_days: int = 7
|
|
|
|
check:
|
|
check_interval_minutes > 0 and check_interval_minutes <= 1440, "Check interval must be 1-1440 minutes (1 day max)"
|
|
max_concurrent_cleanups > 0, "Must allow at least 1 concurrent cleanup"
|
|
cleanup_batch_size > 0, "Batch size must be positive"
|
|
retention_days >= 0, "Retention days cannot be negative"
|
|
|
|
schema VmStateSnapshot:
|
|
"""
|
|
Snapshot of VM state for persistence and recovery.
|
|
|
|
Used for state persistence across operations.
|
|
"""
|
|
vm_name: str
|
|
# ISO 8601 timestamp
|
|
snapshot_time: str
|
|
vm_state: "stopped" | "starting" | "running" | "stopping" | "paused" | "error"
|
|
|
|
# Resource state at snapshot
|
|
cpu_usage_percent: float
|
|
memory_usage_mb: int
|
|
disk_usage_gb: int
|
|
|
|
# Network state
|
|
ip_addresses: [str]
|
|
mac_addresses: [str]
|
|
|
|
# Performance metrics
|
|
uptime_seconds: int
|
|
restart_count: int
|
|
|
|
check:
|
|
len(vm_name) > 0, "VM name required"
|
|
cpu_usage_percent >= 0 and cpu_usage_percent <= 100, "CPU usage must be 0-100%"
|
|
memory_usage_mb >= 0, "Memory usage cannot be negative"
|
|
disk_usage_gb >= 0, "Disk usage cannot be negative"
|
|
uptime_seconds >= 0, "Uptime cannot be negative"
|
|
restart_count >= 0, "Restart count cannot be negative"
|
|
|