Vapora/provisioning/vapora-wrksp/kcl/cluster.k

"""
VAPORA Kubernetes Cluster Configuration
Defines K8s cluster, networking, storage, and service mesh
"""

import k.api.all as k

# ===== CLUSTER DEFINITION =====

cluster = k.Cluster {
    name = "vapora-cluster"
    version = "1.30"
    region = "us-east-1"
    cloud_provider = "aws"  # aws | gcp | azure | on-premise

    # Networking
    network = {
        vpc_cidr = "10.0.0.0/16"
        service_cidr = "10.96.0.0/12"
        pod_cidr = "10.244.0.0/16"
        cni = "cilium"  # cilium | flannel | weave
        serviceMesh = "istio"
        networkPolicy = true
    }

    # Node configuration
    nodes = {
        master = {
            count = 3
            instance_type = "t3.large"  # 2 vCPU, 8Gi RAM
            zone = "us-east-1a"
            disk_size = 100
            disk_type = "gp3"
        }
        worker = {
            count = 5
            instance_type = "t3.xlarge"  # 4 vCPU, 16Gi RAM
            zone = "us-east-1b"
            disk_size = 200
            disk_type = "gp3"
            taints = [
                {"key": "workload", "value": "vapora", "effect": "NoSchedule"}
            ]
        }
    }

    # Storage
    storage = {
        provider = "rook-ceph"  # rook-ceph | ebs | local
        replication_factor = 3
        pools = [
            {
                name = "ssd"
                device_class = "ssd"
                size = "500Gi"
            },
            {
                name = "hdd"
                device_class = "hdd"
                size = "2Ti"
            }
        ]
    }

    # Monitoring stack
    monitoring = {
        prometheus = true
        grafana = true
        loki = true
        alert_manager = true
    }

    # Security
    security = {
        mTLS = true
        network_policies = true
        pod_security_policy = true
        rbac = true
        audit_logging = true
    }

    # Ingress
    ingress = {
        provider = "istio"  # istio | nginx | haproxy
        domain = "vapora.example.com"
        tls = true
        cert_provider = "letsencrypt"
    }
}

# ===== NAMESPACES =====

namespaces = [
    {
        name = "vapora-system"
        labels = {"app": "vapora"}
    },
    {
        name = "istio-system"
        labels = {"istio-injection": "enabled"}
    },
    {
        name = "monitoring"
        labels = {"monitoring": "true"}
    },
    {
        name = "rook-ceph"
        labels = {"storage": "ceph"}
    }
]

# ===== ISTIO SERVICE MESH =====

istio = {
    enabled = true
    version = "1.18"

    # Traffic management
    traffic_policy = {
        connection_pool = {
            http = {
                http1MaxPendingRequests = 100
                maxRequestsPerConnection = 2
                h2UpgradePolicy = "UPGRADE"
            }
            tcp = {
                maxConnections = 100
            }
        }
        outlier_detection = {
            consecutive5xxErrors = 5
            interval = "30s"
            baseEjectionTime = "30s"
        }
    }

    # Authorization policies
    authz_policies = {
        deny_all = true
        allow_prometheus = true
        allow_inter_service_mtls = true
    }

    # Virtual Service for VAPORA frontend
    virtual_services = [
        {
            name = "vapora-frontend"
            namespace = "vapora-system"
            hosts = ["vapora.example.com"]
            routes = [
                {
                    destination = "vapora-frontend"
                    weight = 100
                    timeout = "10s"
                    retries = {
                        attempts = 3
                        perTryTimeout = "2s"
                    }
                }
            ]
        }
    ]

    # Gateway
    gateway = {
        name = "vapora-gateway"
        selector = {"istio": "ingressgateway"}
        servers = [
            {
                port = {number = 80, name = "http", protocol = "HTTP"}
                hosts = ["vapora.example.com"]
                redirectPort = 443
            },
            {
                port = {number = 443, name = "https", protocol = "HTTPS"}
                hosts = ["vapora.example.com"]
                tls = {
                    mode = "SIMPLE"
                    credentialName = "vapora-tls"
                }
            }
        ]
    }
}

# ===== RESOURCE QUOTAS =====

resource_quotas = [
    {
        namespace = "vapora-system"
        hard = {
            requests.cpu = "100"
            requests.memory = "200Gi"
            limits.cpu = "200"
            limits.memory = "400Gi"
            pods = "500"
            services = "50"
            configmaps = "100"
            secrets = "100"
        }
    }
]

# ===== PERSISTENT VOLUMES =====

persistent_volumes = [
    {
        name = "vapora-data-ssd"
        storage_class = "ssd"
        size = "500Gi"
        access_mode = "ReadWriteOnce"
        reclaim_policy = "Retain"
    },
    {
        name = "vapora-backup-hdd"
        storage_class = "hdd"
        size = "2Ti"
        access_mode = "ReadWriteOnce"
        reclaim_policy = "Retain"
    }
]

# ===== OUTPUT =====

output = {
    cluster_info = cluster
    namespaces = namespaces
    istio_config = istio
    storage_config = cluster.storage
}
feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3base + 0.5expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection. 2026-01-11 13:03:53 +00:00			`"""`
			`VAPORA Kubernetes Cluster Configuration`
			`Defines K8s cluster, networking, storage, and service mesh`
			`"""`

			`import k.api.all as k`

			`# ===== CLUSTER DEFINITION =====`

			`cluster = k.Cluster {`
			`name = "vapora-cluster"`
			`version = "1.30"`
			`region = "us-east-1"`
			`cloud_provider = "aws" # aws \| gcp \| azure \| on-premise`

			`# Networking`
			`network = {`
			`vpc_cidr = "10.0.0.0/16"`
			`service_cidr = "10.96.0.0/12"`
			`pod_cidr = "10.244.0.0/16"`
			`cni = "cilium" # cilium \| flannel \| weave`
			`serviceMesh = "istio"`
			`networkPolicy = true`
			`}`

			`# Node configuration`
			`nodes = {`
			`master = {`
			`count = 3`
			`instance_type = "t3.large" # 2 vCPU, 8Gi RAM`
			`zone = "us-east-1a"`
			`disk_size = 100`
			`disk_type = "gp3"`
			`}`
			`worker = {`
			`count = 5`
			`instance_type = "t3.xlarge" # 4 vCPU, 16Gi RAM`
			`zone = "us-east-1b"`
			`disk_size = 200`
			`disk_type = "gp3"`
			`taints = [`
			`{"key": "workload", "value": "vapora", "effect": "NoSchedule"}`
			`]`
			`}`
			`}`

			`# Storage`
			`storage = {`
			`provider = "rook-ceph" # rook-ceph \| ebs \| local`
			`replication_factor = 3`
			`pools = [`
			`{`
			`name = "ssd"`
			`device_class = "ssd"`
			`size = "500Gi"`
			`},`
			`{`
			`name = "hdd"`
			`device_class = "hdd"`
			`size = "2Ti"`
			`}`
			`]`
			`}`

			`# Monitoring stack`
			`monitoring = {`
			`prometheus = true`
			`grafana = true`
			`loki = true`
			`alert_manager = true`
			`}`

			`# Security`
			`security = {`
			`mTLS = true`
			`network_policies = true`
			`pod_security_policy = true`
			`rbac = true`
			`audit_logging = true`
			`}`

			`# Ingress`
			`ingress = {`
			`provider = "istio" # istio \| nginx \| haproxy`
			`domain = "vapora.example.com"`
			`tls = true`
			`cert_provider = "letsencrypt"`
			`}`
			`}`

			`# ===== NAMESPACES =====`

			`namespaces = [`
			`{`
			`name = "vapora-system"`
			`labels = {"app": "vapora"}`
			`},`
			`{`
			`name = "istio-system"`
			`labels = {"istio-injection": "enabled"}`
			`},`
			`{`
			`name = "monitoring"`
			`labels = {"monitoring": "true"}`
			`},`
			`{`
			`name = "rook-ceph"`
			`labels = {"storage": "ceph"}`
			`}`
			`]`

			`# ===== ISTIO SERVICE MESH =====`

			`istio = {`
			`enabled = true`
			`version = "1.18"`

			`# Traffic management`
			`traffic_policy = {`
			`connection_pool = {`
			`http = {`
			`http1MaxPendingRequests = 100`
			`maxRequestsPerConnection = 2`
			`h2UpgradePolicy = "UPGRADE"`
			`}`
			`tcp = {`
			`maxConnections = 100`
			`}`
			`}`
			`outlier_detection = {`
			`consecutive5xxErrors = 5`
			`interval = "30s"`
			`baseEjectionTime = "30s"`
			`}`
			`}`

			`# Authorization policies`
			`authz_policies = {`
			`deny_all = true`
			`allow_prometheus = true`
			`allow_inter_service_mtls = true`
			`}`

			`# Virtual Service for VAPORA frontend`
			`virtual_services = [`
			`{`
			`name = "vapora-frontend"`
			`namespace = "vapora-system"`
			`hosts = ["vapora.example.com"]`
			`routes = [`
			`{`
			`destination = "vapora-frontend"`
			`weight = 100`
			`timeout = "10s"`
			`retries = {`
			`attempts = 3`
			`perTryTimeout = "2s"`
			`}`
			`}`
			`]`
			`}`
			`]`

			`# Gateway`
			`gateway = {`
			`name = "vapora-gateway"`
			`selector = {"istio": "ingressgateway"}`
			`servers = [`
			`{`
			`port = {number = 80, name = "http", protocol = "HTTP"}`
			`hosts = ["vapora.example.com"]`
			`redirectPort = 443`
			`},`
			`{`
			`port = {number = 443, name = "https", protocol = "HTTPS"}`
			`hosts = ["vapora.example.com"]`
			`tls = {`
			`mode = "SIMPLE"`
			`credentialName = "vapora-tls"`
			`}`
			`}`
			`]`
			`}`
			`}`

			`# ===== RESOURCE QUOTAS =====`

			`resource_quotas = [`
			`{`
			`namespace = "vapora-system"`
			`hard = {`
			`requests.cpu = "100"`
			`requests.memory = "200Gi"`
			`limits.cpu = "200"`
			`limits.memory = "400Gi"`
			`pods = "500"`
			`services = "50"`
			`configmaps = "100"`
			`secrets = "100"`
			`}`
			`}`
			`]`

			`# ===== PERSISTENT VOLUMES =====`

			`persistent_volumes = [`
			`{`
			`name = "vapora-data-ssd"`
			`storage_class = "ssd"`
			`size = "500Gi"`
			`access_mode = "ReadWriteOnce"`
			`reclaim_policy = "Retain"`
			`},`
			`{`
			`name = "vapora-backup-hdd"`
			`storage_class = "hdd"`
			`size = "2Ti"`
			`access_mode = "ReadWriteOnce"`
			`reclaim_policy = "Retain"`
			`}`
			`]`

			`# ===== OUTPUT =====`

			`output = {`
			`cluster_info = cluster`
			`namespaces = namespaces`
			`istio_config = istio`
			`storage_config = cluster.storage`
			`}`