Vapora/provisioning/vapora-wrksp/workflows/deploy-full-stack.yaml

apiVersion: provisioning.vapora.io/v1
kind: Workflow
metadata:
  name: deploy-full-stack
  description: Complete VAPORA deployment from scratch including cluster, databases, and services
spec:
  # Workflow metadata
  version: "0.2.0"
  namespace: vapora-system
  timeout: 3600s  # 1 hour max
  retryPolicy:
    maxRetries: 3
    backoffFactor: 2

  # Prerequisites
  prerequisites:
    - kubeconfig_present
    - provisioning_cli_installed
    - sufficient_resources:
        cpu: "20"
        memory: "64Gi"
        disk: "500Gi"

  # Workflow phases executed sequentially with gates
  phases:

    # Phase 1: Infrastructure foundation
    - name: "Create K8s Cluster"
      description: "Deploy base Kubernetes cluster with networking"
      retryable: true
      steps:
        - name: "Apply KCL cluster schema"
          command: "provisioning cluster create --config kcl/cluster.k"
          timeout: 1200s
          onError: "rollback_cluster"

        - name: "Install CNI (Cilium)"
          command: "provisioning addon install cilium --helm-values cilium-values.yaml"
          timeout: 300s
          retries: 3

        - name: "Install service mesh (Istio)"
          command: "provisioning addon install istio --config kcl/cluster.k"
          timeout: 600s
          dependencies: ["cilium"]

        - name: "Install storage (Rook Ceph)"
          command: "provisioning addon install rook-ceph --size 500Gi --replicas 3"
          timeout: 900s
          dependencies: ["cilium"]

        - name: "Verify cluster health"
          command: "provisioning health-check --cluster"
          timeout: 300s
          dependencies: ["cilium", "istio", "rook-ceph"]

    # Phase 2: Create namespaces and RBAC
    - name: "Setup Namespaces and Security"
      description: "Create namespaces, service accounts, and RBAC policies"
      retryable: true
      steps:
        - name: "Create namespaces"
          command: "kubectl apply -f - <<EOF\napiVersion: v1\nkind: Namespace\nmetadata:\n  name: vapora-system\n  labels:\n    app: vapora\nEOF"
          timeout: 60s
          dependencies: ["Create K8s Cluster"]

        - name: "Create service accounts"
          command: "kubectl apply -f - <<EOF\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: vapora-backend\n  namespace: vapora-system\n---\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: vapora-agents\n  namespace: vapora-system\nEOF"
          timeout: 60s
          dependencies: ["Create namespaces"]

        - name: "Apply RBAC policies"
          command: "provisioning rbac apply --config kcl/services.k --namespace vapora-system"
          timeout: 120s
          dependencies: ["Create service accounts"]

    # Phase 3: Deploy storage layer
    - name: "Deploy Storage Infrastructure"
      description: "Deploy SurrealDB, Redis, and NATS"
      retryable: true
      parallel: false  # Must be sequential for data consistency
      steps:
        - name: "Deploy SurrealDB"
          command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component surrealdb"
          timeout: 600s
          dependencies: ["Setup Namespaces and Security"]
          healthCheck:
            type: "http"
            endpoint: "http://surrealdb-0.surrealdb.vapora-system:8000/health"
            interval: 10s
            maxRetries: 30

        - name: "Deploy Redis"
          command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component redis"
          timeout: 300s
          dependencies: ["Deploy SurrealDB"]
          healthCheck:
            type: "exec"
            command: "redis-cli ping"
            interval: 5s
            maxRetries: 20

        - name: "Deploy NATS JetStream"
          command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component nats"
          timeout: 300s
          dependencies: ["Deploy Redis"]
          healthCheck:
            type: "http"
            endpoint: "http://nats-0:8222/varz"
            interval: 10s
            maxRetries: 20

        - name: "Initialize database schema"
          command: "provisioning db init --database surrealdb --schema-dir scripts/migrations"
          timeout: 180s
          dependencies: ["Deploy SurrealDB"]

    # Phase 4: Deploy backend services
    - name: "Deploy Backend Services"
      description: "Deploy Axum backend, LLM router, and MCP gateway"
      retryable: true
      parallel: true  # Can deploy in parallel
      steps:
        - name: "Deploy REST API backend"
          command: "provisioning taskserv deploy vapora-backend"
          timeout: 300s
          dependencies: ["Deploy Storage Infrastructure"]
          healthCheck:
            type: "http"
            endpoint: "http://vapora-backend.vapora-system:8080/api/v1/health"
            interval: 10s
            maxRetries: 20

        - name: "Deploy Multi-IA LLM Router"
          command: "provisioning taskserv deploy vapora-llm-router"
          timeout: 300s
          dependencies: ["Deploy Storage Infrastructure"]
          healthCheck:
            type: "http"
            endpoint: "http://vapora-llm-router.vapora-system:8899/health"
            interval: 10s
            maxRetries: 20

        - name: "Deploy MCP Gateway"
          command: "provisioning taskserv deploy vapora-mcp-gateway"
          timeout: 300s
          dependencies: ["Deploy Storage Infrastructure"]
          healthCheck:
            type: "http"
            endpoint: "http://vapora-mcp-gateway.vapora-system:8888/health"
            interval: 10s
            maxRetries: 20

    # Phase 5: Deploy agent runtime
    - name: "Deploy Agent Runtime"
      description: "Deploy 12-agent orchestrator with initial replicas"
      retryable: true
      steps:
        - name: "Deploy agent runtime pods"
          command: "provisioning taskserv deploy vapora-agents --replicas 3"
          timeout: 600s
          dependencies: ["Deploy Backend Services"]
          healthCheck:
            type: "custom"
            script: |
              AGENT_COUNT=$(kubectl get pods -n vapora-system -l app=vapora-agents --field-selector=status.phase=Running | wc -l)
              if [ "$AGENT_COUNT" -ge 2 ]; then
                echo "OK: $AGENT_COUNT agents running"
                exit 0
              else
                echo "ERROR: Only $AGENT_COUNT agents running"
                exit 1
              fi
            interval: 30s
            maxRetries: 20

    # Phase 6: Deploy frontend
    - name: "Deploy Frontend"
      description: "Deploy Leptos CSR frontend"
      retryable: true
      steps:
        - name: "Deploy frontend application"
          command: "provisioning taskserv deploy vapora-frontend"
          timeout: 300s
          dependencies: ["Deploy Agent Runtime"]
          healthCheck:
            type: "http"
            endpoint: "http://vapora-frontend.vapora-system:3000/"
            interval: 10s
            maxRetries: 20

    # Phase 7: Setup monitoring and observability
    - name: "Setup Monitoring Stack"
      description: "Deploy Prometheus, Grafana, and Loki"
      retryable: true
      parallel: true
      steps:
        - name: "Deploy Prometheus"
          command: "provisioning addon install prometheus --namespace monitoring"
          timeout: 300s
          dependencies: ["Deploy Frontend"]

        - name: "Deploy Grafana"
          command: "provisioning addon install grafana --namespace monitoring"
          timeout: 300s
          dependencies: ["Deploy Frontend"]

        - name: "Deploy Loki (log aggregation)"
          command: "provisioning addon install loki --namespace monitoring"
          timeout: 300s
          dependencies: ["Deploy Frontend"]

    # Phase 8: Configure networking and ingress
    - name: "Configure Ingress and Networking"
      description: "Setup Istio gateway and ingress rules"
      retryable: true
      steps:
        - name: "Apply Istio gateway configuration"
          command: "kubectl apply -f - <<EOF\napiVersion: networking.istio.io/v1beta1\nkind: Gateway\nmetadata:\n  name: vapora-gateway\n  namespace: vapora-system\nspec:\n  selector:\n    istio: ingressgateway\n  servers:\n  - port:\n      number: 80\n      name: http\n      protocol: HTTP\n    hosts:\n    - \"vapora.example.com\"\n  - port:\n      number: 443\n      name: https\n      protocol: HTTPS\n    tls:\n      mode: SIMPLE\n      credentialName: vapora-tls\n    hosts:\n    - \"vapora.example.com\"\nEOF"
          timeout: 60s
          dependencies: ["Deploy Frontend"]

        - name: "Apply VirtualService routing"
          command: "provisioning istio apply --config kcl/cluster.k --namespace vapora-system"
          timeout: 120s
          dependencies: ["Apply Istio gateway configuration"]

    # Phase 9: Post-deployment verification
    - name: "Verify Full Stack"
      description: "Run comprehensive health checks and smoke tests"
      retryable: false
      steps:
        - name: "Check all services are running"
          command: |
            provisioning health-check --services all
            if [ $? -ne 0 ]; then
              echo "ERROR: Some services are not healthy"
              exit 1
            fi
          timeout: 300s
          dependencies: ["Configure Ingress and Networking"]

        - name: "Run smoke tests"
          command: |
            provisioning test smoke --api http://vapora-backend.vapora-system:8080
            provisioning test smoke --frontend http://vapora-frontend.vapora-system:3000
          timeout: 180s
          dependencies: ["Check all services are running"]

        - name: "Verify database connectivity"
          command: "provisioning db test-connection --database surrealdb"
          timeout: 60s
          dependencies: ["Check all services are running"]

        - name: "Verify agent communication"
          command: "provisioning agents health-check --nats nats://nats-0.vapora-system:4222"
          timeout: 120s
          dependencies: ["Check all services are running"]

  # Output configuration
  outputs:
    - name: frontend_url
      value: "https://vapora.example.com"
    - name: grafana_url
      value: "https://vapora.example.com/grafana"
    - name: prometheus_url
      value: "https://vapora.example.com:9090"
    - name: cluster_info
      value: "kubectl cluster-info"

  # Rollback policies
  rollback:
    onFailure: "manual"  # manual | automatic | rollback-to-previous
    allowedSteps: ["Create K8s Cluster", "Deploy Storage Infrastructure", "Deploy Backend Services"]
    strategy: "cascade"  # cascade | parallel

  # Notifications
  notifications:
    onStart:
      - "email: devops@example.com"
      - "slack: #deployment"
    onSuccess:
      - "email: devops@example.com"
      - "slack: #deployment"
      - "action: update-dns"
    onFailure:
      - "email: devops@example.com"
      - "slack: #deployment"
      - "severity: critical"

  # Post-deployment actions
  postDeployment:
    - name: "Create backup"
      command: "provisioning backup create --cluster vapora-cluster"
      schedule: "daily"

    - name: "Generate deployment report"
      command: "provisioning report generate --format markdown --output deployment-report-$(date +%s).md"
      schedule: "once"

---

# Rollback workflow (automatically created)
apiVersion: provisioning.vapora.io/v1
kind: Workflow
metadata:
  name: deploy-full-stack-rollback
  description: Rollback VAPORA deployment to previous stable state
spec:
  version: "0.2.0"
  phases:
    - name: "Stop new operations"
      steps:
        - name: "Drain agent queue"
          command: "provisioning agents drain --wait-timeout 300s"

    - name: "Restore from backup"
      steps:
        - name: "List available backups"
          command: "provisioning backup list"

        - name: "Restore cluster state"
          command: "provisioning backup restore --backup latest --confirm"

    - name: "Verify rollback"
      steps:
        - name: "Run health checks"
          command: "provisioning health-check --cluster"
feat: Phase 5.3 - Multi-Agent Learning Infrastructure Implement intelligent agent learning from Knowledge Graph execution history with per-task-type expertise tracking, recency bias, and learning curves. ## Phase 5.3 Implementation ### Learning Infrastructure (✅ Complete) - LearningProfileService with per-task-type expertise metrics - TaskTypeExpertise model tracking success_rate, confidence, learning curves - Recency bias weighting: recent 7 days weighted 3x higher (exponential decay) - Confidence scoring prevents overfitting: min(1.0, executions / 20) - Learning curves computed from daily execution windows ### Agent Scoring Service (✅ Complete) - Unified AgentScore combining SwarmCoordinator + learning profiles - Scoring formula: 0.3base + 0.5expertise + 0.2*confidence - Rank agents by combined score for intelligent assignment - Support for recency-biased scoring (recent_success_rate) - Methods: rank_agents, select_best, rank_agents_with_recency ### KG Integration (✅ Complete) - KGPersistence::get_executions_for_task_type() - query by agent + task type - KGPersistence::get_agent_executions() - all executions for agent - Coordinator::load_learning_profile_from_kg() - core KG→Learning integration - Coordinator::load_all_learning_profiles() - batch load for multiple agents - Convert PersistedExecution → ExecutionData for learning calculations ### Agent Assignment Integration (✅ Complete) - AgentCoordinator uses learning profiles for task assignment - extract_task_type() infers task type from title/description - assign_task() scores candidates using AgentScoringService - Fallback to load-based selection if no learning data available - Learning profiles stored in coordinator.learning_profiles RwLock ### Profile Adapter Enhancements (✅ Complete) - create_learning_profile() - initialize empty profiles - add_task_type_expertise() - set task-type expertise - update_profile_with_learning() - update swarm profiles from learning ## Files Modified ### vapora-knowledge-graph/src/persistence.rs (+30 lines) - get_executions_for_task_type(agent_id, task_type, limit) - get_agent_executions(agent_id, limit) ### vapora-agents/src/coordinator.rs (+100 lines) - load_learning_profile_from_kg() - core KG integration method - load_all_learning_profiles() - batch loading for agents - assign_task() already uses learning-based scoring via AgentScoringService ### Existing Complete Implementation - vapora-knowledge-graph/src/learning.rs - calculation functions - vapora-agents/src/learning_profile.rs - data structures and expertise - vapora-agents/src/scoring.rs - unified scoring service - vapora-agents/src/profile_adapter.rs - adapter methods ## Tests Passing - learning_profile: 7 tests ✅ - scoring: 5 tests ✅ - profile_adapter: 6 tests ✅ - coordinator: learning-specific tests ✅ ## Data Flow 1. Task arrives → AgentCoordinator::assign_task() 2. Extract task_type from description 3. Query KG for task-type executions (load_learning_profile_from_kg) 4. Calculate expertise with recency bias 5. Score candidates (SwarmCoordinator + learning) 6. Assign to top-scored agent 7. Execution result → KG → Update learning profiles ## Key Design Decisions ✅ Recency bias: 7-day half-life with 3x weight for recent performance ✅ Confidence scoring: min(1.0, total_executions / 20) prevents overfitting ✅ Hierarchical scoring: 30% base load, 50% expertise, 20% confidence ✅ KG query limit: 100 recent executions per task-type for performance ✅ Async loading: load_learning_profile_from_kg supports concurrent loads ## Next: Phase 5.4 - Cost Optimization Ready to implement budget enforcement and cost-aware provider selection. 2026-01-11 13:03:53 +00:00			`apiVersion: provisioning.vapora.io/v1`
			`kind: Workflow`
			`metadata:`
			`name: deploy-full-stack`
			`description: Complete VAPORA deployment from scratch including cluster, databases, and services`
			`spec:`
			`# Workflow metadata`
			`version: "0.2.0"`
			`namespace: vapora-system`
			`timeout: 3600s # 1 hour max`
			`retryPolicy:`
			`maxRetries: 3`
			`backoffFactor: 2`

			`# Prerequisites`
			`prerequisites:`
			`- kubeconfig_present`
			`- provisioning_cli_installed`
			`- sufficient_resources:`
			`cpu: "20"`
			`memory: "64Gi"`
			`disk: "500Gi"`

			`# Workflow phases executed sequentially with gates`
			`phases:`

			`# Phase 1: Infrastructure foundation`
			`- name: "Create K8s Cluster"`
			`description: "Deploy base Kubernetes cluster with networking"`
			`retryable: true`
			`steps:`
			`- name: "Apply KCL cluster schema"`
			`command: "provisioning cluster create --config kcl/cluster.k"`
			`timeout: 1200s`
			`onError: "rollback_cluster"`

			`- name: "Install CNI (Cilium)"`
			`command: "provisioning addon install cilium --helm-values cilium-values.yaml"`
			`timeout: 300s`
			`retries: 3`

			`- name: "Install service mesh (Istio)"`
			`command: "provisioning addon install istio --config kcl/cluster.k"`
			`timeout: 600s`
			`dependencies: ["cilium"]`

			`- name: "Install storage (Rook Ceph)"`
			`command: "provisioning addon install rook-ceph --size 500Gi --replicas 3"`
			`timeout: 900s`
			`dependencies: ["cilium"]`

			`- name: "Verify cluster health"`
			`command: "provisioning health-check --cluster"`
			`timeout: 300s`
			`dependencies: ["cilium", "istio", "rook-ceph"]`

			`# Phase 2: Create namespaces and RBAC`
			`- name: "Setup Namespaces and Security"`
			`description: "Create namespaces, service accounts, and RBAC policies"`
			`retryable: true`
			`steps:`
			`- name: "Create namespaces"`
			`command: "kubectl apply -f - <<EOF\napiVersion: v1\nkind: Namespace\nmetadata:\n name: vapora-system\n labels:\n app: vapora\nEOF"`
			`timeout: 60s`
			`dependencies: ["Create K8s Cluster"]`

			`- name: "Create service accounts"`
			`command: "kubectl apply -f - <<EOF\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: vapora-backend\n namespace: vapora-system\n---\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: vapora-agents\n namespace: vapora-system\nEOF"`
			`timeout: 60s`
			`dependencies: ["Create namespaces"]`

			`- name: "Apply RBAC policies"`
			`command: "provisioning rbac apply --config kcl/services.k --namespace vapora-system"`
			`timeout: 120s`
			`dependencies: ["Create service accounts"]`

			`# Phase 3: Deploy storage layer`
			`- name: "Deploy Storage Infrastructure"`
			`description: "Deploy SurrealDB, Redis, and NATS"`
			`retryable: true`
			`parallel: false # Must be sequential for data consistency`
			`steps:`
			`- name: "Deploy SurrealDB"`
			`command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component surrealdb"`
			`timeout: 600s`
			`dependencies: ["Setup Namespaces and Security"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://surrealdb-0.surrealdb.vapora-system:8000/health"`
			`interval: 10s`
			`maxRetries: 30`

			`- name: "Deploy Redis"`
			`command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component redis"`
			`timeout: 300s`
			`dependencies: ["Deploy SurrealDB"]`
			`healthCheck:`
			`type: "exec"`
			`command: "redis-cli ping"`
			`interval: 5s`
			`maxRetries: 20`

			`- name: "Deploy NATS JetStream"`
			`command: "provisioning taskserv deploy --config taskservs/vapora-storage.toml --component nats"`
			`timeout: 300s`
			`dependencies: ["Deploy Redis"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://nats-0:8222/varz"`
			`interval: 10s`
			`maxRetries: 20`

			`- name: "Initialize database schema"`
			`command: "provisioning db init --database surrealdb --schema-dir scripts/migrations"`
			`timeout: 180s`
			`dependencies: ["Deploy SurrealDB"]`

			`# Phase 4: Deploy backend services`
			`- name: "Deploy Backend Services"`
			`description: "Deploy Axum backend, LLM router, and MCP gateway"`
			`retryable: true`
			`parallel: true # Can deploy in parallel`
			`steps:`
			`- name: "Deploy REST API backend"`
			`command: "provisioning taskserv deploy vapora-backend"`
			`timeout: 300s`
			`dependencies: ["Deploy Storage Infrastructure"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://vapora-backend.vapora-system:8080/api/v1/health"`
			`interval: 10s`
			`maxRetries: 20`

			`- name: "Deploy Multi-IA LLM Router"`
			`command: "provisioning taskserv deploy vapora-llm-router"`
			`timeout: 300s`
			`dependencies: ["Deploy Storage Infrastructure"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://vapora-llm-router.vapora-system:8899/health"`
			`interval: 10s`
			`maxRetries: 20`

			`- name: "Deploy MCP Gateway"`
			`command: "provisioning taskserv deploy vapora-mcp-gateway"`
			`timeout: 300s`
			`dependencies: ["Deploy Storage Infrastructure"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://vapora-mcp-gateway.vapora-system:8888/health"`
			`interval: 10s`
			`maxRetries: 20`

			`# Phase 5: Deploy agent runtime`
			`- name: "Deploy Agent Runtime"`
			`description: "Deploy 12-agent orchestrator with initial replicas"`
			`retryable: true`
			`steps:`
			`- name: "Deploy agent runtime pods"`
			`command: "provisioning taskserv deploy vapora-agents --replicas 3"`
			`timeout: 600s`
			`dependencies: ["Deploy Backend Services"]`
			`healthCheck:`
			`type: "custom"`
			`script: \|`
			`AGENT_COUNT=$(kubectl get pods -n vapora-system -l app=vapora-agents --field-selector=status.phase=Running \| wc -l)`
			`if [ "$AGENT_COUNT" -ge 2 ]; then`
			`echo "OK: $AGENT_COUNT agents running"`
			`exit 0`
			`else`
			`echo "ERROR: Only $AGENT_COUNT agents running"`
			`exit 1`
			`fi`
			`interval: 30s`
			`maxRetries: 20`

			`# Phase 6: Deploy frontend`
			`- name: "Deploy Frontend"`
			`description: "Deploy Leptos CSR frontend"`
			`retryable: true`
			`steps:`
			`- name: "Deploy frontend application"`
			`command: "provisioning taskserv deploy vapora-frontend"`
			`timeout: 300s`
			`dependencies: ["Deploy Agent Runtime"]`
			`healthCheck:`
			`type: "http"`
			`endpoint: "http://vapora-frontend.vapora-system:3000/"`
			`interval: 10s`
			`maxRetries: 20`

			`# Phase 7: Setup monitoring and observability`
			`- name: "Setup Monitoring Stack"`
			`description: "Deploy Prometheus, Grafana, and Loki"`
			`retryable: true`
			`parallel: true`
			`steps:`
			`- name: "Deploy Prometheus"`
			`command: "provisioning addon install prometheus --namespace monitoring"`
			`timeout: 300s`
			`dependencies: ["Deploy Frontend"]`

			`- name: "Deploy Grafana"`
			`command: "provisioning addon install grafana --namespace monitoring"`
			`timeout: 300s`
			`dependencies: ["Deploy Frontend"]`

			`- name: "Deploy Loki (log aggregation)"`
			`command: "provisioning addon install loki --namespace monitoring"`
			`timeout: 300s`
			`dependencies: ["Deploy Frontend"]`

			`# Phase 8: Configure networking and ingress`
			`- name: "Configure Ingress and Networking"`
			`description: "Setup Istio gateway and ingress rules"`
			`retryable: true`
			`steps:`
			`- name: "Apply Istio gateway configuration"`
			`command: "kubectl apply -f - <<EOF\napiVersion: networking.istio.io/v1beta1\nkind: Gateway\nmetadata:\n name: vapora-gateway\n namespace: vapora-system\nspec:\n selector:\n istio: ingressgateway\n servers:\n - port:\n number: 80\n name: http\n protocol: HTTP\n hosts:\n - \"vapora.example.com\"\n - port:\n number: 443\n name: https\n protocol: HTTPS\n tls:\n mode: SIMPLE\n credentialName: vapora-tls\n hosts:\n - \"vapora.example.com\"\nEOF"`
			`timeout: 60s`
			`dependencies: ["Deploy Frontend"]`

			`- name: "Apply VirtualService routing"`
			`command: "provisioning istio apply --config kcl/cluster.k --namespace vapora-system"`
			`timeout: 120s`
			`dependencies: ["Apply Istio gateway configuration"]`

			`# Phase 9: Post-deployment verification`
			`- name: "Verify Full Stack"`
			`description: "Run comprehensive health checks and smoke tests"`
			`retryable: false`
			`steps:`
			`- name: "Check all services are running"`
			`command: \|`
			`provisioning health-check --services all`
			`if [ $? -ne 0 ]; then`
			`echo "ERROR: Some services are not healthy"`
			`exit 1`
			`fi`
			`timeout: 300s`
			`dependencies: ["Configure Ingress and Networking"]`

			`- name: "Run smoke tests"`
			`command: \|`
			`provisioning test smoke --api http://vapora-backend.vapora-system:8080`
			`provisioning test smoke --frontend http://vapora-frontend.vapora-system:3000`
			`timeout: 180s`
			`dependencies: ["Check all services are running"]`

			`- name: "Verify database connectivity"`
			`command: "provisioning db test-connection --database surrealdb"`
			`timeout: 60s`
			`dependencies: ["Check all services are running"]`

			`- name: "Verify agent communication"`
			`command: "provisioning agents health-check --nats nats://nats-0.vapora-system:4222"`
			`timeout: 120s`
			`dependencies: ["Check all services are running"]`

			`# Output configuration`
			`outputs:`
			`- name: frontend_url`
			`value: "https://vapora.example.com"`
			`- name: grafana_url`
			`value: "https://vapora.example.com/grafana"`
			`- name: prometheus_url`
			`value: "https://vapora.example.com:9090"`
			`- name: cluster_info`
			`value: "kubectl cluster-info"`

			`# Rollback policies`
			`rollback:`
			`onFailure: "manual" # manual \| automatic \| rollback-to-previous`
			`allowedSteps: ["Create K8s Cluster", "Deploy Storage Infrastructure", "Deploy Backend Services"]`
			`strategy: "cascade" # cascade \| parallel`

			`# Notifications`
			`notifications:`
			`onStart:`
			`- "email: devops@example.com"`
			`- "slack: #deployment"`
			`onSuccess:`
			`- "email: devops@example.com"`
			`- "slack: #deployment"`
			`- "action: update-dns"`
			`onFailure:`
			`- "email: devops@example.com"`
			`- "slack: #deployment"`
			`- "severity: critical"`

			`# Post-deployment actions`
			`postDeployment:`
			`- name: "Create backup"`
			`command: "provisioning backup create --cluster vapora-cluster"`
			`schedule: "daily"`

			`- name: "Generate deployment report"`
			`command: "provisioning report generate --format markdown --output deployment-report-$(date +%s).md"`
			`schedule: "once"`

			`---`

			`# Rollback workflow (automatically created)`
			`apiVersion: provisioning.vapora.io/v1`
			`kind: Workflow`
			`metadata:`
			`name: deploy-full-stack-rollback`
			`description: Rollback VAPORA deployment to previous stable state`
			`spec:`
			`version: "0.2.0"`
			`phases:`
			`- name: "Stop new operations"`
			`steps:`
			`- name: "Drain agent queue"`
			`command: "provisioning agents drain --wait-timeout 300s"`

			`- name: "Restore from backup"`
			`steps:`
			`- name: "List available backups"`
			`command: "provisioning backup list"`

			`- name: "Restore cluster state"`
			`command: "provisioning backup restore --backup latest --confirm"`

			`- name: "Verify rollback"`
			`steps:`
			`- name: "Run health checks"`
			`command: "provisioning health-check --cluster"`