# VAPORA Woodpecker Pipeline - Health Check & Monitoring
# Continuous health monitoring for Docker and Kubernetes deployments
# Triggers on: cron schedule, manual promotion

trigger:
  event: [cron, promote]
  cron:
    - "*/15 * * * *"    # Every 15 minutes - quick check
    - "0 */6 * * *"     # Every 6 hours - comprehensive diagnostics

variables:
  ARTIFACTS_DIR: provisioning/artifacts
  LOGS_DIR: provisioning/logs
  VAPORA_NAMESPACE: vapora

stages:
  setup:
    steps:
      - name: prepare
        image: alpine:latest
        commands:
          - mkdir -p ${LOGS_DIR}/health-checks
          - echo "🏥 VAPORA Health Check Pipeline"
          - echo "Timestamp: $(date -u +'%Y-%m-%dT%H:%M:%SZ')"
          - echo "Event: ${CI_PIPELINE_EVENT}"

  install_dependencies:
    steps:
      - name: install_tools
        image: rust:latest
        commands:
          - apt-get update && apt-get install -y curl jq yq
          - cargo install nu --locked
          - pip install jinja2-cli
          - curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
          - chmod +x kubectl && mv kubectl /usr/local/bin/
          - nu --version
          - kubectl version --client
          - docker --version || echo "Docker not available in this runner"

  configure_kubernetes:
    depends_on: [install_dependencies]
    steps:
      - name: setup_kubeconfig_staging
        image: alpine:latest
        environment:
          KUBE_CONFIG_STAGING: ${KUBE_CONFIG_STAGING}
        commands:
          - mkdir -p ~/.kube
          - echo "$KUBE_CONFIG_STAGING" | base64 -d > ~/.kube/config
          - chmod 600 ~/.kube/config
          - kubectl cluster-info
          - echo "✓ Kubernetes staging configured"
        when:
          evaluate: 'return build.Health_Target == "kubernetes" || build.Health_Target == ""'

  health_check_docker:
    depends_on: [configure_kubernetes]
    steps:
      - name: check_docker_containers
        image: docker:latest
        volumes:
          - /var/run/docker.sock:/var/run/docker.sock
        commands:
          - |
            echo "🐳 Docker Health Check"
            echo "---"
            mkdir -p ${LOGS_DIR}/health-checks
            {
              echo "Timestamp: $(date -u +'%Y-%m-%dT%H:%M:%SZ')"
              echo ""
              echo "Container Status:"
              docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
              echo ""
              echo "Network Status:"
              docker network ls
            } | tee ${LOGS_DIR}/health-checks/docker-containers.log

      - name: check_docker_endpoints
        image: docker:latest
        volumes:
          - /var/run/docker.sock:/var/run/docker.sock
        commands:
          - apk add --no-cache curl
          - |
            echo "🔍 Docker Endpoint Health Checks"
            mkdir -p ${LOGS_DIR}/health-checks
            > ${LOGS_DIR}/health-checks/docker-endpoints.log

            check_endpoint() {
              local name=$1
              local url=$2
              echo "Checking $name: $url" | tee -a ${LOGS_DIR}/health-checks/docker-endpoints.log
              if curl -sf $url > /dev/null; then
                echo "✓ $name healthy" | tee -a ${LOGS_DIR}/health-checks/docker-endpoints.log
              else
                echo "⚠️  $name unreachable" | tee -a ${LOGS_DIR}/health-checks/docker-endpoints.log
              fi
            }

            check_endpoint "Backend" "http://localhost:8001/health"
            check_endpoint "Frontend" "http://localhost:3000"
            check_endpoint "Agents" "http://localhost:8002/health"
            check_endpoint "LLM Router" "http://localhost:8003/health"
            check_endpoint "SurrealDB" "http://localhost:8000/health"

      - name: collect_docker_diagnostics
        image: docker:latest
        volumes:
          - /var/run/docker.sock:/var/run/docker.sock
        commands:
          - apk add --no-cache curl jq
          - |
            echo "📊 Docker Diagnostics"
            mkdir -p ${LOGS_DIR}/health-checks
            {
              echo "Docker System Info:"
              docker system df
              echo ""
              echo "Docker Resource Usage:"
              docker stats --no-stream --all
              echo ""
              echo "Docker Volume Status:"
              docker volume ls
            } | tee ${LOGS_DIR}/health-checks/docker-diagnostics.log

  health_check_kubernetes:
    depends_on: [configure_kubernetes]
    steps:
      - name: check_k8s_deployments
        image: alpine:latest
        commands:
          - apk add --no-cache curl
          - |
            echo "☸️  Kubernetes Deployment Health Check"
            echo "---"
            mkdir -p ${LOGS_DIR}/health-checks
            {
              echo "Timestamp: $(date -u +'%Y-%m-%dT%H:%M:%SZ')"
              echo ""
              echo "Deployment Status:"
              kubectl get deployments -n ${VAPORA_NAMESPACE} -o wide
              echo ""
              echo "Pod Status:"
              kubectl get pods -n ${VAPORA_NAMESPACE} -o wide
              echo ""
              echo "Pod Details:"
              kubectl get pods -n ${VAPORA_NAMESPACE} -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\t"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}'
            } | tee ${LOGS_DIR}/health-checks/k8s-deployments.log

      - name: check_k8s_services
        image: alpine:latest
        commands:
          - apk add --no-cache curl
          - |
            echo "🔍 Kubernetes Service Health Check"
            mkdir -p ${LOGS_DIR}/health-checks
            {
              echo "Services:"
              kubectl get services -n ${VAPORA_NAMESPACE} -o wide
              echo ""
              echo "Endpoints:"
              kubectl get endpoints -n ${VAPORA_NAMESPACE}
              echo ""
              echo "ConfigMap:"
              kubectl get configmap -n ${VAPORA_NAMESPACE} -o yaml | head -30
            } | tee ${LOGS_DIR}/health-checks/k8s-services.log

      - name: check_k8s_events
        image: alpine:latest
        commands:
          - apk add --no-cache curl
          - |
            echo "📋 Recent Kubernetes Events"
            mkdir -p ${LOGS_DIR}/health-checks
            kubectl get events -n ${VAPORA_NAMESPACE} --sort-by='.lastTimestamp' | tail -50 | tee ${LOGS_DIR}/health-checks/k8s-events.log

      - name: collect_k8s_diagnostics
        image: alpine:latest
        commands:
          - apk add --no-cache curl
          - |
            echo "📊 Kubernetes Diagnostics"
            mkdir -p ${LOGS_DIR}/health-checks
            {
              echo "Cluster Info:"
              kubectl cluster-info
              echo ""
              echo "Nodes:"
              kubectl get nodes -o wide
              echo ""
              echo "Resource Usage (if metrics available):"
              kubectl top nodes 2>/dev/null || echo "Metrics server not available"
              echo ""
              echo "Pod Resource Usage:"
              kubectl top pods -n ${VAPORA_NAMESPACE} 2>/dev/null || echo "Pod metrics not available"
            } | tee ${LOGS_DIR}/health-checks/k8s-diagnostics.log

      - name: collect_pod_logs
        image: alpine:latest
        commands:
          - apk add --no-cache curl
          - |
            echo "📝 Collecting Pod Logs"
            mkdir -p ${LOGS_DIR}/health-checks/pods
            kubectl logs -n ${VAPORA_NAMESPACE} deployment/vapora-backend --tail=100 > ${LOGS_DIR}/health-checks/pods/backend.log 2>&1
            kubectl logs -n ${VAPORA_NAMESPACE} deployment/vapora-agents --tail=100 > ${LOGS_DIR}/health-checks/pods/agents.log 2>&1
            kubectl logs -n ${VAPORA_NAMESPACE} deployment/vapora-llm-router --tail=100 > ${LOGS_DIR}/health-checks/pods/llm-router.log 2>&1
            ls -lah ${LOGS_DIR}/health-checks/pods/

  analyze_health:
    depends_on: [health_check_docker, health_check_kubernetes]
    steps:
      - name: generate_health_report
        image: alpine:latest
        commands:
          - |
            mkdir -p ${LOGS_DIR}/health-checks
            cat > ${LOGS_DIR}/health-checks/HEALTH_REPORT.md << 'EOF'
            # VAPORA Health Check Report

            **Report Time**: $(date -u +'%Y-%m-%dT%H:%M:%SZ')
            **Pipeline**: ${CI_BUILD_LINK}

            ## Summary

            Health check completed for VAPORA services

            ## Docker Status

            - Check logs: `${LOGS_DIR}/health-checks/docker-containers.log`
            - Endpoint checks: `${LOGS_DIR}/health-checks/docker-endpoints.log`
            - System diagnostics: `${LOGS_DIR}/health-checks/docker-diagnostics.log`

            ## Kubernetes Status

            - Deployment status: `${LOGS_DIR}/health-checks/k8s-deployments.log`
            - Service status: `${LOGS_DIR}/health-checks/k8s-services.log`
            - Recent events: `${LOGS_DIR}/health-checks/k8s-events.log`
            - System diagnostics: `${LOGS_DIR}/health-checks/k8s-diagnostics.log`
            - Pod logs: `${LOGS_DIR}/health-checks/pods/`

            ## Diagnostics

            Review the following for detailed information:

            1. **Docker Health**
               - Container status and uptime
               - Endpoint responsiveness (8001, 8002, 8003, 3000, 8000)
               - Resource allocation and usage

            2. **Kubernetes Health**
               - Deployment replica status
               - Pod readiness conditions
               - Service endpoint availability
               - Recent cluster events
               - Node resource availability

            ## Action Required

            If any services are down or unhealthy:
            1. Review pod logs in `pods/` directory
            2. Check recent events in `k8s-events.log`
            3. Investigate resource constraints
            4. Check configuration in ConfigMap
            5. Consider rollback if recent deployment

            ## Next Check

            Next automatic health check scheduled per cron configuration

            EOF
            cat ${LOGS_DIR}/health-checks/HEALTH_REPORT.md

      - name: check_health_status
        image: alpine:latest
        commands:
          - |
            echo "📊 Health Check Summary"
            echo "---"

            # Count issues
            DOCKER_DOWN=$(grep -c "⚠️" ${LOGS_DIR}/health-checks/docker-endpoints.log 2>/dev/null || echo 0)
            K8S_DOWN=$(grep -c "CrashLoopBackOff\|Error\|Failed" ${LOGS_DIR}/health-checks/k8s-deployments.log 2>/dev/null || echo 0)

            echo "Docker issues: $DOCKER_DOWN"
            echo "Kubernetes issues: $K8S_DOWN"

            if [ "$DOCKER_DOWN" -gt 0 ] || [ "$K8S_DOWN" -gt 0 ]; then
              echo "⚠️  Issues detected - may require attention"
            else
              echo "✓ All checks passed"
            fi

  publish:
    depends_on: [analyze_health]
    steps:
      - name: publish_reports
        image: alpine:latest
        commands:
          - echo "📦 Health check reports published"
          - ls -lah ${LOGS_DIR}/health-checks/
          - echo ""
          - du -sh ${LOGS_DIR}/health-checks/

      - name: notify_slack_success
        image: alpine:latest
        environment:
          SLACK_WEBHOOK: ${SLACK_WEBHOOK}
        commands:
          - |
            if [ -n "$SLACK_WEBHOOK" ]; then
              apk add --no-cache curl jq
              curl -X POST $SLACK_WEBHOOK \
                -H 'Content-Type: application/json' \
                -d '{
                  "text": "✅ VAPORA Health Check Completed",
                  "blocks": [
                    {
                      "type": "section",
                      "text": {
                        "type": "mrkdwn",
                        "text": "✅ **VAPORA Health Check Completed**\n\n*Systems Monitored:*\n• Docker (containers, endpoints)\n• Kubernetes (deployments, pods, services)"
                      }
                    },
                    {
                      "type": "context",
                      "elements": [
                        {
                          "type": "mrkdwn",
                          "text": "*Report Location*: `${LOGS_DIR}/health-checks/HEALTH_REPORT.md`"
                        }
                      ]
                    }
                  ]
                }'
            fi