Vapora/docs/operations/deployment-runbook.html

<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
    <head>
        <!-- Book generated using mdBook -->
        <meta charset="UTF-8">
        <title>Deployment Runbook - VAPORA Platform Documentation</title>


        <!-- Custom HTML head -->

        <meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="theme-color" content="#ffffff">

        <link rel="icon" href="../favicon.svg">
        <link rel="shortcut icon" href="../favicon.png">
        <link rel="stylesheet" href="../css/variables.css">
        <link rel="stylesheet" href="../css/general.css">
        <link rel="stylesheet" href="../css/chrome.css">
        <link rel="stylesheet" href="../css/print.css" media="print">

        <!-- Fonts -->
        <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
        <link rel="stylesheet" href="../fonts/fonts.css">

        <!-- Highlight.js Stylesheets -->
        <link rel="stylesheet" id="highlight-css" href="../highlight.css">
        <link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
        <link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">

        <!-- Custom theme stylesheets -->


        <!-- Provide site root and default themes to javascript -->
        <script>
            const path_to_root = "../";
            const default_light_theme = "light";
            const default_dark_theme = "dark";
        </script>
        <!-- Start loading toc.js asap -->
        <script src="../toc.js"></script>
    </head>
    <body>
    <div id="mdbook-help-container">
        <div id="mdbook-help-popup">
            <h2 class="mdbook-help-title">Keyboard shortcuts</h2>
            <div>
                <p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p>
                <p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
                <p>Press <kbd>?</kbd> to show this help</p>
                <p>Press <kbd>Esc</kbd> to hide this help</p>
            </div>
        </div>
    </div>
    <div id="body-container">
        <!-- Work around some values being stored in localStorage wrapped in quotes -->
        <script>
            try {
                let theme = localStorage.getItem('mdbook-theme');
                let sidebar = localStorage.getItem('mdbook-sidebar');

                if (theme.startsWith('"') && theme.endsWith('"')) {
                    localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
                }

                if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
                    localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
                }
            } catch (e) { }
        </script>

        <!-- Set the theme before any content is loaded, prevents flash -->
        <script>
            const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
            let theme;
            try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
            if (theme === null || theme === undefined) { theme = default_theme; }
            const html = document.documentElement;
            html.classList.remove('light')
            html.classList.add(theme);
            html.classList.add("js");
        </script>

        <input type="checkbox" id="sidebar-toggle-anchor" class="hidden">

        <!-- Hide / unhide sidebar before it is displayed -->
        <script>
            let sidebar = null;
            const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
            if (document.body.clientWidth >= 1080) {
                try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
                sidebar = sidebar || 'visible';
            } else {
                sidebar = 'hidden';
            }
            sidebar_toggle.checked = sidebar === 'visible';
            html.classList.remove('sidebar-visible');
            html.classList.add("sidebar-" + sidebar);
        </script>

        <nav id="sidebar" class="sidebar" aria-label="Table of contents">
            <!-- populated by js -->
            <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
            <noscript>
                <iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
            </noscript>
            <div id="sidebar-resize-handle" class="sidebar-resize-handle">
                <div class="sidebar-resize-indicator"></div>
            </div>
        </nav>

        <div id="page-wrapper" class="page-wrapper">

            <div class="page">
                <div id="menu-bar-hover-placeholder"></div>
                <div id="menu-bar" class="menu-bar sticky">
                    <div class="left-buttons">
                        <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
                            <i class="fa fa-bars"></i>
                        </label>
                        <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
                            <i class="fa fa-paint-brush"></i>
                        </button>
                        <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
                            <li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
                        </ul>
                        <button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
                            <i class="fa fa-search"></i>
                        </button>
                    </div>

                    <h1 class="menu-title">VAPORA Platform Documentation</h1>

                    <div class="right-buttons">
                        <a href="../print.html" title="Print this book" aria-label="Print this book">
                            <i id="print-button" class="fa fa-print"></i>
                        </a>
                        <a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
                            <i id="git-repository-button" class="fa fa-github"></i>
                        </a>
                        <a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../operations/deployment-runbook.md" title="Suggest an edit" aria-label="Suggest an edit">
                            <i id="git-edit-button" class="fa fa-edit"></i>
                        </a>

                    </div>
                </div>

                <div id="search-wrapper" class="hidden">
                    <form id="searchbar-outer" class="searchbar-outer">
                        <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
                    </form>
                    <div id="searchresults-outer" class="searchresults-outer hidden">
                        <div id="searchresults-header" class="searchresults-header"></div>
                        <ul id="searchresults">
                        </ul>
                    </div>
                </div>

                <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
                <script>
                    document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
                    document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
                    Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
                        link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
                    });
                </script>

                <div id="content" class="content">
                    <main>
                        <h1 id="deployment-runbook"><a class="header" href="#deployment-runbook">Deployment Runbook</a></h1>
<p>Step-by-step procedures for deploying VAPORA to staging and production environments.</p>
<hr />
<h2 id="quick-start"><a class="header" href="#quick-start">Quick Start</a></h2>
<p>For experienced operators:</p>
<pre><code class="language-bash"># Validate in CI/CD
# Download artifacts
# Review dry-run
# Apply: kubectl apply -f configmap.yaml deployment.yaml
# Monitor: kubectl logs -f deployment/vapora-backend -n vapora
# Verify: curl http://localhost:8001/health
</code></pre>
<p>For complete steps, continue reading.</p>
<hr />
<h2 id="before-starting"><a class="header" href="#before-starting">Before Starting</a></h2>
<p>✅ <strong>Prerequisites Completed</strong>:</p>
<ul>
<li><input disabled="" type="checkbox"/>
Pre-deployment checklist completed</li>
<li><input disabled="" type="checkbox"/>
Artifacts generated and validated</li>
<li><input disabled="" type="checkbox"/>
Staging deployment verified</li>
<li><input disabled="" type="checkbox"/>
Team ready and monitoring</li>
<li><input disabled="" type="checkbox"/>
Maintenance window announced</li>
</ul>
<p>✅ <strong>Access Verified</strong>:</p>
<ul>
<li><input disabled="" type="checkbox"/>
kubectl configured for target cluster</li>
<li><input disabled="" type="checkbox"/>
Can list nodes: <code>kubectl get nodes</code></li>
<li><input disabled="" type="checkbox"/>
Can access namespace: <code>kubectl get namespace vapora</code></li>
</ul>
<p>❌ <strong>If any prerequisite missing</strong>: Go back to pre-deployment checklist</p>
<hr />
<h2 id="phase-1-pre-flight-5-minutes"><a class="header" href="#phase-1-pre-flight-5-minutes">Phase 1: Pre-Flight (5 minutes)</a></h2>
<h3 id="11-verify-current-state"><a class="header" href="#11-verify-current-state">1.1 Verify Current State</a></h3>
<pre><code class="language-bash"># Set context
export CLUSTER=production  # or staging
export NAMESPACE=vapora

# Verify cluster access
kubectl cluster-info
kubectl get nodes

# Output should show:
# NAME     STATUS   ROLES    AGE
# node-1   Ready    worker   30d
# node-2   Ready    worker   25d
</code></pre>
<p><strong>What to look for:</strong></p>
<ul>
<li>✓ All nodes in "Ready" state</li>
<li>✓ No "NotReady" or "Unknown" nodes</li>
<li>If issues: Don't proceed, investigate node health</li>
</ul>
<h3 id="12-check-current-deployments"><a class="header" href="#12-check-current-deployments">1.2 Check Current Deployments</a></h3>
<pre><code class="language-bash"># Get current deployment status
kubectl get deployments -n $NAMESPACE -o wide
kubectl get pods -n $NAMESPACE

# Output example:
# NAME                READY   UP-TO-DATE   AVAILABLE
# vapora-backend      3/3     3            3
# vapora-agents       2/2     2            2
# vapora-llm-router   2/2     2            2
</code></pre>
<p><strong>What to look for:</strong></p>
<ul>
<li>✓ All deployments showing correct replica count</li>
<li>✓ All pods in "Running" state</li>
<li>❌ If pods in "CrashLoopBackOff" or "Pending": Investigate before proceeding</li>
</ul>
<h3 id="13-record-current-versions"><a class="header" href="#13-record-current-versions">1.3 Record Current Versions</a></h3>
<pre><code class="language-bash"># Get current image versions (baseline for rollback)
kubectl get deployments -n $NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.template.spec.containers[0].image}{"\n"}{end}'

# Expected output:
# vapora-backend      vapora/backend:v1.2.0
# vapora-agents       vapora/agents:v1.2.0
# vapora-llm-router   vapora/llm-router:v1.2.0
</code></pre>
<p><strong>Record these for rollback</strong>: Keep this output visible</p>
<h3 id="14-get-current-revision-numbers"><a class="header" href="#14-get-current-revision-numbers">1.4 Get Current Revision Numbers</a></h3>
<pre><code class="language-bash"># For each deployment, get rollout history
for deployment in vapora-backend vapora-agents vapora-llm-router; do
  echo "=== $deployment ==="
  kubectl rollout history deployment/$deployment -n $NAMESPACE | tail -5
done

# Output example:
# REVISION  CHANGE-CAUSE
# 42        Deployment rolled out
# 43        Deployment rolled out
# 44        (current)
</code></pre>
<p><strong>Record the highest revision number for each</strong> - this is your rollback reference</p>
<h3 id="15-check-cluster-resources"><a class="header" href="#15-check-cluster-resources">1.5 Check Cluster Resources</a></h3>
<pre><code class="language-bash"># Verify cluster has capacity for new deployment
kubectl top nodes
kubectl describe nodes | grep -A 5 "Allocated resources"

# Example - check memory/CPU availability
# Requested:     8200m (41%)
# Limits:        16400m (82%)
</code></pre>
<p><strong>What to look for:</strong></p>
<ul>
<li>✓ Less than 80% resource utilization</li>
<li>❌ If above 85%: Insufficient capacity, don't proceed</li>
</ul>
<hr />
<h2 id="phase-2-configuration-deployment-3-minutes"><a class="header" href="#phase-2-configuration-deployment-3-minutes">Phase 2: Configuration Deployment (3 minutes)</a></h2>
<h3 id="21-apply-configmap"><a class="header" href="#21-apply-configmap">2.1 Apply ConfigMap</a></h3>
<p>The ConfigMap contains all application configuration.</p>
<pre><code class="language-bash"># First: Dry-run to verify no syntax errors
kubectl apply -f configmap.yaml --dry-run=server -n $NAMESPACE

# Should output:
# configmap/vapora-config configured (server dry run)

# Check for any warnings or errors in output
# If errors, stop and fix the YAML before proceeding
</code></pre>
<p><strong>Troubleshooting</strong>:</p>
<ul>
<li>"error validating": YAML syntax error - fix and retry</li>
<li>"field is immutable": Can't change certain ConfigMap fields - delete and recreate</li>
<li>"resourceQuotaExceeded": Namespace quota exceeded - contact cluster admin</li>
</ul>
<h3 id="22-apply-configmap-for-real"><a class="header" href="#22-apply-configmap-for-real">2.2 Apply ConfigMap for Real</a></h3>
<pre><code class="language-bash"># Apply the actual ConfigMap
kubectl apply -f configmap.yaml -n $NAMESPACE

# Output:
# configmap/vapora-config configured

# Verify it was applied
kubectl get configmap -n $NAMESPACE vapora-config -o yaml | head -20

# Check for your new values in the output
</code></pre>
<p><strong>Verify ConfigMap is correct</strong>:</p>
<pre><code class="language-bash"># Extract specific values to verify
kubectl get configmap vapora-config -n $NAMESPACE -o jsonpath='{.data.vapora\.toml}' | grep "database_url" | head -1

# Should show the correct database URL
</code></pre>
<h3 id="23-annotate-configmap"><a class="header" href="#23-annotate-configmap">2.3 Annotate ConfigMap</a></h3>
<p>Record when this config was deployed for audit trail:</p>
<pre><code class="language-bash">kubectl annotate configmap vapora-config \
  -n $NAMESPACE \
  deployment.timestamp="$(date -u +'%Y-%m-%dT%H:%M:%SZ')" \
  deployment.commit="$(git rev-parse HEAD | cut -c1-8)" \
  deployment.branch="$(git rev-parse --abbrev-ref HEAD)" \
  --overwrite

# Verify annotation was added
kubectl get configmap vapora-config -n $NAMESPACE -o yaml | grep "deployment\."
</code></pre>
<hr />
<h2 id="phase-3-deployment-update-5-minutes"><a class="header" href="#phase-3-deployment-update-5-minutes">Phase 3: Deployment Update (5 minutes)</a></h2>
<h3 id="31-dry-run-deployment"><a class="header" href="#31-dry-run-deployment">3.1 Dry-Run Deployment</a></h3>
<p>Always dry-run first to catch issues:</p>
<pre><code class="language-bash"># Run deployment dry-run
kubectl apply -f deployment.yaml --dry-run=server -n $NAMESPACE

# Output should show what will be updated:
# deployment.apps/vapora-backend configured (server dry run)
# deployment.apps/vapora-agents configured (server dry run)
# deployment.apps/vapora-llm-router configured (server dry run)
</code></pre>
<p><strong>Check for warnings</strong>:</p>
<ul>
<li>"imagePullBackOff": Docker image doesn't exist</li>
<li>"insufficient quota": Resource limits exceeded</li>
<li>"nodeAffinity": Pod can't be placed on any node</li>
</ul>
<h3 id="32-apply-deployments"><a class="header" href="#32-apply-deployments">3.2 Apply Deployments</a></h3>
<pre><code class="language-bash"># Apply the actual deployments
kubectl apply -f deployment.yaml -n $NAMESPACE

# Output:
# deployment.apps/vapora-backend configured
# deployment.apps/vapora-agents configured
# deployment.apps/vapora-llm-router configured
</code></pre>
<p><strong>Verify deployments updated</strong>:</p>
<pre><code class="language-bash"># Check that new rollout was initiated
kubectl get deployments -n $NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.observedGeneration}{"\n"}{end}'

# Compare with recorded versions - should be incremented
</code></pre>
<h3 id="33-monitor-rollout-progress"><a class="header" href="#33-monitor-rollout-progress">3.3 Monitor Rollout Progress</a></h3>
<p>Watch the deployment rollout status:</p>
<pre><code class="language-bash"># For each deployment, monitor the rollout
for deployment in vapora-backend vapora-agents vapora-llm-router; do
  echo "Waiting for $deployment..."
  kubectl rollout status deployment/$deployment \
    -n $NAMESPACE \
    --timeout=5m
  echo "$deployment ready"
done
</code></pre>
<p><strong>What to look for</strong> (per pod update):</p>
<pre><code>Waiting for rollout to finish: 2 of 3 updated replicas are available...
Waiting for rollout to finish: 2 of 3 updated replicas are available...
Waiting for rollout to finish: 3 of 3 updated replicas are available...
deployment "vapora-backend" successfully rolled out
</code></pre>
<p><strong>Expected time: 2-3 minutes per deployment</strong></p>
<h3 id="34-watch-pod-updates-in-separate-terminal"><a class="header" href="#34-watch-pod-updates-in-separate-terminal">3.4 Watch Pod Updates (in separate terminal)</a></h3>
<p>While rollout completes, monitor pods:</p>
<pre><code class="language-bash"># Watch pods being updated in real-time
kubectl get pods -n $NAMESPACE -w

# Output shows updates like:
# NAME                              READY   STATUS
# vapora-backend-abc123-def45       1/1     Running
# vapora-backend-xyz789-old-pod     1/1     Running  ← old pod still running
# vapora-backend-abc123-new-pod     0/1     Pending  ← new pod starting
# vapora-backend-abc123-new-pod     0/1     ContainerCreating
# vapora-backend-abc123-new-pod     1/1     Running  ← new pod ready
# vapora-backend-xyz789-old-pod     1/1     Terminating  ← old pod being removed
</code></pre>
<p><strong>What to look for:</strong></p>
<ul>
<li>✓ New pods starting (Pending → ContainerCreating → Running)</li>
<li>✓ Each new pod reaches Running state</li>
<li>✓ Old pods gradually terminating</li>
<li>❌ Pod stuck in "CrashLoopBackOff": Stop, check logs, might need rollback</li>
</ul>
<hr />
<h2 id="phase-4-verification-5-minutes"><a class="header" href="#phase-4-verification-5-minutes">Phase 4: Verification (5 minutes)</a></h2>
<h3 id="41-verify-all-pods-running"><a class="header" href="#41-verify-all-pods-running">4.1 Verify All Pods Running</a></h3>
<pre><code class="language-bash"># Check all pods are ready
kubectl get pods -n $NAMESPACE

# Expected output:
# NAME                              READY   STATUS
# vapora-backend-&lt;hash&gt;-1           1/1     Running
# vapora-backend-&lt;hash&gt;-2           1/1     Running
# vapora-backend-&lt;hash&gt;-3           1/1     Running
# vapora-agents-&lt;hash&gt;-1            1/1     Running
# vapora-agents-&lt;hash&gt;-2            1/1     Running
# vapora-llm-router-&lt;hash&gt;-1        1/1     Running
# vapora-llm-router-&lt;hash&gt;-2        1/1     Running
</code></pre>
<p><strong>Verification</strong>:</p>
<pre><code class="language-bash"># All pods should show READY=1/1
# All pods should show STATUS=Running
# No pods should be in Pending, CrashLoopBackOff, or Error state

# Quick check:
READY=$(kubectl get pods -n $NAMESPACE -o jsonpath='{range .items[*]}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' | grep -c "True")
TOTAL=$(kubectl get pods -n $NAMESPACE --no-headers | wc -l)

echo "Ready pods: $READY / $TOTAL"

# Should show: Ready pods: 7 / 7 (or your expected pod count)
</code></pre>
<h3 id="42-check-pod-logs-for-errors"><a class="header" href="#42-check-pod-logs-for-errors">4.2 Check Pod Logs for Errors</a></h3>
<pre><code class="language-bash"># Check logs from the last minute for errors
for pod in $(kubectl get pods -n $NAMESPACE -o name); do
  echo "=== $pod ==="
  kubectl logs $pod -n $NAMESPACE --since=1m 2&gt;&amp;1 | grep -i "error\|exception\|fatal" | head -3
done

# If errors found:
# 1. Note which pods have errors
# 2. Get full log: kubectl logs &lt;pod&gt; -n $NAMESPACE
# 3. Decide: can proceed or need to rollback
</code></pre>
<h3 id="43-verify-service-endpoints"><a class="header" href="#43-verify-service-endpoints">4.3 Verify Service Endpoints</a></h3>
<pre><code class="language-bash"># Check services are exposing pods correctly
kubectl get endpoints -n $NAMESPACE

# Expected output:
# NAME              ENDPOINTS
# vapora-backend    10.1.2.3:8001,10.1.2.4:8001,10.1.2.5:8001
# vapora-agents     10.1.2.6:8002,10.1.2.7:8002
# vapora-llm-router 10.1.2.8:8003,10.1.2.9:8003
</code></pre>
<p><strong>Verification</strong>:</p>
<ul>
<li>✓ Each service has multiple endpoints (not empty)</li>
<li>✓ Endpoints match running pods</li>
<li>❌ If empty endpoints: Service can't route traffic</li>
</ul>
<h3 id="44-health-check-endpoints"><a class="header" href="#44-health-check-endpoints">4.4 Health Check Endpoints</a></h3>
<pre><code class="language-bash"># Port-forward to access services locally
kubectl port-forward -n $NAMESPACE svc/vapora-backend 8001:8001 &amp;

# Wait a moment for port-forward to establish
sleep 2

# Check backend health
curl -v http://localhost:8001/health

# Expected response:
# HTTP/1.1 200 OK
# {...healthy response...}

# Check other endpoints
curl http://localhost:8001/api/projects -H "Authorization: Bearer test-token"
</code></pre>
<p><strong>Expected responses</strong>:</p>
<ul>
<li><code>/health</code>: 200 OK with health data</li>
<li><code>/api/projects</code>: 200 OK with projects list</li>
<li><code>/metrics</code>: 200 OK with Prometheus metrics</li>
</ul>
<p><strong>If connection refused</strong>:</p>
<pre><code class="language-bash"># Check if port-forward working
ps aux | grep "port-forward"

# Restart port-forward
pkill -f "port-forward svc/vapora-backend"
kubectl port-forward -n $NAMESPACE svc/vapora-backend 8001:8001 &amp;
</code></pre>
<h3 id="45-check-metrics"><a class="header" href="#45-check-metrics">4.5 Check Metrics</a></h3>
<pre><code class="language-bash"># Monitor resource usage of deployed pods
kubectl top pods -n $NAMESPACE

# Expected output:
# NAME                           CPU(cores)   MEMORY(Mi)
# vapora-backend-abc123          250m         512Mi
# vapora-backend-def456          280m         498Mi
# vapora-agents-ghi789           300m         256Mi
</code></pre>
<p><strong>Verification</strong>:</p>
<ul>
<li>✓ CPU usage within expected range (typically 100-500m per pod)</li>
<li>✓ Memory usage within expected range (typically 200-512Mi)</li>
<li>❌ If any pod at 100% CPU/Memory: Performance issue, monitor closely</li>
</ul>
<hr />
<h2 id="phase-5-validation-3-minutes"><a class="header" href="#phase-5-validation-3-minutes">Phase 5: Validation (3 minutes)</a></h2>
<h3 id="51-run-smoke-tests-if-available"><a class="header" href="#51-run-smoke-tests-if-available">5.1 Run Smoke Tests (if available)</a></h3>
<pre><code class="language-bash"># If your project has smoke tests:
kubectl exec -it deployment/vapora-backend -n $NAMESPACE -- \
  sh -c "curl http://localhost:8001/health &amp;&amp; echo 'Health check passed'"

# Or run from your local machine:
./scripts/smoke-tests.sh --endpoint http://localhost:8001
</code></pre>
<h3 id="52-check-for-errors-in-logs"><a class="header" href="#52-check-for-errors-in-logs">5.2 Check for Errors in Logs</a></h3>
<pre><code class="language-bash"># Look at logs from all pods since deployment started
for deployment in vapora-backend vapora-agents vapora-llm-router; do
  echo "=== Checking $deployment ==="
  kubectl logs deployment/$deployment -n $NAMESPACE --since=5m 2&gt;&amp;1 | \
    grep -i "error\|exception\|failed" | wc -l
done

# If any errors found:
# 1. Get detailed logs
# 2. Determine if critical or expected errors
# 3. Decide to proceed or rollback
</code></pre>
<h3 id="53-compare-against-baseline-metrics"><a class="header" href="#53-compare-against-baseline-metrics">5.3 Compare Against Baseline Metrics</a></h3>
<p>Compare current metrics with pre-deployment baseline:</p>
<pre><code class="language-bash"># Current metrics
echo "=== Current ==="
kubectl top nodes
kubectl top pods -n $NAMESPACE | head -5

# Compare with recorded baseline
# If similar: ✓ Good
# If significantly higher: ⚠️ Watch for issues
# If error rates high: ❌ Consider rollback
</code></pre>
<h3 id="54-check-for-recent-eventswarnings"><a class="header" href="#54-check-for-recent-eventswarnings">5.4 Check for Recent Events/Warnings</a></h3>
<pre><code class="language-bash"># Look for any cluster events in the last 5 minutes
kubectl get events -n $NAMESPACE --sort-by='.lastTimestamp' | tail -20

# Watch for:
# - Warning: FailedScheduling (pod won't fit)
# - Warning: PullImageError (image doesn't exist)
# - Warning: ImagePullBackOff (can't download image)
# - Error: ExceededQuota (resource limits)
</code></pre>
<hr />
<h2 id="phase-6-communication-1-minute"><a class="header" href="#phase-6-communication-1-minute">Phase 6: Communication (1 minute)</a></h2>
<h3 id="61-post-deployment-complete"><a class="header" href="#61-post-deployment-complete">6.1 Post Deployment Complete</a></h3>
<pre><code>Post message to #deployments:

🚀 DEPLOYMENT COMPLETE

Deployment: VAPORA Core Services
Mode: Enterprise
Duration: 8 minutes
Status: ✅ Successful

Deployed:
- vapora-backend (v1.2.1)
- vapora-agents (v1.2.1)
- vapora-llm-router (v1.2.1)

Verification:
✓ All pods running
✓ Health checks passing
✓ No error logs
✓ Metrics normal

Next steps:
- Monitor #alerts for any issues
- Check dashboards every 5 minutes for 30 min
- Review logs if any issues detected

Questions? @on-call-engineer
</code></pre>
<h3 id="62-update-status-page"><a class="header" href="#62-update-status-page">6.2 Update Status Page</a></h3>
<pre><code>If using public status page:

UPDATE: Maintenance Complete

VAPORA services have been successfully updated
and are now operating normally.

All systems monitoring nominal.
</code></pre>
<h3 id="63-notify-stakeholders"><a class="header" href="#63-notify-stakeholders">6.3 Notify Stakeholders</a></h3>
<ul>
<li><input disabled="" type="checkbox"/>
Send message to support team: "Deployment complete, all systems normal"</li>
<li><input disabled="" type="checkbox"/>
Post in #product: "Backend updated to v1.2.1, new features available"</li>
<li><input disabled="" type="checkbox"/>
Update ticket/issue with deployment completion time and status</li>
</ul>
<hr />
<h2 id="phase-7-post-deployment-monitoring-ongoing"><a class="header" href="#phase-7-post-deployment-monitoring-ongoing">Phase 7: Post-Deployment Monitoring (Ongoing)</a></h2>
<h3 id="71-first-5-minutes-watch-closely"><a class="header" href="#71-first-5-minutes-watch-closely">7.1 First 5 Minutes: Watch Closely</a></h3>
<pre><code class="language-bash"># Keep watching for any issues
watch kubectl get pods -n $NAMESPACE
watch kubectl top pods -n $NAMESPACE
watch kubectl logs -f deployment/vapora-backend -n $NAMESPACE
</code></pre>
<p><strong>Watch for:</strong></p>
<ul>
<li>Pod restarts (RESTARTS counter increasing)</li>
<li>Increased error logs</li>
<li>Resource usage spikes</li>
<li>Service unreachability</li>
</ul>
<h3 id="72-first-30-minutes-monitor-dashboard"><a class="header" href="#72-first-30-minutes-monitor-dashboard">7.2 First 30 Minutes: Monitor Dashboard</a></h3>
<p>Keep dashboard visible showing:</p>
<ul>
<li>Pod health status</li>
<li>CPU/Memory usage per pod</li>
<li>Request latency (if available)</li>
<li>Error rate</li>
<li>Recent logs</li>
</ul>
<p><strong>Alert triggers for immediate action:</strong></p>
<ul>
<li>Any pod restarting repeatedly</li>
<li>Error rate above 5%</li>
<li>Latency above 2x normal</li>
<li>Pod stuck in Pending state</li>
</ul>
<h3 id="73-first-2-hours-regular-checks"><a class="header" href="#73-first-2-hours-regular-checks">7.3 First 2 Hours: Regular Checks</a></h3>
<pre><code class="language-bash"># Every 10 minutes:
1. kubectl get pods -n $NAMESPACE
2. kubectl top pods -n $NAMESPACE
3. Check error logs: grep -i error from recent logs
4. Check alerts dashboard
</code></pre>
<p><strong>If issues detected</strong>, proceed to Incident Response Runbook</p>
<h3 id="74-after-2-hours-normal-monitoring"><a class="header" href="#74-after-2-hours-normal-monitoring">7.4 After 2 Hours: Normal Monitoring</a></h3>
<p>Return to standard monitoring procedures. Deployment complete.</p>
<hr />
<h2 id="if-issues-detected-quick-rollback"><a class="header" href="#if-issues-detected-quick-rollback">If Issues Detected: Quick Rollback</a></h2>
<p>If problems occur at any point:</p>
<pre><code class="language-bash"># IMMEDIATE: Rollback (1 minute)
for deployment in vapora-backend vapora-agents vapora-llm-router; do
  kubectl rollout undo deployment/$deployment -n $NAMESPACE &amp;
done
wait

# Verify rollback completing:
kubectl rollout status deployment/vapora-backend -n $NAMESPACE --timeout=5m

# Confirm services recovering:
curl http://localhost:8001/health

# Post to #deployments:
# 🔙 ROLLBACK EXECUTED
# Issue detected, services rolled back to previous version
# All pods should be recovering now
</code></pre>
<p>See <a href="./rollback-runbook.html">Rollback Runbook</a> for detailed procedures.</p>
<hr />
<h2 id="common-issues--solutions"><a class="header" href="#common-issues--solutions">Common Issues &amp; Solutions</a></h2>
<h3 id="issue-pod-stuck-in-imagepullbackoff"><a class="header" href="#issue-pod-stuck-in-imagepullbackoff">Issue: Pod stuck in ImagePullBackOff</a></h3>
<p><strong>Cause</strong>: Docker image doesn't exist or can't be downloaded</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># Check pod events
kubectl describe pod &lt;pod-name&gt; -n $NAMESPACE

# Check image registry access
kubectl get secret -n $NAMESPACE

# Either:
1. Verify image name is correct in deployment.yaml
2. Push missing image to registry
3. Rollback deployment
</code></pre>
<h3 id="issue-pod-stuck-in-crashloopbackoff"><a class="header" href="#issue-pod-stuck-in-crashloopbackoff">Issue: Pod stuck in CrashLoopBackOff</a></h3>
<p><strong>Cause</strong>: Application crashing on startup</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># Get pod logs
kubectl logs &lt;pod-name&gt; -n $NAMESPACE --previous

# Fix typically requires config change:
1. Fix ConfigMap issue
2. Re-apply ConfigMap: kubectl apply -f configmap.yaml
3. Trigger pod restart: kubectl rollout restart deployment/&lt;name&gt;

# Or rollback if unclear
</code></pre>
<h3 id="issue-pod-in-pending-state"><a class="header" href="#issue-pod-in-pending-state">Issue: Pod in Pending state</a></h3>
<p><strong>Cause</strong>: Node doesn't have capacity or resources</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># Describe pod to see why
kubectl describe pod &lt;pod-name&gt; -n $NAMESPACE

# Check for "Insufficient cpu", "Insufficient memory"
kubectl top nodes

# Either:
1. Scale down other workloads
2. Increase node count
3. Reduce resource requirements in deployment.yaml and redeploy
</code></pre>
<h3 id="issue-service-endpoints-empty"><a class="header" href="#issue-service-endpoints-empty">Issue: Service endpoints empty</a></h3>
<p><strong>Cause</strong>: Pods not passing health checks</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># Check pod logs for errors
kubectl logs &lt;pod-name&gt; -n $NAMESPACE

# Check pod readiness probe failures
kubectl describe pod &lt;pod-name&gt; -n $NAMESPACE | grep -A 5 "Readiness"

# Fix configuration or rollback
</code></pre>
<hr />
<h2 id="completion-checklist"><a class="header" href="#completion-checklist">Completion Checklist</a></h2>
<ul>
<li><input disabled="" type="checkbox"/>
All pods running and ready</li>
<li><input disabled="" type="checkbox"/>
Health endpoints responding</li>
<li><input disabled="" type="checkbox"/>
No error logs</li>
<li><input disabled="" type="checkbox"/>
Metrics normal</li>
<li><input disabled="" type="checkbox"/>
Deployment communication posted</li>
<li><input disabled="" type="checkbox"/>
Status page updated</li>
<li><input disabled="" type="checkbox"/>
Stakeholders notified</li>
<li><input disabled="" type="checkbox"/>
Monitoring enabled for next 2 hours</li>
<li><input disabled="" type="checkbox"/>
Ticket/issue updated with completion details</li>
</ul>
<hr />
<h2 id="next-steps"><a class="header" href="#next-steps">Next Steps</a></h2>
<ul>
<li>Continue monitoring per <a href="./monitoring-runbook.html">Monitoring Runbook</a></li>
<li>If issues arise, follow <a href="./incident-response-runbook.html">Incident Response Runbook</a></li>
<li>Document lessons learned</li>
<li>Update runbooks if procedures need improvement</li>
</ul>

                    </main>

                    <nav class="nav-wrapper" aria-label="Page navigation">
                        <!-- Mobile navigation buttons -->
                            <a rel="prev" href="../../operations/index.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                                <i class="fa fa-angle-left"></i>
                            </a>

                            <a rel="next prefetch" href="../../operations/pre-deployment-checklist.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                                <i class="fa fa-angle-right"></i>
                            </a>

                        <div style="clear: both"></div>
                    </nav>
                </div>
            </div>

            <nav class="nav-wide-wrapper" aria-label="Page navigation">
                    <a rel="prev" href="../../operations/index.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                        <i class="fa fa-angle-left"></i>
                    </a>

                    <a rel="next prefetch" href="../../operations/pre-deployment-checklist.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                        <i class="fa fa-angle-right"></i>
                    </a>
            </nav>

        </div>


        <script>
            window.playground_copyable = true;
        </script>


        <script src="../elasticlunr.min.js"></script>
        <script src="../mark.min.js"></script>
        <script src="../searcher.js"></script>

        <script src="../clipboard.min.js"></script>
        <script src="../highlight.js"></script>
        <script src="../book.js"></script>

        <!-- Custom JS scripts -->


    </div>
    </body>
</html>