Vapora/docs/disaster-recovery/database-recovery-procedures.html

770 lines
31 KiB
HTML
Raw Normal View History

<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>Database Recovery Procedures - VAPORA Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "light";
const default_dark_theme = "dark";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('light')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">VAPORA Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../disaster-recovery/database-recovery-procedures.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="database-recovery-procedures"><a class="header" href="#database-recovery-procedures">Database Recovery Procedures</a></h1>
<p>Detailed procedures for recovering SurrealDB in various failure scenarios.</p>
<hr />
<h2 id="quick-reference-recovery-methods"><a class="header" href="#quick-reference-recovery-methods">Quick Reference: Recovery Methods</a></h2>
<div class="table-wrapper"><table><thead><tr><th>Scenario</th><th>Method</th><th>Time</th><th>Data Loss</th></tr></thead><tbody>
<tr><td><strong>Pod restart</strong></td><td>Automatic pod recovery</td><td>2 min</td><td>0</td></tr>
<tr><td><strong>Pod crash</strong></td><td>Persistent volume intact</td><td>3 min</td><td>0</td></tr>
<tr><td><strong>Corrupted pod</strong></td><td>Restart from snapshot</td><td>5 min</td><td>0</td></tr>
<tr><td><strong>Corrupted database</strong></td><td>Restore from backup</td><td>15 min</td><td>0-60 min</td></tr>
<tr><td><strong>Complete loss</strong></td><td>Restore from backup</td><td>30 min</td><td>0-60 min</td></tr>
</tbody></table>
</div>
<hr />
<h2 id="surrealdb-architecture"><a class="header" href="#surrealdb-architecture">SurrealDB Architecture</a></h2>
<pre><code>VAPORA Database Layer
SurrealDB Pod (Kubernetes)
├── PersistentVolume: /var/lib/surrealdb/
├── Data file: data.db (RocksDB)
├── Index files: *.idx
└── Wal (Write-ahead log): *.wal
Backed up to:
├── Hourly exports: S3 backups/database/
├── CloudSQL snapshots: AWS/GCP snapshots
└── Archive backups: Glacier (monthly)
</code></pre>
<hr />
<h2 id="scenario-1-pod-restart-most-common"><a class="header" href="#scenario-1-pod-restart-most-common">Scenario 1: Pod Restart (Most Common)</a></h2>
<p><strong>Cause</strong>: Node maintenance, resource limits, health check failure</p>
<p><strong>Duration</strong>: 2-3 minutes
<strong>Data Loss</strong>: None</p>
<h3 id="recovery-procedure"><a class="header" href="#recovery-procedure">Recovery Procedure</a></h3>
<pre><code class="language-bash"># Most of the time, just restart the pod
# 1. Delete the pod
kubectl delete pod -n vapora surrealdb-0
# 2. Pod automatically restarts (via StatefulSet)
kubectl get pods -n vapora -w
# 3. Verify it's Ready
kubectl get pod surrealdb-0 -n vapora
# Should show: 1/1 Running
# 4. Verify database is accessible
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT 1"
# 5. Check data integrity
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT COUNT(*) FROM projects"
# Should return non-zero count
</code></pre>
<hr />
<h2 id="scenario-2-pod-crashloop-container-issue"><a class="header" href="#scenario-2-pod-crashloop-container-issue">Scenario 2: Pod CrashLoop (Container Issue)</a></h2>
<p><strong>Cause</strong>: Application crash, memory issues, corrupt index</p>
<p><strong>Duration</strong>: 5-10 minutes
<strong>Data Loss</strong>: None (usually)</p>
<h3 id="recovery-procedure-1"><a class="header" href="#recovery-procedure-1">Recovery Procedure</a></h3>
<pre><code class="language-bash"># 1. Examine pod logs to identify issue
kubectl logs surrealdb-0 -n vapora --previous
# Look for: "panic", "fatal", "out of memory"
# 2. Increase resource limits if memory issue
kubectl patch statefulset surrealdb -n vapora --type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/resources/limits/memory", "value":"2Gi"}]'
# 3. If corrupt index, rebuild
kubectl exec -n vapora surrealdb-0 -- \
surreal query "REBUILD INDEX"
# 4. If persistent issue, try volume snapshot
kubectl delete pod -n vapora surrealdb-0
# Use previous snapshot (if available)
# 5. Monitor restart
kubectl get pods -n vapora -w
</code></pre>
<hr />
<h2 id="scenario-3-corrupted-database-detected-via-queries"><a class="header" href="#scenario-3-corrupted-database-detected-via-queries">Scenario 3: Corrupted Database (Detected via Queries)</a></h2>
<p><strong>Cause</strong>: Unclean shutdown, disk issue, data corruption</p>
<p><strong>Duration</strong>: 15-30 minutes
<strong>Data Loss</strong>: Minimal (last hour of transactions)</p>
<h3 id="detection"><a class="header" href="#detection">Detection</a></h3>
<pre><code class="language-bash"># Symptoms to watch for
✗ Queries return error: "corrupted database"
✗ Disk check shows corruption
✗ Checksums fail
✗ Integrity check fails
# Verify corruption
kubectl exec -n vapora surrealdb-0 -- \
surreal query "INFO FOR DB"
# Look for any error messages
# Try repair
kubectl exec -n vapora surrealdb-0 -- \
surreal query "REBUILD INDEX"
</code></pre>
<h3 id="recovery-option-a---restart-and-repair-try-first"><a class="header" href="#recovery-option-a---restart-and-repair-try-first">Recovery: Option A - Restart and Repair (Try First)</a></h3>
<pre><code class="language-bash"># 1. Delete pod to force restart
kubectl delete pod -n vapora surrealdb-0
# 2. Watch restart
kubectl get pods -n vapora -w
# Should restart within 30 seconds
# 3. Verify database accessible
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT COUNT(*) FROM projects"
# 4. If successful, done
# If still errors, proceed to Option B
</code></pre>
<h3 id="recovery-option-b---restore-from-recent-backup"><a class="header" href="#recovery-option-b---restore-from-recent-backup">Recovery: Option B - Restore from Recent Backup</a></h3>
<pre><code class="language-bash"># 1. Stop database pod
kubectl scale statefulset surrealdb --replicas=0 -n vapora
# 2. Download latest backup
aws s3 cp s3://vapora-backups/database/ ./ --recursive
# Get most recent .sql.gz file
# 3. Clear corrupted data
kubectl delete pvc -n vapora surrealdb-data-surrealdb-0
# 4. Recreate pod (will create new PVC)
kubectl scale statefulset surrealdb --replicas=1 -n vapora
# 5. Wait for pod to be ready
kubectl wait --for=condition=Ready pod/surrealdb-0 \
-n vapora --timeout=300s
# 6. Restore backup
# Extract and import
gunzip vapora-db-*.sql.gz
kubectl cp vapora-db-*.sql vapora/surrealdb-0:/tmp/
kubectl exec -n vapora surrealdb-0 -- \
surreal import \
--conn ws://localhost:8000 \
--user root \
--pass $DB_PASSWORD \
--input /tmp/vapora-db-*.sql
# 7. Verify restored data
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT COUNT(*) FROM projects"
# Should match pre-corruption count
</code></pre>
<hr />
<h2 id="scenario-4-storage-failure-pvc-issue"><a class="header" href="#scenario-4-storage-failure-pvc-issue">Scenario 4: Storage Failure (PVC Issue)</a></h2>
<p><strong>Cause</strong>: Storage volume corruption, node storage failure</p>
<p><strong>Duration</strong>: 20-30 minutes
<strong>Data Loss</strong>: None with backup</p>
<h3 id="recovery-procedure-2"><a class="header" href="#recovery-procedure-2">Recovery Procedure</a></h3>
<pre><code class="language-bash"># 1. Detect storage issue
kubectl describe pvc -n vapora surrealdb-data-surrealdb-0
# Look for: "Pod pending", "volume binding failure"
# 2. Check if snapshot available (cloud)
aws ec2 describe-snapshots \
--filters "Name=tag:database,Values=vapora" \
--query 'Snapshots[].{SnapshotId:SnapshotId,StartTime:StartTime}' \
--sort-by StartTime | tail -10
# 3. Create new PVC from snapshot
kubectl apply -f - &lt;&lt; EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: surrealdb-data-surrealdb-0-restore
namespace: vapora
spec:
accessModes:
- ReadWriteOnce
dataSource:
kind: VolumeSnapshot
apiGroup: snapshot.storage.k8s.io
name: surrealdb-snapshot-latest
resources:
requests:
storage: 100Gi
EOF
# 4. Update StatefulSet to use new PVC
kubectl patch statefulset surrealdb -n vapora --type='json' \
-p='[{"op": "replace", "path": "/spec/volumeClaimTemplates/0/metadata/name", "value":"surrealdb-data-surrealdb-0-restore"}]'
# 5. Delete old pod to force remount
kubectl delete pod -n vapora surrealdb-0
# 6. Verify new pod runs
kubectl get pods -n vapora -w
# 7. Test database
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT COUNT(*) FROM projects"
</code></pre>
<hr />
<h2 id="scenario-5-complete-data-loss-restore-from-backup"><a class="header" href="#scenario-5-complete-data-loss-restore-from-backup">Scenario 5: Complete Data Loss (Restore from Backup)</a></h2>
<p><strong>Cause</strong>: User delete, accidental truncate, security incident</p>
<p><strong>Duration</strong>: 30-60 minutes
<strong>Data Loss</strong>: Up to 1 hour</p>
<h3 id="pre-recovery-checklist"><a class="header" href="#pre-recovery-checklist">Pre-Recovery Checklist</a></h3>
<pre><code>Before restoring, verify:
□ What data was lost? (specific tables or entire DB?)
□ When was it lost? (exact time if possible)
□ Is it just one table or entire database?
□ Do we have valid backups from before loss?
□ Has the backup been tested before?
</code></pre>
<h3 id="recovery-procedure-3"><a class="header" href="#recovery-procedure-3">Recovery Procedure</a></h3>
<pre><code class="language-bash"># 1. Stop the database
kubectl scale statefulset surrealdb --replicas=0 -n vapora
sleep 10
# 2. Identify backup to restore
# Look for backup from time BEFORE data loss
aws s3 ls s3://vapora-backups/database/ --recursive | sort
# Example: surrealdb-2026-01-12-230000.sql.gz
# (from 11 PM, before 12 AM loss)
# 3. Download backup
aws s3 cp s3://vapora-backups/database/2026-01-12-surrealdb-230000.sql.gz ./
gunzip surrealdb-230000.sql
# 4. Verify backup integrity before restoring
# Extract first 100 lines to check format
head -100 surrealdb-230000.sql
# 5. Delete corrupted PVC
kubectl delete pvc -n vapora surrealdb-data-surrealdb-0
# 6. Restart database pod (will create new PVC)
kubectl scale statefulset surrealdb --replicas=1 -n vapora
# 7. Wait for pod to be ready and listening
kubectl wait --for=condition=Ready pod/surrealdb-0 \
-n vapora --timeout=300s
sleep 10
# 8. Copy backup to pod
kubectl cp surrealdb-230000.sql vapora/surrealdb-0:/tmp/
# 9. Restore backup
kubectl exec -n vapora surrealdb-0 -- \
surreal import \
--conn ws://localhost:8000 \
--user root \
--pass $DB_PASSWORD \
--input /tmp/surrealdb-230000.sql
# Expected output:
# Imported 1500+ records...
# This should take 5-15 minutes depending on backup size
# 10. Verify data restored
kubectl exec -n vapora surrealdb-0 -- \
surreal sql \
--conn ws://localhost:8000 \
--user root \
--pass $DB_PASSWORD \
"SELECT COUNT(*) as project_count FROM projects"
# Should match pre-loss count
</code></pre>
<h3 id="data-loss-assessment"><a class="header" href="#data-loss-assessment">Data Loss Assessment</a></h3>
<pre><code class="language-bash"># After restore, compare with lost version
# 1. Get current record count
RESTORED_COUNT=$(kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT COUNT(*) FROM projects")
# 2. Get pre-loss count (from logs or ticket)
PRE_LOSS_COUNT=1500
# 3. Calculate data loss
if [ "$RESTORED_COUNT" -lt "$PRE_LOSS_COUNT" ]; then
LOSS=$(( PRE_LOSS_COUNT - RESTORED_COUNT ))
echo "Data loss: $LOSS records"
echo "Data loss duration: ~1 hour"
echo "Restore successful but incomplete"
else
echo "Data loss: 0 records"
echo "Full recovery complete"
fi
</code></pre>
<hr />
<h2 id="scenario-6-backup-verification-failed"><a class="header" href="#scenario-6-backup-verification-failed">Scenario 6: Backup Verification Failed</a></h2>
<p><strong>Cause</strong>: Corrupt backup file, incompatible format</p>
<p><strong>Duration</strong>: 30-120 minutes (fallback to older backup)
<strong>Data Loss</strong>: 2+ hours possible</p>
<h3 id="recovery-procedure-4"><a class="header" href="#recovery-procedure-4">Recovery Procedure</a></h3>
<pre><code class="language-bash"># 1. Identify backup corruption
# During restore, if backup fails import:
kubectl exec -n vapora surrealdb-0 -- \
surreal import \
--conn ws://localhost:8000 \
--user root \
--pass $DB_PASSWORD \
--input /tmp/backup.sql
# Error: "invalid SQL format" or similar
# 2. Check backup file integrity
file vapora-db-backup.sql
# Should show: ASCII text
head -5 vapora-db-backup.sql
# Should show: SQL statements or surreal export format
# 3. If corrupt, try next-oldest backup
aws s3 ls s3://vapora-backups/database/ --recursive | sort | tail -5
# Get second-newest backup
# 4. Retry restore with older backup
aws s3 cp s3://vapora-backups/database/2026-01-12-210000/ ./
gunzip backup.sql.gz
# 5. Repeat restore procedure with older backup
# (As in Scenario 5, steps 8-10)
</code></pre>
<hr />
<h2 id="scenario-7-database-size-growing-unexpectedly"><a class="header" href="#scenario-7-database-size-growing-unexpectedly">Scenario 7: Database Size Growing Unexpectedly</a></h2>
<p><strong>Cause</strong>: Accumulation of data, logs not rotated, storage leak</p>
<p><strong>Duration</strong>: Varies (prevention focus)
<strong>Data Loss</strong>: None</p>
<h3 id="detection-1"><a class="header" href="#detection-1">Detection</a></h3>
<pre><code class="language-bash"># Monitor database size
kubectl exec -n vapora surrealdb-0 -- du -sh /var/lib/surrealdb/
# Check disk usage trend
# (Should be ~1-2% growth per week)
# If sudden spike:
kubectl exec -n vapora surrealdb-0 -- \
find /var/lib/surrealdb/ -type f -exec ls -lh {} + | sort -k5 -h | tail -20
</code></pre>
<h3 id="cleanup-procedure"><a class="header" href="#cleanup-procedure">Cleanup Procedure</a></h3>
<pre><code class="language-bash"># 1. Identify large tables
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT table, count(*) FROM meta::tb GROUP BY table ORDER BY count DESC"
# 2. If logs table too large
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "DELETE FROM audit_logs WHERE created_at &lt; now() - 90d"
# 3. Rebuild indexes to reclaim space
kubectl exec -n vapora surrealdb-0 -- \
surreal query "REBUILD INDEX"
# 4. If still large, delete old records from other tables
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "DELETE FROM tasks WHERE status = 'archived' AND updated_at &lt; now() - 1y"
# 5. Monitor size after cleanup
kubectl exec -n vapora surrealdb-0 -- du -sh /var/lib/surrealdb/
</code></pre>
<hr />
<h2 id="scenario-8-replication-lag-if-using-replicas"><a class="header" href="#scenario-8-replication-lag-if-using-replicas">Scenario 8: Replication Lag (If Using Replicas)</a></h2>
<p><strong>Cause</strong>: Replica behind primary, network latency</p>
<p><strong>Duration</strong>: Usually self-healing (seconds to minutes)
<strong>Data Loss</strong>: None</p>
<h3 id="detection-2"><a class="header" href="#detection-2">Detection</a></h3>
<pre><code class="language-bash"># Check replica lag
kubectl exec -n vapora surrealdb-replica -- \
surreal sql "SHOW REPLICATION STATUS"
# Look for: "Seconds_Behind_Master" &gt; 5 seconds
</code></pre>
<h3 id="recovery"><a class="header" href="#recovery">Recovery</a></h3>
<pre><code class="language-bash"># Usually self-healing, but if stuck:
# 1. Check network connectivity
kubectl exec -n vapora surrealdb-replica -- ping surrealdb-primary -c 5
# 2. Restart replica
kubectl delete pod -n vapora surrealdb-replica
# 3. Monitor replica catching up
kubectl logs -n vapora surrealdb-replica -f
# 4. Verify replica status
kubectl exec -n vapora surrealdb-replica -- \
surreal sql "SHOW REPLICATION STATUS"
</code></pre>
<hr />
<h2 id="database-health-checks"><a class="header" href="#database-health-checks">Database Health Checks</a></h2>
<h3 id="pre-recovery-verification"><a class="header" href="#pre-recovery-verification">Pre-Recovery Verification</a></h3>
<pre><code class="language-bash">def verify_database_health [] {
print "=== Database Health Check ==="
# 1. Connection test
let conn = (try (
exec "surreal sql --conn ws://localhost:8000 \"SELECT 1\""
) catch {error make {msg: "Cannot connect to database"}})
# 2. Data integrity test
let integrity = (exec "surreal sql \"REBUILD INDEX\"")
print "✓ Integrity check passed"
# 3. Performance test
let perf = (exec "surreal sql \"SELECT COUNT(*) FROM projects\"")
print "✓ Performance acceptable"
# 4. Replication lag (if applicable)
# let lag = (exec "surreal sql \"SHOW REPLICATION STATUS\"")
# print "✓ No replication lag"
print "✓ All health checks passed"
}
</code></pre>
<h3 id="post-recovery-verification"><a class="header" href="#post-recovery-verification">Post-Recovery Verification</a></h3>
<pre><code class="language-bash">def verify_recovery_success [] {
print "=== Post-Recovery Verification ==="
# 1. Database accessible
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT 1"
print "✓ Database accessible"
# 2. All tables present
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT table FROM meta::tb"
print "✓ All tables present"
# 3. Record counts reasonable
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT table, count(*) FROM meta::tb"
print "✓ Record counts verified"
# 4. Application can connect
kubectl logs -n vapora deployment/vapora-backend --tail=5 | grep -i connected
print "✓ Application connected"
# 5. API operational
curl http://localhost:8001/api/projects
print "✓ API operational"
}
</code></pre>
<hr />
<h2 id="database-recovery-checklist"><a class="header" href="#database-recovery-checklist">Database Recovery Checklist</a></h2>
<h3 id="before-recovery"><a class="header" href="#before-recovery">Before Recovery</a></h3>
<pre><code>□ Documented failure symptoms
□ Determined root cause
□ Selected appropriate recovery method
□ Located backup to restore
□ Verified backup integrity
□ Notified relevant teams
□ Have runbook available
□ Test environment ready (for testing)
</code></pre>
<h3 id="during-recovery"><a class="header" href="#during-recovery">During Recovery</a></h3>
<pre><code>□ Followed procedure step-by-step
□ Monitored each step completion
□ Captured any error messages
□ Took notes of timings
□ Did NOT skip verification steps
□ Had backup plans ready
</code></pre>
<h3 id="after-recovery"><a class="header" href="#after-recovery">After Recovery</a></h3>
<pre><code>□ Verified database accessible
□ Verified data integrity
□ Verified application can connect
□ Checked API endpoints working
□ Monitored error rates
□ Waited for 30 min stability check
□ Documented recovery procedure
□ Identified improvements needed
□ Updated runbooks if needed
</code></pre>
<hr />
<h2 id="recovery-troubleshooting"><a class="header" href="#recovery-troubleshooting">Recovery Troubleshooting</a></h2>
<h3 id="issue-cannot-connect-to-database-after-restore"><a class="header" href="#issue-cannot-connect-to-database-after-restore">Issue: "Cannot connect to database after restore"</a></h3>
<p><strong>Cause</strong>: Database not fully recovered, network issue</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># 1. Wait longer (import can take 15+ minutes)
sleep 60 &amp;&amp; kubectl exec -n vapora surrealdb-0 -- surreal sql "SELECT 1"
# 2. Check pod logs
kubectl logs -n vapora surrealdb-0 | tail -50
# 3. Restart pod
kubectl delete pod -n vapora surrealdb-0
# 4. Check network connectivity
kubectl exec -n vapora surrealdb-0 -- ping localhost
</code></pre>
<h3 id="issue-import-corrupted-data-error"><a class="header" href="#issue-import-corrupted-data-error">Issue: "Import corrupted data" error</a></h3>
<p><strong>Cause</strong>: Backup file corrupted or wrong format</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># 1. Try different backup
aws s3 ls s3://vapora-backups/database/ | sort | tail -5
# 2. Verify backup format
file vapora-db-backup.sql
# Should show: text
# 3. Manual inspection
head -20 vapora-db-backup.sql
# Should show SQL format
# 4. Try with older backup
</code></pre>
<h3 id="issue-database-running-but-data-seems-wrong"><a class="header" href="#issue-database-running-but-data-seems-wrong">Issue: "Database running but data seems wrong"</a></h3>
<p><strong>Cause</strong>: Restored wrong backup or partial restore</p>
<p><strong>Solution</strong>:</p>
<pre><code class="language-bash"># 1. Verify record counts
kubectl exec -n vapora surrealdb-0 -- \
surreal sql "SELECT table, count(*) FROM meta::tb"
# 2. Compare to pre-loss baseline
# (from documentation or logs)
# If counts don't match:
# - Used wrong backup
# - Restore incomplete
# - Try again with correct backup
</code></pre>
<hr />
<h2 id="database-recovery-reference"><a class="header" href="#database-recovery-reference">Database Recovery Reference</a></h2>
<p><strong>Recovery Procedure Flowchart</strong>:</p>
<pre><code>Database Issue Detected
Is it just a pod restart?
YES → kubectl delete pod surrealdb-0
NO → Continue
Can queries connect and run?
YES → Continue with application recovery
NO → Continue
Is data corrupted (errors in queries)?
YES → Try REBUILD INDEX
NO → Continue
Still errors?
YES → Scale replicas=0, clear PVC, restore from backup
NO → Success, monitor for 30 min
</code></pre>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../../disaster-recovery/backup-strategy.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../disaster-recovery/business-continuity-plan.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../../disaster-recovery/backup-strategy.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../../disaster-recovery/business-continuity-plan.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>