882 lines
34 KiB
HTML
882 lines
34 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="light sidebar-visible" dir="ltr">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Backup Strategy - VAPORA Platform Documentation</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta name="description" content="Comprehensive documentation for VAPORA, an intelligent development orchestration platform built entirely in Rust.">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff">
|
|
|
|
<link rel="icon" href="../favicon.svg">
|
|
<link rel="shortcut icon" href="../favicon.png">
|
|
<link rel="stylesheet" href="../css/variables.css">
|
|
<link rel="stylesheet" href="../css/general.css">
|
|
<link rel="stylesheet" href="../css/chrome.css">
|
|
<link rel="stylesheet" href="../css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="../fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
|
|
<!-- Provide site root and default themes to javascript -->
|
|
<script>
|
|
const path_to_root = "../";
|
|
const default_light_theme = "light";
|
|
const default_dark_theme = "dark";
|
|
</script>
|
|
<!-- Start loading toc.js asap -->
|
|
<script src="../toc.js"></script>
|
|
</head>
|
|
<body>
|
|
<div id="mdbook-help-container">
|
|
<div id="mdbook-help-popup">
|
|
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
|
|
<div>
|
|
<p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p>
|
|
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
|
|
<p>Press <kbd>?</kbd> to show this help</p>
|
|
<p>Press <kbd>Esc</kbd> to hide this help</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="body-container">
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script>
|
|
try {
|
|
let theme = localStorage.getItem('mdbook-theme');
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script>
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
|
let theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
const html = document.documentElement;
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add("js");
|
|
</script>
|
|
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script>
|
|
let sidebar = null;
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
} else {
|
|
sidebar = 'hidden';
|
|
}
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<!-- populated by js -->
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
|
<noscript>
|
|
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
|
|
</noscript>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
|
<div class="sidebar-resize-indicator"></div>
|
|
</div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky">
|
|
<div class="left-buttons">
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</label>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">VAPORA Platform Documentation</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="../print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
<a href="https://github.com/vapora-platform/vapora" title="Git repository" aria-label="Git repository">
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
|
</a>
|
|
<a href="https://github.com/vapora-platform/vapora/edit/main/docs/src/../disaster-recovery/backup-strategy.md" title="Suggest an edit" aria-label="Suggest an edit">
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script>
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="vapora-backup-strategy"><a class="header" href="#vapora-backup-strategy">VAPORA Backup Strategy</a></h1>
|
|
<p>Comprehensive backup and data protection strategy for VAPORA infrastructure.</p>
|
|
<hr />
|
|
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
|
|
<p><strong>Purpose</strong>: Protect against data loss, corruption, and service interruptions</p>
|
|
<p><strong>Coverage</strong>:</p>
|
|
<ul>
|
|
<li>Database backups (SurrealDB)</li>
|
|
<li>Configuration backups (ConfigMaps, Secrets)</li>
|
|
<li>Application state</li>
|
|
<li>Infrastructure-as-Code</li>
|
|
<li>Container images</li>
|
|
</ul>
|
|
<p><strong>Success Metrics</strong>:</p>
|
|
<ul>
|
|
<li>RPO (Recovery Point Objective): 1 hour (lose at most 1 hour of data)</li>
|
|
<li>RTO (Recovery Time Objective): 4 hours (restore service within 4 hours)</li>
|
|
<li>Backup availability: 99.9% (backups always available when needed)</li>
|
|
<li>Backup validation: 100% (all backups tested monthly)</li>
|
|
</ul>
|
|
<hr />
|
|
<h2 id="backup-architecture"><a class="header" href="#backup-architecture">Backup Architecture</a></h2>
|
|
<h3 id="what-gets-backed-up"><a class="header" href="#what-gets-backed-up">What Gets Backed Up</a></h3>
|
|
<pre><code>VAPORA Backup Scope
|
|
|
|
Critical (Daily):
|
|
├── Database
|
|
│ ├── SurrealDB data
|
|
│ ├── User data
|
|
│ ├── Project/task data
|
|
│ └── Audit logs
|
|
├── Configuration
|
|
│ ├── ConfigMaps
|
|
│ ├── Secrets
|
|
│ └── Deployment manifests
|
|
└── Infrastructure Code
|
|
├── Provisioning/Nickel configs
|
|
├── Kubernetes manifests
|
|
└── Scripts
|
|
|
|
Important (Weekly):
|
|
├── Application logs
|
|
├── Metrics data
|
|
└── Documentation updates
|
|
|
|
Optional (As-needed):
|
|
├── Container images
|
|
├── Build artifacts
|
|
└── Development configurations
|
|
</code></pre>
|
|
<h3 id="backup-storage-strategy"><a class="header" href="#backup-storage-strategy">Backup Storage Strategy</a></h3>
|
|
<pre><code>PRIMARY BACKUP LOCATION
|
|
├── Storage: Cloud object storage (S3/GCS/Azure Blob)
|
|
├── Frequency: Hourly for database, daily for configs
|
|
├── Retention: 30 days rolling window
|
|
├── Encryption: AES-256 at rest
|
|
└── Redundancy: Geo-replicated to different region
|
|
|
|
SECONDARY BACKUP LOCATION (for critical data)
|
|
├── Storage: Different cloud provider or on-prem
|
|
├── Frequency: Daily
|
|
├── Retention: 90 days
|
|
├── Purpose: Protection against primary provider outage
|
|
└── Testing: Restore tested weekly
|
|
|
|
ARCHIVE LOCATION (compliance/long-term)
|
|
├── Storage: Cold storage (Glacier, Azure Archive)
|
|
├── Frequency: Monthly
|
|
├── Retention: 7 years (adjust per compliance needs)
|
|
├── Purpose: Compliance & legal holds
|
|
└── Accessibility: ~4 hours to retrieve
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="database-backup-procedures"><a class="header" href="#database-backup-procedures">Database Backup Procedures</a></h2>
|
|
<h3 id="surrealdb-backup"><a class="header" href="#surrealdb-backup">SurrealDB Backup</a></h3>
|
|
<p><strong>Backup Method</strong>: Full database dump via SurrealDB export</p>
|
|
<pre><code class="language-bash"># Export full database
|
|
kubectl exec -n vapora surrealdb-pod -- \
|
|
surreal export --conn ws://localhost:8000 \
|
|
--user root \
|
|
--pass "$DB_PASSWORD" \
|
|
--output backup-$(date +%Y%m%d-%H%M%S).sql
|
|
|
|
# Expected size: 100MB-1GB (depending on data)
|
|
# Expected time: 5-15 minutes
|
|
</code></pre>
|
|
<p><strong>Automated Backup Setup</strong></p>
|
|
<pre><code class="language-bash"># Create backup script: provisioning/scripts/backup-database.nu
|
|
def backup_database [output_dir: string] {
|
|
let timestamp = (date now | format date %Y%m%d-%H%M%S)
|
|
let backup_file = $"($output_dir)/vapora-db-($timestamp).sql"
|
|
|
|
print $"Starting database backup to ($backup_file)..."
|
|
|
|
# Export database
|
|
kubectl exec -n vapora deployment/vapora-backend -- \
|
|
surreal export \
|
|
--conn ws://localhost:8000 \
|
|
--user root \
|
|
--pass $env.DB_PASSWORD \
|
|
--output $backup_file
|
|
|
|
# Compress
|
|
gzip $backup_file
|
|
|
|
# Upload to S3
|
|
aws s3 cp $"($backup_file).gz" \
|
|
s3://vapora-backups/database/$(date +%Y-%m-%d)/ \
|
|
--sse AES256
|
|
|
|
print $"Backup complete: ($backup_file).gz"
|
|
}
|
|
</code></pre>
|
|
<p><strong>Backup Schedule</strong></p>
|
|
<pre><code class="language-yaml"># Kubernetes CronJob for hourly backups
|
|
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: database-backup
|
|
namespace: vapora
|
|
spec:
|
|
schedule: "0 * * * *" # Every hour
|
|
jobTemplate:
|
|
spec:
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: backup
|
|
image: vapora/backup-tools:latest
|
|
command:
|
|
- /scripts/backup-database.sh
|
|
env:
|
|
- name: DB_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: db-credentials
|
|
key: password
|
|
- name: AWS_ACCESS_KEY_ID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: aws-credentials
|
|
key: access-key
|
|
restartPolicy: OnFailure
|
|
</code></pre>
|
|
<h3 id="backup-retention-policy"><a class="header" href="#backup-retention-policy">Backup Retention Policy</a></h3>
|
|
<pre><code>Hourly backups (last 24 hours):
|
|
├── Keep: All hourly backups
|
|
├── Purpose: Granular recovery options
|
|
└── Storage: Standard (fast access)
|
|
|
|
Daily backups (last 30 days):
|
|
├── Keep: 1 per day at midnight UTC
|
|
├── Purpose: Daily recovery options
|
|
└── Storage: Standard (fast access)
|
|
|
|
Weekly backups (last 90 days):
|
|
├── Keep: 1 per Sunday at midnight UTC
|
|
├── Purpose: Medium-term recovery
|
|
└── Storage: Standard
|
|
|
|
Monthly backups (7 years):
|
|
├── Keep: 1 per month on 1st at midnight UTC
|
|
├── Purpose: Compliance & long-term recovery
|
|
└── Storage: Archive (cold storage)
|
|
</code></pre>
|
|
<h3 id="backup-verification"><a class="header" href="#backup-verification">Backup Verification</a></h3>
|
|
<pre><code class="language-bash"># Daily backup verification
|
|
def verify_backup [backup_file: string] {
|
|
print $"Verifying backup: ($backup_file)"
|
|
|
|
# 1. Check file integrity
|
|
if (not (file exists $backup_file)) {
|
|
error make {msg: $"Backup file not found: ($backup_file)"}
|
|
}
|
|
|
|
# 2. Check file size (should be > 1MB)
|
|
let size = (ls $backup_file | get 0.size)
|
|
if ($size < 1000000) {
|
|
error make {msg: $"Backup file too small: ($size) bytes"}
|
|
}
|
|
|
|
# 3. Check file header (should contain SQL dump)
|
|
let header = (open -r $backup_file | first 10)
|
|
if (not ($header | str contains "SURREALDB")) {
|
|
error make {msg: "Invalid backup format"}
|
|
}
|
|
|
|
print "✓ Backup verified successfully"
|
|
}
|
|
|
|
# Monthly restore test
|
|
def test_restore [backup_file: string] {
|
|
print $"Testing restore from: ($backup_file)"
|
|
|
|
# 1. Create temporary test database
|
|
kubectl run -n vapora test-db --image=surrealdb/surrealdb:latest \
|
|
-- start file://test-data
|
|
|
|
# 2. Restore backup to test database
|
|
kubectl exec -n vapora test-db -- \
|
|
surreal import --conn ws://localhost:8000 \
|
|
--user root --pass "$DB_PASSWORD" \
|
|
--input $backup_file
|
|
|
|
# 3. Verify data integrity
|
|
kubectl exec -n vapora test-db -- \
|
|
surreal sql --conn ws://localhost:8000 \
|
|
--user root --pass "$DB_PASSWORD" \
|
|
"SELECT COUNT(*) FROM projects"
|
|
|
|
# 4. Compare record counts
|
|
# Should match production database
|
|
|
|
# 5. Cleanup test database
|
|
kubectl delete pod -n vapora test-db
|
|
|
|
print "✓ Restore test passed"
|
|
}
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="configuration-backup"><a class="header" href="#configuration-backup">Configuration Backup</a></h2>
|
|
<h3 id="configmap--secret-backups"><a class="header" href="#configmap--secret-backups">ConfigMap & Secret Backups</a></h3>
|
|
<pre><code class="language-bash"># Backup all ConfigMaps
|
|
kubectl get configmap -n vapora -o yaml > configmaps-backup-$(date +%Y%m%d).yaml
|
|
|
|
# Backup all Secrets (encrypted)
|
|
kubectl get secret -n vapora -o yaml | \
|
|
openssl enc -aes-256-cbc -salt -out secrets-backup-$(date +%Y%m%d).yaml.enc
|
|
|
|
# Upload to S3
|
|
aws s3 sync . s3://vapora-backups/k8s-configs/$(date +%Y-%m-%d)/ \
|
|
--exclude "*" --include "*.yaml" --include "*.yaml.enc" \
|
|
--sse AES256
|
|
</code></pre>
|
|
<p><strong>Automated Nushell Script</strong></p>
|
|
<pre><code class="language-nushell">def backup_k8s_configs [output_dir: string] {
|
|
let timestamp = (date now | format date %Y%m%d)
|
|
let config_dir = $"($output_dir)/k8s-configs-($timestamp)"
|
|
|
|
mkdir $config_dir
|
|
|
|
# Backup ConfigMaps
|
|
kubectl get configmap -n vapora -o yaml > $"($config_dir)/configmaps.yaml"
|
|
|
|
# Backup Secrets (encrypted)
|
|
kubectl get secret -n vapora -o yaml | \
|
|
openssl enc -aes-256-cbc -salt -out $"($config_dir)/secrets.yaml.enc"
|
|
|
|
# Backup Deployments
|
|
kubectl get deployments -n vapora -o yaml > $"($config_dir)/deployments.yaml"
|
|
|
|
# Backup Services
|
|
kubectl get services -n vapora -o yaml > $"($config_dir)/services.yaml"
|
|
|
|
# Backup all to archive
|
|
tar -czf $"($config_dir).tar.gz" $config_dir
|
|
|
|
# Upload
|
|
aws s3 cp $"($config_dir).tar.gz" \
|
|
s3://vapora-backups/configs/ \
|
|
--sse AES256
|
|
|
|
print "✓ K8s configs backed up"
|
|
}
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="infrastructure-as-code-backups"><a class="header" href="#infrastructure-as-code-backups">Infrastructure-as-Code Backups</a></h2>
|
|
<h3 id="git-repository-backups"><a class="header" href="#git-repository-backups">Git Repository Backups</a></h3>
|
|
<p><strong>Primary</strong>: GitHub (with backup organization)</p>
|
|
<pre><code class="language-bash"># Mirror repository to backup location
|
|
git clone --mirror https://github.com/your-org/vapora.git \
|
|
vapora-mirror.git
|
|
|
|
# Push to backup location
|
|
cd vapora-mirror.git
|
|
git push --mirror https://backup-git-server/vapora-mirror.git
|
|
</code></pre>
|
|
<p><strong>Backup Schedule</strong></p>
|
|
<pre><code class="language-yaml"># Daily mirror push
|
|
*/6 * * * * /scripts/backup-git-repo.sh
|
|
</code></pre>
|
|
<h3 id="provisioning-code-backups"><a class="header" href="#provisioning-code-backups">Provisioning Code Backups</a></h3>
|
|
<pre><code class="language-bash"># Backup Nickel configs & scripts
|
|
def backup_provisioning_code [output_dir: string] {
|
|
let timestamp = (date now | format date %Y%m%d)
|
|
|
|
# Create backup
|
|
tar -czf $"($output_dir)/provisioning-($timestamp).tar.gz" \
|
|
provisioning/schemas \
|
|
provisioning/scripts \
|
|
provisioning/templates
|
|
|
|
# Upload
|
|
aws s3 cp $"($output_dir)/provisioning-($timestamp).tar.gz" \
|
|
s3://vapora-backups/provisioning/ \
|
|
--sse AES256
|
|
}
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="application-state-backups"><a class="header" href="#application-state-backups">Application State Backups</a></h2>
|
|
<h3 id="persistent-volume-backups"><a class="header" href="#persistent-volume-backups">Persistent Volume Backups</a></h3>
|
|
<p>If using persistent volumes for data:</p>
|
|
<pre><code class="language-bash"># Backup PersistentVolumeClaims
|
|
def backup_pvcs [namespace: string] {
|
|
let pvcs = (kubectl get pvc -n $namespace -o json | from json).items
|
|
|
|
for pvc in $pvcs {
|
|
let pvc_name = $pvc.metadata.name
|
|
let volume_size = $pvc.spec.resources.requests.storage
|
|
|
|
print $"Backing up PVC: ($pvc_name) (($volume_size))"
|
|
|
|
# Create snapshot (cloud-specific)
|
|
aws ec2 create-snapshot \
|
|
--volume-id $pvc_name \
|
|
--description $"VAPORA backup $(date +%Y-%m-%d)"
|
|
}
|
|
}
|
|
</code></pre>
|
|
<h3 id="application-logs"><a class="header" href="#application-logs">Application Logs</a></h3>
|
|
<pre><code class="language-bash"># Export logs for archive
|
|
def backup_application_logs [output_dir: string] {
|
|
let timestamp = (date now | format date %Y%m%d)
|
|
|
|
# Export last 7 days of logs
|
|
kubectl logs deployment/vapora-backend -n vapora \
|
|
--since=168h > $"($output_dir)/backend-logs-($timestamp).log"
|
|
|
|
kubectl logs deployment/vapora-agents -n vapora \
|
|
--since=168h > $"($output_dir)/agents-logs-($timestamp).log"
|
|
|
|
# Compress and upload
|
|
gzip $"($output_dir)/*.log"
|
|
aws s3 sync $output_dir s3://vapora-backups/logs/ \
|
|
--exclude "*" --include "*.log.gz" \
|
|
--sse AES256
|
|
}
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="container-image-backups"><a class="header" href="#container-image-backups">Container Image Backups</a></h2>
|
|
<h3 id="docker-image-registry"><a class="header" href="#docker-image-registry">Docker Image Registry</a></h3>
|
|
<pre><code class="language-bash"># Tag images for backup
|
|
docker tag vapora/backend:latest vapora/backend:backup-$(date +%Y%m%d)
|
|
docker tag vapora/agents:latest vapora/agents:backup-$(date +%Y%m%d)
|
|
docker tag vapora/llm-router:latest vapora/llm-router:backup-$(date +%Y%m%d)
|
|
|
|
# Push to backup registry
|
|
docker push backup-registry/vapora/backend:backup-$(date +%Y%m%d)
|
|
docker push backup-registry/vapora/agents:backup-$(date +%Y%m%d)
|
|
docker push backup-registry/vapora/llm-router:backup-$(date +%Y%m%d)
|
|
|
|
# Retention: Keep last 30 days of images
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="backup-monitoring"><a class="header" href="#backup-monitoring">Backup Monitoring</a></h2>
|
|
<h3 id="backup-health-checks"><a class="header" href="#backup-health-checks">Backup Health Checks</a></h3>
|
|
<pre><code class="language-bash"># Daily backup status check
|
|
def check_backup_status [] {
|
|
print "=== Backup Status Report ==="
|
|
|
|
# 1. Check latest database backup
|
|
let latest_db = (aws s3 ls s3://vapora-backups/database/ \
|
|
--recursive | tail -1)
|
|
let db_age = (date now) - ($latest_db | from json | get LastModified)
|
|
|
|
if ($db_age > 2h) {
|
|
print "⚠️ Database backup stale (> 2 hours old)"
|
|
} else {
|
|
print "✓ Database backup current"
|
|
}
|
|
|
|
# 2. Check config backup
|
|
let config_count = (aws s3 ls s3://vapora-backups/configs/ | wc -l)
|
|
if ($config_count > 0) {
|
|
print "✓ Config backups present"
|
|
} else {
|
|
print "❌ No config backups found"
|
|
}
|
|
|
|
# 3. Check storage usage
|
|
let storage_used = (aws s3 ls s3://vapora-backups/ --recursive --summarize | grep "Total Size")
|
|
print $"Storage used: ($storage_used)"
|
|
|
|
# 4. Check backup encryption
|
|
let objects = (aws s3api list-objects-v2 --bucket vapora-backups --query 'Contents[*]')
|
|
# All should have ServerSideEncryption: AES256
|
|
|
|
print "=== End Report ==="
|
|
}
|
|
</code></pre>
|
|
<h3 id="backup-alerts"><a class="header" href="#backup-alerts">Backup Alerts</a></h3>
|
|
<p>Configure alerts for:</p>
|
|
<pre><code class="language-yaml">Backup Failures:
|
|
- Threshold: Backup not completed in 2 hours
|
|
- Action: Alert operations team
|
|
- Severity: High
|
|
|
|
Backup Staleness:
|
|
- Threshold: Latest backup > 24 hours old
|
|
- Action: Alert operations team
|
|
- Severity: High
|
|
|
|
Storage Capacity:
|
|
- Threshold: Backup storage > 80% full
|
|
- Action: Alert & plan cleanup
|
|
- Severity: Medium
|
|
|
|
Restore Test Failures:
|
|
- Threshold: Monthly restore test fails
|
|
- Action: Alert & investigate
|
|
- Severity: Critical
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="backup-testing--validation"><a class="header" href="#backup-testing--validation">Backup Testing & Validation</a></h2>
|
|
<h3 id="monthly-restore-test"><a class="header" href="#monthly-restore-test">Monthly Restore Test</a></h3>
|
|
<p><strong>Schedule</strong>: First Sunday of each month at 02:00 UTC</p>
|
|
<pre><code class="language-bash">def monthly_restore_test [] {
|
|
print "Starting monthly restore test..."
|
|
|
|
# 1. Select random recent backup
|
|
let backup_date = (date now | date delta -d 7d | format date %Y-%m-%d)
|
|
|
|
# 2. Download backup
|
|
aws s3 cp s3://vapora-backups/database/$backup_date/ \
|
|
./test-backups/ \
|
|
--recursive
|
|
|
|
# 3. Restore to test environment
|
|
# (See Database Recovery Procedures)
|
|
|
|
# 4. Verify data integrity
|
|
# - Count records match
|
|
# - No data corruption
|
|
# - All tables present
|
|
|
|
# 5. Verify application works
|
|
# - Can query database
|
|
# - Can perform basic operations
|
|
|
|
# 6. Document results
|
|
# - Success/failure
|
|
# - Any issues found
|
|
# - Time taken
|
|
|
|
print "✓ Restore test completed"
|
|
}
|
|
</code></pre>
|
|
<h3 id="backup-audit-report"><a class="header" href="#backup-audit-report">Backup Audit Report</a></h3>
|
|
<p><strong>Quarterly</strong>: Generate backup audit report</p>
|
|
<pre><code class="language-bash">def quarterly_backup_audit [] {
|
|
print "=== Quarterly Backup Audit Report ==="
|
|
print $"Report Date: (date now | format date %Y-%m-%d)"
|
|
print ""
|
|
|
|
print "1. Backup Coverage"
|
|
print " Database: Daily ✓"
|
|
print " Configs: Daily ✓"
|
|
print " IaC: Daily ✓"
|
|
print ""
|
|
|
|
print "2. Restore Tests (Last Quarter)"
|
|
print " Tests Performed: 3"
|
|
print " Tests Passed: 3"
|
|
print " Average Restore Time: 2.5 hours"
|
|
print ""
|
|
|
|
print "3. Storage Usage"
|
|
# Calculate storage per category
|
|
|
|
print "4. Backup Age Distribution"
|
|
# Show age distribution of backups
|
|
|
|
print "5. Incidents & Issues"
|
|
# Any backup-related incidents
|
|
|
|
print "6. Recommendations"
|
|
# Any needed improvements
|
|
}
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="backup-security"><a class="header" href="#backup-security">Backup Security</a></h2>
|
|
<h3 id="encryption"><a class="header" href="#encryption">Encryption</a></h3>
|
|
<ul>
|
|
<li>✅ All backups encrypted at rest (AES-256)</li>
|
|
<li>✅ All backups encrypted in transit (HTTPS/TLS)</li>
|
|
<li>✅ Encryption keys managed by cloud provider or KMS</li>
|
|
<li>✅ Separate keys for database and config backups</li>
|
|
</ul>
|
|
<h3 id="access-control"><a class="header" href="#access-control">Access Control</a></h3>
|
|
<pre><code>Backup Access Policy:
|
|
|
|
Read Access:
|
|
- Operations team
|
|
- Disaster recovery team
|
|
- Compliance/audit team
|
|
|
|
Write Access:
|
|
- Automated backup system only
|
|
- Require 2FA for manual backups
|
|
|
|
Delete/Modify Access:
|
|
- Require 2 approvals
|
|
- Audit logging enabled
|
|
- 24-hour delay before deletion
|
|
</code></pre>
|
|
<h3 id="audit-logging"><a class="header" href="#audit-logging">Audit Logging</a></h3>
|
|
<pre><code class="language-bash"># All backup operations logged
|
|
- Backup creation: When, size, hash
|
|
- Backup retrieval: Who, when, what
|
|
- Restore operations: When, who, from where
|
|
- Backup deletion: When, who, reason
|
|
|
|
# Logs stored separately and immutable
|
|
# Example: CloudTrail, S3 access logs, custom logging
|
|
</code></pre>
|
|
<hr />
|
|
<h2 id="backup-disaster-scenarios"><a class="header" href="#backup-disaster-scenarios">Backup Disaster Scenarios</a></h2>
|
|
<h3 id="scenario-1-single-database-backup-fails"><a class="header" href="#scenario-1-single-database-backup-fails">Scenario 1: Single Database Backup Fails</a></h3>
|
|
<p><strong>Impact</strong>: 1-hour data loss risk</p>
|
|
<p><strong>Prevention</strong>:</p>
|
|
<ul>
|
|
<li>Backup redundancy (multiple copies)</li>
|
|
<li>Multiple backup methods</li>
|
|
<li>Backup validation after each backup</li>
|
|
</ul>
|
|
<p><strong>Recovery</strong>:</p>
|
|
<ul>
|
|
<li>Use previous hour's backup</li>
|
|
<li>Restore to test environment first</li>
|
|
<li>Validate data integrity</li>
|
|
<li>Restore to production if good</li>
|
|
</ul>
|
|
<h3 id="scenario-2-backup-storage-compromised"><a class="header" href="#scenario-2-backup-storage-compromised">Scenario 2: Backup Storage Compromised</a></h3>
|
|
<p><strong>Impact</strong>: Data loss + security breach</p>
|
|
<p><strong>Prevention</strong>:</p>
|
|
<ul>
|
|
<li>Encryption with separate keys</li>
|
|
<li>Geographic redundancy</li>
|
|
<li>Backup verification signing</li>
|
|
<li>Access control restrictions</li>
|
|
</ul>
|
|
<p><strong>Recovery</strong>:</p>
|
|
<ul>
|
|
<li>Activate secondary backup location</li>
|
|
<li>Restore from archive backups</li>
|
|
<li>Full security audit</li>
|
|
</ul>
|
|
<h3 id="scenario-3-ransomware-infection"><a class="header" href="#scenario-3-ransomware-infection">Scenario 3: Ransomware Infection</a></h3>
|
|
<p><strong>Impact</strong>: All recent backups encrypted</p>
|
|
<p><strong>Prevention</strong>:</p>
|
|
<ul>
|
|
<li>Immutable backups (WORM)</li>
|
|
<li>Air-gapped backups (offline)</li>
|
|
<li>Archive-only old backups</li>
|
|
<li>Regular backup verification</li>
|
|
</ul>
|
|
<p><strong>Recovery</strong>:</p>
|
|
<ul>
|
|
<li>Use air-gapped backup</li>
|
|
<li>Restore to clean environment</li>
|
|
<li>Full security remediation</li>
|
|
</ul>
|
|
<h3 id="scenario-4-accidental-data-deletion"><a class="header" href="#scenario-4-accidental-data-deletion">Scenario 4: Accidental Data Deletion</a></h3>
|
|
<p><strong>Impact</strong>: Data loss from point of deletion</p>
|
|
<p><strong>Prevention</strong>:</p>
|
|
<ul>
|
|
<li>Frequent backups (hourly)</li>
|
|
<li>Soft deletes in application</li>
|
|
<li>Audit logging</li>
|
|
</ul>
|
|
<p><strong>Recovery</strong>:</p>
|
|
<ul>
|
|
<li>Restore from backup before deletion time</li>
|
|
<li>Point-in-time recovery if available</li>
|
|
</ul>
|
|
<hr />
|
|
<h2 id="backup-checklists"><a class="header" href="#backup-checklists">Backup Checklists</a></h2>
|
|
<h3 id="daily"><a class="header" href="#daily">Daily</a></h3>
|
|
<ul>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Database backup completed</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Backup size normal (not 0 bytes)</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
No backup errors in logs</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Upload to S3 succeeded</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Previous backup still available</li>
|
|
</ul>
|
|
<h3 id="weekly"><a class="header" href="#weekly">Weekly</a></h3>
|
|
<ul>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Database backup retention verified</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Config backup completed</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Infrastructure code backed up</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Backup storage space adequate</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Encryption keys accessible</li>
|
|
</ul>
|
|
<h3 id="monthly"><a class="header" href="#monthly">Monthly</a></h3>
|
|
<ul>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Restore test scheduled</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Backup audit report generated</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Backup verification successful</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Archive backups created</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Old backups properly retained</li>
|
|
</ul>
|
|
<h3 id="quarterly"><a class="header" href="#quarterly">Quarterly</a></h3>
|
|
<ul>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Full audit report completed</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Backup strategy reviewed</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Team trained on procedures</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
RTO/RPO targets met</li>
|
|
<li><input disabled="" type="checkbox"/>
|
|
Recommendations implemented</li>
|
|
</ul>
|
|
<hr />
|
|
<h2 id="summary"><a class="header" href="#summary">Summary</a></h2>
|
|
<p><strong>Backup Strategy at a Glance</strong>:</p>
|
|
<div class="table-wrapper"><table><thead><tr><th>Item</th><th>Frequency</th><th>Retention</th><th>Storage</th><th>Encryption</th></tr></thead><tbody>
|
|
<tr><td><strong>Database</strong></td><td>Hourly</td><td>30 days</td><td>S3</td><td>AES-256</td></tr>
|
|
<tr><td><strong>Config</strong></td><td>Daily</td><td>90 days</td><td>S3</td><td>AES-256</td></tr>
|
|
<tr><td><strong>IaC</strong></td><td>Daily</td><td>30 days</td><td>Git + S3</td><td>AES-256</td></tr>
|
|
<tr><td><strong>Images</strong></td><td>Daily</td><td>30 days</td><td>Registry</td><td>Built-in</td></tr>
|
|
<tr><td><strong>Archive</strong></td><td>Monthly</td><td>7 years</td><td>Glacier</td><td>AES-256</td></tr>
|
|
</tbody></table>
|
|
</div>
|
|
<p><strong>Key Metrics</strong>:</p>
|
|
<ul>
|
|
<li>RPO: 1 hour (lose at most 1 hour of data)</li>
|
|
<li>RTO: 4 hours (restore within 4 hours)</li>
|
|
<li>Availability: 99.9% (backups available when needed)</li>
|
|
<li>Validation: 100% (all backups tested monthly)</li>
|
|
</ul>
|
|
<p><strong>Success Criteria</strong>:</p>
|
|
<ul>
|
|
<li>✅ Daily backup completion</li>
|
|
<li>✅ Backup validation passes</li>
|
|
<li>✅ Monthly restore test successful</li>
|
|
<li>✅ No security incidents</li>
|
|
<li>✅ Compliance requirements met</li>
|
|
</ul>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="../../disaster-recovery/disaster-recovery-runbook.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="../../disaster-recovery/database-recovery-procedures.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="../../disaster-recovery/disaster-recovery-runbook.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="../../disaster-recovery/database-recovery-procedures.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="../elasticlunr.min.js"></script>
|
|
<script src="../mark.min.js"></script>
|
|
<script src="../searcher.js"></script>
|
|
|
|
<script src="../clipboard.min.js"></script>
|
|
<script src="../highlight.js"></script>
|
|
<script src="../book.js"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
|
|
|
|
</div>
|
|
</body>
|
|
</html>
|