497 lines
12 KiB
Plaintext
497 lines
12 KiB
Plaintext
|
|
#!/usr/bin/env nu
|
||
|
|
|
||
|
|
# VAPORA Database Recovery Script
|
||
|
|
# Restore SurrealDB from backups (S3 or Restic)
|
||
|
|
# Follows NUSHELL_GUIDELINES.md strictly (0.109.0+)
|
||
|
|
|
||
|
|
# Get timestamp
|
||
|
|
def get-timestamp []: nothing -> string {
|
||
|
|
date now | format date "%Y%m%d-%H%M%S"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Download backup from S3
|
||
|
|
def download-from-s3 [
|
||
|
|
s3_location: string
|
||
|
|
output_file: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Downloading from S3 [$s3_location]..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^aws s3 cp $s3_location $output_file
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
file: $output_file
|
||
|
|
size: (
|
||
|
|
do {
|
||
|
|
^ls -lh $output_file
|
||
|
|
} | complete | if ($in.exit_code == 0) {
|
||
|
|
($in.stdout | str trim)
|
||
|
|
} else {
|
||
|
|
"unknown"
|
||
|
|
}
|
||
|
|
)
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
file: $output_file
|
||
|
|
size: null
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Decrypt backup file
|
||
|
|
def decrypt-backup [
|
||
|
|
encrypted_file: string
|
||
|
|
key_file: string
|
||
|
|
output_file: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Decrypting backup [$encrypted_file]..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^openssl enc -d -aes-256-cbc \
|
||
|
|
-in $encrypted_file \
|
||
|
|
-out $output_file \
|
||
|
|
-pass file:$key_file
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
decrypted_file: $output_file
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
decrypted_file: $output_file
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Decompress backup
|
||
|
|
def decompress-backup [input_file: string]: nothing -> record {
|
||
|
|
print $"Decompressing [$input_file]..."
|
||
|
|
|
||
|
|
let decompressed = ($input_file | str replace ".gz" "")
|
||
|
|
let result = do {
|
||
|
|
^gunzip --force $input_file
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
decompressed_file: $decompressed
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
decompressed_file: $decompressed
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Verify database is running
|
||
|
|
def check-database-ready [
|
||
|
|
surreal_url: string
|
||
|
|
surreal_user: string
|
||
|
|
surreal_pass: string
|
||
|
|
max_retries: int
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Checking database readiness at [$surreal_url]..."
|
||
|
|
|
||
|
|
let wait-recursive = { max_attempts: int, current: int |
|
||
|
|
if ($current >= $max_attempts) {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
error: "Database not ready after maximum attempts"
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
let result = do {
|
||
|
|
^surreal list namespaces --conn $surreal_url \
|
||
|
|
--user $surreal_user --pass $surreal_pass
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
ready_after_attempts: $current
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
print $"Attempt ($current + 1) failed, waiting..."
|
||
|
|
do {
|
||
|
|
sleep 2sec
|
||
|
|
} | complete
|
||
|
|
($wait-recursive | call {max_attempts: $max_attempts, current: ($current + 1)})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
($wait-recursive | call {max_attempts: $max_retries, current: 0})
|
||
|
|
}
|
||
|
|
|
||
|
|
# Import backup to temporary database
|
||
|
|
def import-to-temp-database [
|
||
|
|
backup_file: string
|
||
|
|
surreal_url: string
|
||
|
|
surreal_user: string
|
||
|
|
surreal_pass: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Importing backup to temporary database..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^surreal import --conn $surreal_url \
|
||
|
|
--user $surreal_user \
|
||
|
|
--pass $surreal_pass \
|
||
|
|
--input $backup_file
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
database_url: $surreal_url
|
||
|
|
timestamp: (get-timestamp)
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
database_url: $surreal_url
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Verify restored data
|
||
|
|
def verify-database [
|
||
|
|
surreal_url: string
|
||
|
|
surreal_user: string
|
||
|
|
surreal_pass: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print "Verifying restored database..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^bash -c (
|
||
|
|
$"surreal query --conn ($surreal_url) " +
|
||
|
|
$"--user ($surreal_user) --pass ($surreal_pass) " +
|
||
|
|
"\"SELECT COUNT() FROM projects\" 2>&1"
|
||
|
|
)
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
verification: ($result.stdout | str trim)
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
verification: null
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Scale down StatefulSet
|
||
|
|
def scale-statefulset-down [
|
||
|
|
namespace: string
|
||
|
|
statefulset_name: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Scaling down StatefulSet [$statefulset_name]..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^kubectl scale statefulset $statefulset_name \
|
||
|
|
--replicas 0 -n $namespace
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
statefulset: $statefulset_name
|
||
|
|
action: "scaled-down"
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
statefulset: $statefulset_name
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Delete PVC (persistent volume claim)
|
||
|
|
def delete-pvc [
|
||
|
|
namespace: string
|
||
|
|
pvc_name: string
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Deleting PVC [$pvc_name]..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^kubectl delete pvc $pvc_name -n $namespace
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
pvc: $pvc_name
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
pvc: $pvc_name
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Scale up StatefulSet
|
||
|
|
def scale-statefulset-up [
|
||
|
|
namespace: string
|
||
|
|
statefulset_name: string
|
||
|
|
replicas: int
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Scaling up StatefulSet [$statefulset_name] to [$replicas] replicas..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^kubectl scale statefulset $statefulset_name \
|
||
|
|
--replicas $replicas -n $namespace
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
statefulset: $statefulset_name
|
||
|
|
replicas: $replicas
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
statefulset: $statefulset_name
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Wait for pod to be ready
|
||
|
|
def wait-for-pod-ready [
|
||
|
|
namespace: string
|
||
|
|
pod_name: string
|
||
|
|
timeout_secs: int
|
||
|
|
]: nothing -> record {
|
||
|
|
print $"Waiting for pod [$pod_name] to be ready (timeout: [$timeout_secs]s)..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^kubectl wait --for condition=Ready \
|
||
|
|
pod/$pod_name -n $namespace \
|
||
|
|
--timeout="${timeout_secs}s"
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
pod: $pod_name
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
pod: $pod_name
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Cleanup temporary files
|
||
|
|
def cleanup-temp-files [work_dir: string]: nothing -> record {
|
||
|
|
print $"Cleaning up temporary files [$work_dir]..."
|
||
|
|
|
||
|
|
let result = do {
|
||
|
|
^rm -rf $work_dir
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if ($result.exit_code == 0) {
|
||
|
|
{
|
||
|
|
success: true
|
||
|
|
removed: $work_dir
|
||
|
|
error: null
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
{
|
||
|
|
success: false
|
||
|
|
removed: $work_dir
|
||
|
|
error: ($result.stderr | str trim)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Main recovery function
|
||
|
|
def main [
|
||
|
|
--s3-location: string = ""
|
||
|
|
--encryption-key: string = ""
|
||
|
|
--surreal-url: string = "ws://localhost:8000"
|
||
|
|
--surreal-user: string = "root"
|
||
|
|
--surreal-pass: string = ""
|
||
|
|
--namespace: string = "vapora"
|
||
|
|
--statefulset: string = "surrealdb"
|
||
|
|
--pvc: string = "surrealdb-data-surrealdb-0"
|
||
|
|
--verify
|
||
|
|
--work-dir: string = "/tmp/vapora-recovery"
|
||
|
|
]: nothing {
|
||
|
|
print "=== VAPORA Database Recovery ==="
|
||
|
|
print ""
|
||
|
|
|
||
|
|
# Validate inputs
|
||
|
|
if ($s3_location == "") {
|
||
|
|
print "ERROR: --s3-location required (s3://bucket/path/backup.sql.gz.enc)"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
if ($encryption_key == "") {
|
||
|
|
print "ERROR: --encryption-key required"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
if ($surreal_pass == "") {
|
||
|
|
print "ERROR: --surreal-pass required"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
# Create work directory
|
||
|
|
let work_path = $"($work_dir)/$(get-timestamp)"
|
||
|
|
let create_result = do {
|
||
|
|
^mkdir -p $work_path
|
||
|
|
} | complete
|
||
|
|
|
||
|
|
if (not ($create_result.exit_code == 0)) {
|
||
|
|
print "ERROR: Failed to create work directory"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
# Download from S3
|
||
|
|
let encrypted_file = $"($work_path)/backup.sql.gz.enc"
|
||
|
|
let download_result = (download-from-s3 $s3_location $encrypted_file)
|
||
|
|
|
||
|
|
if (not $download_result.success) {
|
||
|
|
print $"ERROR: S3 download failed: [$download_result.error]"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Backup downloaded from S3"
|
||
|
|
|
||
|
|
# Decrypt
|
||
|
|
let compressed_file = $"($work_path)/backup.sql.gz"
|
||
|
|
let decrypt_result = (decrypt-backup $encrypted_file $encryption_key $compressed_file)
|
||
|
|
|
||
|
|
if (not $decrypt_result.success) {
|
||
|
|
print $"ERROR: Decryption failed: [$decrypt_result.error]"
|
||
|
|
cleanup-temp-files $work_path
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Backup decrypted"
|
||
|
|
|
||
|
|
# Decompress
|
||
|
|
let backup_file = $"($work_path)/backup.sql"
|
||
|
|
let decompress_result = (decompress-backup $compressed_file)
|
||
|
|
|
||
|
|
if (not $decompress_result.success) {
|
||
|
|
print $"ERROR: Decompression failed: [$decompress_result.error]"
|
||
|
|
cleanup-temp-files $work_path
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Backup decompressed"
|
||
|
|
|
||
|
|
# Scale down database (for PVC replacement)
|
||
|
|
let scale_down_result = (scale-statefulset-down $namespace $statefulset)
|
||
|
|
if (not $scale_down_result.success) {
|
||
|
|
print $"WARNING: Scale down failed: [$scale_down_result.error]"
|
||
|
|
} else {
|
||
|
|
print "✓ StatefulSet scaled down"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Wait for pod termination
|
||
|
|
print "Waiting for pod termination..."
|
||
|
|
sleep 5sec
|
||
|
|
|
||
|
|
# Delete PVC
|
||
|
|
let delete_pvc_result = (delete-pvc $namespace $pvc)
|
||
|
|
if (not $delete_pvc_result.success) {
|
||
|
|
print $"WARNING: PVC deletion failed: [$delete_pvc_result.error]"
|
||
|
|
} else {
|
||
|
|
print "✓ PVC deleted"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Scale up database (creates new PVC)
|
||
|
|
let scale_up_result = (scale-statefulset-up $namespace $statefulset 1)
|
||
|
|
if (not $scale_up_result.success) {
|
||
|
|
print $"ERROR: Scale up failed: [$scale_up_result.error]"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ StatefulSet scaled up"
|
||
|
|
|
||
|
|
# Wait for pod ready
|
||
|
|
let wait_result = (wait-for-pod-ready $namespace $"($statefulset)-0" 120)
|
||
|
|
if (not $wait_result.success) {
|
||
|
|
print $"ERROR: Pod failed to become ready: [$wait_result.error]"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Pod is ready"
|
||
|
|
|
||
|
|
# Check database readiness
|
||
|
|
let db_ready = (check-database-ready $surreal_url $surreal_user $surreal_pass 30)
|
||
|
|
if (not $db_ready.success) {
|
||
|
|
print $"ERROR: Database not ready: [$db_ready.error]"
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Database is ready"
|
||
|
|
|
||
|
|
# Import backup
|
||
|
|
let import_result = (import-to-temp-database $decompress_result.decompressed_file $surreal_url $surreal_user $surreal_pass)
|
||
|
|
|
||
|
|
if (not $import_result.success) {
|
||
|
|
print $"ERROR: Database import failed: [$import_result.error]"
|
||
|
|
cleanup-temp-files $work_path
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
print "✓ Backup imported"
|
||
|
|
|
||
|
|
# Verify data
|
||
|
|
if $verify {
|
||
|
|
let verify_result = (verify-database $surreal_url $surreal_user $surreal_pass)
|
||
|
|
if (not $verify_result.success) {
|
||
|
|
print $"WARNING: Verification failed: [$verify_result.error]"
|
||
|
|
} else {
|
||
|
|
print "✓ Database verified"
|
||
|
|
print $verify_result.verification
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Cleanup
|
||
|
|
cleanup-temp-files $work_path
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
print ""
|
||
|
|
print "=== Recovery Complete ==="
|
||
|
|
print $"Database URL: [$surreal_url]"
|
||
|
|
print $"Namespace: [$namespace]"
|
||
|
|
print $"Timestamp: [$(get-timestamp)]"
|
||
|
|
}
|