| 2 |
2 |
|
# Sync database backups to offsite host (astra) via Tailscale.
|
| 3 |
3 |
|
# Called by backup-db.sh after each successful backup.
|
| 4 |
4 |
|
#
|
| 5 |
|
- |
# Setup on astra:
|
|
5 |
+ |
# Usage: sync-backup-offsite.sh <db_name> <backup_file>
|
|
6 |
+ |
# db_name — database name (used for subdir and prune glob)
|
|
7 |
+ |
# backup_file — absolute path to the timestamped .sql.gz produced by this run
|
|
8 |
+ |
#
|
|
9 |
+ |
# On astra, backups land in /opt/backups/mnw/<db_name>/, with a per-DB
|
|
10 |
+ |
# latest.sql.gz hard link maintained for downstream pullers (sando).
|
|
11 |
+ |
#
|
|
12 |
+ |
# Setup on astra (one-time):
|
| 6 |
13 |
|
# mkdir -p /opt/backups/mnw
|
|
14 |
+ |
# chown max:max /opt/backups/mnw
|
| 7 |
15 |
|
#
|
| 8 |
|
- |
# Setup on Hetzner (as makenotwork user):
|
| 9 |
|
- |
# Ensure SSH key-based auth to astra is configured:
|
| 10 |
|
- |
# ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N ""
|
| 11 |
|
- |
# ssh-copy-id max@100.106.221.39
|
| 12 |
|
- |
# Test: ssh max@100.106.221.39 "echo ok"
|
|
16 |
+ |
# Setup on Hetzner: tailnet ACL grants tag:prod -> max@tag:testing SSH (no
|
|
17 |
+ |
# pubkey wrangling — Tailscale SSH bypasses authorized_keys via tailnet cert).
|
| 13 |
18 |
|
|
| 14 |
19 |
|
set -euo pipefail
|
| 15 |
20 |
|
|
|
21 |
+ |
DB_NAME="${1:?usage: sync-backup-offsite.sh <db_name> <backup_file>}"
|
|
22 |
+ |
BACKUP_FILE="${2:?usage: sync-backup-offsite.sh <db_name> <backup_file>}"
|
|
23 |
+ |
|
| 16 |
24 |
|
OFFSITE_HOST="100.106.221.39" # astra (Tailscale IP)
|
| 17 |
25 |
|
OFFSITE_USER="max"
|
| 18 |
|
- |
OFFSITE_DIR="/opt/backups/mnw"
|
| 19 |
|
- |
BACKUP_DIR="/opt/makenotwork/backups"
|
| 20 |
|
- |
DB_NAME="makenotwork"
|
|
26 |
+ |
OFFSITE_DIR="/opt/backups/mnw/${DB_NAME}"
|
| 21 |
27 |
|
OFFSITE_RETENTION_DAYS=30
|
| 22 |
28 |
|
WAM_URL="${WAM_URL:-http://127.0.0.1:7890}"
|
| 23 |
29 |
|
|
| 24 |
|
- |
# Open a WAM ticket for offsite backup failures.
|
| 25 |
30 |
|
wam_alert() {
|
| 26 |
31 |
|
local title="$1"
|
| 27 |
32 |
|
local body="${2:-}"
|
| 31 |
36 |
|
>/dev/null 2>&1 || true
|
| 32 |
37 |
|
}
|
| 33 |
38 |
|
|
| 34 |
|
- |
# Find the most recent backup
|
| 35 |
|
- |
LATEST=$(ls -t "${BACKUP_DIR}/${DB_NAME}"-*.sql.gz 2>/dev/null | head -1)
|
| 36 |
|
- |
if [ -z "$LATEST" ]; then
|
| 37 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: No backups found to sync"
|
|
39 |
+ |
if [ ! -f "$BACKUP_FILE" ]; then
|
|
40 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): backup file missing: $BACKUP_FILE"
|
| 38 |
41 |
|
exit 0
|
| 39 |
42 |
|
fi
|
| 40 |
43 |
|
|
| 41 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: Syncing $(basename "$LATEST") to ${OFFSITE_HOST}:${OFFSITE_DIR}"
|
|
44 |
+ |
BASENAME=$(basename "$BACKUP_FILE")
|
|
45 |
+ |
SSH_OPTS="-o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new"
|
| 42 |
46 |
|
|
| 43 |
|
- |
# Transfer with compression (already gzipped, so -z won't help much, but
|
| 44 |
|
- |
# rsync handles partial transfers and resume on failure)
|
| 45 |
|
- |
if rsync -e "ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new" \
|
| 46 |
|
- |
--timeout=120 \
|
| 47 |
|
- |
"$LATEST" \
|
|
47 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): Syncing ${BASENAME} to ${OFFSITE_HOST}:${OFFSITE_DIR}"
|
|
48 |
+ |
|
|
49 |
+ |
# Ensure the per-DB offsite dir exists.
|
|
50 |
+ |
ssh ${SSH_OPTS} "${OFFSITE_USER}@${OFFSITE_HOST}" "mkdir -p '${OFFSITE_DIR}'"
|
|
51 |
+ |
|
|
52 |
+ |
if rsync -e "ssh ${SSH_OPTS}" --timeout=120 \
|
|
53 |
+ |
"$BACKUP_FILE" \
|
| 48 |
54 |
|
"${OFFSITE_USER}@${OFFSITE_HOST}:${OFFSITE_DIR}/"; then
|
| 49 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: Transfer complete"
|
|
55 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): Transfer complete"
|
| 50 |
56 |
|
else
|
| 51 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: Transfer FAILED (astra unreachable or SSH error)"
|
| 52 |
|
- |
wam_alert "Offsite backup sync failed" "rsync to ${OFFSITE_HOST}:${OFFSITE_DIR} failed for $(basename "$LATEST"). Check Tailscale connectivity and SSH auth."
|
|
57 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): Transfer FAILED (astra unreachable or SSH error)"
|
|
58 |
+ |
wam_alert "Offsite backup sync failed (${DB_NAME})" "rsync to ${OFFSITE_HOST}:${OFFSITE_DIR} failed for ${BASENAME}. Check Tailscale connectivity and SSH auth."
|
| 53 |
59 |
|
exit 0
|
| 54 |
60 |
|
fi
|
| 55 |
61 |
|
|
| 56 |
|
- |
# Prune old offsite backups
|
| 57 |
|
- |
DELETED=$(ssh -o ConnectTimeout=10 "${OFFSITE_USER}@${OFFSITE_HOST}" \
|
|
62 |
+ |
# Refresh the per-DB latest.sql.gz hard link on astra (atomic temp-then-rename).
|
|
63 |
+ |
ssh ${SSH_OPTS} "${OFFSITE_USER}@${OFFSITE_HOST}" "
|
|
64 |
+ |
set -e
|
|
65 |
+ |
cd '${OFFSITE_DIR}'
|
|
66 |
+ |
ln -f '${BASENAME}' latest.sql.gz.new
|
|
67 |
+ |
mv -Tf latest.sql.gz.new latest.sql.gz
|
|
68 |
+ |
" || echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): WARNING — failed to refresh latest.sql.gz"
|
|
69 |
+ |
|
|
70 |
+ |
# Prune offsite backups older than retention.
|
|
71 |
+ |
DELETED=$(ssh ${SSH_OPTS} "${OFFSITE_USER}@${OFFSITE_HOST}" \
|
| 58 |
72 |
|
"find ${OFFSITE_DIR} -name '${DB_NAME}-*.sql.gz' -mtime +${OFFSITE_RETENTION_DAYS} -delete -print 2>/dev/null | wc -l" \
|
| 59 |
73 |
|
2>/dev/null || echo "0")
|
| 60 |
74 |
|
if [ "$DELETED" -gt 0 ]; then
|
| 61 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: Pruned ${DELETED} backup(s) older than ${OFFSITE_RETENTION_DAYS} days"
|
|
75 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): Pruned ${DELETED} backup(s) older than ${OFFSITE_RETENTION_DAYS} days"
|
| 62 |
76 |
|
fi
|
| 63 |
77 |
|
|
| 64 |
|
- |
TOTAL=$(ssh -o ConnectTimeout=10 "${OFFSITE_USER}@${OFFSITE_HOST}" \
|
|
78 |
+ |
TOTAL=$(ssh ${SSH_OPTS} "${OFFSITE_USER}@${OFFSITE_HOST}" \
|
| 65 |
79 |
|
"ls ${OFFSITE_DIR}/${DB_NAME}-*.sql.gz 2>/dev/null | wc -l" \
|
| 66 |
80 |
|
2>/dev/null || echo "?")
|
| 67 |
|
- |
echo "[$(date -Iseconds)] OFFSITE: Total backups on astra: ${TOTAL}"
|
|
81 |
+ |
echo "[$(date -Iseconds)] OFFSITE(${DB_NAME}): Total backups on astra: ${TOTAL}"
|