feat(backup): domedog-side pull script for Forgejo+Vault #106
2 changed files with 144 additions and 0 deletions
119
scripts/clawdie-backup-pull.sh
Executable file
119
scripts/clawdie-backup-pull.sh
Executable file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env bash
|
||||
# clawdie-backup-pull — domedog-side PULL of the Vultr Forgejo + Vaultwarden backups.
|
||||
#
|
||||
# WHY PULL: the backup host initiates and holds the data. A compromised source
|
||||
# (the Vultr box) can never reach into or destroy this host's backup history.
|
||||
#
|
||||
# WHAT IT DOES (per the HOST-MATRIX §4 backup-independence constraint):
|
||||
# 1. rsync-pulls the *current* dumps from the Vultr box (read-only, over Tailscale/SSH)
|
||||
# 2. verifies integrity (forgejo dump zip + vault sqlite PRAGMA integrity_check)
|
||||
# 3. encrypts the snapshot at rest with `age` (the vault dump is secret material)
|
||||
# 4. keeps dated snapshots with retention (versioned, not a single overwritten mirror)
|
||||
# 5. reports status to the Colibri board (optional, opt-in)
|
||||
#
|
||||
# VULTR SIDE CONTRACT (its responsibility — produce consistent dumps, expose read-only):
|
||||
# In a remote dumps dir (e.g. /var/backups/clawdie/), a cron produces:
|
||||
# - forgejo-*.zip via `forgejo dump` (repos + DB + config + LFS)
|
||||
# - vault*/db.sqlite3 via `sqlite3 … ".backup"` (consistent online copy)
|
||||
# vault*/{attachments,sends,config.json} (the rest of the data dir)
|
||||
# Use the apps' native dump tools — a raw rsync of a live data dir risks an
|
||||
# inconsistent SQLite copy and can MISS an external Forgejo DB (= all issues/PRs).
|
||||
#
|
||||
# CONFIG — no secrets/hosts live in this file. Put them in:
|
||||
# ~/.config/clawdie-backup/backup.env (gitignored; see clawdie-backup.env.example)
|
||||
# BACKUP_SRC rsync src, e.g. user@vultr-tailnet-name:/var/backups/clawdie/
|
||||
# BACKUP_DEST local snapshot root (default: $HOME/clawdie-backups)
|
||||
# BACKUP_KEEP dated snapshots to retain (default: 14)
|
||||
# BACKUP_AGE_RECIPIENT age recipient pubkey string, OR a path to a recipients file
|
||||
# COLIBRI_SOCKET optional; if set with BACKUP_TASK_ID, report board status
|
||||
# BACKUP_TASK_ID optional Colibri task to transition done/failed
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
CONF="${CLAWDIE_BACKUP_CONF:-$HOME/.config/clawdie-backup/backup.env}"
|
||||
# shellcheck disable=SC1090
|
||||
[ -f "$CONF" ] && . "$CONF"
|
||||
|
||||
BACKUP_DEST="${BACKUP_DEST:-$HOME/clawdie-backups}"
|
||||
BACKUP_KEEP="${BACKUP_KEEP:-14}"
|
||||
STAMP="$(date -u +%Y%m%dT%H%M%SZ)"
|
||||
STAGE="$(mktemp -d "${TMPDIR:-/tmp}/clawdie-backup.XXXXXX")"
|
||||
SNAP="$BACKUP_DEST/$STAMP"
|
||||
|
||||
log() { printf '%s clawdie-backup: %s\n' "$(date -u +%H:%M:%SZ)" "$*" >&2; }
|
||||
cleanup() { rm -rf "$STAGE"; }
|
||||
trap cleanup EXIT
|
||||
|
||||
# Best-effort board status (non-fatal). Transitions a pre-created task if configured.
|
||||
report() {
|
||||
local status="$1"
|
||||
local helper="$HOME/.colibri/colibri_cmd.py"
|
||||
[ -n "${COLIBRI_SOCKET:-}" ] && [ -n "${BACKUP_TASK_ID:-}" ] \
|
||||
&& [ -f "$helper" ] && command -v python3 >/dev/null 2>&1 || return 0
|
||||
python3 "$helper" \
|
||||
"{\"cmd\":\"transition-task\",\"task_id\":\"${BACKUP_TASK_ID}\",\"status\":\"${status}\"}" \
|
||||
>/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
fail() { log "FAILED: $*"; report failed; exit 1; }
|
||||
|
||||
[ -n "${BACKUP_SRC:-}" ] || fail "BACKUP_SRC not set (configure $CONF)"
|
||||
|
||||
mkdir -p "$BACKUP_DEST"; chmod 700 "$BACKUP_DEST"
|
||||
|
||||
# Single-run lock so overlapping crons can't race.
|
||||
exec 9>"$BACKUP_DEST/.lock"
|
||||
flock -n 9 || fail "another backup run is in progress"
|
||||
|
||||
# 1. PULL (read-only — we only ever read from the remote; never --delete, never write back).
|
||||
# Pin the remote host key in ~/.ssh/known_hosts; accept-new is first-use TOFU only.
|
||||
log "pulling current dumps from \$BACKUP_SRC (read-only)"
|
||||
rsync -az --timeout=180 \
|
||||
-e "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \
|
||||
"$BACKUP_SRC" "$STAGE/" || fail "rsync pull failed"
|
||||
|
||||
# 2. VERIFY integrity before we trust it as a backup.
|
||||
shopt -s nullglob
|
||||
fdumps=( "$STAGE"/forgejo-*.zip )
|
||||
[ "${#fdumps[@]}" -ge 1 ] || fail "no forgejo-*.zip in pull"
|
||||
for z in "${fdumps[@]}"; do
|
||||
[ -s "$z" ] || fail "empty forgejo dump: $z"
|
||||
unzip -tqq "$z" >/dev/null 2>&1 || fail "corrupt forgejo dump: $z"
|
||||
done
|
||||
|
||||
mapfile -t vdbs < <(find "$STAGE" -type f -name db.sqlite3)
|
||||
[ "${#vdbs[@]}" -ge 1 ] || fail "no vault db.sqlite3 in pull"
|
||||
for db in "${vdbs[@]}"; do
|
||||
[ -s "$db" ] || fail "empty vault db: $db"
|
||||
if command -v sqlite3 >/dev/null 2>&1; then
|
||||
[ "$(sqlite3 "$db" 'PRAGMA integrity_check;' 2>/dev/null)" = "ok" ] \
|
||||
|| fail "vault db integrity_check failed: $db"
|
||||
fi
|
||||
done
|
||||
log "integrity OK (${#fdumps[@]} forgejo dump(s), ${#vdbs[@]} vault db(s))"
|
||||
|
||||
# 3. ENCRYPT AT REST — the vault dump is the fleet's secret store.
|
||||
mkdir -p "$SNAP"; chmod 700 "$SNAP"
|
||||
if [ -n "${BACKUP_AGE_RECIPIENT:-}" ] && command -v age >/dev/null 2>&1; then
|
||||
if [ -f "$BACKUP_AGE_RECIPIENT" ]; then age_args=( -R "$BACKUP_AGE_RECIPIENT" )
|
||||
else age_args=( -r "$BACKUP_AGE_RECIPIENT" ); fi
|
||||
tar -C "$STAGE" -czf - . | age "${age_args[@]}" -o "$SNAP/clawdie-backup-$STAMP.tar.gz.age" \
|
||||
|| fail "age encryption failed"
|
||||
chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz.age"
|
||||
log "encrypted snapshot -> $SNAP/clawdie-backup-$STAMP.tar.gz.age"
|
||||
else
|
||||
log "WARNING: BACKUP_AGE_RECIPIENT unset or 'age' missing — storing UNENCRYPTED."
|
||||
log " The vault dump is secret material; configure age before relying on this."
|
||||
tar -C "$STAGE" -czf "$SNAP/clawdie-backup-$STAMP.tar.gz" .
|
||||
chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz"
|
||||
fi
|
||||
|
||||
# 4. RETENTION — keep the newest BACKUP_KEEP dated snapshots (ISO names sort chronologically).
|
||||
mapfile -t snaps < <(ls -1d "$BACKUP_DEST"/*/ 2>/dev/null | sort)
|
||||
if [ "${#snaps[@]}" -gt "$BACKUP_KEEP" ]; then
|
||||
drop=$(( ${#snaps[@]} - BACKUP_KEEP ))
|
||||
for old in "${snaps[@]:0:$drop}"; do rm -rf "$old"; log "pruned old snapshot: $old"; done
|
||||
fi
|
||||
|
||||
log "backup complete: $SNAP (retaining $BACKUP_KEEP)"
|
||||
report done
|
||||
25
scripts/clawdie-backup.env.example
Normal file
25
scripts/clawdie-backup.env.example
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# clawdie-backup-pull config — copy to ~/.config/clawdie-backup/backup.env (chmod 600).
|
||||
# This file is GITIGNORED on purpose: it holds the source host + the age recipient.
|
||||
# Keep real hosts/keys out of the repo.
|
||||
|
||||
# rsync source = the Vultr box's read-only dumps dir, reached over Tailscale.
|
||||
# Use a Tailscale name (not a public IP), and a read-only/forced-command SSH key.
|
||||
BACKUP_SRC="user@vultr-tailnet-name:/var/backups/clawdie/"
|
||||
|
||||
# Local snapshot root on domedog (plenty of room; ~1-2 GB per snapshot).
|
||||
BACKUP_DEST="$HOME/clawdie-backups"
|
||||
|
||||
# How many dated snapshots to keep (e.g. 14 daily).
|
||||
BACKUP_KEEP="14"
|
||||
|
||||
# age recipient for encryption-at-rest. Either a public key string:
|
||||
# BACKUP_AGE_RECIPIENT="age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
# or a path to a recipients file (one age pubkey per line):
|
||||
# BACKUP_AGE_RECIPIENT="$HOME/.config/clawdie-backup/recipients.txt"
|
||||
# The private key stays OFF this host (operator holds it) so a domedog compromise
|
||||
# can't decrypt the vault backup.
|
||||
BACKUP_AGE_RECIPIENT=""
|
||||
|
||||
# Optional: report backup health to the Colibri board by transitioning a task.
|
||||
# COLIBRI_SOCKET="$HOME/.colibri/colibri.sock"
|
||||
# BACKUP_TASK_ID="<uuid of a scheduled 'backup' task assigned to domedog>"
|
||||
Loading…
Add table
Reference in a new issue