#!/usr/bin/env bash # clawdie-backup-pull — domedog-side PULL of the Vultr Forgejo + Vaultwarden backups. # # WHY PULL: the backup host initiates and holds the data. A compromised source # (the Vultr box) can never reach into or destroy this host's backup history. # # WHAT IT DOES (per the HOST-MATRIX §4 backup-independence constraint): # 1. rsync-pulls the *current* dumps from the Vultr box (read-only, over Tailscale/SSH) # 2. verifies integrity (forgejo dump zip + vault sqlite PRAGMA integrity_check) # 3. encrypts the snapshot at rest with `age` (the vault dump is secret material) # 4. keeps dated snapshots with retention (versioned, not a single overwritten mirror) # 5. reports status to the Colibri board (optional, opt-in) # # VULTR SIDE CONTRACT (its responsibility — produce consistent dumps, expose read-only): # In a remote dumps dir (e.g. /var/backups/clawdie/), a cron produces: # - forgejo-*.zip via `forgejo dump` (repos + DB + config + LFS) # - vault*/db.sqlite3 via `sqlite3 … ".backup"` (consistent online copy) # vault*/{attachments,sends,config.json} (the rest of the data dir) # Use the apps' native dump tools — a raw rsync of a live data dir risks an # inconsistent SQLite copy and can MISS an external Forgejo DB (= all issues/PRs). # # CONFIG — no secrets/hosts live in this file. Put them in: # ~/.config/clawdie-backup/backup.env (gitignored; see clawdie-backup.env.example) # BACKUP_SRC rsync src, e.g. user@vultr-tailnet-name:/var/backups/clawdie/ # BACKUP_DEST local snapshot root (default: $HOME/clawdie-backups) # BACKUP_KEEP dated snapshots to retain (default: 14) # BACKUP_AGE_RECIPIENT age recipient pubkey string, OR a path to a recipients file # COLIBRI_SOCKET optional; if set with BACKUP_TASK_ID, report board status # BACKUP_TASK_ID optional Colibri task to transition done/failed set -euo pipefail CONF="${CLAWDIE_BACKUP_CONF:-$HOME/.config/clawdie-backup/backup.env}" # shellcheck disable=SC1090 [ -f "$CONF" ] && . "$CONF" BACKUP_DEST="${BACKUP_DEST:-$HOME/clawdie-backups}" BACKUP_KEEP="${BACKUP_KEEP:-14}" STAMP="$(date -u +%Y%m%dT%H%M%SZ)" STAGE="$(mktemp -d "${TMPDIR:-/tmp}/clawdie-backup.XXXXXX")" SNAP="$BACKUP_DEST/$STAMP" log() { printf '%s clawdie-backup: %s\n' "$(date -u +%H:%M:%SZ)" "$*" >&2; } cleanup() { rm -rf "$STAGE"; } trap cleanup EXIT # Best-effort board status (non-fatal). Transitions a pre-created task if configured. report() { local status="$1" local helper="$HOME/.colibri/colibri_cmd.py" [ -n "${COLIBRI_SOCKET:-}" ] && [ -n "${BACKUP_TASK_ID:-}" ] \ && [ -f "$helper" ] && command -v python3 >/dev/null 2>&1 || return 0 python3 "$helper" \ "{\"cmd\":\"transition-task\",\"task_id\":\"${BACKUP_TASK_ID}\",\"status\":\"${status}\"}" \ >/dev/null 2>&1 || true } fail() { log "FAILED: $*"; report failed; exit 1; } [ -n "${BACKUP_SRC:-}" ] || fail "BACKUP_SRC not set (configure $CONF)" mkdir -p "$BACKUP_DEST"; chmod 700 "$BACKUP_DEST" # Single-run lock so overlapping crons can't race. exec 9>"$BACKUP_DEST/.lock" flock -n 9 || fail "another backup run is in progress" # 1. PULL (read-only — we only ever read from the remote; never --delete, never write back). # Pin the remote host key in ~/.ssh/known_hosts; accept-new is first-use TOFU only. log "pulling current dumps from \$BACKUP_SRC (read-only)" rsync -az --timeout=180 \ -e "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \ "$BACKUP_SRC" "$STAGE/" || fail "rsync pull failed" # 2. VERIFY integrity before we trust it as a backup. shopt -s nullglob fdumps=( "$STAGE"/forgejo-*.zip ) [ "${#fdumps[@]}" -ge 1 ] || fail "no forgejo-*.zip in pull" for z in "${fdumps[@]}"; do [ -s "$z" ] || fail "empty forgejo dump: $z" unzip -tqq "$z" >/dev/null 2>&1 || fail "corrupt forgejo dump: $z" done mapfile -t vdbs < <(find "$STAGE" -type f -name db.sqlite3) [ "${#vdbs[@]}" -ge 1 ] || fail "no vault db.sqlite3 in pull" for db in "${vdbs[@]}"; do [ -s "$db" ] || fail "empty vault db: $db" if command -v sqlite3 >/dev/null 2>&1; then [ "$(sqlite3 "$db" 'PRAGMA integrity_check;' 2>/dev/null)" = "ok" ] \ || fail "vault db integrity_check failed: $db" fi done log "integrity OK (${#fdumps[@]} forgejo dump(s), ${#vdbs[@]} vault db(s))" # 3. ENCRYPT AT REST — the vault dump is the fleet's secret store. mkdir -p "$SNAP"; chmod 700 "$SNAP" if [ -n "${BACKUP_AGE_RECIPIENT:-}" ] && command -v age >/dev/null 2>&1; then if [ -f "$BACKUP_AGE_RECIPIENT" ]; then age_args=( -R "$BACKUP_AGE_RECIPIENT" ) else age_args=( -r "$BACKUP_AGE_RECIPIENT" ); fi tar -C "$STAGE" -czf - . | age "${age_args[@]}" -o "$SNAP/clawdie-backup-$STAMP.tar.gz.age" \ || fail "age encryption failed" chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz.age" log "encrypted snapshot -> $SNAP/clawdie-backup-$STAMP.tar.gz.age" else log "WARNING: BACKUP_AGE_RECIPIENT unset or 'age' missing — storing UNENCRYPTED." log " The vault dump is secret material; configure age before relying on this." tar -C "$STAGE" -czf "$SNAP/clawdie-backup-$STAMP.tar.gz" . chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz" fi # 4. RETENTION — keep the newest BACKUP_KEEP dated snapshots (ISO names sort chronologically). mapfile -t snaps < <(ls -1d "$BACKUP_DEST"/*/ 2>/dev/null | sort) if [ "${#snaps[@]}" -gt "$BACKUP_KEEP" ]; then drop=$(( ${#snaps[@]} - BACKUP_KEEP )) for old in "${snaps[@]:0:$drop}"; do rm -rf "$old"; log "pruned old snapshot: $old"; done fi log "backup complete: $SNAP (retaining $BACKUP_KEEP)" report done