feat(backup): domedog-side pull script for Forgejo+Vault #106

Merged
clawdie merged 1 commit from feat/backup-pull-domedog into main 2026-06-20 14:28:08 +02:00
2 changed files with 144 additions and 0 deletions

119
scripts/clawdie-backup-pull.sh Executable file
View file

@ -0,0 +1,119 @@
#!/usr/bin/env bash
# clawdie-backup-pull — domedog-side PULL of the Vultr Forgejo + Vaultwarden backups.
#
# WHY PULL: the backup host initiates and holds the data. A compromised source
# (the Vultr box) can never reach into or destroy this host's backup history.
#
# WHAT IT DOES (per the HOST-MATRIX §4 backup-independence constraint):
# 1. rsync-pulls the *current* dumps from the Vultr box (read-only, over Tailscale/SSH)
# 2. verifies integrity (forgejo dump zip + vault sqlite PRAGMA integrity_check)
# 3. encrypts the snapshot at rest with `age` (the vault dump is secret material)
# 4. keeps dated snapshots with retention (versioned, not a single overwritten mirror)
# 5. reports status to the Colibri board (optional, opt-in)
#
# VULTR SIDE CONTRACT (its responsibility — produce consistent dumps, expose read-only):
# In a remote dumps dir (e.g. /var/backups/clawdie/), a cron produces:
# - forgejo-*.zip via `forgejo dump` (repos + DB + config + LFS)
# - vault*/db.sqlite3 via `sqlite3 … ".backup"` (consistent online copy)
# vault*/{attachments,sends,config.json} (the rest of the data dir)
# Use the apps' native dump tools — a raw rsync of a live data dir risks an
# inconsistent SQLite copy and can MISS an external Forgejo DB (= all issues/PRs).
#
# CONFIG — no secrets/hosts live in this file. Put them in:
# ~/.config/clawdie-backup/backup.env (gitignored; see clawdie-backup.env.example)
# BACKUP_SRC rsync src, e.g. user@vultr-tailnet-name:/var/backups/clawdie/
# BACKUP_DEST local snapshot root (default: $HOME/clawdie-backups)
# BACKUP_KEEP dated snapshots to retain (default: 14)
# BACKUP_AGE_RECIPIENT age recipient pubkey string, OR a path to a recipients file
# COLIBRI_SOCKET optional; if set with BACKUP_TASK_ID, report board status
# BACKUP_TASK_ID optional Colibri task to transition done/failed
set -euo pipefail
CONF="${CLAWDIE_BACKUP_CONF:-$HOME/.config/clawdie-backup/backup.env}"
# shellcheck disable=SC1090
[ -f "$CONF" ] && . "$CONF"
BACKUP_DEST="${BACKUP_DEST:-$HOME/clawdie-backups}"
BACKUP_KEEP="${BACKUP_KEEP:-14}"
STAMP="$(date -u +%Y%m%dT%H%M%SZ)"
STAGE="$(mktemp -d "${TMPDIR:-/tmp}/clawdie-backup.XXXXXX")"
SNAP="$BACKUP_DEST/$STAMP"
log() { printf '%s clawdie-backup: %s\n' "$(date -u +%H:%M:%SZ)" "$*" >&2; }
cleanup() { rm -rf "$STAGE"; }
trap cleanup EXIT
# Best-effort board status (non-fatal). Transitions a pre-created task if configured.
report() {
local status="$1"
local helper="$HOME/.colibri/colibri_cmd.py"
[ -n "${COLIBRI_SOCKET:-}" ] && [ -n "${BACKUP_TASK_ID:-}" ] \
&& [ -f "$helper" ] && command -v python3 >/dev/null 2>&1 || return 0
python3 "$helper" \
"{\"cmd\":\"transition-task\",\"task_id\":\"${BACKUP_TASK_ID}\",\"status\":\"${status}\"}" \
>/dev/null 2>&1 || true
}
fail() { log "FAILED: $*"; report failed; exit 1; }
[ -n "${BACKUP_SRC:-}" ] || fail "BACKUP_SRC not set (configure $CONF)"
mkdir -p "$BACKUP_DEST"; chmod 700 "$BACKUP_DEST"
# Single-run lock so overlapping crons can't race.
exec 9>"$BACKUP_DEST/.lock"
flock -n 9 || fail "another backup run is in progress"
# 1. PULL (read-only — we only ever read from the remote; never --delete, never write back).
# Pin the remote host key in ~/.ssh/known_hosts; accept-new is first-use TOFU only.
log "pulling current dumps from \$BACKUP_SRC (read-only)"
rsync -az --timeout=180 \
-e "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \
"$BACKUP_SRC" "$STAGE/" || fail "rsync pull failed"
# 2. VERIFY integrity before we trust it as a backup.
shopt -s nullglob
fdumps=( "$STAGE"/forgejo-*.zip )
[ "${#fdumps[@]}" -ge 1 ] || fail "no forgejo-*.zip in pull"
for z in "${fdumps[@]}"; do
[ -s "$z" ] || fail "empty forgejo dump: $z"
unzip -tqq "$z" >/dev/null 2>&1 || fail "corrupt forgejo dump: $z"
done
mapfile -t vdbs < <(find "$STAGE" -type f -name db.sqlite3)
[ "${#vdbs[@]}" -ge 1 ] || fail "no vault db.sqlite3 in pull"
for db in "${vdbs[@]}"; do
[ -s "$db" ] || fail "empty vault db: $db"
if command -v sqlite3 >/dev/null 2>&1; then
[ "$(sqlite3 "$db" 'PRAGMA integrity_check;' 2>/dev/null)" = "ok" ] \
|| fail "vault db integrity_check failed: $db"
fi
done
log "integrity OK (${#fdumps[@]} forgejo dump(s), ${#vdbs[@]} vault db(s))"
# 3. ENCRYPT AT REST — the vault dump is the fleet's secret store.
mkdir -p "$SNAP"; chmod 700 "$SNAP"
if [ -n "${BACKUP_AGE_RECIPIENT:-}" ] && command -v age >/dev/null 2>&1; then
if [ -f "$BACKUP_AGE_RECIPIENT" ]; then age_args=( -R "$BACKUP_AGE_RECIPIENT" )
else age_args=( -r "$BACKUP_AGE_RECIPIENT" ); fi
tar -C "$STAGE" -czf - . | age "${age_args[@]}" -o "$SNAP/clawdie-backup-$STAMP.tar.gz.age" \
|| fail "age encryption failed"
chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz.age"
log "encrypted snapshot -> $SNAP/clawdie-backup-$STAMP.tar.gz.age"
else
log "WARNING: BACKUP_AGE_RECIPIENT unset or 'age' missing — storing UNENCRYPTED."
log " The vault dump is secret material; configure age before relying on this."
tar -C "$STAGE" -czf "$SNAP/clawdie-backup-$STAMP.tar.gz" .
chmod 600 "$SNAP/clawdie-backup-$STAMP.tar.gz"
fi
# 4. RETENTION — keep the newest BACKUP_KEEP dated snapshots (ISO names sort chronologically).
mapfile -t snaps < <(ls -1d "$BACKUP_DEST"/*/ 2>/dev/null | sort)
if [ "${#snaps[@]}" -gt "$BACKUP_KEEP" ]; then
drop=$(( ${#snaps[@]} - BACKUP_KEEP ))
for old in "${snaps[@]:0:$drop}"; do rm -rf "$old"; log "pruned old snapshot: $old"; done
fi
log "backup complete: $SNAP (retaining $BACKUP_KEEP)"
report done

View file

@ -0,0 +1,25 @@
# clawdie-backup-pull config — copy to ~/.config/clawdie-backup/backup.env (chmod 600).
# This file is GITIGNORED on purpose: it holds the source host + the age recipient.
# Keep real hosts/keys out of the repo.
# rsync source = the Vultr box's read-only dumps dir, reached over Tailscale.
# Use a Tailscale name (not a public IP), and a read-only/forced-command SSH key.
BACKUP_SRC="user@vultr-tailnet-name:/var/backups/clawdie/"
# Local snapshot root on domedog (plenty of room; ~1-2 GB per snapshot).
BACKUP_DEST="$HOME/clawdie-backups"
# How many dated snapshots to keep (e.g. 14 daily).
BACKUP_KEEP="14"
# age recipient for encryption-at-rest. Either a public key string:
# BACKUP_AGE_RECIPIENT="age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# or a path to a recipients file (one age pubkey per line):
# BACKUP_AGE_RECIPIENT="$HOME/.config/clawdie-backup/recipients.txt"
# The private key stays OFF this host (operator holds it) so a domedog compromise
# can't decrypt the vault backup.
BACKUP_AGE_RECIPIENT=""
# Optional: report backup health to the Colibri board by transitioning a task.
# COLIBRI_SOCKET="$HOME/.colibri/colibri.sock"
# BACKUP_TASK_ID="<uuid of a scheduled 'backup' task assigned to domedog>"