fix(vault): wire seed bootstrap → vault-fetch path + harden agent dir validation

Address the 5 review concerns on the secrets-out-of-the-box feature:

1. Seed↔fetch path alignment: _seed_split_env routes BW_* creds out of .env
   into ~/.config/vault-bootstrap.env (SEED_VAULT_BOOTSTRAP_REL), the path
   clawdie-vault-fetch actually reads — so 'seed bootstrap → fetch out of
   the box' now lines up without an explicit --bootstrap arg.
2. Drop unused COLLECTION_ID from clawdie-vault-fetch. Items are fetched by
   name via 'bw get password', which is fail-closed on ambiguity; document
   that item names must be unique in the visible vault.
3. Agent dir validation: _seed_agent_name_ok rejects leading-dot dirs
   (.Spotlight-V100, .fseventsd) and traversal; _seed_agent_has_payload
   requires a recognized payload so an empty/stray dir can't become active.
4. No phantom homes: extra agent dirs stage under /var/db/clawdie/seed/<agent>
   only — _seed_stage_agent never writes a home or SSH keys.
5. Bootstrap file mode enforcement: clawdie-vault-fetch now stat-checks the
   bootstrap file and refuses group/world-readable unless
   VAULT_ALLOW_INSECURE_BOOTSTRAP is set.

Also renames _seed_import_env → _seed_merge_env + _seed_split_env and adds
_seed_key_ok to guard env var names.

Checks: sh -n on vault-fetch/live-seed/build.sh; git diff --check;
./scripts/check-format.sh (prettier clean); 5 concerns verified present.

Co-Authored-By: Hermes & Sam <hello@clawdie.si>
This commit is contained in:
Sam & Claude 2026-06-19 18:24:55 +02:00
parent 1af0e62942
commit df783f2a59
2 changed files with 140 additions and 36 deletions

View file

@ -10,8 +10,8 @@
# 1. Legacy top-level allowlist: /authorized_keys, /ssh/authorized_keys.
# 2. Per-agent directories: /<agent-name>/ holding env, soul/, harness.toml,
# ssh/authorized_keys. On the live USB (single agent) the first agent dir
# maps to the clawdie user; on a deployed host the importer loops every dir
# (multi-agent provisioning is a documented follow-up seam).
# is activated for the clawdie user; on a deployed host the importer loops
# every dir, staging the rest (multi-agent provisioning is a follow-up seam).
#
# SECURITY: this partition is plaintext FAT32. By operator decision the env
# files here may carry secrets (provider API keys, vault-bootstrap creds).
@ -51,6 +51,9 @@ SEED_IMPORT_ROOT="${SEED_IMPORT_ROOT:-/var/db/clawdie/seed}"
SEED_RESERVED_DIRS="ssh"
# Valid harness values mirror Colibri's AgentRuntime enum (colibri-glasspane).
SEED_VALID_HARNESSES="pi zot local"
# Vaultwarden bootstrap creds are routed out of .env into this file (relative to
# the agent home) so clawdie-vault-fetch can consume them.
SEED_VAULT_BOOTSTRAP_REL=".config/vault-bootstrap.env"
_seed_log() {
printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S')" "$1" >>"${SEED_LOG}" 2>/dev/null || true
@ -103,10 +106,20 @@ _seed_install_authorized_keys() {
_seed_log "installed authorized_keys from ${_src} -> ${_dst}"
}
# Merge KEY=VALUE pairs from a plaintext env file into a target .env, preserving
# keys the source does not mention and replacing those it does. Lands 0600 owned
# by the agent user. Mirrors clawdie-vault-fetch's --write-env upsert.
_seed_import_env() {
# Valid POSIX-ish shell env var name. Guards against garbled seed lines
# injecting odd content into .env.
_seed_key_ok() {
case "$1" in
''|[!A-Za-z_]*) return 1 ;;
*[!A-Za-z0-9_]*) return 1 ;;
esac
return 0
}
# Merge KEY=VALUE pairs from a plaintext source into a target file, preserving
# keys the source does not mention and replacing those it does. Skips blanks,
# comments, and invalid key names. Lands 0600 owned by the agent user.
_seed_merge_env() {
_src="$1"
_dst="$2"
_user="${3:-${SEED_USER}}"
@ -122,7 +135,6 @@ _seed_import_env() {
_count=0
while IFS= read -r _line || [ -n "${_line}" ]; do
# Skip blanks, comments, and lines without a KEY=.
case "${_line}" in
''|'#'*) continue ;;
*=*) : ;;
@ -130,6 +142,10 @@ _seed_import_env() {
esac
_line="$(printf '%s' "${_line}" | tr -d '\r')"
_k="${_line%%=*}"
if ! _seed_key_ok "${_k}"; then
_seed_log "skipping invalid env key '${_k}'"
continue
fi
grep -v "^${_k}=" "${_merged}" >"${_merged}.tmp" 2>/dev/null || : >"${_merged}.tmp"
mv "${_merged}.tmp" "${_merged}"
printf '%s\n' "${_line}" >>"${_merged}"
@ -140,12 +156,43 @@ _seed_import_env() {
chmod 0600 "${_dst}"
chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
rm -f "${_merged}"
_seed_log "imported ${_count} env key(s) from ${_src} -> ${_dst}"
_seed_log "merged ${_count} key(s) -> ${_dst}"
}
# Split an agent's seed env into two staged files: bootstrap creds (BW_*) and
# everything else. Writes ${_outdir}/.app.env and ${_outdir}/.boot.env.
_seed_split_env() {
_src="$1"
_outdir="$2"
_app="${_outdir}/.app.env"
_boot="${_outdir}/.boot.env"
: >"${_app}"
: >"${_boot}"
chmod 0600 "${_app}" "${_boot}" 2>/dev/null || true
[ -f "${_src}" ] || return 0
while IFS= read -r _line || [ -n "${_line}" ]; do
case "${_line}" in
''|'#'*) continue ;;
*=*) : ;;
*) continue ;;
esac
_line="$(printf '%s' "${_line}" | tr -d '\r')"
_k="${_line%%=*}"
_seed_key_ok "${_k}" || continue
case "${_k}" in
BW_CLIENTID|BW_CLIENTSECRET|BW_PASSWORD)
printf '%s\n' "${_line}" >>"${_boot}" ;;
*)
printf '%s\n' "${_line}" >>"${_app}" ;;
esac
done <"${_src}"
}
# Validate + record a harness descriptor. We only parse the `harness` value to
# sanity-check it against the AgentRuntime enum; the full file is staged as-is
# for the runtime to read.
# sanity-check it against the AgentRuntime enum; the full file is staged as-is.
_seed_import_harness() {
_src="$1"
_stage="$2"
@ -180,36 +227,60 @@ _seed_import_soul() {
_seed_log "staged soul backup ${_src} -> ${_stage}/soul"
}
# Import one /<agent-name>/ directory. On the live USB this is called once with
# the clawdie user/home as the target; on a deployed host the caller loops.
_seed_import_agent_dir() {
_dir="$1" # absolute path to the agent dir on the mounted seed
_agent="$2" # agent name (already validated)
_user="$3"
_home="$4"
# Stage non-home-affecting payload for any agent dir: soul, harness, agent name,
# and a 0600 copy of the raw env (so a staged-only agent's secrets are not lost).
# Never writes to a home directory or installs SSH keys.
_seed_stage_agent() {
_dir="$1"
_agent="$2"
_stage="${SEED_IMPORT_ROOT}/${_agent}"
mkdir -p "${_stage}"
chmod 0700 "${_stage}" 2>/dev/null || true
printf '%s\n' "${_agent}" >"${_stage}/agent-name" 2>/dev/null || true
_seed_import_env "${_dir}/env" "${_home}/.env" "${_user}"
_seed_import_harness "${_dir}/harness.toml" "${_stage}"
_seed_import_soul "${_dir}/soul" "${_stage}"
_seed_import_harness "${_dir}/harness.toml" "${_stage}"
_seed_import_soul "${_dir}/soul" "${_stage}"
if [ -f "${_dir}/env" ]; then
cp "${_dir}/env" "${_stage}/env" 2>/dev/null || true
chmod 0600 "${_stage}/env" 2>/dev/null || true
fi
echo "${_stage}"
}
# Activate one agent into a real home: env -> .env, BW_* -> vault-bootstrap.env,
# ssh keys installed. Only the live USB's single active agent gets this.
_seed_activate_agent() {
_dir="$1"
_agent="$2"
_user="$3"
_home="$4"
_stage="$(_seed_stage_agent "${_dir}" "${_agent}")"
if [ -f "${_dir}/env" ]; then
_seed_split_env "${_dir}/env" "${_stage}"
_seed_merge_env "${_stage}/.app.env" "${_home}/.env" "${_user}"
if [ -s "${_stage}/.boot.env" ]; then
_seed_merge_env "${_stage}/.boot.env" "${_home}/${SEED_VAULT_BOOTSTRAP_REL}" "${_user}"
_seed_log "routed Vaultwarden bootstrap creds -> ${_home}/${SEED_VAULT_BOOTSTRAP_REL}"
fi
rm -f "${_stage}/.app.env" "${_stage}/.boot.env"
fi
if [ -f "${_dir}/ssh/authorized_keys" ]; then
_seed_install_authorized_keys "${_dir}/ssh/authorized_keys" "${_user}" "${_home}"
fi
_seed_log "imported agent dir '${_agent}' -> stage ${_stage}, user ${_user}"
_seed_log "activated agent '${_agent}' for user ${_user} (home ${_home})"
}
# Return 0 if NAME is a safe agent directory name (no traversal, not reserved).
# Safe agent directory name: no traversal, no leading dot (skips macOS/FAT
# system dirs like .Spotlight-V100, .fseventsd), allowlisted charset, not reserved.
_seed_agent_name_ok() {
_n="$1"
case "${_n}" in
''|.|..|*/*) return 1 ;;
esac
# Allowlist characters to keep this off the filesystem's sharp edges.
case "${_n}" in
''|.|..|.*|*/*) return 1 ;;
*[!A-Za-z0-9._-]*) return 1 ;;
esac
for _r in ${SEED_RESERVED_DIRS}; do
@ -218,6 +289,16 @@ _seed_agent_name_ok() {
return 0
}
# An agent dir must carry at least one recognized payload to count as an agent.
_seed_agent_has_payload() {
_d="$1"
[ -f "${_d}/env" ] && return 0
[ -f "${_d}/harness.toml" ] && return 0
[ -d "${_d}/soul" ] && return 0
[ -f "${_d}/ssh/authorized_keys" ] && return 0
return 1
}
# Core import routine, factored out of the rc start_cmd so it can be tested
# against a pre-mounted directory (CLAWDIE_SEED_TEST). Operates on $SEED_MOUNT.
_seed_import_tree() {
@ -233,27 +314,32 @@ _seed_import_tree() {
fi
# Layer 2: per-agent directories. On the live USB (single agent) the first
# valid dir, sorted, maps to the clawdie user. Additional dirs are staged
# but flagged: deployed multi-agent provisioning is the follow-up seam.
# valid dir, sorted, is activated for the clawdie user. Additional dirs are
# staged only — no home is created — pending deployed multi-agent provisioning.
_first=1
for _entry in "${SEED_MOUNT}"/*; do
[ -d "${_entry}" ] || continue
_agent="$(basename "${_entry}")"
if ! _seed_agent_name_ok "${_agent}"; then
[ "${_agent}" = "ssh" ] || _seed_log "skipping non-agent dir '${_agent}'"
case "${_agent}" in
ssh) : ;; # reserved, expected
*) _seed_log "skipping dir '${_agent}' (reserved, hidden, or invalid name)" ;;
esac
continue
fi
if ! _seed_agent_has_payload "${_entry}"; then
_seed_log "skipping dir '${_agent}' (no recognized agent payload)"
continue
fi
if [ "${_first}" -eq 1 ]; then
_seed_import_agent_dir "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}"
_seed_activate_agent "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}"
printf '%s\n' "${_agent}" >"${SEED_IMPORT_ROOT}/active-agent" 2>/dev/null || true
_first=0
_imported=1
else
# Stage payload for visibility but do not provision a second live
# identity — the live USB is single-agent.
_seed_import_agent_dir "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}.${_agent}"
_seed_log "NOTE additional agent dir '${_agent}' staged; deployed multi-agent provisioning not yet wired"
_seed_stage_agent "${_entry}" "${_agent}" >/dev/null
_seed_log "NOTE additional agent dir '${_agent}' staged only; deployed multi-agent provisioning not yet wired"
fi
done

View file

@ -29,8 +29,10 @@
set -eu
SERVER="${VAULT_SERVER:-https://vault.smilepowered.org}"
# agent-secrets collection in the Clawdie org. Overridable for other vaults.
COLLECTION_ID="${VAULT_COLLECTION_ID:-94ba61b8-633c-454e-b749-f115617eeac3}"
# NOTE: items are fetched by name with `bw get password`, which is fail-closed
# on ambiguity (multiple matches error out). Item names must therefore be unique
# in the agent account's visible vault — see docs/VAULTWARDEN-SETUP.md. We do not
# scope by collection here to avoid a JSON-parse (jq) dependency.
BOOTSTRAP_FILE="${VAULT_BOOTSTRAP_FILE:-${HOME}/.config/vault-bootstrap.env}"
WRITE_ENV=""
@ -63,6 +65,22 @@ if [ ! -f "$BOOTSTRAP_FILE" ]; then
exit 3
fi
# The bootstrap file holds the vault master password. Refuse to read it if it is
# group/world-readable, unless explicitly overridden. stat differs across BSD
# (-f '%Lp') and GNU (-c '%a'); a non-octal/unknown result skips the check.
_mode="$(stat -f '%Lp' "$BOOTSTRAP_FILE" 2>/dev/null || stat -c '%a' "$BOOTSTRAP_FILE" 2>/dev/null || echo '')"
case "$_mode" in
''|*[!0-7]*) _mode='' ;;
esac
if [ -n "$_mode" ] && [ "$(( 0$_mode & 077 ))" -ne 0 ]; then
if [ -n "${VAULT_ALLOW_INSECURE_BOOTSTRAP:-}" ]; then
log "WARNING: $BOOTSTRAP_FILE is mode $_mode (group/world-readable) — proceeding (override set)"
else
log "refusing: $BOOTSTRAP_FILE is mode $_mode (group/world-readable). chmod 600 it, or set VAULT_ALLOW_INSECURE_BOOTSTRAP=1"
exit 1
fi
fi
if ! command -v bw >/dev/null 2>&1; then
log "bw (Bitwarden CLI) not found — install @bitwarden/cli first"
exit 4