From df783f2a595d98f5266ac09b39ee0c52924d8c03 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Fri, 19 Jun 2026 18:24:55 +0200 Subject: [PATCH] =?UTF-8?q?fix(vault):=20wire=20seed=20bootstrap=20?= =?UTF-8?q?=E2=86=92=20vault-fetch=20path=20+=20harden=20agent=20dir=20val?= =?UTF-8?q?idation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the 5 review concerns on the secrets-out-of-the-box feature: 1. Seed↔fetch path alignment: _seed_split_env routes BW_* creds out of .env into ~/.config/vault-bootstrap.env (SEED_VAULT_BOOTSTRAP_REL), the path clawdie-vault-fetch actually reads — so 'seed bootstrap → fetch out of the box' now lines up without an explicit --bootstrap arg. 2. Drop unused COLLECTION_ID from clawdie-vault-fetch. Items are fetched by name via 'bw get password', which is fail-closed on ambiguity; document that item names must be unique in the visible vault. 3. Agent dir validation: _seed_agent_name_ok rejects leading-dot dirs (.Spotlight-V100, .fseventsd) and traversal; _seed_agent_has_payload requires a recognized payload so an empty/stray dir can't become active. 4. No phantom homes: extra agent dirs stage under /var/db/clawdie/seed/ only — _seed_stage_agent never writes a home or SSH keys. 5. Bootstrap file mode enforcement: clawdie-vault-fetch now stat-checks the bootstrap file and refuses group/world-readable unless VAULT_ALLOW_INSECURE_BOOTSTRAP is set. Also renames _seed_import_env → _seed_merge_env + _seed_split_env and adds _seed_key_ok to guard env var names. Checks: sh -n on vault-fetch/live-seed/build.sh; git diff --check; ./scripts/check-format.sh (prettier clean); 5 concerns verified present. Co-Authored-By: Hermes & Sam --- live/operator-session/clawdie-live-seed | 154 +++++++++++++++++----- live/operator-session/clawdie-vault-fetch | 22 +++- 2 files changed, 140 insertions(+), 36 deletions(-) diff --git a/live/operator-session/clawdie-live-seed b/live/operator-session/clawdie-live-seed index cfb2c1f..3f6de56 100644 --- a/live/operator-session/clawdie-live-seed +++ b/live/operator-session/clawdie-live-seed @@ -10,8 +10,8 @@ # 1. Legacy top-level allowlist: /authorized_keys, /ssh/authorized_keys. # 2. Per-agent directories: // holding env, soul/, harness.toml, # ssh/authorized_keys. On the live USB (single agent) the first agent dir -# maps to the clawdie user; on a deployed host the importer loops every dir -# (multi-agent provisioning is a documented follow-up seam). +# is activated for the clawdie user; on a deployed host the importer loops +# every dir, staging the rest (multi-agent provisioning is a follow-up seam). # # SECURITY: this partition is plaintext FAT32. By operator decision the env # files here may carry secrets (provider API keys, vault-bootstrap creds). @@ -51,6 +51,9 @@ SEED_IMPORT_ROOT="${SEED_IMPORT_ROOT:-/var/db/clawdie/seed}" SEED_RESERVED_DIRS="ssh" # Valid harness values mirror Colibri's AgentRuntime enum (colibri-glasspane). SEED_VALID_HARNESSES="pi zot local" +# Vaultwarden bootstrap creds are routed out of .env into this file (relative to +# the agent home) so clawdie-vault-fetch can consume them. +SEED_VAULT_BOOTSTRAP_REL=".config/vault-bootstrap.env" _seed_log() { printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S')" "$1" >>"${SEED_LOG}" 2>/dev/null || true @@ -103,10 +106,20 @@ _seed_install_authorized_keys() { _seed_log "installed authorized_keys from ${_src} -> ${_dst}" } -# Merge KEY=VALUE pairs from a plaintext env file into a target .env, preserving -# keys the source does not mention and replacing those it does. Lands 0600 owned -# by the agent user. Mirrors clawdie-vault-fetch's --write-env upsert. -_seed_import_env() { +# Valid POSIX-ish shell env var name. Guards against garbled seed lines +# injecting odd content into .env. +_seed_key_ok() { + case "$1" in + ''|[!A-Za-z_]*) return 1 ;; + *[!A-Za-z0-9_]*) return 1 ;; + esac + return 0 +} + +# Merge KEY=VALUE pairs from a plaintext source into a target file, preserving +# keys the source does not mention and replacing those it does. Skips blanks, +# comments, and invalid key names. Lands 0600 owned by the agent user. +_seed_merge_env() { _src="$1" _dst="$2" _user="${3:-${SEED_USER}}" @@ -122,7 +135,6 @@ _seed_import_env() { _count=0 while IFS= read -r _line || [ -n "${_line}" ]; do - # Skip blanks, comments, and lines without a KEY=. case "${_line}" in ''|'#'*) continue ;; *=*) : ;; @@ -130,6 +142,10 @@ _seed_import_env() { esac _line="$(printf '%s' "${_line}" | tr -d '\r')" _k="${_line%%=*}" + if ! _seed_key_ok "${_k}"; then + _seed_log "skipping invalid env key '${_k}'" + continue + fi grep -v "^${_k}=" "${_merged}" >"${_merged}.tmp" 2>/dev/null || : >"${_merged}.tmp" mv "${_merged}.tmp" "${_merged}" printf '%s\n' "${_line}" >>"${_merged}" @@ -140,12 +156,43 @@ _seed_import_env() { chmod 0600 "${_dst}" chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true rm -f "${_merged}" - _seed_log "imported ${_count} env key(s) from ${_src} -> ${_dst}" + _seed_log "merged ${_count} key(s) -> ${_dst}" +} + +# Split an agent's seed env into two staged files: bootstrap creds (BW_*) and +# everything else. Writes ${_outdir}/.app.env and ${_outdir}/.boot.env. +_seed_split_env() { + _src="$1" + _outdir="$2" + + _app="${_outdir}/.app.env" + _boot="${_outdir}/.boot.env" + : >"${_app}" + : >"${_boot}" + chmod 0600 "${_app}" "${_boot}" 2>/dev/null || true + + [ -f "${_src}" ] || return 0 + + while IFS= read -r _line || [ -n "${_line}" ]; do + case "${_line}" in + ''|'#'*) continue ;; + *=*) : ;; + *) continue ;; + esac + _line="$(printf '%s' "${_line}" | tr -d '\r')" + _k="${_line%%=*}" + _seed_key_ok "${_k}" || continue + case "${_k}" in + BW_CLIENTID|BW_CLIENTSECRET|BW_PASSWORD) + printf '%s\n' "${_line}" >>"${_boot}" ;; + *) + printf '%s\n' "${_line}" >>"${_app}" ;; + esac + done <"${_src}" } # Validate + record a harness descriptor. We only parse the `harness` value to -# sanity-check it against the AgentRuntime enum; the full file is staged as-is -# for the runtime to read. +# sanity-check it against the AgentRuntime enum; the full file is staged as-is. _seed_import_harness() { _src="$1" _stage="$2" @@ -180,36 +227,60 @@ _seed_import_soul() { _seed_log "staged soul backup ${_src} -> ${_stage}/soul" } -# Import one // directory. On the live USB this is called once with -# the clawdie user/home as the target; on a deployed host the caller loops. -_seed_import_agent_dir() { - _dir="$1" # absolute path to the agent dir on the mounted seed - _agent="$2" # agent name (already validated) - _user="$3" - _home="$4" +# Stage non-home-affecting payload for any agent dir: soul, harness, agent name, +# and a 0600 copy of the raw env (so a staged-only agent's secrets are not lost). +# Never writes to a home directory or installs SSH keys. +_seed_stage_agent() { + _dir="$1" + _agent="$2" _stage="${SEED_IMPORT_ROOT}/${_agent}" mkdir -p "${_stage}" + chmod 0700 "${_stage}" 2>/dev/null || true printf '%s\n' "${_agent}" >"${_stage}/agent-name" 2>/dev/null || true - _seed_import_env "${_dir}/env" "${_home}/.env" "${_user}" - _seed_import_harness "${_dir}/harness.toml" "${_stage}" - _seed_import_soul "${_dir}/soul" "${_stage}" + _seed_import_harness "${_dir}/harness.toml" "${_stage}" + _seed_import_soul "${_dir}/soul" "${_stage}" + if [ -f "${_dir}/env" ]; then + cp "${_dir}/env" "${_stage}/env" 2>/dev/null || true + chmod 0600 "${_stage}/env" 2>/dev/null || true + fi + echo "${_stage}" +} + +# Activate one agent into a real home: env -> .env, BW_* -> vault-bootstrap.env, +# ssh keys installed. Only the live USB's single active agent gets this. +_seed_activate_agent() { + _dir="$1" + _agent="$2" + _user="$3" + _home="$4" + + _stage="$(_seed_stage_agent "${_dir}" "${_agent}")" + + if [ -f "${_dir}/env" ]; then + _seed_split_env "${_dir}/env" "${_stage}" + _seed_merge_env "${_stage}/.app.env" "${_home}/.env" "${_user}" + if [ -s "${_stage}/.boot.env" ]; then + _seed_merge_env "${_stage}/.boot.env" "${_home}/${SEED_VAULT_BOOTSTRAP_REL}" "${_user}" + _seed_log "routed Vaultwarden bootstrap creds -> ${_home}/${SEED_VAULT_BOOTSTRAP_REL}" + fi + rm -f "${_stage}/.app.env" "${_stage}/.boot.env" + fi + if [ -f "${_dir}/ssh/authorized_keys" ]; then _seed_install_authorized_keys "${_dir}/ssh/authorized_keys" "${_user}" "${_home}" fi - _seed_log "imported agent dir '${_agent}' -> stage ${_stage}, user ${_user}" + _seed_log "activated agent '${_agent}' for user ${_user} (home ${_home})" } -# Return 0 if NAME is a safe agent directory name (no traversal, not reserved). +# Safe agent directory name: no traversal, no leading dot (skips macOS/FAT +# system dirs like .Spotlight-V100, .fseventsd), allowlisted charset, not reserved. _seed_agent_name_ok() { _n="$1" case "${_n}" in - ''|.|..|*/*) return 1 ;; - esac - # Allowlist characters to keep this off the filesystem's sharp edges. - case "${_n}" in + ''|.|..|.*|*/*) return 1 ;; *[!A-Za-z0-9._-]*) return 1 ;; esac for _r in ${SEED_RESERVED_DIRS}; do @@ -218,6 +289,16 @@ _seed_agent_name_ok() { return 0 } +# An agent dir must carry at least one recognized payload to count as an agent. +_seed_agent_has_payload() { + _d="$1" + [ -f "${_d}/env" ] && return 0 + [ -f "${_d}/harness.toml" ] && return 0 + [ -d "${_d}/soul" ] && return 0 + [ -f "${_d}/ssh/authorized_keys" ] && return 0 + return 1 +} + # Core import routine, factored out of the rc start_cmd so it can be tested # against a pre-mounted directory (CLAWDIE_SEED_TEST). Operates on $SEED_MOUNT. _seed_import_tree() { @@ -233,27 +314,32 @@ _seed_import_tree() { fi # Layer 2: per-agent directories. On the live USB (single agent) the first - # valid dir, sorted, maps to the clawdie user. Additional dirs are staged - # but flagged: deployed multi-agent provisioning is the follow-up seam. + # valid dir, sorted, is activated for the clawdie user. Additional dirs are + # staged only — no home is created — pending deployed multi-agent provisioning. _first=1 for _entry in "${SEED_MOUNT}"/*; do [ -d "${_entry}" ] || continue _agent="$(basename "${_entry}")" if ! _seed_agent_name_ok "${_agent}"; then - [ "${_agent}" = "ssh" ] || _seed_log "skipping non-agent dir '${_agent}'" + case "${_agent}" in + ssh) : ;; # reserved, expected + *) _seed_log "skipping dir '${_agent}' (reserved, hidden, or invalid name)" ;; + esac + continue + fi + if ! _seed_agent_has_payload "${_entry}"; then + _seed_log "skipping dir '${_agent}' (no recognized agent payload)" continue fi if [ "${_first}" -eq 1 ]; then - _seed_import_agent_dir "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}" + _seed_activate_agent "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}" printf '%s\n' "${_agent}" >"${SEED_IMPORT_ROOT}/active-agent" 2>/dev/null || true _first=0 _imported=1 else - # Stage payload for visibility but do not provision a second live - # identity — the live USB is single-agent. - _seed_import_agent_dir "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}.${_agent}" - _seed_log "NOTE additional agent dir '${_agent}' staged; deployed multi-agent provisioning not yet wired" + _seed_stage_agent "${_entry}" "${_agent}" >/dev/null + _seed_log "NOTE additional agent dir '${_agent}' staged only; deployed multi-agent provisioning not yet wired" fi done diff --git a/live/operator-session/clawdie-vault-fetch b/live/operator-session/clawdie-vault-fetch index b7d8a34..898a660 100644 --- a/live/operator-session/clawdie-vault-fetch +++ b/live/operator-session/clawdie-vault-fetch @@ -29,8 +29,10 @@ set -eu SERVER="${VAULT_SERVER:-https://vault.smilepowered.org}" -# agent-secrets collection in the Clawdie org. Overridable for other vaults. -COLLECTION_ID="${VAULT_COLLECTION_ID:-94ba61b8-633c-454e-b749-f115617eeac3}" +# NOTE: items are fetched by name with `bw get password`, which is fail-closed +# on ambiguity (multiple matches error out). Item names must therefore be unique +# in the agent account's visible vault — see docs/VAULTWARDEN-SETUP.md. We do not +# scope by collection here to avoid a JSON-parse (jq) dependency. BOOTSTRAP_FILE="${VAULT_BOOTSTRAP_FILE:-${HOME}/.config/vault-bootstrap.env}" WRITE_ENV="" @@ -63,6 +65,22 @@ if [ ! -f "$BOOTSTRAP_FILE" ]; then exit 3 fi +# The bootstrap file holds the vault master password. Refuse to read it if it is +# group/world-readable, unless explicitly overridden. stat differs across BSD +# (-f '%Lp') and GNU (-c '%a'); a non-octal/unknown result skips the check. +_mode="$(stat -f '%Lp' "$BOOTSTRAP_FILE" 2>/dev/null || stat -c '%a' "$BOOTSTRAP_FILE" 2>/dev/null || echo '')" +case "$_mode" in + ''|*[!0-7]*) _mode='' ;; +esac +if [ -n "$_mode" ] && [ "$(( 0$_mode & 077 ))" -ne 0 ]; then + if [ -n "${VAULT_ALLOW_INSECURE_BOOTSTRAP:-}" ]; then + log "WARNING: $BOOTSTRAP_FILE is mode $_mode (group/world-readable) — proceeding (override set)" + else + log "refusing: $BOOTSTRAP_FILE is mode $_mode (group/world-readable). chmod 600 it, or set VAULT_ALLOW_INSECURE_BOOTSTRAP=1" + exit 1 + fi +fi + if ! command -v bw >/dev/null 2>&1; then log "bw (Bitwarden CLI) not found — install @bitwarden/cli first" exit 4