test(seed): add Layer 0 importer + Layer 2 MCP-boundary regression tests

Layer 0 (seed-import-test.sh): runs the real clawdie-live-seed importer in a
sandbox (CLAWDIE_SEED_TEST=1, all paths overridden) and asserts the seed->runtime
propagation contract — env split, provider.env, dual-home ssh, soul staging, and
AGENTS.md -> $ZOT_HOME (the global slot the autospawned zot reads). Idempotent
re-import is checked. REQUIRE_AGENTS_MD=1 enforces the AGENTS.md install added on
this branch; it passes 23/23 here.

Layer 2 (mcp-boundary-test.sh): exercises the mother MCP-over-SSH boundary on
Linux — colibri-mcp-ssh forced-command allowlist (""/"tools" route, everything
else rejected) and the MCP tools/list handshake, including a real loopback sshd
with command=. Skips cleanly when colibri isn't a sibling checkout.

Verified end to end on Linux before merge: importer target path and the ZOT_HOME
pin in colibri (fix/zot-home) both resolve to /var/db/colibri/.local/state/zot.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Sam & Claude 2026-06-25 05:03:23 +02:00
parent 85ac1ceec5
commit 5df307aaf4
2 changed files with 303 additions and 0 deletions

127
tests/mcp-boundary-test.sh Executable file
View file

@ -0,0 +1,127 @@
#!/bin/sh
# Layer 2 — mother MCP boundary test (runs on Linux; no osa, no PostgreSQL).
#
# Proves two things the mother relies on, without standing up a real mother:
# 2a) the colibri-mcp-ssh forced-command ALLOWLIST: "" and "tools" route to
# colibri-mcp; everything else is rejected (exit 1, JSON error). Tested
# both directly and through a real loopback sshd with command="..." forced.
# 2b) the MCP handshake: `colibri-mcp tools` and the stdio tools/list JSON-RPC
# return the Colibri tool catalog.
#
# The DB-backed node_register path needs real PostgreSQL and belongs on osa
# (domedog is Docker-free). This layer stops at the SSH + MCP plumbing.
#
# Requires a prebuilt colibri-mcp. Point at it with COLIBRI_MCP_BIN, else the
# colibri debug build is auto-detected.
set -u
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
WRAPPER="${WRAPPER:-${SCRIPT_DIR}/../../colibri/packaging/mother/colibri-mcp-ssh}"
# auto-detect the prebuilt binary
if [ -z "${COLIBRI_MCP_BIN:-}" ]; then
for c in "${SCRIPT_DIR}/../../colibri/target/debug/colibri-mcp" \
"${SCRIPT_DIR}/../../colibri/target/release/colibri-mcp"; do
[ -x "$c" ] && COLIBRI_MCP_BIN="$c" && break
done
fi
COLIBRI_MCP_BIN="${COLIBRI_MCP_BIN:-}"
# colibri is a sibling repo; if it isn't checked out + built, skip cleanly so a
# standalone clawdie-iso run is not a false failure. Force-fail with STRICT=1.
if [ ! -r "${WRAPPER}" ] || [ -z "${COLIBRI_MCP_BIN}" ] || [ ! -x "${COLIBRI_MCP_BIN}" ]; then
echo "SKIP: colibri not available (needs sibling colibri checkout + built colibri-mcp)."
echo " set COLIBRI_MCP_BIN / WRAPPER, or STRICT=1 to make this a failure."
[ -n "${STRICT:-}" ] && exit 2
exit 0
fi
COLIBRI_MCP_BIN=$(CDPATH= cd -- "$(dirname -- "${COLIBRI_MCP_BIN}")" && pwd)/$(basename -- "${COLIBRI_MCP_BIN}")
WORK=$(mktemp -d "${TMPDIR:-/tmp}/mcp-boundary.XXXXXX") || exit 2
SSHD_PID=""
cleanup() { [ -n "${SSHD_PID}" ] && kill "${SSHD_PID}" 2>/dev/null; rm -rf "${WORK}"; }
trap cleanup EXIT INT TERM
# Test wrapper: identical allowlist logic, but the absolute /usr/local/bin path
# is redirected to the prebuilt binary so we can run unprivileged.
TWRAP="${WORK}/colibri-mcp-ssh"
sed "s#/usr/local/bin/colibri-mcp#${COLIBRI_MCP_BIN}#g" "${WRAPPER}" >"${TWRAP}"
chmod +x "${TWRAP}"
PASS=0; FAIL=0; SKIP=0
ok() { PASS=$((PASS+1)); printf ' ok %s\n' "$1"; }
bad() { FAIL=$((FAIL+1)); printf ' FAIL %s\n' "$1"; }
skip() { SKIP=$((SKIP+1)); printf ' SKIP %s\n' "$1"; }
check(){ [ "$1" -eq 0 ] && ok "$2" || bad "$2"; } # check <0|1-result> <msg>
echo "== 2b: MCP handshake (direct) =="
timeout 10 "${COLIBRI_MCP_BIN}" tools 2>/dev/null | grep -q 'colibri_status'
check $? "colibri-mcp tools lists colibri_status"
printf '%s\n' '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' \
| timeout 10 "${COLIBRI_MCP_BIN}" 2>/dev/null | grep -q '"colibri_intake_task"'
check $? "stdio tools/list returns colibri_intake_task"
echo "== 2a: forced-command allowlist (direct wrapper) =="
# allowed: "tools" -> tool list
out=$(SSH_ORIGINAL_COMMAND="tools" timeout 10 sh "${TWRAP}" 2>/dev/null)
{ echo "${out}" | grep -q 'colibri_status'; } ; check $? 'SSH_ORIGINAL_COMMAND="tools" -> tool list'
# allowed: "" -> stdio MCP (feed a request, expect a JSON-RPC result)
out=$(printf '%s\n' '{"jsonrpc":"2.0","id":7,"method":"tools/list"}' \
| SSH_ORIGINAL_COMMAND="" timeout 10 sh "${TWRAP}" 2>/dev/null)
{ echo "${out}" | grep -q '"id":7'; } ; check $? 'SSH_ORIGINAL_COMMAND="" -> stdio MCP responds'
# rejected: arbitrary command must NOT run; exit 1 + JSON error on stderr
for evil in 'rm -rf /' 'status' 'tools; rm -rf /' 'tools --help' '/bin/sh'; do
err=$(SSH_ORIGINAL_COMMAND="${evil}" sh "${TWRAP}" 2>&1 >/dev/null); rc=$?
{ [ "${rc}" -eq 1 ] && echo "${err}" | grep -q 'rejected'; }
check $? "rejected: '${evil}' (exit 1 + json error)"
done
echo "== 2a: forced-command through a REAL loopback sshd =="
if ! command -v sshd >/dev/null 2>&1 || ! command -v ssh-keygen >/dev/null 2>&1; then
skip "sshd/ssh-keygen unavailable — direct wrapper test stands in"
else
HK="${WORK}/hostkey"; CK="${WORK}/clientkey"; AK="${WORK}/authorized_keys"
ssh-keygen -t ed25519 -N '' -f "${HK}" >/dev/null 2>&1
ssh-keygen -t ed25519 -N '' -f "${CK}" >/dev/null 2>&1
# force every connection through the wrapper, exactly like mother's authorized_keys
printf 'command="%s",restrict %s\n' "${TWRAP}" "$(cat "${CK}.pub")" >"${AK}"
chmod 600 "${AK}"
PORT=$(( (RANDOM % 5000) + 60000 ))
SSHDBIN=$(command -v sshd)
# Own config via -f so sshd ignores /etc/ssh + its (root-only) drop-ins.
CFG="${WORK}/sshd_config"
cat >"${CFG}" <<EOF
Port ${PORT}
ListenAddress 127.0.0.1
HostKey ${HK}
PidFile ${WORK}/sshd.pid
AuthorizedKeysFile ${AK}
StrictModes no
UsePAM no
PasswordAuthentication no
PubkeyAuthentication yes
KbdInteractiveAuthentication no
EOF
"${SSHDBIN}" -D -f "${CFG}" >"${WORK}/sshd.log" 2>&1 &
SSHD_PID=$!
sleep 1
if ! kill -0 "${SSHD_PID}" 2>/dev/null; then
skip "sshd failed to start (see log) — direct wrapper test stands in"
sed 's/^/ sshd: /' "${WORK}/sshd.log" 2>/dev/null | head -3
else
SSHOPTS="-i ${CK} -p ${PORT} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
# allowed via forced command
# shellcheck disable=SC2086
ssh ${SSHOPTS} localhost tools 2>/dev/null | grep -q 'colibri_status'
check $? "ssh ... tools -> tool list (forced command)"
# rejected via forced command
# shellcheck disable=SC2086
out=$(ssh ${SSHOPTS} localhost 'rm -rf /' 2>&1); rc=$?
{ [ "${rc}" -ne 0 ] && echo "${out}" | grep -q 'rejected'; }
check $? "ssh ... 'rm -rf /' -> rejected (forced command)"
fi
fi
echo
echo "RESULT: ${PASS} passed, ${FAIL} failed, ${SKIP} skipped"
[ "${FAIL}" -eq 0 ] || exit 1
exit 0

176
tests/seed-import-test.sh Executable file
View file

@ -0,0 +1,176 @@
#!/bin/sh
# Layer 0 — clawdie-live-seed importer regression test (runs on any POSIX host).
#
# Exercises the REAL importer (live/operator-session/clawdie-live-seed) against a
# synthetic CLAWDIESEED tree, with every path redirected to a temp sandbox via the
# SEED_* override vars and CLAWDIE_SEED_TEST=1 (which skips the rc.subr handoff).
# No FreeBSD, no mount, no root required — chowns fail silently by design.
#
# It encodes the seed->runtime propagation contract as assertions:
# - operator home: .env (app keys), vault-bootstrap.env (BW_*), ssh material
# - daemon home: outbound ssh material (mother-mcp key), NO authorized_keys
# - provider.env: app provider keys only (no BW_*)
# - staging: soul/ tree, harness.toml, agent-name, active-agent, raw env
# - idempotency: re-import does not duplicate keys / known_hosts
#
# PENDING group: AGENTS.md -> zot home. This is the change Hermes is pushing.
# It is xfail (informational) until the importer learns to install AGENTS.md;
# re-run with REQUIRE_AGENTS_MD=1 after the patch to enforce it as required.
#
# Usage: sh tests/seed-import-test.sh # current contract must pass
# REQUIRE_AGENTS_MD=1 sh tests/... # also enforce the AGENTS.md patch
set -u
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
IMPORTER="${IMPORTER:-${SCRIPT_DIR}/../live/operator-session/clawdie-live-seed}"
if [ ! -r "${IMPORTER}" ]; then
echo "FATAL: importer not found at ${IMPORTER}" >&2
exit 2
fi
WORK=$(mktemp -d "${TMPDIR:-/tmp}/seed-import-test.XXXXXX") || exit 2
trap 'rm -rf "${WORK}"' EXIT INT TERM
# --- override every path the importer touches into the sandbox ---------------
TEST_USER=$(id -un)
export CLAWDIE_SEED_TEST=1
export SEED_MOUNT="${WORK}/seed"
export SEED_LOG="${WORK}/seed.log"
export SEED_USER="${TEST_USER}"
export SEED_USER_HOME="${WORK}/operator-home"
export SEED_DAEMON_USER="${TEST_USER}"
export SEED_DAEMON_HOME="${WORK}/daemon-home"
export SEED_IMPORT_ROOT="${WORK}/import-root"
export SEED_PROVIDER_ENV="${WORK}/provider.env"
# Contract under test for the AGENTS.md patch: importer installs it to zot's
# global slot under the daemon home. Keep this in one place so it tracks the
# final decision (ZOT_HOME pin) without edits scattered through the test.
ZOT_HOME_REL=".local/state/zot"
EXPECT_AGENTS_MD="${SEED_DAEMON_HOME}/${ZOT_HOME_REL}/AGENTS.md"
mkdir -p "${SEED_USER_HOME}" "${SEED_DAEMON_HOME}" "${SEED_IMPORT_ROOT}"
# --- build a synthetic single-agent seed (the live-USB case) ------------------
SEED_AGENT="${SEED_MOUNT}/clawdie"
mkdir -p "${SEED_AGENT}/ssh" "${SEED_AGENT}/soul/memories"
cat >"${SEED_AGENT}/env" <<'EOF'
# operator-provided secrets (plaintext FAT32 by design)
DEEPSEEK_API_KEY=sk-test-deepseek
DEEPSEEK_MODEL=deepseek-v4
BW_CLIENTID=user.test-client
BW_CLIENTSECRET=test-secret
BW_PASSWORD=test-master-pw
EOF
cat >"${SEED_AGENT}/harness.toml" <<'EOF'
harness = "zot"
EOF
cat >"${SEED_AGENT}/AGENTS.md" <<'EOF'
# Operational rules (zot reads this as project context)
- mother is OSA, reachable via the mother-mcp key on this seed
- verbs: node_register, create-task, intake-task
- this node is a USB operator, capability: freebsd
- install Hermes from /home/clawdie/ai/hermes-bsd
CANARY_AGENTS_MARKER=zot-sees-this
EOF
# soul tree (dormant; must be STAGED, not activated, in 0.12)
cat >"${SEED_AGENT}/soul/SOUL.md" <<'EOF'
# SOUL (staged for Hermes, dormant)
EOF
cat >"${SEED_AGENT}/soul/memories/USER.md" <<'EOF'
# USER (staged)
EOF
# ssh material: inbound authorized_keys + outbound client (config, key, known_hosts)
echo "ssh-ed25519 AAAAINBOUNDtestkey operator@laptop" >"${SEED_AGENT}/ssh/authorized_keys"
cat >"${SEED_AGENT}/ssh/config" <<'EOF'
Host mother
HostName 100.72.229.63
User colibri
IdentityFile ~/.ssh/mother-mcp
EOF
echo "-----BEGIN OPENSSH PRIVATE KEY-----TEST-----END-----" >"${SEED_AGENT}/ssh/mother-mcp"
echo "ssh-ed25519 AAAAMOTHERPUBkey colibri@mother" >"${SEED_AGENT}/ssh/mother-mcp.pub"
echo "mother-host ssh-ed25519 AAAAMOTHERhostkey" >"${SEED_AGENT}/ssh/known_hosts"
# --- assertion harness -------------------------------------------------------
PASS=0
REQ_FAIL=0
PEND_FAIL=0
ok() { PASS=$((PASS+1)); printf ' ok %s\n' "$1"; }
fail() { REQ_FAIL=$((REQ_FAIL+1)); printf ' FAIL %s\n' "$1"; }
pend() { PEND_FAIL=$((PEND_FAIL+1)); printf ' PEND %s\n' "$1"; }
# fail() vs pend() chosen by tag arg ($1 = required|pending)
report() { # report <required|pending> <passed 0|1> <msg>
if [ "$2" -eq 1 ]; then ok "$3"
elif [ "$1" = pending ] && [ -z "${REQUIRE_AGENTS_MD:-}" ]; then pend "$3"
else fail "$3"; fi
}
exists() { [ -e "$2" ] && report "$1" 1 "$3" || report "$1" 0 "$3"; }
not_exists() { [ ! -e "$2" ] && report "$1" 1 "$3" || report "$1" 0 "$3"; }
contains() { [ -f "$2" ] && grep -q "$3" "$2" 2>/dev/null && report "$1" 1 "$4" || report "$1" 0 "$4"; }
absent_in() { { [ ! -f "$2" ] || ! grep -q "$3" "$2" 2>/dev/null; } && report "$1" 1 "$4" || report "$1" 0 "$4"; }
count_is() { _c=$(grep -c "$3" "$2" 2>/dev/null || echo 0); [ "$_c" = "$4" ] && report "$1" 1 "$5 (got $_c)" || report "$1" 0 "$5 (got $_c)"; }
# --- run the real importer ---------------------------------------------------
# shellcheck disable=SC1090
. "${IMPORTER}"
echo "== import pass 1 =="
_seed_import_tree
OP_HOME="${SEED_USER_HOME}"
DM_HOME="${SEED_DAEMON_HOME}"
STAGE="${SEED_IMPORT_ROOT}/clawdie"
echo "-- operator home --"
contains required "${OP_HOME}/.env" '^DEEPSEEK_API_KEY=sk-test-deepseek' "DEEPSEEK key in operator ~/.env"
absent_in required "${OP_HOME}/.env" '^BW_PASSWORD=' "BW_* NOT in operator ~/.env"
contains required "${OP_HOME}/.config/vault-bootstrap.env" '^BW_PASSWORD=test-master-pw' "BW_PASSWORD in vault-bootstrap.env"
exists required "${OP_HOME}/.ssh/authorized_keys" "operator authorized_keys installed"
exists required "${OP_HOME}/.ssh/config" "operator ssh config installed"
exists required "${OP_HOME}/.ssh/mother-mcp" "operator mother-mcp private key installed"
contains required "${OP_HOME}/.ssh/known_hosts" 'mother-host' "operator known_hosts has mother host key"
echo "-- daemon home (colibri spawns the outbound MCP SSH) --"
exists required "${DM_HOME}/.ssh/mother-mcp" "daemon mother-mcp private key installed"
exists required "${DM_HOME}/.ssh/config" "daemon ssh config installed"
not_exists required "${DM_HOME}/.ssh/authorized_keys" "daemon has NO authorized_keys (inbound is operator-only)"
echo "-- provider.env (daemon autospawn keys; root-owned) --"
contains required "${SEED_PROVIDER_ENV}" '^DEEPSEEK_API_KEY=sk-test-deepseek' "DEEPSEEK key in provider.env"
absent_in required "${SEED_PROVIDER_ENV}" '^BW_PASSWORD=' "BW_* NOT in provider.env"
echo "-- staging (dormant payload) --"
exists required "${STAGE}/soul/SOUL.md" "soul/ tree staged"
exists required "${STAGE}/soul/memories/USER.md" "soul memories staged"
contains required "${STAGE}/harness.toml" 'harness = "zot"' "harness.toml recorded"
contains required "${STAGE}/agent-name" '^clawdie$' "agent-name staged"
contains required "${SEED_IMPORT_ROOT}/active-agent" '^clawdie$' "active-agent recorded"
exists required "${STAGE}/env" "raw env staged (0600)"
echo "-- PENDING: AGENTS.md -> zot global slot (Hermes's importer patch) --"
exists pending "${EXPECT_AGENTS_MD}" "AGENTS.md installed to \$ZOT_HOME"
contains pending "${EXPECT_AGENTS_MD}" 'CANARY_AGENTS_MARKER=zot-sees-this' "AGENTS.md content intact at zot slot"
# --- idempotency: a second import must not duplicate -------------------------
echo "== import pass 2 (idempotency) =="
_seed_import_tree
count_is required "${OP_HOME}/.env" '^DEEPSEEK_API_KEY=' 1 "exactly one DEEPSEEK key in ~/.env after re-import"
count_is required "${SEED_PROVIDER_ENV}" '^DEEPSEEK_API_KEY=' 1 "exactly one DEEPSEEK key in provider.env after re-import"
count_is required "${OP_HOME}/.ssh/known_hosts" 'mother-host' 1 "known_hosts not duplicated after re-import"
# --- summary -----------------------------------------------------------------
echo
echo "RESULT: ${PASS} passed, ${REQ_FAIL} required-fail, ${PEND_FAIL} pending"
if [ -n "${REQUIRE_AGENTS_MD:-}" ] && [ "${PEND_FAIL}" -gt 0 ]; then
echo " (REQUIRE_AGENTS_MD set: pending failures count as required)"
fi
[ "${REQ_FAIL}" -eq 0 ] || exit 1
exit 0