test(seed): add Layer 0 importer + Layer 2 MCP-boundary regression tests
Layer 0 (seed-import-test.sh): runs the real clawdie-live-seed importer in a
sandbox (CLAWDIE_SEED_TEST=1, all paths overridden) and asserts the seed->runtime
propagation contract — env split, provider.env, dual-home ssh, soul staging, and
AGENTS.md -> $ZOT_HOME (the global slot the autospawned zot reads). Idempotent
re-import is checked. REQUIRE_AGENTS_MD=1 enforces the AGENTS.md install added on
this branch; it passes 23/23 here.
Layer 2 (mcp-boundary-test.sh): exercises the mother MCP-over-SSH boundary on
Linux — colibri-mcp-ssh forced-command allowlist (""/"tools" route, everything
else rejected) and the MCP tools/list handshake, including a real loopback sshd
with command=. Skips cleanly when colibri isn't a sibling checkout.
Verified end to end on Linux before merge: importer target path and the ZOT_HOME
pin in colibri (fix/zot-home) both resolve to /var/db/colibri/.local/state/zot.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
85ac1ceec5
commit
5df307aaf4
2 changed files with 303 additions and 0 deletions
127
tests/mcp-boundary-test.sh
Executable file
127
tests/mcp-boundary-test.sh
Executable file
|
|
@ -0,0 +1,127 @@
|
|||
#!/bin/sh
|
||||
# Layer 2 — mother MCP boundary test (runs on Linux; no osa, no PostgreSQL).
|
||||
#
|
||||
# Proves two things the mother relies on, without standing up a real mother:
|
||||
# 2a) the colibri-mcp-ssh forced-command ALLOWLIST: "" and "tools" route to
|
||||
# colibri-mcp; everything else is rejected (exit 1, JSON error). Tested
|
||||
# both directly and through a real loopback sshd with command="..." forced.
|
||||
# 2b) the MCP handshake: `colibri-mcp tools` and the stdio tools/list JSON-RPC
|
||||
# return the Colibri tool catalog.
|
||||
#
|
||||
# The DB-backed node_register path needs real PostgreSQL and belongs on osa
|
||||
# (domedog is Docker-free). This layer stops at the SSH + MCP plumbing.
|
||||
#
|
||||
# Requires a prebuilt colibri-mcp. Point at it with COLIBRI_MCP_BIN, else the
|
||||
# colibri debug build is auto-detected.
|
||||
set -u
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
WRAPPER="${WRAPPER:-${SCRIPT_DIR}/../../colibri/packaging/mother/colibri-mcp-ssh}"
|
||||
# auto-detect the prebuilt binary
|
||||
if [ -z "${COLIBRI_MCP_BIN:-}" ]; then
|
||||
for c in "${SCRIPT_DIR}/../../colibri/target/debug/colibri-mcp" \
|
||||
"${SCRIPT_DIR}/../../colibri/target/release/colibri-mcp"; do
|
||||
[ -x "$c" ] && COLIBRI_MCP_BIN="$c" && break
|
||||
done
|
||||
fi
|
||||
COLIBRI_MCP_BIN="${COLIBRI_MCP_BIN:-}"
|
||||
|
||||
# colibri is a sibling repo; if it isn't checked out + built, skip cleanly so a
|
||||
# standalone clawdie-iso run is not a false failure. Force-fail with STRICT=1.
|
||||
if [ ! -r "${WRAPPER}" ] || [ -z "${COLIBRI_MCP_BIN}" ] || [ ! -x "${COLIBRI_MCP_BIN}" ]; then
|
||||
echo "SKIP: colibri not available (needs sibling colibri checkout + built colibri-mcp)."
|
||||
echo " set COLIBRI_MCP_BIN / WRAPPER, or STRICT=1 to make this a failure."
|
||||
[ -n "${STRICT:-}" ] && exit 2
|
||||
exit 0
|
||||
fi
|
||||
COLIBRI_MCP_BIN=$(CDPATH= cd -- "$(dirname -- "${COLIBRI_MCP_BIN}")" && pwd)/$(basename -- "${COLIBRI_MCP_BIN}")
|
||||
|
||||
WORK=$(mktemp -d "${TMPDIR:-/tmp}/mcp-boundary.XXXXXX") || exit 2
|
||||
SSHD_PID=""
|
||||
cleanup() { [ -n "${SSHD_PID}" ] && kill "${SSHD_PID}" 2>/dev/null; rm -rf "${WORK}"; }
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# Test wrapper: identical allowlist logic, but the absolute /usr/local/bin path
|
||||
# is redirected to the prebuilt binary so we can run unprivileged.
|
||||
TWRAP="${WORK}/colibri-mcp-ssh"
|
||||
sed "s#/usr/local/bin/colibri-mcp#${COLIBRI_MCP_BIN}#g" "${WRAPPER}" >"${TWRAP}"
|
||||
chmod +x "${TWRAP}"
|
||||
|
||||
PASS=0; FAIL=0; SKIP=0
|
||||
ok() { PASS=$((PASS+1)); printf ' ok %s\n' "$1"; }
|
||||
bad() { FAIL=$((FAIL+1)); printf ' FAIL %s\n' "$1"; }
|
||||
skip() { SKIP=$((SKIP+1)); printf ' SKIP %s\n' "$1"; }
|
||||
check(){ [ "$1" -eq 0 ] && ok "$2" || bad "$2"; } # check <0|1-result> <msg>
|
||||
|
||||
echo "== 2b: MCP handshake (direct) =="
|
||||
timeout 10 "${COLIBRI_MCP_BIN}" tools 2>/dev/null | grep -q 'colibri_status'
|
||||
check $? "colibri-mcp tools lists colibri_status"
|
||||
printf '%s\n' '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' \
|
||||
| timeout 10 "${COLIBRI_MCP_BIN}" 2>/dev/null | grep -q '"colibri_intake_task"'
|
||||
check $? "stdio tools/list returns colibri_intake_task"
|
||||
|
||||
echo "== 2a: forced-command allowlist (direct wrapper) =="
|
||||
# allowed: "tools" -> tool list
|
||||
out=$(SSH_ORIGINAL_COMMAND="tools" timeout 10 sh "${TWRAP}" 2>/dev/null)
|
||||
{ echo "${out}" | grep -q 'colibri_status'; } ; check $? 'SSH_ORIGINAL_COMMAND="tools" -> tool list'
|
||||
# allowed: "" -> stdio MCP (feed a request, expect a JSON-RPC result)
|
||||
out=$(printf '%s\n' '{"jsonrpc":"2.0","id":7,"method":"tools/list"}' \
|
||||
| SSH_ORIGINAL_COMMAND="" timeout 10 sh "${TWRAP}" 2>/dev/null)
|
||||
{ echo "${out}" | grep -q '"id":7'; } ; check $? 'SSH_ORIGINAL_COMMAND="" -> stdio MCP responds'
|
||||
# rejected: arbitrary command must NOT run; exit 1 + JSON error on stderr
|
||||
for evil in 'rm -rf /' 'status' 'tools; rm -rf /' 'tools --help' '/bin/sh'; do
|
||||
err=$(SSH_ORIGINAL_COMMAND="${evil}" sh "${TWRAP}" 2>&1 >/dev/null); rc=$?
|
||||
{ [ "${rc}" -eq 1 ] && echo "${err}" | grep -q 'rejected'; }
|
||||
check $? "rejected: '${evil}' (exit 1 + json error)"
|
||||
done
|
||||
|
||||
echo "== 2a: forced-command through a REAL loopback sshd =="
|
||||
if ! command -v sshd >/dev/null 2>&1 || ! command -v ssh-keygen >/dev/null 2>&1; then
|
||||
skip "sshd/ssh-keygen unavailable — direct wrapper test stands in"
|
||||
else
|
||||
HK="${WORK}/hostkey"; CK="${WORK}/clientkey"; AK="${WORK}/authorized_keys"
|
||||
ssh-keygen -t ed25519 -N '' -f "${HK}" >/dev/null 2>&1
|
||||
ssh-keygen -t ed25519 -N '' -f "${CK}" >/dev/null 2>&1
|
||||
# force every connection through the wrapper, exactly like mother's authorized_keys
|
||||
printf 'command="%s",restrict %s\n' "${TWRAP}" "$(cat "${CK}.pub")" >"${AK}"
|
||||
chmod 600 "${AK}"
|
||||
PORT=$(( (RANDOM % 5000) + 60000 ))
|
||||
SSHDBIN=$(command -v sshd)
|
||||
# Own config via -f so sshd ignores /etc/ssh + its (root-only) drop-ins.
|
||||
CFG="${WORK}/sshd_config"
|
||||
cat >"${CFG}" <<EOF
|
||||
Port ${PORT}
|
||||
ListenAddress 127.0.0.1
|
||||
HostKey ${HK}
|
||||
PidFile ${WORK}/sshd.pid
|
||||
AuthorizedKeysFile ${AK}
|
||||
StrictModes no
|
||||
UsePAM no
|
||||
PasswordAuthentication no
|
||||
PubkeyAuthentication yes
|
||||
KbdInteractiveAuthentication no
|
||||
EOF
|
||||
"${SSHDBIN}" -D -f "${CFG}" >"${WORK}/sshd.log" 2>&1 &
|
||||
SSHD_PID=$!
|
||||
sleep 1
|
||||
if ! kill -0 "${SSHD_PID}" 2>/dev/null; then
|
||||
skip "sshd failed to start (see log) — direct wrapper test stands in"
|
||||
sed 's/^/ sshd: /' "${WORK}/sshd.log" 2>/dev/null | head -3
|
||||
else
|
||||
SSHOPTS="-i ${CK} -p ${PORT} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
# allowed via forced command
|
||||
# shellcheck disable=SC2086
|
||||
ssh ${SSHOPTS} localhost tools 2>/dev/null | grep -q 'colibri_status'
|
||||
check $? "ssh ... tools -> tool list (forced command)"
|
||||
# rejected via forced command
|
||||
# shellcheck disable=SC2086
|
||||
out=$(ssh ${SSHOPTS} localhost 'rm -rf /' 2>&1); rc=$?
|
||||
{ [ "${rc}" -ne 0 ] && echo "${out}" | grep -q 'rejected'; }
|
||||
check $? "ssh ... 'rm -rf /' -> rejected (forced command)"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "RESULT: ${PASS} passed, ${FAIL} failed, ${SKIP} skipped"
|
||||
[ "${FAIL}" -eq 0 ] || exit 1
|
||||
exit 0
|
||||
176
tests/seed-import-test.sh
Executable file
176
tests/seed-import-test.sh
Executable file
|
|
@ -0,0 +1,176 @@
|
|||
#!/bin/sh
|
||||
# Layer 0 — clawdie-live-seed importer regression test (runs on any POSIX host).
|
||||
#
|
||||
# Exercises the REAL importer (live/operator-session/clawdie-live-seed) against a
|
||||
# synthetic CLAWDIESEED tree, with every path redirected to a temp sandbox via the
|
||||
# SEED_* override vars and CLAWDIE_SEED_TEST=1 (which skips the rc.subr handoff).
|
||||
# No FreeBSD, no mount, no root required — chowns fail silently by design.
|
||||
#
|
||||
# It encodes the seed->runtime propagation contract as assertions:
|
||||
# - operator home: .env (app keys), vault-bootstrap.env (BW_*), ssh material
|
||||
# - daemon home: outbound ssh material (mother-mcp key), NO authorized_keys
|
||||
# - provider.env: app provider keys only (no BW_*)
|
||||
# - staging: soul/ tree, harness.toml, agent-name, active-agent, raw env
|
||||
# - idempotency: re-import does not duplicate keys / known_hosts
|
||||
#
|
||||
# PENDING group: AGENTS.md -> zot home. This is the change Hermes is pushing.
|
||||
# It is xfail (informational) until the importer learns to install AGENTS.md;
|
||||
# re-run with REQUIRE_AGENTS_MD=1 after the patch to enforce it as required.
|
||||
#
|
||||
# Usage: sh tests/seed-import-test.sh # current contract must pass
|
||||
# REQUIRE_AGENTS_MD=1 sh tests/... # also enforce the AGENTS.md patch
|
||||
set -u
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
IMPORTER="${IMPORTER:-${SCRIPT_DIR}/../live/operator-session/clawdie-live-seed}"
|
||||
|
||||
if [ ! -r "${IMPORTER}" ]; then
|
||||
echo "FATAL: importer not found at ${IMPORTER}" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
WORK=$(mktemp -d "${TMPDIR:-/tmp}/seed-import-test.XXXXXX") || exit 2
|
||||
trap 'rm -rf "${WORK}"' EXIT INT TERM
|
||||
|
||||
# --- override every path the importer touches into the sandbox ---------------
|
||||
TEST_USER=$(id -un)
|
||||
export CLAWDIE_SEED_TEST=1
|
||||
export SEED_MOUNT="${WORK}/seed"
|
||||
export SEED_LOG="${WORK}/seed.log"
|
||||
export SEED_USER="${TEST_USER}"
|
||||
export SEED_USER_HOME="${WORK}/operator-home"
|
||||
export SEED_DAEMON_USER="${TEST_USER}"
|
||||
export SEED_DAEMON_HOME="${WORK}/daemon-home"
|
||||
export SEED_IMPORT_ROOT="${WORK}/import-root"
|
||||
export SEED_PROVIDER_ENV="${WORK}/provider.env"
|
||||
# Contract under test for the AGENTS.md patch: importer installs it to zot's
|
||||
# global slot under the daemon home. Keep this in one place so it tracks the
|
||||
# final decision (ZOT_HOME pin) without edits scattered through the test.
|
||||
ZOT_HOME_REL=".local/state/zot"
|
||||
EXPECT_AGENTS_MD="${SEED_DAEMON_HOME}/${ZOT_HOME_REL}/AGENTS.md"
|
||||
|
||||
mkdir -p "${SEED_USER_HOME}" "${SEED_DAEMON_HOME}" "${SEED_IMPORT_ROOT}"
|
||||
|
||||
# --- build a synthetic single-agent seed (the live-USB case) ------------------
|
||||
SEED_AGENT="${SEED_MOUNT}/clawdie"
|
||||
mkdir -p "${SEED_AGENT}/ssh" "${SEED_AGENT}/soul/memories"
|
||||
|
||||
cat >"${SEED_AGENT}/env" <<'EOF'
|
||||
# operator-provided secrets (plaintext FAT32 by design)
|
||||
DEEPSEEK_API_KEY=sk-test-deepseek
|
||||
DEEPSEEK_MODEL=deepseek-v4
|
||||
BW_CLIENTID=user.test-client
|
||||
BW_CLIENTSECRET=test-secret
|
||||
BW_PASSWORD=test-master-pw
|
||||
EOF
|
||||
|
||||
cat >"${SEED_AGENT}/harness.toml" <<'EOF'
|
||||
harness = "zot"
|
||||
EOF
|
||||
|
||||
cat >"${SEED_AGENT}/AGENTS.md" <<'EOF'
|
||||
# Operational rules (zot reads this as project context)
|
||||
- mother is OSA, reachable via the mother-mcp key on this seed
|
||||
- verbs: node_register, create-task, intake-task
|
||||
- this node is a USB operator, capability: freebsd
|
||||
- install Hermes from /home/clawdie/ai/hermes-bsd
|
||||
CANARY_AGENTS_MARKER=zot-sees-this
|
||||
EOF
|
||||
|
||||
# soul tree (dormant; must be STAGED, not activated, in 0.12)
|
||||
cat >"${SEED_AGENT}/soul/SOUL.md" <<'EOF'
|
||||
# SOUL (staged for Hermes, dormant)
|
||||
EOF
|
||||
cat >"${SEED_AGENT}/soul/memories/USER.md" <<'EOF'
|
||||
# USER (staged)
|
||||
EOF
|
||||
|
||||
# ssh material: inbound authorized_keys + outbound client (config, key, known_hosts)
|
||||
echo "ssh-ed25519 AAAAINBOUNDtestkey operator@laptop" >"${SEED_AGENT}/ssh/authorized_keys"
|
||||
cat >"${SEED_AGENT}/ssh/config" <<'EOF'
|
||||
Host mother
|
||||
HostName 100.72.229.63
|
||||
User colibri
|
||||
IdentityFile ~/.ssh/mother-mcp
|
||||
EOF
|
||||
echo "-----BEGIN OPENSSH PRIVATE KEY-----TEST-----END-----" >"${SEED_AGENT}/ssh/mother-mcp"
|
||||
echo "ssh-ed25519 AAAAMOTHERPUBkey colibri@mother" >"${SEED_AGENT}/ssh/mother-mcp.pub"
|
||||
echo "mother-host ssh-ed25519 AAAAMOTHERhostkey" >"${SEED_AGENT}/ssh/known_hosts"
|
||||
|
||||
# --- assertion harness -------------------------------------------------------
|
||||
PASS=0
|
||||
REQ_FAIL=0
|
||||
PEND_FAIL=0
|
||||
|
||||
ok() { PASS=$((PASS+1)); printf ' ok %s\n' "$1"; }
|
||||
fail() { REQ_FAIL=$((REQ_FAIL+1)); printf ' FAIL %s\n' "$1"; }
|
||||
pend() { PEND_FAIL=$((PEND_FAIL+1)); printf ' PEND %s\n' "$1"; }
|
||||
|
||||
# fail() vs pend() chosen by tag arg ($1 = required|pending)
|
||||
report() { # report <required|pending> <passed 0|1> <msg>
|
||||
if [ "$2" -eq 1 ]; then ok "$3"
|
||||
elif [ "$1" = pending ] && [ -z "${REQUIRE_AGENTS_MD:-}" ]; then pend "$3"
|
||||
else fail "$3"; fi
|
||||
}
|
||||
|
||||
exists() { [ -e "$2" ] && report "$1" 1 "$3" || report "$1" 0 "$3"; }
|
||||
not_exists() { [ ! -e "$2" ] && report "$1" 1 "$3" || report "$1" 0 "$3"; }
|
||||
contains() { [ -f "$2" ] && grep -q "$3" "$2" 2>/dev/null && report "$1" 1 "$4" || report "$1" 0 "$4"; }
|
||||
absent_in() { { [ ! -f "$2" ] || ! grep -q "$3" "$2" 2>/dev/null; } && report "$1" 1 "$4" || report "$1" 0 "$4"; }
|
||||
count_is() { _c=$(grep -c "$3" "$2" 2>/dev/null || echo 0); [ "$_c" = "$4" ] && report "$1" 1 "$5 (got $_c)" || report "$1" 0 "$5 (got $_c)"; }
|
||||
|
||||
# --- run the real importer ---------------------------------------------------
|
||||
# shellcheck disable=SC1090
|
||||
. "${IMPORTER}"
|
||||
echo "== import pass 1 =="
|
||||
_seed_import_tree
|
||||
|
||||
OP_HOME="${SEED_USER_HOME}"
|
||||
DM_HOME="${SEED_DAEMON_HOME}"
|
||||
STAGE="${SEED_IMPORT_ROOT}/clawdie"
|
||||
|
||||
echo "-- operator home --"
|
||||
contains required "${OP_HOME}/.env" '^DEEPSEEK_API_KEY=sk-test-deepseek' "DEEPSEEK key in operator ~/.env"
|
||||
absent_in required "${OP_HOME}/.env" '^BW_PASSWORD=' "BW_* NOT in operator ~/.env"
|
||||
contains required "${OP_HOME}/.config/vault-bootstrap.env" '^BW_PASSWORD=test-master-pw' "BW_PASSWORD in vault-bootstrap.env"
|
||||
exists required "${OP_HOME}/.ssh/authorized_keys" "operator authorized_keys installed"
|
||||
exists required "${OP_HOME}/.ssh/config" "operator ssh config installed"
|
||||
exists required "${OP_HOME}/.ssh/mother-mcp" "operator mother-mcp private key installed"
|
||||
contains required "${OP_HOME}/.ssh/known_hosts" 'mother-host' "operator known_hosts has mother host key"
|
||||
|
||||
echo "-- daemon home (colibri spawns the outbound MCP SSH) --"
|
||||
exists required "${DM_HOME}/.ssh/mother-mcp" "daemon mother-mcp private key installed"
|
||||
exists required "${DM_HOME}/.ssh/config" "daemon ssh config installed"
|
||||
not_exists required "${DM_HOME}/.ssh/authorized_keys" "daemon has NO authorized_keys (inbound is operator-only)"
|
||||
|
||||
echo "-- provider.env (daemon autospawn keys; root-owned) --"
|
||||
contains required "${SEED_PROVIDER_ENV}" '^DEEPSEEK_API_KEY=sk-test-deepseek' "DEEPSEEK key in provider.env"
|
||||
absent_in required "${SEED_PROVIDER_ENV}" '^BW_PASSWORD=' "BW_* NOT in provider.env"
|
||||
|
||||
echo "-- staging (dormant payload) --"
|
||||
exists required "${STAGE}/soul/SOUL.md" "soul/ tree staged"
|
||||
exists required "${STAGE}/soul/memories/USER.md" "soul memories staged"
|
||||
contains required "${STAGE}/harness.toml" 'harness = "zot"' "harness.toml recorded"
|
||||
contains required "${STAGE}/agent-name" '^clawdie$' "agent-name staged"
|
||||
contains required "${SEED_IMPORT_ROOT}/active-agent" '^clawdie$' "active-agent recorded"
|
||||
exists required "${STAGE}/env" "raw env staged (0600)"
|
||||
|
||||
echo "-- PENDING: AGENTS.md -> zot global slot (Hermes's importer patch) --"
|
||||
exists pending "${EXPECT_AGENTS_MD}" "AGENTS.md installed to \$ZOT_HOME"
|
||||
contains pending "${EXPECT_AGENTS_MD}" 'CANARY_AGENTS_MARKER=zot-sees-this' "AGENTS.md content intact at zot slot"
|
||||
|
||||
# --- idempotency: a second import must not duplicate -------------------------
|
||||
echo "== import pass 2 (idempotency) =="
|
||||
_seed_import_tree
|
||||
count_is required "${OP_HOME}/.env" '^DEEPSEEK_API_KEY=' 1 "exactly one DEEPSEEK key in ~/.env after re-import"
|
||||
count_is required "${SEED_PROVIDER_ENV}" '^DEEPSEEK_API_KEY=' 1 "exactly one DEEPSEEK key in provider.env after re-import"
|
||||
count_is required "${OP_HOME}/.ssh/known_hosts" 'mother-host' 1 "known_hosts not duplicated after re-import"
|
||||
|
||||
# --- summary -----------------------------------------------------------------
|
||||
echo
|
||||
echo "RESULT: ${PASS} passed, ${REQ_FAIL} required-fail, ${PEND_FAIL} pending"
|
||||
if [ -n "${REQUIRE_AGENTS_MD:-}" ] && [ "${PEND_FAIL}" -gt 0 ]; then
|
||||
echo " (REQUIRE_AGENTS_MD set: pending failures count as required)"
|
||||
fi
|
||||
[ "${REQ_FAIL}" -eq 0 ] || exit 1
|
||||
exit 0
|
||||
Loading…
Add table
Reference in a new issue