clawdie-iso/tests/mcp-boundary-test.sh

#!/bin/sh
# Layer 2 — mother MCP boundary test (runs on Linux; no osa, no PostgreSQL).
#
# Proves two things the mother relies on, without standing up a real mother:
#   2a) the colibri-mcp-ssh forced-command ALLOWLIST: "" and "tools" route to
#       colibri-mcp; everything else is rejected (exit 1, JSON error). Tested
#       both directly and through a real loopback sshd with command="..." forced.
#   2b) the MCP handshake: `colibri-mcp tools` and the stdio tools/list JSON-RPC
#       return the Colibri tool catalog.
#
# The DB-backed node_register path needs real PostgreSQL and belongs on osa
# (domedog is Docker-free). This layer stops at the SSH + MCP plumbing.
#
# Requires a prebuilt colibri-mcp. Point at it with COLIBRI_MCP_BIN, else the
# colibri debug build is auto-detected.
set -u

SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
WRAPPER="${WRAPPER:-${SCRIPT_DIR}/../../colibri/packaging/mother/colibri-mcp-ssh}"
# auto-detect the prebuilt binary
if [ -z "${COLIBRI_MCP_BIN:-}" ]; then
    for c in "${SCRIPT_DIR}/../../colibri/target/debug/colibri-mcp" \
             "${SCRIPT_DIR}/../../colibri/target/release/colibri-mcp"; do
        [ -x "$c" ] && COLIBRI_MCP_BIN="$c" && break
    done
fi
COLIBRI_MCP_BIN="${COLIBRI_MCP_BIN:-}"

# colibri is a sibling repo; if it isn't checked out + built, skip cleanly so a
# standalone clawdie-iso run is not a false failure. Force-fail with STRICT=1.
if [ ! -r "${WRAPPER}" ] || [ -z "${COLIBRI_MCP_BIN}" ] || [ ! -x "${COLIBRI_MCP_BIN}" ]; then
    echo "SKIP: colibri not available (needs sibling colibri checkout + built colibri-mcp)."
    echo "      set COLIBRI_MCP_BIN / WRAPPER, or STRICT=1 to make this a failure."
    [ -n "${STRICT:-}" ] && exit 2
    exit 0
fi
COLIBRI_MCP_BIN=$(CDPATH= cd -- "$(dirname -- "${COLIBRI_MCP_BIN}")" && pwd)/$(basename -- "${COLIBRI_MCP_BIN}")

WORK=$(mktemp -d "${TMPDIR:-/tmp}/mcp-boundary.XXXXXX") || exit 2
SSHD_PID=""
cleanup() { [ -n "${SSHD_PID}" ] && kill "${SSHD_PID}" 2>/dev/null; rm -rf "${WORK}"; }
trap cleanup EXIT INT TERM

# Test wrapper: identical allowlist logic, but the absolute /usr/local/bin path
# is redirected to the prebuilt binary so we can run unprivileged.
TWRAP="${WORK}/colibri-mcp-ssh"
sed "s#/usr/local/bin/colibri-mcp#${COLIBRI_MCP_BIN}#g" "${WRAPPER}" >"${TWRAP}"
chmod +x "${TWRAP}"

PASS=0; FAIL=0; SKIP=0
ok()   { PASS=$((PASS+1)); printf '  ok   %s\n' "$1"; }
bad()  { FAIL=$((FAIL+1)); printf '  FAIL %s\n' "$1"; }
skip() { SKIP=$((SKIP+1)); printf '  SKIP %s\n' "$1"; }
check(){ [ "$1" -eq 0 ] && ok "$2" || bad "$2"; }     # check <0|1-result> <msg>

echo "== 2b: MCP handshake (direct) =="
timeout 10 "${COLIBRI_MCP_BIN}" tools 2>/dev/null | grep -q 'colibri_status'
check $? "colibri-mcp tools lists colibri_status"
printf '%s\n' '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' \
    | timeout 10 "${COLIBRI_MCP_BIN}" 2>/dev/null | grep -q '"colibri_intake_task"'
check $? "stdio tools/list returns colibri_intake_task"

echo "== 2a: forced-command allowlist (direct wrapper) =="
# allowed: "tools" -> tool list
out=$(SSH_ORIGINAL_COMMAND="tools" timeout 10 sh "${TWRAP}" 2>/dev/null)
{ echo "${out}" | grep -q 'colibri_status'; } ; check $? 'SSH_ORIGINAL_COMMAND="tools" -> tool list'
# allowed: "" -> stdio MCP (feed a request, expect a JSON-RPC result)
out=$(printf '%s\n' '{"jsonrpc":"2.0","id":7,"method":"tools/list"}' \
        | SSH_ORIGINAL_COMMAND="" timeout 10 sh "${TWRAP}" 2>/dev/null)
{ echo "${out}" | grep -q '"id":7'; } ; check $? 'SSH_ORIGINAL_COMMAND="" -> stdio MCP responds'
# rejected: arbitrary command must NOT run; exit 1 + JSON error on stderr
for evil in 'rm -rf /' 'status' 'tools; rm -rf /' 'tools --help' '/bin/sh'; do
    err=$(SSH_ORIGINAL_COMMAND="${evil}" sh "${TWRAP}" 2>&1 >/dev/null); rc=$?
    { [ "${rc}" -eq 1 ] && echo "${err}" | grep -q 'rejected'; }
    check $? "rejected: '${evil}' (exit 1 + json error)"
done

echo "== 2a: forced-command through a REAL loopback sshd =="
if ! command -v sshd >/dev/null 2>&1 || ! command -v ssh-keygen >/dev/null 2>&1; then
    skip "sshd/ssh-keygen unavailable — direct wrapper test stands in"
else
    HK="${WORK}/hostkey"; CK="${WORK}/clientkey"; AK="${WORK}/authorized_keys"
    ssh-keygen -t ed25519 -N '' -f "${HK}" >/dev/null 2>&1
    ssh-keygen -t ed25519 -N '' -f "${CK}" >/dev/null 2>&1
    # force every connection through the wrapper, exactly like mother's authorized_keys
    printf 'command="%s",restrict %s\n' "${TWRAP}" "$(cat "${CK}.pub")" >"${AK}"
    chmod 600 "${AK}"
    PORT=$(( (RANDOM % 5000) + 60000 ))
    SSHDBIN=$(command -v sshd)
    # Own config via -f so sshd ignores /etc/ssh + its (root-only) drop-ins.
    CFG="${WORK}/sshd_config"
    cat >"${CFG}" <<EOF
Port ${PORT}
ListenAddress 127.0.0.1
HostKey ${HK}
PidFile ${WORK}/sshd.pid
AuthorizedKeysFile ${AK}
StrictModes no
UsePAM no
PasswordAuthentication no
PubkeyAuthentication yes
KbdInteractiveAuthentication no
EOF
    "${SSHDBIN}" -D -f "${CFG}" >"${WORK}/sshd.log" 2>&1 &
    SSHD_PID=$!
    sleep 1
    if ! kill -0 "${SSHD_PID}" 2>/dev/null; then
        skip "sshd failed to start (see log) — direct wrapper test stands in"
        sed 's/^/      sshd: /' "${WORK}/sshd.log" 2>/dev/null | head -3
    else
        SSHOPTS="-i ${CK} -p ${PORT} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
        # allowed via forced command
        # shellcheck disable=SC2086
        ssh ${SSHOPTS} localhost tools 2>/dev/null | grep -q 'colibri_status'
        check $? "ssh ... tools -> tool list (forced command)"
        # rejected via forced command
        # shellcheck disable=SC2086
        out=$(ssh ${SSHOPTS} localhost 'rm -rf /' 2>&1); rc=$?
        { [ "${rc}" -ne 0 ] && echo "${out}" | grep -q 'rejected'; }
        check $? "ssh ... 'rm -rf /' -> rejected (forced command)"
    fi
fi

echo
echo "RESULT: ${PASS} passed, ${FAIL} failed, ${SKIP} skipped"
[ "${FAIL}" -eq 0 ] || exit 1
exit 0
test(seed): add Layer 0 importer + Layer 2 MCP-boundary regression tests Layer 0 (seed-import-test.sh): runs the real clawdie-live-seed importer in a sandbox (CLAWDIE_SEED_TEST=1, all paths overridden) and asserts the seed->runtime propagation contract — env split, provider.env, dual-home ssh, soul staging, and AGENTS.md -> $ZOT_HOME (the global slot the autospawned zot reads). Idempotent re-import is checked. REQUIRE_AGENTS_MD=1 enforces the AGENTS.md install added on this branch; it passes 23/23 here. Layer 2 (mcp-boundary-test.sh): exercises the mother MCP-over-SSH boundary on Linux — colibri-mcp-ssh forced-command allowlist (""/"tools" route, everything else rejected) and the MCP tools/list handshake, including a real loopback sshd with command=. Skips cleanly when colibri isn't a sibling checkout. Verified end to end on Linux before merge: importer target path and the ZOT_HOME pin in colibri (fix/zot-home) both resolve to /var/db/colibri/.local/state/zot. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-25 05:03:23 +02:00			`#!/bin/sh`
			`# Layer 2 — mother MCP boundary test (runs on Linux; no osa, no PostgreSQL).`
			`#`
			`# Proves two things the mother relies on, without standing up a real mother:`
			`# 2a) the colibri-mcp-ssh forced-command ALLOWLIST: "" and "tools" route to`
			`# colibri-mcp; everything else is rejected (exit 1, JSON error). Tested`
			`# both directly and through a real loopback sshd with command="..." forced.`
			# 2b) the MCP handshake: `colibri-mcp tools` and the stdio tools/list JSON-RPC
			`# return the Colibri tool catalog.`
			`#`
			`# The DB-backed node_register path needs real PostgreSQL and belongs on osa`
			`# (domedog is Docker-free). This layer stops at the SSH + MCP plumbing.`
			`#`
			`# Requires a prebuilt colibri-mcp. Point at it with COLIBRI_MCP_BIN, else the`
			`# colibri debug build is auto-detected.`
			`set -u`

			`SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)`
			`WRAPPER="${WRAPPER:-${SCRIPT_DIR}/../../colibri/packaging/mother/colibri-mcp-ssh}"`
			`# auto-detect the prebuilt binary`
			`if [ -z "${COLIBRI_MCP_BIN:-}" ]; then`
			`for c in "${SCRIPT_DIR}/../../colibri/target/debug/colibri-mcp" \`
			`"${SCRIPT_DIR}/../../colibri/target/release/colibri-mcp"; do`
			`[ -x "$c" ] && COLIBRI_MCP_BIN="$c" && break`
			`done`
			`fi`
			`COLIBRI_MCP_BIN="${COLIBRI_MCP_BIN:-}"`

			`# colibri is a sibling repo; if it isn't checked out + built, skip cleanly so a`
			`# standalone clawdie-iso run is not a false failure. Force-fail with STRICT=1.`
			`if [ ! -r "${WRAPPER}" ] \|\| [ -z "${COLIBRI_MCP_BIN}" ] \|\| [ ! -x "${COLIBRI_MCP_BIN}" ]; then`
			`echo "SKIP: colibri not available (needs sibling colibri checkout + built colibri-mcp)."`
			`echo " set COLIBRI_MCP_BIN / WRAPPER, or STRICT=1 to make this a failure."`
			`[ -n "${STRICT:-}" ] && exit 2`
			`exit 0`
			`fi`
			`COLIBRI_MCP_BIN=$(CDPATH= cd -- "$(dirname -- "${COLIBRI_MCP_BIN}")" && pwd)/$(basename -- "${COLIBRI_MCP_BIN}")`

			`WORK=$(mktemp -d "${TMPDIR:-/tmp}/mcp-boundary.XXXXXX") \|\| exit 2`
			`SSHD_PID=""`
			`cleanup() { [ -n "${SSHD_PID}" ] && kill "${SSHD_PID}" 2>/dev/null; rm -rf "${WORK}"; }`
			`trap cleanup EXIT INT TERM`

			`# Test wrapper: identical allowlist logic, but the absolute /usr/local/bin path`
			`# is redirected to the prebuilt binary so we can run unprivileged.`
			`TWRAP="${WORK}/colibri-mcp-ssh"`
			`sed "s#/usr/local/bin/colibri-mcp#${COLIBRI_MCP_BIN}#g" "${WRAPPER}" >"${TWRAP}"`
			`chmod +x "${TWRAP}"`

			`PASS=0; FAIL=0; SKIP=0`
			`ok() { PASS=$((PASS+1)); printf ' ok %s\n' "$1"; }`
			`bad() { FAIL=$((FAIL+1)); printf ' FAIL %s\n' "$1"; }`
			`skip() { SKIP=$((SKIP+1)); printf ' SKIP %s\n' "$1"; }`
			`check(){ [ "$1" -eq 0 ] && ok "$2" \|\| bad "$2"; } # check <0\|1-result> <msg>`

			`echo "== 2b: MCP handshake (direct) =="`
			`timeout 10 "${COLIBRI_MCP_BIN}" tools 2>/dev/null \| grep -q 'colibri_status'`
			`check $? "colibri-mcp tools lists colibri_status"`
			`printf '%s\n' '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' \`
			`\| timeout 10 "${COLIBRI_MCP_BIN}" 2>/dev/null \| grep -q '"colibri_intake_task"'`
			`check $? "stdio tools/list returns colibri_intake_task"`

			`echo "== 2a: forced-command allowlist (direct wrapper) =="`
			`# allowed: "tools" -> tool list`
			`out=$(SSH_ORIGINAL_COMMAND="tools" timeout 10 sh "${TWRAP}" 2>/dev/null)`
			`{ echo "${out}" \| grep -q 'colibri_status'; } ; check $? 'SSH_ORIGINAL_COMMAND="tools" -> tool list'`
			`# allowed: "" -> stdio MCP (feed a request, expect a JSON-RPC result)`
			`out=$(printf '%s\n' '{"jsonrpc":"2.0","id":7,"method":"tools/list"}' \`
			`\| SSH_ORIGINAL_COMMAND="" timeout 10 sh "${TWRAP}" 2>/dev/null)`
			`{ echo "${out}" \| grep -q '"id":7'; } ; check $? 'SSH_ORIGINAL_COMMAND="" -> stdio MCP responds'`
			`# rejected: arbitrary command must NOT run; exit 1 + JSON error on stderr`
			`for evil in 'rm -rf /' 'status' 'tools; rm -rf /' 'tools --help' '/bin/sh'; do`
			`err=$(SSH_ORIGINAL_COMMAND="${evil}" sh "${TWRAP}" 2>&1 >/dev/null); rc=$?`
			`{ [ "${rc}" -eq 1 ] && echo "${err}" \| grep -q 'rejected'; }`
			`check $? "rejected: '${evil}' (exit 1 + json error)"`
			`done`

			`echo "== 2a: forced-command through a REAL loopback sshd =="`
			`if ! command -v sshd >/dev/null 2>&1 \|\| ! command -v ssh-keygen >/dev/null 2>&1; then`
			`skip "sshd/ssh-keygen unavailable — direct wrapper test stands in"`
			`else`
			`HK="${WORK}/hostkey"; CK="${WORK}/clientkey"; AK="${WORK}/authorized_keys"`
			`ssh-keygen -t ed25519 -N '' -f "${HK}" >/dev/null 2>&1`
			`ssh-keygen -t ed25519 -N '' -f "${CK}" >/dev/null 2>&1`
			`# force every connection through the wrapper, exactly like mother's authorized_keys`
			`printf 'command="%s",restrict %s\n' "${TWRAP}" "$(cat "${CK}.pub")" >"${AK}"`
			`chmod 600 "${AK}"`
			`PORT=$(( (RANDOM % 5000) + 60000 ))`
			`SSHDBIN=$(command -v sshd)`
			`# Own config via -f so sshd ignores /etc/ssh + its (root-only) drop-ins.`
			`CFG="${WORK}/sshd_config"`
			`cat >"${CFG}" <<EOF`
			`Port ${PORT}`
			`ListenAddress 127.0.0.1`
			`HostKey ${HK}`
			`PidFile ${WORK}/sshd.pid`
			`AuthorizedKeysFile ${AK}`
			`StrictModes no`
			`UsePAM no`
			`PasswordAuthentication no`
			`PubkeyAuthentication yes`
			`KbdInteractiveAuthentication no`
			`EOF`
			`"${SSHDBIN}" -D -f "${CFG}" >"${WORK}/sshd.log" 2>&1 &`
			`SSHD_PID=$!`
			`sleep 1`
			`if ! kill -0 "${SSHD_PID}" 2>/dev/null; then`
			`skip "sshd failed to start (see log) — direct wrapper test stands in"`
			`sed 's/^/ sshd: /' "${WORK}/sshd.log" 2>/dev/null \| head -3`
			`else`
			`SSHOPTS="-i ${CK} -p ${PORT} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"`
			`# allowed via forced command`
			`# shellcheck disable=SC2086`
			`ssh ${SSHOPTS} localhost tools 2>/dev/null \| grep -q 'colibri_status'`
			`check $? "ssh ... tools -> tool list (forced command)"`
			`# rejected via forced command`
			`# shellcheck disable=SC2086`
			`out=$(ssh ${SSHOPTS} localhost 'rm -rf /' 2>&1); rc=$?`
			`{ [ "${rc}" -ne 0 ] && echo "${out}" \| grep -q 'rejected'; }`
			`check $? "ssh ... 'rm -rf /' -> rejected (forced command)"`
			`fi`
			`fi`

			`echo`
			`echo "RESULT: ${PASS} passed, ${FAIL} failed, ${SKIP} skipped"`
			`[ "${FAIL}" -eq 0 ] \|\| exit 1`
			`exit 0`