clawdie-iso/scripts/run-bhyve-test.sh

114 lines
4.9 KiB
Bash
Raw Normal View History

#!/bin/sh
# Wrapper script to run bhyve test with full logging, error capture, and a
# QA gate that asserts the boot actually came up clean.
#
# bhyve-test.sh boots the image with com1,stdio, so the guest serial console is
# captured in the log below. The QA gate scans that console for required boot
# markers and forbidden failures (panics, restart loops, the colibri
# "permission denied" regression), and exits non-zero if a critical check fails
# — so this can be used as a build gate (see private appliance runbook §3).
#
# Console-scannable checks are automated. Deeper in-guest checks (socket mode,
# glasspane query, GPU branch) are printed as a checklist to run in the guest
# (10.99.0.2) — they live in log files, not the serial console.
LOG_DIR="${PWD}/logs"
mkdir -p "$LOG_DIR"
# Log filename uses sortable ISO 8601 basic (machine artifact name, per the
# AGENTS.md date rule's internal-storage exemption). User-facing timestamps
# below use European display format (DD.mmm.YYYY HH:MM:SS).
LOG_FILE="${LOG_DIR}/bhyve-test-$(date +%Y%m%d-%H%M%S).log"
echo "Test started: $(date '+%d.%b.%Y %H:%M:%S')" > "$LOG_FILE"
echo "Log: $LOG_FILE" >&2
echo "" >> "$LOG_FILE"
# Run the actual test script, capturing both stdout and stderr
echo "=== BHYVE TEST START ===" >> "$LOG_FILE"
echo "Command: $(pwd)/scripts/bhyve-test.sh $*" >> "$LOG_FILE"
echo "Started: $(date '+%d.%b.%Y %H:%M:%S')" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"
# Run with tee to show output AND log it. Pass through any args (ISO path, --name).
timeout 1800 ./scripts/bhyve-test.sh "$@" 2>&1 | tee -a "$LOG_FILE"
EXIT_CODE=$?
echo "" >> "$LOG_FILE"
echo "=== BHYVE TEST END ===" >> "$LOG_FILE"
echo "Ended: $(date '+%d.%b.%Y %H:%M:%S')" >> "$LOG_FILE"
echo "Exit code: $EXIT_CODE" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"
# Check for errors in log
echo "=== ERROR SCAN ===" >> "$LOG_FILE"
echo "Errors found:" >> "$LOG_FILE"
grep -iE "error|failed|panic|fatal" "$LOG_FILE" | tail -20 >> "$LOG_FILE" || echo "No errors detected" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"
# ── QA gate ────────────────────────────────────────────────────────────────
# CRITICAL checks gate the exit code. INFO checks are reported only (their
# markers may not always reach the serial console depending on rc verbosity —
# confirm/adjust patterns against a real boot before promoting them to CRITICAL).
QA_FAIL=0
# present DESC PATTERN LEVEL
qa_present() {
if grep -qiE "$2" "$LOG_FILE"; then
echo " PASS $1"
else
echo " ${3} $1 (missing /$2/)"
[ "$3" = "FAIL" ] && QA_FAIL=1
fi
}
# absent DESC PATTERN MAXALLOWED LEVEL
qa_absent() {
# grep -c prints the count (0 on no match) but exits 1 when 0 — capture the
# number directly; do NOT use `|| echo 0` (that doubles the output).
_n=$(grep -ciE "$2" "$LOG_FILE" 2>/dev/null)
_n=${_n:-0}
if [ "$_n" -le "$3" ]; then
echo " PASS $1 (${_n} hits, max $3)"
else
echo " ${4} $1 (${_n} hits, max $3)"
[ "$4" = "FAIL" ] && QA_FAIL=1
fi
}
{
echo "=== QA GATE ==="
echo "-- critical (gate the build) --"
qa_absent "no kernel panic" 'panic:|Fatal trap|panic @' 0 FAIL
qa_absent "no repeated reboot loop" '---<<BOOT>>---' 1 FAIL
qa_present "boot reached login/getty" 'login:|FreeBSD/amd64 \(.*\) \(ttyu|sddm' FAIL
qa_absent "no colibri permission-denied" 'permission denied|EACCES|Permission denied' 0 FAIL
echo "-- informational (best-effort console markers) --"
qa_present "colibri-daemon socket ready" 'colibri-daemon socket ready|Colibri control-plane socket listening' INFO
qa_absent "no store-open panic" 'failed to open coordination store' 0 INFO
qa_present "clawdie-live-gpu ran" 'selected live KMS|recommended branch|clawdie.live.gpu' INFO
echo ""
echo "-- deeper in-guest checks (run in the guest @ 10.99.0.2; not on console) --"
echo " service colibri_daemon status # one stable PID"
echo " stat -f '%Sp %Su:%Sg' /var/run/colibri/colibri.sock # expect srwxrwx--- colibri:colibri"
echo " printf '{\"cmd\":\"status\"}\\n' | nc -U /var/run/colibri/colibri.sock # no EACCES"
echo " cat /var/log/clawdie-live-gpu.log # detected id + branch matches build"
echo " kldstat | grep nvidia # NVIDIA builds only"
} | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"
if [ "$EXIT_CODE" -ne 0 ]; then
echo "QA GATE: boot harness exited non-zero (${EXIT_CODE})" | tee -a "$LOG_FILE"
fi
if [ "$QA_FAIL" -ne 0 ]; then
echo "QA GATE: FAIL — critical assertion(s) failed" | tee -a "$LOG_FILE"
else
echo "QA GATE: critical assertions PASS" | tee -a "$LOG_FILE"
fi
echo ""
echo "Log saved to: $LOG_FILE"
# Fail the gate if the harness failed OR any critical assertion failed.
[ "$EXIT_CODE" -eq 0 ] && [ "$QA_FAIL" -eq 0 ]