bhyve-qa-gate #37
1 changed files with 88 additions and 10 deletions
|
|
@ -1,35 +1,113 @@
|
|||
#!/bin/sh
|
||||
# Wrapper script to run bhyve test with full logging and error capture
|
||||
# Wrapper script to run bhyve test with full logging, error capture, and a
|
||||
# QA gate that asserts the boot actually came up clean.
|
||||
#
|
||||
# bhyve-test.sh boots the image with com1,stdio, so the guest serial console is
|
||||
# captured in the log below. The QA gate scans that console for required boot
|
||||
# markers and forbidden failures (panics, restart loops, the colibri
|
||||
# "permission denied" regression), and exits non-zero if a critical check fails
|
||||
# — so this can be used as a build gate (see private appliance runbook §3).
|
||||
#
|
||||
# Console-scannable checks are automated. Deeper in-guest checks (socket mode,
|
||||
# glasspane query, GPU branch) are printed as a checklist to run in the guest
|
||||
# (10.99.0.2) — they live in log files, not the serial console.
|
||||
|
||||
LOG_DIR="${PWD}/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
# Log filename uses sortable ISO 8601 basic (machine artifact name, per the
|
||||
# AGENTS.md date rule's internal-storage exemption). User-facing timestamps
|
||||
# below use European display format (DD.mmm.YYYY HH:MM:SS).
|
||||
LOG_FILE="${LOG_DIR}/bhyve-test-$(date +%Y%m%d-%H%M%S).log"
|
||||
echo "Test started: $(date)" > "$LOG_FILE"
|
||||
echo "Test started: $(date '+%d.%b.%Y %H:%M:%S')" > "$LOG_FILE"
|
||||
echo "Log: $LOG_FILE" >&2
|
||||
echo "" >> "$LOG_FILE"
|
||||
|
||||
# Run the actual test script, capturing both stdout and stderr
|
||||
echo "=== BHYVE TEST START ===" >> "$LOG_FILE"
|
||||
echo "Command: $(pwd)/tmp/bhyve-test-setup.sh" >> "$LOG_FILE"
|
||||
echo "Started: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "Command: $(pwd)/scripts/bhyve-test.sh $*" >> "$LOG_FILE"
|
||||
echo "Started: $(date '+%d.%b.%Y %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "" >> "$LOG_FILE"
|
||||
|
||||
# Run with tee to show output AND log it
|
||||
timeout 1800 ./scripts/bhyve-test.sh 2>&1 | tee -a "$LOG_FILE"
|
||||
# Run with tee to show output AND log it. Pass through any args (ISO path, --name).
|
||||
timeout 1800 ./scripts/bhyve-test.sh "$@" 2>&1 | tee -a "$LOG_FILE"
|
||||
EXIT_CODE=$?
|
||||
|
||||
echo "" >> "$LOG_FILE"
|
||||
echo "=== BHYVE TEST END ===" >> "$LOG_FILE"
|
||||
echo "Ended: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "Ended: $(date '+%d.%b.%Y %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "Exit code: $EXIT_CODE" >> "$LOG_FILE"
|
||||
echo "" >> "$LOG_FILE"
|
||||
|
||||
# Check for errors in log
|
||||
echo "=== ERROR SCAN ===" >> "$LOG_FILE"
|
||||
echo "Errors found:" >> "$LOG_FILE"
|
||||
grep -i "error\|failed\|panic\|fatal" "$LOG_FILE" | tail -20 >> "$LOG_FILE" || echo "No errors detected" >> "$LOG_FILE"
|
||||
|
||||
grep -iE "error|failed|panic|fatal" "$LOG_FILE" | tail -20 >> "$LOG_FILE" || echo "No errors detected" >> "$LOG_FILE"
|
||||
echo "" >> "$LOG_FILE"
|
||||
|
||||
# ── QA gate ────────────────────────────────────────────────────────────────
|
||||
# CRITICAL checks gate the exit code. INFO checks are reported only (their
|
||||
# markers may not always reach the serial console depending on rc verbosity —
|
||||
# confirm/adjust patterns against a real boot before promoting them to CRITICAL).
|
||||
QA_FAIL=0
|
||||
|
||||
# present DESC PATTERN LEVEL
|
||||
qa_present() {
|
||||
if grep -qiE "$2" "$LOG_FILE"; then
|
||||
echo " PASS $1"
|
||||
else
|
||||
echo " ${3} $1 (missing /$2/)"
|
||||
[ "$3" = "FAIL" ] && QA_FAIL=1
|
||||
fi
|
||||
}
|
||||
|
||||
# absent DESC PATTERN MAXALLOWED LEVEL
|
||||
qa_absent() {
|
||||
# grep -c prints the count (0 on no match) but exits 1 when 0 — capture the
|
||||
# number directly; do NOT use `|| echo 0` (that doubles the output).
|
||||
_n=$(grep -ciE "$2" "$LOG_FILE" 2>/dev/null)
|
||||
_n=${_n:-0}
|
||||
if [ "$_n" -le "$3" ]; then
|
||||
echo " PASS $1 (${_n} hits, max $3)"
|
||||
else
|
||||
echo " ${4} $1 (${_n} hits, max $3)"
|
||||
[ "$4" = "FAIL" ] && QA_FAIL=1
|
||||
fi
|
||||
}
|
||||
|
||||
{
|
||||
echo "=== QA GATE ==="
|
||||
echo "-- critical (gate the build) --"
|
||||
qa_absent "no kernel panic" 'panic:|Fatal trap|panic @' 0 FAIL
|
||||
qa_absent "no repeated reboot loop" '---<<BOOT>>---' 1 FAIL
|
||||
qa_present "boot reached login/getty" 'login:|FreeBSD/amd64 \(.*\) \(ttyu|sddm' FAIL
|
||||
qa_absent "no colibri permission-denied" 'permission denied|EACCES|Permission denied' 0 FAIL
|
||||
|
||||
echo "-- informational (best-effort console markers) --"
|
||||
qa_present "colibri-daemon socket ready" 'colibri-daemon socket ready|Herdr socket API listening' INFO
|
||||
qa_absent "no store-open panic" 'failed to open coordination store' 0 INFO
|
||||
qa_present "clawdie-live-gpu ran" 'selected live KMS|recommended branch|clawdie.live.gpu' INFO
|
||||
|
||||
echo ""
|
||||
echo "-- deeper in-guest checks (run in the guest @ 10.99.0.2; not on console) --"
|
||||
echo " service colibri_daemon status # one stable PID"
|
||||
echo " stat -f '%Sp %Su:%Sg' /var/run/colibri/colibri.sock # expect srwxrwx--- colibri:colibri"
|
||||
echo " printf '{\"cmd\":\"status\"}\\n' | nc -U /var/run/colibri/colibri.sock # no EACCES"
|
||||
echo " cat /var/log/clawdie-live-gpu.log # detected id + branch matches build"
|
||||
echo " kldstat | grep nvidia # NVIDIA builds only"
|
||||
} | tee -a "$LOG_FILE"
|
||||
|
||||
echo "" | tee -a "$LOG_FILE"
|
||||
if [ "$EXIT_CODE" -ne 0 ]; then
|
||||
echo "QA GATE: boot harness exited non-zero (${EXIT_CODE})" | tee -a "$LOG_FILE"
|
||||
fi
|
||||
if [ "$QA_FAIL" -ne 0 ]; then
|
||||
echo "QA GATE: FAIL — critical assertion(s) failed" | tee -a "$LOG_FILE"
|
||||
else
|
||||
echo "QA GATE: critical assertions PASS" | tee -a "$LOG_FILE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Log saved to: $LOG_FILE"
|
||||
|
||||
exit $EXIT_CODE
|
||||
# Fail the gate if the harness failed OR any critical assertion failed.
|
||||
[ "$EXIT_CODE" -eq 0 ] && [ "$QA_FAIL" -eq 0 ]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue