From 79c34c29eaeb5f4becd803a120c66c98412abc3f Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Mon, 15 Jun 2026 17:15:27 +0200 Subject: [PATCH] feat(iso): colibri-live-rebuild helper + image-size guard; bhyve on-demand (Sam & Claude) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two agreed extras, plus the bhyve half of the ceremony cut. - colibri-live-rebuild: new operator helper staged at /usr/local/bin (only with the Colibri service) that runs the whole live rebuild runbook in one command — clone/update a build checkout, optional --ref, build release binaries, stop the daemon, install binaries + rc.d, clear stale socket/pid, restart, validate `colibri status`, and record /var/db/colibri/live-rebuild.json. Clones from the on-image git-backed source first, so a first rebuild works offline. - Image-size headroom guard: after staging, report df/du on the mounted live filesystem and abort before shipping if free space is under IMAGE_MIN_FREE_MB (default 1024) — catches a 32GB-stick overflow at build time. - TESTING.md: bhyve reclassified as an on-demand lane, not a per-change gate (cheap gates = sh -n + staging smoke + cargo test). - LIVE-COLIBRI-REBUILD.md / CHANGELOG updated. Stacked on PR #58 (release-gate branch). Checks: sh -n build.sh + helper; prettier clean; size-guard arithmetic verified. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 2 + TESTING.md | 14 +-- build.sh | 23 +++++ docs/LIVE-COLIBRI-REBUILD.md | 28 +++--- live/operator-session/colibri-live-rebuild | 107 +++++++++++++++++++++ 5 files changed, 155 insertions(+), 19 deletions(-) create mode 100755 live/operator-session/colibri-live-rebuild diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a85f912..2fd61065 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ image is meant to work out of the box. Work continues from here toward `1.0.0`. - `build-manifest.json` now records `colibri_commit`/`colibri_modified` — the image stages adjacent colibri binaries, so the commit that produced them is captured for reproducibility. ### Added +- `colibri-live-rebuild` helper staged at `/usr/local/bin` (with the Colibri service): one command to rebuild + redeploy Colibri from source on a booted USB (clone/build/stop/install/restart/validate/record). See `docs/LIVE-COLIBRI-REBUILD.md`. +- Image-size headroom guard: the build reports `df`/`du` on the mounted live filesystem and **fails before shipping** if free space is under a floor (`IMAGE_MIN_FREE_MB`, default 1024) — catches a 32 GB-stick overflow at build time, not on the stick. - Live rebuild lane now covers the **whole agent stack**: `go` added to the live-operator package list and the `zot` source seeded at `/home/clawdie/ai/zot`, so a booted USB can rebuild zot (Go) as well as Colibri (Rust). See `docs/LIVE-COLIBRI-REBUILD.md`. - Aider+Pi harness venv provisioning — `firstboot/shell-deploy.sh` now creates `/opt/clawdie/venv/aider` with pinned deps: `aider-chat==0.86.2`, `litellm==1.81.10`, `tree_sitter==0.20.4` - Aider availability check in post-install verification (logs `aider` version at end of deployment) diff --git a/TESTING.md b/TESTING.md index d0530d0b..7b3848c2 100644 --- a/TESTING.md +++ b/TESTING.md @@ -331,14 +331,16 @@ sudo mdconfig -d -u md0 --- -## Level 2: bhyve Verification Gate +## Level 2: bhyve Verification (on-demand) -Use bhyve as the standard pre-hardware gate when the ML350p lane is available. -This is no longer just an opportunistic startup check; it is the default -middle-stage verification between static artifact checks and physical hardware. +bhyve is an **on-demand** verification lane, not a per-change gate — it's slow and +needs the ML350p, so run it when you want boot/runtime confidence before +committing to physical hardware, not on every iteration. The cheap gates that run +every change are the static checks (`sh -n`, the staging smoke) and `cargo test`. -Treat bhyve as authoritative for boot/runtime plumbing and still treat real -hardware as the final authority for GPU, panel, input, Wi-Fi, and audio polish. +When you do run it, treat bhyve as authoritative for boot/runtime plumbing and +still treat real hardware as the final authority for GPU, panel, input, Wi-Fi, +and audio polish. ### ML350p resource plan diff --git a/build.sh b/build.sh index 2ed51d2a..584be5e2 100755 --- a/build.sh +++ b/build.sh @@ -908,6 +908,11 @@ install_colibri_service() { exit 1 fi + # Operator helper: one-command live rebuild/redeploy of Colibri from source + # (automates docs/LIVE-COLIBRI-REBUILD.md). Only staged with the service. + install -m 0755 "${LIVE_SESSION_DIR}/colibri-live-rebuild" \ + "${MOUNT_POINT}/usr/local/bin/colibri-live-rebuild" + # Seed the skills catalog with operator-useful entries. The daemon # creates the SQLite DB on first start; pre-populate it so skills # are available immediately without a first-boot script. @@ -2374,6 +2379,24 @@ write_build_manifest "${USB_SHARE}/build-manifest.json" echo " Payload injected." +# Image-size headroom guard. The live filesystem is fixed at IMAGE_SIZE; rust + go +# + offline repo staging + the package cache can fill it. Report usage and fail +# *before* shipping an image that booted with no room to breathe, rather than +# discovering it on the stick. Override the floor with IMAGE_MIN_FREE_MB. +echo "==> Image space report (mounted live filesystem):" +df -h "${MOUNT_POINT}" || true +echo " Largest staged trees:" +du -sh "${MOUNT_POINT}/usr/local" "${MOUNT_POINT}/home" "${MOUNT_POINT}/var" 2>/dev/null || true +_img_free_kb=$(df -k "${MOUNT_POINT}" | awk 'NR==2 {print $4}') +_img_min_free_mb="${IMAGE_MIN_FREE_MB:-1024}" +if [ -n "${_img_free_kb:-}" ] && [ "${_img_free_kb}" -lt $((_img_min_free_mb * 1024)) ]; then + echo "ERROR: only $((_img_free_kb / 1024)) MB free on the live filesystem (< ${_img_min_free_mb} MB floor)." + echo " Raise IMAGE_SIZE in build.cfg (e.g. for a larger USB) or trim staged content," + echo " or lower the floor with IMAGE_MIN_FREE_MB if you know it fits." + exit 1 +fi +echo " Free space OK ($((_img_free_kb / 1024)) MB, floor ${_img_min_free_mb} MB)." + # Unmount and detach umount "$MOUNT_POINT" if [ -f "${CACHE_DIR}/.md_device" ]; then diff --git a/docs/LIVE-COLIBRI-REBUILD.md b/docs/LIVE-COLIBRI-REBUILD.md index 0cf335dc..046a0ccc 100644 --- a/docs/LIVE-COLIBRI-REBUILD.md +++ b/docs/LIVE-COLIBRI-REBUILD.md @@ -233,19 +233,21 @@ Expected: - `service colibri_daemon start` returns to the shell. - `colibri status` can connect to `/var/run/colibri/colibri.sock`. -## Future ISO improvement +## One-command helper -A future helper script, tentatively `/usr/local/bin/colibri-live-rebuild`, should -automate this runbook: +The whole Colibri runbook above is wrapped in `colibri-live-rebuild`, staged at +`/usr/local/bin/colibri-live-rebuild` whenever the image carries the Colibri +service. It clones/updates a build checkout, optionally checks out a ref, builds +the release binaries, stops the daemon, installs binaries + rc.d, clears stale +socket/pid files, restarts, validates `colibri status`, and writes +`/var/db/colibri/live-rebuild.json`. -1. clone/update `/home/clawdie/ai/colibri-build` -2. checkout requested branch or commit -3. run the release build -4. stop `colibri_daemon` -5. install binaries and rc.d script -6. clean stale socket/pid files -7. restart and validate `colibri status` -8. write `/var/db/colibri/live-rebuild.json` and `.sha256` +```sh +colibri-live-rebuild # rebuild from the build checkout's HEAD +colibri-live-rebuild --ref main # check out a branch/tag/commit first +colibri-live-rebuild --build-dir DIR # use an alternate build checkout +``` -This keeps the live USB usable as a self-hosted Colibri validation target while -preserving ISO rebuilds for release artifacts. +The manual steps above remain the reference for partial repairs (e.g. the zot +rebuild or the rc.d-only fix). This keeps the live USB usable as a self-hosted +Colibri validation target while preserving ISO rebuilds for release artifacts. diff --git a/live/operator-session/colibri-live-rebuild b/live/operator-session/colibri-live-rebuild new file mode 100755 index 00000000..84bc9699 --- /dev/null +++ b/live/operator-session/colibri-live-rebuild @@ -0,0 +1,107 @@ +#!/bin/sh +# colibri-live-rebuild — rebuild and redeploy Colibri on a running live USB. +# +# Automates docs/LIVE-COLIBRI-REBUILD.md so a booted operator USB can rebuild the +# control plane from source in one command — field repair/validation without a +# full ISO rebuild. FreeBSD live image only; privileged steps go through `mdo` +# (the live image has no sudo). It does NOT replace the release ISO process. +# +# Usage: +# colibri-live-rebuild # rebuild from the build checkout's current HEAD +# colibri-live-rebuild --ref main # check out a branch/tag/commit first +# colibri-live-rebuild --build-dir DIR # use an alternate build checkout +set -eu + +REF="" +BUILD_DIR="/home/clawdie/ai/colibri-build" +SEED_SRC="/home/clawdie/ai/colibri" # git-backed source staged on the image +REMOTE="https://code.smilepowered.org/clawdie/colibri.git" +BINS="colibri-daemon colibri colibri-test-agent colibri-mcp colibri-tui" + +usage() { echo "usage: colibri-live-rebuild [--ref ] [--build-dir ]"; exit "${1:-0}"; } +while [ $# -gt 0 ]; do + case "$1" in + --ref) REF="${2:?--ref needs a value}"; shift 2 ;; + --build-dir) BUILD_DIR="${2:?--build-dir needs a value}"; shift 2 ;; + -h|--help) usage 0 ;; + *) echo "unknown argument: $1" >&2; usage 1 ;; + esac +done + +command -v cargo >/dev/null 2>&1 || { echo "ERROR: cargo not found — pkg install rust" >&2; exit 1; } +command -v git >/dev/null 2>&1 || { echo "ERROR: git not found — pkg install git" >&2; exit 1; } +command -v mdo >/dev/null 2>&1 || { echo "ERROR: mdo not found — this targets the FreeBSD live image" >&2; exit 1; } + +# 1. Obtain/refresh a working build checkout (kept separate from the shipped +# read-only source at SEED_SRC). Clone from the on-image source when present +# so a first rebuild works offline; otherwise from Forgejo. +if [ ! -d "${BUILD_DIR}/.git" ]; then + if [ -d "${SEED_SRC}/.git" ]; then + echo "==> cloning build checkout from staged source ${SEED_SRC}" + git clone "file://${SEED_SRC}" "${BUILD_DIR}" + git -C "${BUILD_DIR}" remote set-url origin "${REMOTE}" || true + else + echo "==> cloning build checkout from ${REMOTE}" + git clone "${REMOTE}" "${BUILD_DIR}" + fi +fi +git -C "${BUILD_DIR}" fetch --prune origin 2>/dev/null || echo "WARN: fetch failed (offline?) — using current checkout" +if [ -n "${REF}" ]; then + echo "==> checking out ${REF}" + git -C "${BUILD_DIR}" checkout "${REF}" + git -C "${BUILD_DIR}" pull --ff-only 2>/dev/null || true +fi + +# 2. Build the release binaries. +echo "==> building colibri release binaries (this takes a few minutes)" +( cd "${BUILD_DIR}" && cargo build --release \ + -p colibri-daemon -p colibri-client -p colibri-mcp -p colibri-glasspane-tui ) + +REL="${BUILD_DIR}/target/release" +for b in ${BINS}; do + [ -x "${REL}/${b}" ] || { echo "ERROR: expected build output missing: ${REL}/${b}" >&2; exit 1; } +done + +# 3. Stop the service and clear stale runtime files. +echo "==> stopping colibri_daemon" +mdo -u root service colibri_daemon stop 2>/dev/null || true +mdo -u root pkill -f colibri-daemon 2>/dev/null || true +mdo -u root rm -f /var/run/colibri/colibri.sock \ + /var/run/colibri/colibri-daemon.pid \ + /var/run/colibri/colibri-daemon-supervisor.pid + +# 4. Install binaries + rc.d script. +echo "==> installing binaries + rc.d script" +for b in ${BINS}; do + mdo -u root install -m 0555 "${REL}/${b}" "/usr/local/bin/${b}" +done +mdo -u root install -m 0555 "${BUILD_DIR}/packaging/freebsd/colibri_daemon.in" \ + /usr/local/etc/rc.d/colibri_daemon +mdo -u root chown -R colibri:colibri /var/run/colibri /var/db/colibri /var/log/colibri 2>/dev/null || true + +# 5. Restart and validate. +echo "==> starting colibri_daemon" +mdo -u root service colibri_daemon start +sleep 2 +mdo -u root service colibri_daemon status || true +colibri status || { echo "ERROR: 'colibri status' could not reach the socket after restart" >&2; exit 1; } + +# 6. Record what was installed. +commit=$(git -C "${BUILD_DIR}" rev-parse HEAD 2>/dev/null || echo unknown) +branch=$(git -C "${BUILD_DIR}" symbolic-ref --short -q HEAD 2>/dev/null || echo detached) +now=$(date -u +%Y-%m-%dT%H:%M:%SZ) +_rec=$(mktemp) +cat > "${_rec}" < done: colibri ${commit} (${branch}) is live on this USB" +echo " record: /var/db/colibri/live-rebuild.json" -- 2.45.3