clawdie-ai/docs/internal/scripts/setup-llamacpp-jail.sh
Clawdie AI cc37d2c8cf Public/internal docs restructure; CMS/verify cleanup (Sam & Codex)
---

Build: pass | Tests: pass - 603 passed (44 files)

---
Build: pass | Tests: pass — Tests  603 passed (603)
2026-04-05 06:29:19 +00:00

138 lines
5.3 KiB
Bash
Executable file

#!/bin/sh
# setup-llamacpp-jail.sh — create the llama-cpp inference jail
#
# Usage: sudo sh docs/internal/scripts/setup-llamacpp-jail.sh
#
# What it does:
# 1. Create a thin bastille jail at WARDEN_LLAMA_CPP_IP (.5)
# 2. Install llama-cpp package
# 3. Mount /var/db/llm-models (read-only) — models live on host
# 4. Create llama_chat rc.d service (not shipped with pkg — second instance)
# 5. Configure rc.conf for both embed (:8080) and chat (:8081) servers
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
cd "${PROJECT_ROOT}"
env_get() {
local key="$1" default="$2"
local val
val=$(grep -m1 "^${key}=" .env 2>/dev/null | cut -d= -f2- | sed "s/^['\"]//;s/['\"]$//")
printf '%s' "${val:-$default}"
}
SUBNET_BASE=$(env_get AGENT_SUBNET_BASE "$(env_get JAIL_SUBNET_BASE 10.0.1)")
LLAMA_IP=$(env_get WARDEN_LLAMA_CPP_IP "${SUBNET_BASE}.5")
JAIL="llamacpp"
MODELS_DIR="/var/db/llm-models"
EMBED_MODEL=$(env_get EMBED_MODEL_FILE "bge-m3-Q4_K_M.gguf")
CHAT_MODEL=$(env_get CHAT_MODEL_FILE "microsoft_Phi-4-mini-instruct-Q4_K_M.gguf")
FREEBSD_REL=$(freebsd-version -u | cut -d- -f1,2)
JAIL_ROOT="/usr/local/bastille/jails/${JAIL}/root"
echo "==> llamacpp jail setup (shared service)"
echo " Jail: ${JAIL} @ ${LLAMA_IP}"
echo " Embed: ${MODELS_DIR}/${EMBED_MODEL} (:8080)"
echo " Chat: ${MODELS_DIR}/${CHAT_MODEL} (:8081)"
echo ""
# ── 1. Create jail ────────────────────────────────────────────────────────────
if bastille list 2>/dev/null | grep -qw "${JAIL}"; then
echo "==> Jail ${JAIL} already exists — skipping creation"
else
echo "==> Creating jail ${JAIL}"
bastille create "${JAIL}" "${FREEBSD_REL}" "${LLAMA_IP}"
fi
# ── 2. Install packages ───────────────────────────────────────────────────────
echo "==> Installing llama-cpp"
bastille pkg "${JAIL}" install -y llama-cpp curl
# ── 3. Mount models (read-only) ───────────────────────────────────────────────
# Models are large files stored on the host. Mount read-only into jail.
# The host path /var/db/llm-models must exist and contain the model files.
MOUNT_TARGET="${JAIL_ROOT}${MODELS_DIR}"
FSTAB="/usr/local/bastille/jails/${JAIL}/fstab"
if ! grep -q "^${MODELS_DIR} " "${FSTAB}" 2>/dev/null; then
echo "==> Mounting ${MODELS_DIR} (ro) into jail"
mkdir -p "${MOUNT_TARGET}"
echo "${MODELS_DIR} ${MOUNT_TARGET} nullfs ro 0 0" >> "${FSTAB}"
fi
if ! mount | grep -q "${MOUNT_TARGET}"; then
mount -t nullfs -o ro "${MODELS_DIR}" "${MOUNT_TARGET}"
fi
# ── 4. Create llama_chat rc.d service ────────────────────────────────────────
# The llama-cpp package ships only one rc.d script (llama-server).
# A second service (llama_chat) is needed for the chat model on :8081.
# This is a copy with s/llama_server/llama_chat/g and separate log/pid paths.
RC_CHAT="${JAIL_ROOT}/usr/local/etc/rc.d/llama_chat"
if [ ! -f "${RC_CHAT}" ]; then
echo "==> Creating llama_chat rc.d service"
cat > "${RC_CHAT}" << 'EOF'
#!/bin/sh
# PROVIDE: llama_chat
# REQUIRE: LOGIN
# KEYWORD: shutdown
. /etc/rc.subr
name="llama_chat"
rcvar=llama_chat_enable
load_rc_config $name
: ${llama_chat_enable:="NO"}
: ${llama_chat_user:="nobody"}
: ${llama_chat_model:=""}
: ${llama_chat_args:=""}
: ${llama_chat_log:="/var/log/llama-chat.log"}
: ${llama_chat_pidfile:="/var/run/${name}.pid"}
run_command="/usr/local/bin/llama-server"
procname="${run_command}"
pidfile=${llama_chat_pidfile}
command=/usr/sbin/daemon
command_args="-f -t ${name} -p ${pidfile} -o ${llama_chat_log} ${run_command} -m ${llama_chat_model} ${llama_chat_args}"
start_precmd="llama_chat_precmd"
llama_chat_chdir=/tmp
llama_chat_precmd()
{
if [ -z "${llama_chat_model}" ]; then echo "llama_chat_model required"; exit 1; fi
if [ ! -f "${llama_chat_model}" ]; then echo "llama_chat_model not found: ${llama_chat_model}"; exit 1; fi
touch ${llama_chat_log} ${pidfile}
chown ${llama_chat_user} ${llama_chat_log} ${pidfile}
chmod 640 ${llama_chat_log}
}
run_rc_command "$1"
EOF
chmod +x "${RC_CHAT}"
fi
# ── 5. Configure rc.conf ──────────────────────────────────────────────────────
echo "==> Configuring llama_server (:8080 embed) and llama_chat (:8081 chat)"
bastille sysrc "${JAIL}" llama_server_enable=YES
bastille sysrc "${JAIL}" llama_server_model="${MODELS_DIR}/${EMBED_MODEL}"
bastille sysrc "${JAIL}" llama_server_args="--host 0.0.0.0 --port 8080 --embedding --pooling mean --ctx-size 8192"
bastille sysrc "${JAIL}" llama_chat_enable=YES
bastille sysrc "${JAIL}" llama_chat_model="${MODELS_DIR}/${CHAT_MODEL}"
bastille sysrc "${JAIL}" llama_chat_args="--host 0.0.0.0 --port 8081 --ctx-size 8192 --n-predict 2048"
echo ""
echo "==> Done."
echo " Start: bastille cmd ${JAIL} service llama-server start"
echo " bastille cmd ${JAIL} service llama_chat start"
echo " Verify: curl -sf http://${LLAMA_IP}:8080/health && curl -sf http://${LLAMA_IP}:8081/health"