--- Build: pass | Tests: pass - 603 passed (44 files) --- Build: pass | Tests: pass — Tests 603 passed (603)
138 lines
5.3 KiB
Bash
Executable file
138 lines
5.3 KiB
Bash
Executable file
#!/bin/sh
|
|
# setup-llamacpp-jail.sh — create the llama-cpp inference jail
|
|
#
|
|
# Usage: sudo sh docs/internal/scripts/setup-llamacpp-jail.sh
|
|
#
|
|
# What it does:
|
|
# 1. Create a thin bastille jail at WARDEN_LLAMA_CPP_IP (.5)
|
|
# 2. Install llama-cpp package
|
|
# 3. Mount /var/db/llm-models (read-only) — models live on host
|
|
# 4. Create llama_chat rc.d service (not shipped with pkg — second instance)
|
|
# 5. Configure rc.conf for both embed (:8080) and chat (:8081) servers
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
cd "${PROJECT_ROOT}"
|
|
|
|
env_get() {
|
|
local key="$1" default="$2"
|
|
local val
|
|
val=$(grep -m1 "^${key}=" .env 2>/dev/null | cut -d= -f2- | sed "s/^['\"]//;s/['\"]$//")
|
|
printf '%s' "${val:-$default}"
|
|
}
|
|
|
|
SUBNET_BASE=$(env_get AGENT_SUBNET_BASE "$(env_get JAIL_SUBNET_BASE 10.0.1)")
|
|
LLAMA_IP=$(env_get WARDEN_LLAMA_CPP_IP "${SUBNET_BASE}.5")
|
|
JAIL="llamacpp"
|
|
MODELS_DIR="/var/db/llm-models"
|
|
EMBED_MODEL=$(env_get EMBED_MODEL_FILE "bge-m3-Q4_K_M.gguf")
|
|
CHAT_MODEL=$(env_get CHAT_MODEL_FILE "microsoft_Phi-4-mini-instruct-Q4_K_M.gguf")
|
|
FREEBSD_REL=$(freebsd-version -u | cut -d- -f1,2)
|
|
JAIL_ROOT="/usr/local/bastille/jails/${JAIL}/root"
|
|
|
|
echo "==> llamacpp jail setup (shared service)"
|
|
echo " Jail: ${JAIL} @ ${LLAMA_IP}"
|
|
echo " Embed: ${MODELS_DIR}/${EMBED_MODEL} (:8080)"
|
|
echo " Chat: ${MODELS_DIR}/${CHAT_MODEL} (:8081)"
|
|
echo ""
|
|
|
|
# ── 1. Create jail ────────────────────────────────────────────────────────────
|
|
|
|
if bastille list 2>/dev/null | grep -qw "${JAIL}"; then
|
|
echo "==> Jail ${JAIL} already exists — skipping creation"
|
|
else
|
|
echo "==> Creating jail ${JAIL}"
|
|
bastille create "${JAIL}" "${FREEBSD_REL}" "${LLAMA_IP}"
|
|
fi
|
|
|
|
# ── 2. Install packages ───────────────────────────────────────────────────────
|
|
|
|
echo "==> Installing llama-cpp"
|
|
bastille pkg "${JAIL}" install -y llama-cpp curl
|
|
|
|
# ── 3. Mount models (read-only) ───────────────────────────────────────────────
|
|
|
|
# Models are large files stored on the host. Mount read-only into jail.
|
|
# The host path /var/db/llm-models must exist and contain the model files.
|
|
MOUNT_TARGET="${JAIL_ROOT}${MODELS_DIR}"
|
|
FSTAB="/usr/local/bastille/jails/${JAIL}/fstab"
|
|
|
|
if ! grep -q "^${MODELS_DIR} " "${FSTAB}" 2>/dev/null; then
|
|
echo "==> Mounting ${MODELS_DIR} (ro) into jail"
|
|
mkdir -p "${MOUNT_TARGET}"
|
|
echo "${MODELS_DIR} ${MOUNT_TARGET} nullfs ro 0 0" >> "${FSTAB}"
|
|
fi
|
|
|
|
if ! mount | grep -q "${MOUNT_TARGET}"; then
|
|
mount -t nullfs -o ro "${MODELS_DIR}" "${MOUNT_TARGET}"
|
|
fi
|
|
|
|
# ── 4. Create llama_chat rc.d service ────────────────────────────────────────
|
|
|
|
# The llama-cpp package ships only one rc.d script (llama-server).
|
|
# A second service (llama_chat) is needed for the chat model on :8081.
|
|
# This is a copy with s/llama_server/llama_chat/g and separate log/pid paths.
|
|
RC_CHAT="${JAIL_ROOT}/usr/local/etc/rc.d/llama_chat"
|
|
|
|
if [ ! -f "${RC_CHAT}" ]; then
|
|
echo "==> Creating llama_chat rc.d service"
|
|
cat > "${RC_CHAT}" << 'EOF'
|
|
#!/bin/sh
|
|
# PROVIDE: llama_chat
|
|
# REQUIRE: LOGIN
|
|
# KEYWORD: shutdown
|
|
|
|
. /etc/rc.subr
|
|
|
|
name="llama_chat"
|
|
rcvar=llama_chat_enable
|
|
load_rc_config $name
|
|
|
|
: ${llama_chat_enable:="NO"}
|
|
: ${llama_chat_user:="nobody"}
|
|
: ${llama_chat_model:=""}
|
|
: ${llama_chat_args:=""}
|
|
: ${llama_chat_log:="/var/log/llama-chat.log"}
|
|
: ${llama_chat_pidfile:="/var/run/${name}.pid"}
|
|
|
|
run_command="/usr/local/bin/llama-server"
|
|
procname="${run_command}"
|
|
pidfile=${llama_chat_pidfile}
|
|
command=/usr/sbin/daemon
|
|
command_args="-f -t ${name} -p ${pidfile} -o ${llama_chat_log} ${run_command} -m ${llama_chat_model} ${llama_chat_args}"
|
|
start_precmd="llama_chat_precmd"
|
|
llama_chat_chdir=/tmp
|
|
|
|
llama_chat_precmd()
|
|
{
|
|
if [ -z "${llama_chat_model}" ]; then echo "llama_chat_model required"; exit 1; fi
|
|
if [ ! -f "${llama_chat_model}" ]; then echo "llama_chat_model not found: ${llama_chat_model}"; exit 1; fi
|
|
touch ${llama_chat_log} ${pidfile}
|
|
chown ${llama_chat_user} ${llama_chat_log} ${pidfile}
|
|
chmod 640 ${llama_chat_log}
|
|
}
|
|
|
|
run_rc_command "$1"
|
|
EOF
|
|
chmod +x "${RC_CHAT}"
|
|
fi
|
|
|
|
# ── 5. Configure rc.conf ──────────────────────────────────────────────────────
|
|
|
|
echo "==> Configuring llama_server (:8080 embed) and llama_chat (:8081 chat)"
|
|
|
|
bastille sysrc "${JAIL}" llama_server_enable=YES
|
|
bastille sysrc "${JAIL}" llama_server_model="${MODELS_DIR}/${EMBED_MODEL}"
|
|
bastille sysrc "${JAIL}" llama_server_args="--host 0.0.0.0 --port 8080 --embedding --pooling mean --ctx-size 8192"
|
|
|
|
bastille sysrc "${JAIL}" llama_chat_enable=YES
|
|
bastille sysrc "${JAIL}" llama_chat_model="${MODELS_DIR}/${CHAT_MODEL}"
|
|
bastille sysrc "${JAIL}" llama_chat_args="--host 0.0.0.0 --port 8081 --ctx-size 8192 --n-predict 2048"
|
|
|
|
echo ""
|
|
echo "==> Done."
|
|
echo " Start: bastille cmd ${JAIL} service llama-server start"
|
|
echo " bastille cmd ${JAIL} service llama_chat start"
|
|
echo " Verify: curl -sf http://${LLAMA_IP}:8080/health && curl -sf http://${LLAMA_IP}:8081/health"
|