/** * setup/llama-cpp.ts — Provision the llama-cpp jail for local inference. * * Creates the jail if missing, installs llama-cpp, and prepares * directories for models. Does not start llama-server by default. */ import { execSync, spawnSync } from 'child_process'; import { FEATURE_LLAMA_CPP, LLAMA_CPP_INTERNAL_DOMAIN, LOCAL_LLM_PROVIDER, LLAMA_CPP_JAIL_IP, RUNTIME_ID, SUBNET_BASE, } from '../src/config.js'; import { logger } from '../src/logger.js'; import { loadPackageList, mountPkgCacheInJail } from './packages.js'; import { getPlatform } from './platform.js'; import { emitStatus } from './status.js'; import { maybeEnableTailscaleInJail } from './tailscale.js'; import { bastille, jailExists, detectFreeBSDRelease, } from './bastille-helpers.js'; const LOG = 'logs/setup.log'; const MODELS_DIR = '/var/db/llm-models'; export async function run(_args: string[]): Promise { const explicitJailName = (process.env.LLAMA_CPP_JAIL_NAME || '').trim(); const hostname = LLAMA_CPP_INTERNAL_DOMAIN; const runBastille = (args: string[]) => bastille(...args); if (!FEATURE_LLAMA_CPP && LOCAL_LLM_PROVIDER !== 'llama_cpp') { emitStatus('SETUP_LLAMA_CPP', { STATUS: 'skipped', REASON: 'feature_disabled', LOG, }); logger.info( 'llama-cpp skipped — FEATURE_LLAMA_CPP disabled and provider not llama_cpp', ); return; } if (getPlatform() !== 'freebsd') { emitStatus('SETUP_LLAMA_CPP', { STATUS: 'failed', ERROR: 'unsupported_platform', LOG, }); process.exit(1); } try { const jailName = explicitJailName || `${RUNTIME_ID}-llamacpp`; const exists = jailExists(jailName); if (!exists) { const release = detectFreeBSDRelease(); const gateway = process.env.WARDEN_GATEWAY || `${SUBNET_BASE}.1`; const bridge = process.env.WARDEN_BRIDGE || 'warden0'; logger.info( { jailName, ip: LLAMA_CPP_JAIL_IP, release }, 'Creating llama-cpp jail', ); const create = bastille( 'create', // thin jail (no -T): keep model state in mounted storage, not a copied base. '-B', '-g', gateway, jailName, release, `${LLAMA_CPP_JAIL_IP}/24`, bridge, ); if (!create.ok) { throw new Error(`bastille create failed: ${create.output}`); } bastille('config', jailName, 'set', 'host.hostname', hostname); bastille('restart', jailName); } else { logger.info( { jailName }, 'llama-cpp jail already exists, skipping creation', ); } mountPkgCacheInJail(jailName); const packages = loadPackageList('llama-cpp-jail.txt'); const pkg = bastille('pkg', jailName, 'install', '-y', ...packages); if (!pkg.ok) { logger.warn( { output: pkg.output }, 'llama-cpp package install had warnings', ); } maybeEnableTailscaleInJail(runBastille, jailName, jailName); bastille('cmd', jailName, 'install', '-d', '-m', '755', MODELS_DIR); emitStatus('SETUP_LLAMA_CPP', { STATUS: 'success', JAIL_NAME: jailName, JAIL_IP: LLAMA_CPP_JAIL_IP, MODELS_DIR, NOTE: 'models_required', LOG, }); } catch (error) { const message = error instanceof Error ? error.message : String(error); emitStatus('SETUP_LLAMA_CPP', { STATUS: 'failed', ERROR: message, LOG, }); throw error; } }