clawdie-ai/setup/agent-jails.ts

411 lines
12 KiB
TypeScript
Raw Normal View History

/**
* setup/agent-jails.ts Provision per-specialist agent jails.
*
* Creates db-worker, git-worker, ctrl-worker jails from infra/jails.yaml.
* Each gets pi + aider installed, scoped secrets, and nullfs mounts for
* its domain. PF rules restrict cross-jail network access.
*
* Run: just setup-agent-jails (or just install -- --from agent-jails)
*/
refactor(identity): remove PLATFORM_ID/SERVICE_NAME/RUNTIME_USER env vars Step 5 of system-namespace cutover: complete the env-var removal that step 4 set up. All consumers now import SERVICE_NAME from src/platform-identity.ts directly; the deprecated PLATFORM_* re-exports in src/config.ts are gone. src/config.ts: - PLATFORM_ID, PLATFORM_SERVICE_NAME, PLATFORM_RUNTIME_USER exports removed. - PLATFORM_RUNTIME_HOME stays (derived from SERVICE_NAME, used by ~10 consumers for path construction). - Env-var allowlist drops PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER / PLATFORM_RUNTIME_HOME entries. - CONTROLPLANE_AIDER_TMUX_SESSION uses SERVICE_NAME directly. setup/onboarding.ts: - writeIdentity() simplified to write only ASSISTANT_NAME (display). PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER are no longer written to .env. Fresh installs have no PLATFORM_* keys. - Status emission switched from PLATFORM_ID to SERVICE_NAME. setup/env-audit.ts: - Audit lists SERVICE_NAME instead of PLATFORM_ID; the env-file PLATFORM_ID read is gone. 24 source files (src/*.ts, setup/*.ts, scripts/dashboard.ts): - Bare PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER references replaced with SERVICE_NAME. - Imports rewired: SERVICE_NAME comes from ../{src/}platform-identity.js, not from config.js. - Imports deduped where the sed sweep produced collisions. Shell scripts (scripts/bhyve-evidence.sh, glass.sh, inspect-system.sh): - Hardcoded SERVICE_NAME='clawdie' and SERVICE_USER='clawdie'. No more grep-the-.env fallbacks; the constants are the source. Tests (middle path): - Mechanical fixes (import path, renamed assertion text): src/hostd/privileged-commands.test.ts, src/startup-report.test.ts, setup/env-audit.test.ts, setup/install-mode.test.ts. - Skipped with `// system-namespace:` markers (pinned removed env-driven override behavior; Codex rewrites once the bootstrap- config service-user override path lands): setup/verify.test.ts > 'uses the platform service name for PID candidates' setup/service.test.ts > 'resolves a platform runtime separately from the tenant' Test files still containing PLATFORM_* strings in vi.mock contents, ENV_KEYS arrays, or comments are left untouched — they are test artifacts that don't affect runtime; mock contents resolve to 'clawdie' which still equals SERVICE_NAME. tsc clean. 2095 tests pass, 4 skipped, 0 fail. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- Build: pass | Tests: pass — Tests 2095 passed | 4 skipped (2099)
2026-05-02 14:49:19 +02:00
import { SERVICE_NAME } from '../src/platform-identity.js';
import fs from 'fs';
import path from 'path';
import {
AGENT_INTERNAL_DOMAIN,
SUBNET_BASE,
DB_HOST,
PROJECT_ROOT,
TENANT_ID,
} from '../src/config.js';
import { readControlplaneAgentApiKey } from '../src/controlplane-agent-keys.js';
import { logger } from '../src/logger.js';
import { readEnvFile } from '../src/env.js';
import { loadPackageList, mountPkgCacheInJail } from './packages.js';
import { commandExists, getPlatform, isRoot } from './platform.js';
import { emitStatus } from './status.js';
import {
bastille,
ensureJailExecDir,
jailExists,
detectFreeBSDRelease,
jailRoot,
resolveJailName,
} from './bastille-helpers.js';
import {
loadJailRegistry,
resolveJailIp,
getAgentJailDef,
parseMountSpec,
} from '../src/jail-schema.js';
const LOG = 'logs/setup.log';
const SHARED_LLM_KEYS = [
'ANTHROPIC_API_KEY',
'OPENAI_API_KEY',
'OPENROUTER_API_KEY',
'ZAI_API_KEY',
'GEMINI_API_KEY',
];
const DOMAIN_KEY_ENV_MAP: Record<string, string[]> = {
'db-admin': [
'DB_HOST',
'DB_PORT',
'SKILLS_DB_URL',
'MEMORY_DB_URL',
'OPS_DB_URL',
'SKILLS_DB_USER',
'SKILLS_DB_PASSWORD',
'MEMORY_DB_USER',
'MEMORY_DB_PASSWORD',
'OPS_DB_USER',
'OPS_DB_PASSWORD',
],
'git-admin': [
'GIT_LOCAL_URL',
'REMOTE_GIT_URL',
'GIT_MIRROR_URLS',
'GIT_SSH_KEY_PATH',
'FORGEJO_TOKEN',
'AGENT_REPO_DIR',
],
coordinator: ['CONTROLPLANE_API_KEY'],
};
const EXTRA_PACKAGES: Record<string, string[]> = {
'db-admin': ['postgresql18-client'],
'git-admin': [],
coordinator: [],
};
function ensurePiInstalled(jailName: string): void {
const exists = bastille('cmd', jailName, '/bin/sh', '-lc', 'command -v pi');
if (exists.ok) return;
logger.info({ jailName }, 'Installing pi inside agent jail');
const install = bastille(
'cmd',
jailName,
'npm',
'install',
'-g',
'@earendil-works/pi-coding-agent',
);
if (!install.ok) {
throw new Error(`pi install failed in ${jailName}: ${install.output}`);
}
}
/**
* Map of env var names to check and the env var name they should be written as.
* Pi reads keys by their provider-specific name (ZAI_API_KEY, ANTHROPIC_API_KEY, etc.)
*/
const LLM_KEY_ENTRIES: Array<{ envVar: string; writeAs: string }> = [
{ envVar: 'ZAI_API_KEY', writeAs: 'ZAI_API_KEY' },
{ envVar: 'ANTHROPIC_API_KEY', writeAs: 'ANTHROPIC_API_KEY' },
{ envVar: 'OPENAI_API_KEY', writeAs: 'OPENAI_API_KEY' },
{ envVar: 'OPENROUTER_API_KEY', writeAs: 'OPENROUTER_API_KEY' },
{ envVar: 'GEMINI_API_KEY', writeAs: 'GEMINI_API_KEY' },
];
function buildJailEnv(
specialist: string,
allEnv: Record<string, string | undefined>,
): string {
const lines: string[] = [];
// Write the first available LLM key with its correct env var name.
// Previously this always wrote ANTROPIC_API_KEY even for ZAI keys,
// causing pi to not find the key (provider mismatch).
for (const entry of LLM_KEY_ENTRIES) {
const val = allEnv[entry.envVar];
if (val) {
lines.push(`export ${entry.writeAs}=${JSON.stringify(val)}`);
break;
}
}
// Domain-specific keys (DB creds, git tokens, etc.)
const domainKeys = DOMAIN_KEY_ENV_MAP[specialist] ?? [];
for (const key of domainKeys) {
const val = allEnv[key];
if (val) lines.push(`export ${key}=${JSON.stringify(val)}`);
}
return lines.join('\n') + '\n';
}
export async function run(args: string[]): Promise<void> {
if (getPlatform() !== 'freebsd') {
emitStatus('SETUP_AGENT_JAILS', {
STATUS: 'failed',
ERROR: 'unsupported_platform',
LOG,
});
process.exit(1);
}
if (!isRoot()) {
emitStatus('SETUP_AGENT_JAILS', {
STATUS: 'failed',
ERROR: 'requires_root',
LOG,
});
throw new Error('setup_agent_jails_requires_root');
}
if (!commandExists('bastille')) {
emitStatus('SETUP_AGENT_JAILS', {
STATUS: 'failed',
ERROR: 'missing_bastille',
LOG,
});
throw new Error('missing_bastille');
}
const registry = loadJailRegistry();
const gateway = process.env.WARDEN_GATEWAY || `${SUBNET_BASE}.1`;
const bridge = process.env.WARDEN_BRIDGE || registry.bridge;
const release = detectFreeBSDRelease();
const internalDomain = AGENT_INTERNAL_DOMAIN;
const allEnv = readEnvFile([
...SHARED_LLM_KEYS,
...Object.values(DOMAIN_KEY_ENV_MAP).flat(),
]);
const coordinatorApiKey = readControlplaneAgentApiKey('coordinator');
if (!coordinatorApiKey) {
throw new Error(
'Missing coordinator CONTROLPLANE_API_KEY. Run controlplane setup first.',
);
}
allEnv.CONTROLPLANE_API_KEY = coordinatorApiKey;
const specialists = ['db-admin', 'git-admin', 'coordinator'];
const results: Record<
string,
{ jailName: string; ip: string; created: boolean }
> = {};
for (const specialist of specialists) {
const entry = getAgentJailDef(registry, specialist);
if (!entry) {
logger.warn({ specialist }, 'No agent jail definition found in registry');
continue;
}
const { role, def } = entry;
const jailName = resolveJailName({ role });
const ip = resolveJailIp(registry, role);
const hostname = `${role}.${internalDomain}`;
const exists = jailExists(jailName);
if (!exists) {
if (args.includes('--destroy') || args.includes('--recreate')) {
logger.info(
{ jailName, specialist },
'Skipping creation (--destroy/--recreate passed but jail does not exist)',
);
}
logger.info(
{ jailName, ip, release, specialist },
`Creating ${role} jail`,
);
const create = bastille(
'create',
...(def.thick ? ['-T'] : []),
...(def.vnet ? ['-B'] : []),
'-g',
gateway,
jailName,
release,
`${ip}/24`,
bridge,
);
if (!create.ok) {
throw new Error(`bastille create failed for ${role}: ${create.output}`);
}
bastille('config', jailName, 'set', 'host.hostname', hostname);
bastille('restart', jailName);
} else {
logger.info(
{ jailName, specialist },
`${role} jail already exists, skipping creation`,
);
}
mountPkgCacheInJail(jailName);
const pkgs = loadPackageList('agent-worker-jail.txt');
const extra = EXTRA_PACKAGES[specialist] ?? [];
const allPkgs = [...pkgs, ...extra];
if (allPkgs.length > 0) {
const pkg = bastille('pkg', jailName, 'install', '-y', ...allPkgs);
if (!pkg.ok) {
logger.warn(
{ output: pkg.output, specialist },
`${role} jail package install had warnings`,
);
}
}
ensurePiInstalled(jailName);
// Ensure jail-exec staging dir is writable by the agent user
refactor(identity): remove PLATFORM_ID/SERVICE_NAME/RUNTIME_USER env vars Step 5 of system-namespace cutover: complete the env-var removal that step 4 set up. All consumers now import SERVICE_NAME from src/platform-identity.ts directly; the deprecated PLATFORM_* re-exports in src/config.ts are gone. src/config.ts: - PLATFORM_ID, PLATFORM_SERVICE_NAME, PLATFORM_RUNTIME_USER exports removed. - PLATFORM_RUNTIME_HOME stays (derived from SERVICE_NAME, used by ~10 consumers for path construction). - Env-var allowlist drops PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER / PLATFORM_RUNTIME_HOME entries. - CONTROLPLANE_AIDER_TMUX_SESSION uses SERVICE_NAME directly. setup/onboarding.ts: - writeIdentity() simplified to write only ASSISTANT_NAME (display). PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER are no longer written to .env. Fresh installs have no PLATFORM_* keys. - Status emission switched from PLATFORM_ID to SERVICE_NAME. setup/env-audit.ts: - Audit lists SERVICE_NAME instead of PLATFORM_ID; the env-file PLATFORM_ID read is gone. 24 source files (src/*.ts, setup/*.ts, scripts/dashboard.ts): - Bare PLATFORM_ID / PLATFORM_SERVICE_NAME / PLATFORM_RUNTIME_USER references replaced with SERVICE_NAME. - Imports rewired: SERVICE_NAME comes from ../{src/}platform-identity.js, not from config.js. - Imports deduped where the sed sweep produced collisions. Shell scripts (scripts/bhyve-evidence.sh, glass.sh, inspect-system.sh): - Hardcoded SERVICE_NAME='clawdie' and SERVICE_USER='clawdie'. No more grep-the-.env fallbacks; the constants are the source. Tests (middle path): - Mechanical fixes (import path, renamed assertion text): src/hostd/privileged-commands.test.ts, src/startup-report.test.ts, setup/env-audit.test.ts, setup/install-mode.test.ts. - Skipped with `// system-namespace:` markers (pinned removed env-driven override behavior; Codex rewrites once the bootstrap- config service-user override path lands): setup/verify.test.ts > 'uses the platform service name for PID candidates' setup/service.test.ts > 'resolves a platform runtime separately from the tenant' Test files still containing PLATFORM_* strings in vi.mock contents, ENV_KEYS arrays, or comments are left untouched — they are test artifacts that don't affect runtime; mock contents resolve to 'clawdie' which still equals SERVICE_NAME. tsc clean. 2095 tests pass, 4 skipped, 0 fail. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- Build: pass | Tests: pass — Tests 2095 passed | 4 skipped (2099)
2026-05-02 14:49:19 +02:00
ensureJailExecDir(jailName, SERVICE_NAME);
const chsh = bastille(
'cmd',
jailName,
'chsh',
'-s',
'/usr/local/bin/bash',
'root',
);
if (!chsh.ok) {
logger.warn({ output: chsh.output }, `chsh failed in ${role}`);
}
const envContent = buildJailEnv(specialist, allEnv);
// Write to jail fs root (/.env.agent inside the jail) — jail-exec-runner
// sources this path with `set -a && . /.env.agent`.
const jailEnvPath = path.join(jailRoot(jailName), '.env.agent');
fs.mkdirSync(path.dirname(jailEnvPath), { recursive: true });
fs.writeFileSync(jailEnvPath, envContent);
logger.info(
{ jailName, specialist, envPath: jailEnvPath },
'Wrote scoped .env.agent',
);
if (entry.agentJail.mounts && entry.agentJail.mounts.length > 0) {
for (const mountSpec of entry.agentJail.mounts) {
const mount = parseMountSpec(mountSpec);
const hostDir = resolveMountHostPath(mount.hostToken, allEnv);
if (!hostDir || !fs.existsSync(hostDir)) {
logger.warn(
{ specialist, mountHost: mount.hostToken, resolvedPath: hostDir },
`Skipping mount — host path not found`,
);
continue;
}
const fstabPath = `/usr/local/bastille/jails/${jailName}/fstab`;
const jailMountTarget = `/usr/local/bastille/jails/${jailName}/root${mount.jailPath}`;
const desiredLine = `${hostDir} ${jailMountTarget} nullfs ${mount.mode} 0 0`;
if (fs.existsSync(fstabPath)) {
const existing = fs.readFileSync(fstabPath, 'utf-8');
const lines = existing.split('\n');
const idx = lines.findIndex(
(line) =>
line.includes(jailMountTarget) &&
line.trim().split(/\s+/u).length >= 6,
);
if (idx !== -1) {
if (lines[idx].trim() === desiredLine) {
logger.debug(
{ specialist, mountPoint: mount.jailPath },
'Mount already in fstab',
);
continue;
}
lines[idx] = desiredLine;
fs.writeFileSync(fstabPath, lines.join('\n'));
try {
bastille('umount', jailName, mount.jailPath);
} catch {
// ignore — may not be mounted yet
}
bastille(
'mount',
jailName,
hostDir,
mount.jailPath,
'nullfs',
mount.mode,
);
logger.info(
{
specialist,
hostDir,
jailPoint: mount.jailPath,
mode: mount.mode,
},
'Updated existing mount in agent jail',
);
continue;
}
}
const mounted = bastille(
'mount',
jailName,
hostDir,
mount.jailPath,
'nullfs',
mount.mode,
);
if (!mounted.ok) {
logger.warn(
{
specialist,
hostDir,
jailPoint: mount.jailPath,
output: mounted.output,
},
'Mount command failed in agent jail',
);
continue;
}
logger.info(
{ specialist, hostDir, jailPoint: mount.jailPath, mode: mount.mode },
'Mounted directory into agent jail',
);
}
}
results[specialist] = { jailName, ip, created: !exists };
}
emitStatus('SETUP_AGENT_JAILS', {
STATUS: 'success',
JAILS: JSON.stringify(results),
LOG,
});
}
function resolveMountHostPath(
token: string,
env: Record<string, string | undefined>,
): string | null {
if (token === 'MIGRATIONS_DIR') {
return path.join(PROJECT_ROOT, 'docs', 'internal', 'sql');
}
if (token === 'SKILLS_DIR') {
return path.join(PROJECT_ROOT, '.agent', 'skills');
}
if (token === 'IDENTITIES_DIR') {
return path.join(PROJECT_ROOT, '.agent', 'identities');
}
if (token === 'PI_EXTENSIONS_DIR') {
return path.join(PROJECT_ROOT, '.pi', 'extensions');
}
if (token === 'REPO_DIR') {
const agentRepoDir = env['AGENT_REPO_DIR'];
if (agentRepoDir) return agentRepoDir;
const repoRoot = env['GIT_STORAGE_ROOT'];
if (repoRoot) return repoRoot;
return path.join(PROJECT_ROOT);
}
const envVal = env[token];
if (envVal && fs.existsSync(envVal)) return envVal;
return null;
}
run(process.argv.slice(2)).catch((err: unknown) => {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`agent-jails: ${msg}\n`);
process.exit(1);
});