Merge pull request 'fix(spawner): stage jailed env payloads (Sam & Codex)' (#49) from fix/jail-staged-env-main into main
Some checks are pending
CI / rust (push) Waiting to run
CI / markdown (push) Waiting to run

Reviewed-on: #49
This commit is contained in:
clawdie 2026-06-13 23:32:50 +02:00
commit 70b6ddf846
2 changed files with 315 additions and 50 deletions

View file

@ -13,6 +13,8 @@
//! Pi-JSONL agents and for future local-only tools.
use std::collections::HashMap;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
@ -116,6 +118,7 @@ impl PrivMode {
/// Default path to the setuid jail-spawn helper used in `PrivMode::Helper`.
pub const DEFAULT_JAIL_HELPER: &str = "/usr/local/libexec/colibri-jail-spawn";
const STAGED_JAIL_RUN_DIR: &str = "/var/run/colibri-stage";
/// Optional FreeBSD jail confinement for a spawned agent.
///
@ -135,6 +138,12 @@ pub struct JailConfig {
/// Root path for an ephemeral jail (`jail -c path=…`). Used when `name` is None.
#[serde(default)]
pub path: Option<String>,
/// Host-visible root path for a named jail. Required when staged payload
/// delivery is needed (env/system prompt/session context launcher files).
///
/// If unset, `path` is also accepted as the staging root fallback.
#[serde(default)]
pub root_path: Option<String>,
/// IPv4 spec: `inherit` (default) → `ip4=inherit`; any other value →
/// `ip4.addr=<value>`.
#[serde(default)]
@ -168,6 +177,146 @@ fn priv_wrap(
}
}
fn jail_stage_root(jail: &JailConfig) -> Option<&str> {
jail.root_path.as_deref().or(jail.path.as_deref())
}
fn shell_single_quote(value: &str) -> String {
format!("'{}'", value.replace('\'', "'\"'\"'"))
}
fn valid_env_key(key: &str) -> bool {
let mut chars = key.chars();
match chars.next() {
Some(c) if c == '_' || c.is_ascii_alphabetic() => {}
_ => return false,
}
chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
}
fn build_env_script(env: &HashMap<String, String>) -> std::io::Result<String> {
let mut entries: Vec<_> = env.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
let mut script = String::new();
for (key, value) in entries {
if !valid_env_key(key) {
return Err(std::io::Error::other(format!(
"invalid env key for staged jail payload: {key}"
)));
}
script.push_str("export ");
script.push_str(key);
script.push('=');
script.push_str(&shell_single_quote(value));
script.push('\n');
}
Ok(script)
}
fn build_jail_launcher() -> &'static str {
r#"#!/bin/sh
set -eu
ENV_FILE="$1"
shift
WORK_DIR="$1"
shift
if [ "$ENV_FILE" != "-" ] && [ -f "$ENV_FILE" ]; then
. "$ENV_FILE"
fi
if [ "$WORK_DIR" != "-" ]; then
cd "$WORK_DIR"
fi
exec "$@"
"#
}
#[derive(Debug)]
pub struct PreparedSpawnCommand {
pub program: String,
pub argv: Vec<String>,
pub env: HashMap<String, String>,
pub cleanup_dir: Option<PathBuf>,
}
async fn remove_staged_dir(path: &Path) {
let _ = tokio::fs::remove_dir_all(path).await;
}
pub async fn prepare_spawn_command(
binary: &str,
args: &[String],
env: HashMap<String, String>,
working_dir: Option<&str>,
jail: Option<&JailConfig>,
priv_mode: &PrivMode,
helper: &str,
stage_id: &str,
) -> std::io::Result<PreparedSpawnCommand> {
let Some(jail) = jail else {
return Ok(PreparedSpawnCommand {
program: binary.to_string(),
argv: args.to_vec(),
env,
cleanup_dir: None,
});
};
let needs_staging = !env.is_empty() || working_dir.is_some();
if !needs_staging {
let (program, argv) = jail_wrap(binary, args, Some(jail), priv_mode, helper);
return Ok(PreparedSpawnCommand {
program,
argv,
env,
cleanup_dir: None,
});
}
let stage_root = jail_stage_root(jail).ok_or_else(|| {
std::io::Error::other(
"jailed spawn with staged payload requires JailConfig.root_path (or path for ephemeral jails)",
)
})?;
let jail_stage_dir = format!("{STAGED_JAIL_RUN_DIR}/{stage_id}");
let host_stage_dir = Path::new(stage_root).join(jail_stage_dir.trim_start_matches('/'));
tokio::fs::create_dir_all(&host_stage_dir).await?;
let launcher_host = host_stage_dir.join("launch.sh");
tokio::fs::write(&launcher_host, build_jail_launcher()).await?;
std::fs::set_permissions(&launcher_host, std::fs::Permissions::from_mode(0o700))?;
let env_rel = if env.is_empty() {
"-".to_string()
} else {
let env_host = host_stage_dir.join("env.sh");
tokio::fs::write(&env_host, build_env_script(&env)?).await?;
std::fs::set_permissions(&env_host, std::fs::Permissions::from_mode(0o600))?;
format!("{jail_stage_dir}/env.sh")
};
let mut staged_args = vec![
format!("{jail_stage_dir}/launch.sh"),
env_rel,
working_dir.unwrap_or("-").to_string(),
binary.to_string(),
];
staged_args.extend(args.iter().cloned());
let (program, argv) = jail_wrap("/bin/sh", &staged_args, Some(jail), priv_mode, helper);
Ok(PreparedSpawnCommand {
program,
argv,
env: HashMap::new(),
cleanup_dir: Some(host_stage_dir),
})
}
/// Wrap an agent command for optional jail confinement plus privilege
/// escalation, returning the `(program, argv)` to hand to `Command::new`.
///
@ -325,14 +474,16 @@ pub struct AgentHandle {
stdout: Mutex<Option<ChildStdout>>,
/// Creation timestamp.
pub created_at: String,
/// Optional staged jail payload directory to remove when the child exits.
staging_dir: Option<PathBuf>,
}
impl AgentHandle {
pub fn new(config: AgentSpawnConfig) -> Self {
Self::new_with_id(Uuid::new_v4().to_string(), config)
Self::new_with_id(Uuid::new_v4().to_string(), config, None)
}
pub fn new_with_id(id: String, config: AgentSpawnConfig) -> Self {
pub fn new_with_id(id: String, config: AgentSpawnConfig, staging_dir: Option<PathBuf>) -> Self {
Self {
id,
config,
@ -340,6 +491,7 @@ impl AgentHandle {
child: Mutex::new(None),
stdout: Mutex::new(None),
created_at: chrono::Utc::now().to_rfc3339(),
staging_dir,
}
}
@ -363,6 +515,9 @@ impl AgentHandle {
}
*child = None;
*self.status.write().await = AgentStatus::Stopped;
if let Some(path) = self.staging_dir.as_deref() {
remove_staged_dir(path).await;
}
Ok(())
}
@ -374,12 +529,18 @@ impl AgentHandle {
Ok(Some(status)) => {
*child_guard = None;
*self.status.write().await = AgentStatus::Stopped;
if let Some(path) = self.staging_dir.as_deref() {
remove_staged_dir(path).await;
}
Some(status)
}
Ok(None) => None,
Err(_) => {
*child_guard = None;
*self.status.write().await = AgentStatus::Failed("poll error".to_string());
if let Some(path) = self.staging_dir.as_deref() {
remove_staged_dir(path).await;
}
None
}
}
@ -466,8 +627,6 @@ impl Spawner {
continue;
}
let handle = AgentHandle::new_with_id(agent_id.clone(), agent_config.clone());
// Build environment for this provider
let mut env_map = agent_config.env.clone();
if let Some(api_key) = provider.api_key(&self.config) {
@ -497,29 +656,40 @@ impl Spawner {
// Spawn with retry/backoff
let spawn_result = {
// Apply optional jail confinement + privilege escalation. With
// no jail this is the bare (binary, args); stdio is unaffected.
let (exe, argv) = jail_wrap(
// Prepare the command once per spawn attempt. For jailed runs,
// large/custom payload is staged into a launcher+env file
// instead of relying on inherited env across jexec/mdo.
let prepared = prepare_spawn_command(
&agent_config.binary,
&agent_config.args,
env_map.clone(),
agent_config.working_dir.as_deref(),
agent_config.jail.as_ref(),
&priv_mode,
&jail_helper,
&agent_id,
)
.await
.map_err(SpawnerError::Io)?;
let handle = AgentHandle::new_with_id(
agent_id.clone(),
agent_config.clone(),
prepared.cleanup_dir.clone(),
);
let env = env_map.clone();
let working_dir = agent_config.working_dir.clone();
let _startup_timeout = agent_config.startup_timeout_secs;
let op = || async {
let mut cmd = Command::new(&exe);
cmd.args(&argv)
.envs(&env)
let mut cmd = Command::new(&prepared.program);
cmd.args(&prepared.argv)
.envs(&prepared.env)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped());
if let Some(ref dir) = working_dir {
cmd.current_dir(dir);
if agent_config.jail.is_none() {
if let Some(ref dir) = agent_config.working_dir {
cmd.current_dir(dir);
}
}
let mut child = cmd.spawn().map_err(SpawnerError::Io)?;
@ -538,6 +708,9 @@ impl Spawner {
} else {
String::new()
};
if let Some(path) = prepared.cleanup_dir.as_deref() {
remove_staged_dir(path).await;
}
Err(SpawnerError::Io(std::io::Error::other(format!(
"process exited immediately with status {status}: {stderr}"
))))
@ -555,11 +728,13 @@ impl Spawner {
.with_max_delay(Duration::from_secs(10))
.with_max_times(agent_config.max_retries as usize);
op.retry(backoff).await
op.retry(backoff)
.await
.map(|(child, stdout)| (handle, child, stdout))
};
match spawn_result {
Ok((child, stdout)) => {
Ok((handle, child, stdout)) => {
*handle.child.lock().await = Some(child);
*handle.stdout.lock().await = stdout;
*handle.status.write().await = AgentStatus::Running;
@ -630,6 +805,7 @@ mod jail_tests {
fn named_jail_via_mdo() {
let j = JailConfig {
name: Some("pi0".into()),
root_path: Some("/usr/local/bastille/jails/pi0/root".into()),
..Default::default()
};
let (exe, a) = jail_wrap(
@ -647,6 +823,7 @@ mod jail_tests {
fn named_jail_with_user_no_priv() {
let j = JailConfig {
name: Some("pi0".into()),
root_path: Some("/usr/local/bastille/jails/pi0/root".into()),
user: Some("clawdie".into()),
..Default::default()
};
@ -666,6 +843,7 @@ mod jail_tests {
let j = JailConfig {
name: Some("pi0".into()),
path: Some("/var/jails/pi".into()),
root_path: Some("/usr/local/bastille/jails/pi0/root".into()),
..Default::default()
};
let (exe, a) = jail_wrap("pi", &[], Some(&j), &PrivMode::None, DEFAULT_JAIL_HELPER);
@ -721,4 +899,69 @@ mod jail_tests {
])
);
}
#[tokio::test]
async fn staged_named_jail_writes_launcher_and_env() {
let root = std::env::temp_dir().join(format!("colibri-jail-stage-{}", Uuid::new_v4()));
let j = JailConfig {
name: Some("pi0".into()),
root_path: Some(root.to_string_lossy().to_string()),
..Default::default()
};
let mut env = HashMap::new();
env.insert("COLIBRI_SYSTEM_PROMPT".into(), "Sam's prompt".into());
let prepared = prepare_spawn_command(
"pi",
&argv(&["--mode", "json"]),
env,
Some("/work"),
Some(&j),
&PrivMode::None,
DEFAULT_JAIL_HELPER,
"agent-test",
)
.await
.unwrap();
assert_eq!(prepared.program, "jexec");
assert_eq!(prepared.argv[0], "pi0");
assert_eq!(prepared.argv[1], "/bin/sh");
assert_eq!(
prepared.argv[2],
"/var/run/colibri-stage/agent-test/launch.sh"
);
assert!(prepared.env.is_empty());
let launcher = root.join("var/run/colibri-stage/agent-test/launch.sh");
let env_file = root.join("var/run/colibri-stage/agent-test/env.sh");
assert!(launcher.exists());
assert!(env_file.exists());
let env_raw = std::fs::read_to_string(env_file).unwrap();
assert!(env_raw.contains("COLIBRI_SYSTEM_PROMPT"));
assert!(env_raw.contains("Sam'\"'\"'s prompt"));
remove_staged_dir(prepared.cleanup_dir.as_deref().unwrap()).await;
}
#[tokio::test]
async fn staged_jail_requires_root_path() {
let mut env = HashMap::new();
env.insert("COLIBRI_AGENT_ID".into(), "a1".into());
let err = prepare_spawn_command(
"pi",
&[],
env,
None,
Some(&JailConfig {
name: Some("pi0".into()),
..Default::default()
}),
&PrivMode::None,
DEFAULT_JAIL_HELPER,
"missing-root",
)
.await
.unwrap_err();
assert!(err.to_string().contains("requires JailConfig.root_path"));
}
}

View file

@ -3,7 +3,12 @@
//! This is intentionally thin: stdio transport only, one process per request,
//! no long-lived registry, and no production-grade policy engine yet.
use std::{collections::BTreeMap, path::Path, process::Stdio, time::Duration};
use std::{
collections::{BTreeMap, HashMap},
path::{Path, PathBuf},
process::Stdio,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
@ -13,7 +18,9 @@ use tokio::{
time::timeout,
};
use colibri_daemon::spawner::{jail_wrap, JailConfig, PrivMode, DEFAULT_JAIL_HELPER};
#[cfg(test)]
use colibri_daemon::spawner::jail_wrap;
use colibri_daemon::spawner::{prepare_spawn_command, JailConfig, PrivMode, DEFAULT_JAIL_HELPER};
use crate::protocol::McpError;
@ -44,24 +51,6 @@ pub struct ExternalMcpServer {
pub jail: Option<JailConfig>,
}
impl ExternalMcpServer {
/// Resolve the `(program, argv)` to exec, applying optional jail
/// confinement + privilege escalation via the shared spawner policy.
/// With `jail == None` this is just `(command, args)`.
fn resolved_command(&self) -> (String, Vec<String>) {
let priv_mode = PrivMode::from_env();
let helper = std::env::var("COLIBRI_JAIL_HELPER")
.unwrap_or_else(|_| DEFAULT_JAIL_HELPER.to_string());
jail_wrap(
&self.command,
&self.args,
self.jail.as_ref(),
&priv_mode,
&helper,
)
}
}
impl ExternalMcpRegistry {
pub async fn load(path: &Path) -> Result<Self, McpError> {
let raw = tokio::fs::read_to_string(path).await.map_err(|e| {
@ -143,22 +132,51 @@ struct ExternalMcpSession {
stdin: tokio::process::ChildStdin,
stdout: BufReader<tokio::process::ChildStdout>,
next_id: i64,
cleanup_dir: Option<PathBuf>,
}
impl ExternalMcpSession {
async fn start(server: &ExternalMcpServer) -> Result<Self, McpError> {
// Apply optional jail confinement. With no jail this is (command, args);
// jexec/jail/mdo all inherit stdio, so the piped stdin/stdout below still
// reach the actual MCP server inside the jail.
let (program, argv) = server.resolved_command();
let mut cmd = Command::new(&program);
cmd.args(&argv)
.envs(&server.env)
let env: HashMap<String, String> = server
.env
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let stage_id = format!(
"mcp-{}",
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
);
let prepared = prepare_spawn_command(
&server.command,
&server.args,
env,
None,
server.jail.as_ref(),
&PrivMode::from_env(),
&std::env::var("COLIBRI_JAIL_HELPER")
.unwrap_or_else(|_| DEFAULT_JAIL_HELPER.to_string()),
&stage_id,
)
.await
.map_err(|e| McpError::internal(format!("failed to prepare jailed MCP spawn: {e}")))?;
let mut cmd = Command::new(&prepared.program);
cmd.args(&prepared.argv)
.envs(&prepared.env)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null());
let mut child = cmd.spawn().map_err(|e| {
if let Some(path) = prepared.cleanup_dir.as_deref() {
let path = path.to_path_buf();
tokio::spawn(async move {
let _ = tokio::fs::remove_dir_all(path).await;
});
}
McpError::internal(format!(
"failed to spawn external MCP server {}: {e}",
server.command
@ -176,6 +194,7 @@ impl ExternalMcpSession {
stdin,
stdout: BufReader::new(stdout),
next_id: 1,
cleanup_dir: prepared.cleanup_dir,
})
}
@ -264,6 +283,9 @@ impl ExternalMcpSession {
async fn shutdown(&mut self) {
let _ = self.child.kill().await;
let _ = self.child.wait().await;
if let Some(path) = self.cleanup_dir.as_deref() {
let _ = tokio::fs::remove_dir_all(path).await;
}
}
}
@ -294,13 +316,13 @@ mod tests {
#[test]
fn no_jail_resolves_to_bare_command() {
// jail == None ⇒ unchanged regardless of COLIBRI_JAIL_PRIV_MODE.
let s = ExternalMcpServer {
command: "mcp-fs".into(),
args: vec!["--root".into(), "/srv".into()],
env: BTreeMap::new(),
jail: None,
};
let (program, argv) = s.resolved_command();
let (program, argv) = jail_wrap(
"mcp-fs",
&["--root".to_string(), "/srv".to_string()],
None,
&PrivMode::Mdo,
DEFAULT_JAIL_HELPER,
);
assert_eq!(program, "mcp-fs");
assert_eq!(argv, vec!["--root".to_string(), "/srv".to_string()]);
}