feat: rework multi-agent plan + complete CLI surface (19/19 commands) #138

Merged
clawdie merged 3 commits from feat/multi-agent-plan-rework-cli-surface into main 2026-06-21 18:47:04 +02:00
4 changed files with 343 additions and 69 deletions

View file

@ -34,6 +34,17 @@ enum Command {
ListTasks { ListTasks {
status: Option<String>, status: Option<String>,
}, },
ClaimTask {
task_id: String,
agent_id: String,
},
TransitionTask {
task_id: String,
status: String,
},
SetCostMode {
mode: String,
},
CreateTask { CreateTask {
title: String, title: String,
description: Option<String>, description: Option<String>,
@ -77,6 +88,9 @@ fn usage() -> &'static str {
colibri [--socket PATH] get-session SESSION_ID colibri [--socket PATH] get-session SESSION_ID
colibri [--socket PATH] compact-session SESSION_ID colibri [--socket PATH] compact-session SESSION_ID
colibri [--socket PATH] list-tasks [--status STATUS] colibri [--socket PATH] list-tasks [--status STATUS]
colibri [--socket PATH] claim-task --task-id UUID --agent-id UUID
colibri [--socket PATH] transition-task --task-id UUID --status STATUS
colibri [--socket PATH] set-cost-mode MODE
colibri [--socket PATH] create-task --title TEXT [--description TEXT] colibri [--socket PATH] create-task --title TEXT [--description TEXT]
colibri [--socket PATH] intake-task --title TEXT [--description TEXT] [--capability CAP]... colibri [--socket PATH] intake-task --title TEXT [--description TEXT] [--capability CAP]...
colibri [--socket PATH] list-skills colibri [--socket PATH] list-skills
@ -93,6 +107,9 @@ Examples:
colibri create-task --title "verify OSA check" --description "manual follow-up" colibri create-task --title "verify OSA check" --description "manual follow-up"
colibri intake-task --title "triage watchdog" --capability freebsd colibri intake-task --title "triage watchdog" --capability freebsd
colibri list-tasks --status queued colibri list-tasks --status queued
colibri claim-task --task-id 550e8400-e29b-41d4-a716-446655440000 --agent-id 660e8400-e29b-41d4-a716-446655440000
colibri transition-task --task-id 550e8400-e29b-41d4-a716-446655440000 --status done
colibri set-cost-mode smart
colibri register-skill freebsd-check --description "Live USB startup check" --category freebsd colibri register-skill freebsd-check --description "Live USB startup check" --category freebsd
colibri list-skills colibri list-skills
colibri register-agent NAME [--capability CAP]... [--capabilities CSV] colibri register-agent NAME [--capability CAP]... [--capabilities CSV]
@ -183,6 +200,17 @@ where
session_id: args[1].clone(), session_id: args[1].clone(),
}), }),
"list-tasks" => parse_list_tasks_options(&args[1..]), "list-tasks" => parse_list_tasks_options(&args[1..]),
"claim-task" => parse_claim_task_options(&args[1..]),
"transition-task" => parse_transition_task_options(&args[1..]),
"set-cost-mode" => {
if args.len() != 2 {
Err("set-cost-mode requires MODE\n\n".to_string() + usage())
} else {
Ok(Command::SetCostMode {
mode: args[1].clone(),
})
}
}
"create-task" => { "create-task" => {
let (title, description) = parse_task_text_options("create-task", &args[1..])?; let (title, description) = parse_task_text_options("create-task", &args[1..])?;
Ok(Command::CreateTask { title, description }) Ok(Command::CreateTask { title, description })
@ -275,6 +303,71 @@ fn parse_list_tasks_options(args: &[String]) -> Result<Command, String> {
Ok(Command::ListTasks { status }) Ok(Command::ListTasks { status })
} }
fn parse_claim_task_options(args: &[String]) -> Result<Command, String> {
let mut task_id = None;
let mut agent_id = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--task-id" => {
let Some(value) = args.get(i + 1) else {
return Err("--task-id requires UUID\n\n".to_string() + usage());
};
task_id = Some(value.clone());
i += 2;
}
"--agent-id" => {
let Some(value) = args.get(i + 1) else {
return Err("--agent-id requires UUID\n\n".to_string() + usage());
};
agent_id = Some(value.clone());
i += 2;
}
other => return Err(format!("unknown claim-task option: {other}\n\n{}", usage())),
}
}
let task_id =
task_id.ok_or_else(|| format!("claim-task requires --task-id UUID\n\n{}", usage()))?;
let agent_id =
agent_id.ok_or_else(|| format!("claim-task requires --agent-id UUID\n\n{}", usage()))?;
Ok(Command::ClaimTask { task_id, agent_id })
}
fn parse_transition_task_options(args: &[String]) -> Result<Command, String> {
let mut task_id = None;
let mut status = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--task-id" => {
let Some(value) = args.get(i + 1) else {
return Err("--task-id requires UUID\n\n".to_string() + usage());
};
task_id = Some(value.clone());
i += 2;
}
"--status" => {
let Some(value) = args.get(i + 1) else {
return Err("--status requires STATUS\n\n".to_string() + usage());
};
status = Some(value.clone());
i += 2;
}
other => {
return Err(format!(
"unknown transition-task option: {other}\n\n{}",
usage()
))
}
}
}
let task_id =
task_id.ok_or_else(|| format!("transition-task requires --task-id UUID\n\n{}", usage()))?;
let status =
status.ok_or_else(|| format!("transition-task requires --status STATUS\n\n{}", usage()))?;
Ok(Command::TransitionTask { task_id, status })
}
fn parse_task_text_options( fn parse_task_text_options(
command: &str, command: &str,
args: &[String], args: &[String],
@ -517,6 +610,13 @@ async fn run(options: Options) -> Result<(), ClientError> {
print_json(&client.compact_session(session_id).await?) print_json(&client.compact_session(session_id).await?)
} }
Command::ListTasks { status } => print_json(&client.list_tasks(status).await?), Command::ListTasks { status } => print_json(&client.list_tasks(status).await?),
Command::ClaimTask { task_id, agent_id } => {
print_json(&client.claim_task(task_id, agent_id).await?)
}
Command::TransitionTask { task_id, status } => {
print_json(&client.transition_task(task_id, status).await?)
}
Command::SetCostMode { mode } => print_json(&client.set_cost_mode(mode).await?),
Command::CreateTask { title, description } => { Command::CreateTask { title, description } => {
print_json(&client.create_task(title, description).await?) print_json(&client.create_task(title, description).await?)
} }
@ -671,6 +771,65 @@ mod tests {
); );
} }
#[test]
fn parses_claim_task() {
assert_eq!(
parsed(&["claim-task", "--task-id", "task-1", "--agent-id", "agent-1",]),
Options {
socket_path: default_socket_path(),
command: Command::ClaimTask {
task_id: "task-1".to_string(),
agent_id: "agent-1".to_string(),
},
}
);
}
#[test]
fn rejects_claim_task_missing_flags() {
let err = parse_args(["claim-task", "--task-id", "only"]).unwrap_err();
assert!(err.contains("claim-task requires --agent-id"));
}
#[test]
fn parses_transition_task() {
assert_eq!(
parsed(&["transition-task", "--task-id", "task-1", "--status", "done",]),
Options {
socket_path: default_socket_path(),
command: Command::TransitionTask {
task_id: "task-1".to_string(),
status: "done".to_string(),
},
}
);
}
#[test]
fn rejects_transition_task_missing_flags() {
let err = parse_args(["transition-task", "--task-id", "only"]).unwrap_err();
assert!(err.contains("transition-task requires --status"));
}
#[test]
fn parses_set_cost_mode() {
assert_eq!(
parsed(&["set-cost-mode", "smart"]),
Options {
socket_path: default_socket_path(),
command: Command::SetCostMode {
mode: "smart".to_string(),
},
}
);
}
#[test]
fn rejects_set_cost_mode_without_arg() {
let err = parse_args(["set-cost-mode"]).unwrap_err();
assert!(err.contains("set-cost-mode requires MODE"));
}
#[test] #[test]
fn rejects_create_task_without_title() { fn rejects_create_task_without_title() {
let err = parse_args(["create-task", "--description", "missing title"]).unwrap_err(); let err = parse_args(["create-task", "--description", "missing title"]).unwrap_err();

View file

@ -196,6 +196,38 @@ impl DaemonClient {
.await .await
} }
pub async fn claim_task(
&self,
task_id: impl Into<String>,
agent_id: impl Into<String>,
) -> Result<serde_json::Value, ClientError> {
self.request(&ColibriCommand::ClaimTask {
task_id: task_id.into(),
agent_id: agent_id.into(),
})
.await
}
pub async fn transition_task(
&self,
task_id: impl Into<String>,
status: impl Into<String>,
) -> Result<serde_json::Value, ClientError> {
self.request(&ColibriCommand::TransitionTask {
task_id: task_id.into(),
status: status.into(),
})
.await
}
pub async fn set_cost_mode(
&self,
mode: impl Into<String>,
) -> Result<serde_json::Value, ClientError> {
self.request(&ColibriCommand::SetCostMode { mode: mode.into() })
.await
}
pub async fn list_skills(&self) -> Result<serde_json::Value, ClientError> { pub async fn list_skills(&self) -> Result<serde_json::Value, ClientError> {
self.request(&ColibriCommand::ListSkills).await self.request(&ColibriCommand::ListSkills).await
} }

View file

@ -529,6 +529,76 @@ mod tests {
assert_eq!(pick_agent(&[], &agents).unwrap().name, "generalist"); assert_eq!(pick_agent(&[], &agents).unwrap().name, "generalist");
} }
#[test]
fn test_pick_agent_tie_breaking() {
let required = vec!["rust".to_string()];
let agents = vec![
colibri_store::Agent {
id: "a1".into(),
name: "first-rust".into(),
capabilities: serde_json::json!(["rust"]),
status: "idle".into(),
created_at: "2026-01-01T00:00:00Z".into(),
},
colibri_store::Agent {
id: "a2".into(),
name: "second-rust".into(),
capabilities: serde_json::json!(["rust"]),
status: "idle".into(),
created_at: "2026-01-02T00:00:00Z".into(),
},
];
let picked = pick_agent(&required, &agents).unwrap();
assert_eq!(
picked.name, "second-rust",
"tie-break must favor later-in-slice (deterministic)"
);
}
#[test]
fn test_pick_agent_multiple_required_capabilities() {
let required = vec!["rust".to_string(), "freebsd".to_string()];
let agents = vec![
colibri_store::Agent {
id: "a1".into(),
name: "rust-only".into(),
capabilities: serde_json::json!(["rust"]),
status: "idle".into(),
created_at: "2026-01-01T00:00:00Z".into(),
},
colibri_store::Agent {
id: "a2".into(),
name: "rust-and-freebsd".into(),
capabilities: serde_json::json!(["rust", "freebsd"]),
status: "idle".into(),
created_at: "2026-01-01T00:00:00Z".into(),
},
];
let picked = pick_agent(&required, &agents).unwrap();
assert_eq!(
picked.name, "rust-and-freebsd",
"agent with both required caps beats agent with one"
);
}
#[test]
fn test_pick_agent_active_status_eligible() {
let required = vec!["rust".to_string()];
let agents = vec![colibri_store::Agent {
id: "a1".into(),
name: "active-bot".into(),
capabilities: serde_json::json!(["rust"]),
status: "active".into(),
created_at: "2026-01-01T00:00:00Z".into(),
}];
let picked = pick_agent(&required, &agents);
assert!(
picked.is_some(),
"status 'active' must be eligible same as 'idle'"
);
assert_eq!(picked.unwrap().name, "active-bot");
}
#[tokio::test] #[tokio::test]
async fn test_scheduler_tick_drains_intake_without_deadlock() { async fn test_scheduler_tick_drains_intake_without_deadlock() {
let config = test_config(); let config = test_config();

View file

@ -1,24 +1,25 @@
# Multi-Agent Multi-Host — Gap Analysis & Implementation Plan # Multi-Agent Multi-Host — Gap Analysis & Implementation Plan
**Created:** 2026-06-19 (Sam & Hermes) **Created:** 2026-06-19 (Sam & Hermes)
**Status:** Phase 1-2 ready for implementation **Updated:** 2026-06-21 (Sam & Claude) — reflects 0.11.0 release and narrowed gaps
**Status:** Phase 2a complete; Phase 1 + Phase 2b ready for implementation
## Context ## Context
The 0.10.0 milestone (ISO build, rc.d lifecycle, SIGTERM/socket fixes, release Colibri 0.11.0 is released (MIT license, 230 tests, FreeBSD port + CI running).
gate) is staged for the FreeBSD build host. The next milestone is proving the The tenant/vault provision chain has landed (`register-tenant` → jail spawn →
multi-agent, multi-host coordination model: multiple agents on different hosts `provision_tenant_env()``colibri-vault::provision`). The next milestone is
reading from the same Colibri task board, each picking up work by capability, proving the multi-agent, multi-host coordination model: multiple agents on
and reporting results back. different hosts reading from the same Colibri task board, each picking up work
by capability, and reporting results back.
PR #83 landed the first cross-host plumbing — a socat TCP bridge, Python polling PR #83 landed the first cross-host plumbing — a socat TCP bridge, Python polling
scripts, and a Hermes cronjob configuration. But the gap analysis below shows scripts, and a Hermes cronjob configuration. The gap analysis below defines what
that **the multi-host plane is packaged and documented but almost entirely remains to close the multi-host testing gap.
untested**. This document defines what needs to happen to close that gap.
--- ---
## Current architecture (as of PR #83) ## Current architecture (as of 0.11.0)
The multi-host stack lives **outside the Rust daemon**: The multi-host stack lives **outside the Rust daemon**:
@ -34,17 +35,17 @@ The multi-host stack lives **outside the Rust daemon**:
- **Transport:** `tokio::net::UnixListener` only — zero TCP in Rust. The socat - **Transport:** `tokio::net::UnixListener` only — zero TCP in Rust. The socat
bridge is a shell-level relay. bridge is a shell-level relay.
- **Agent model:** `register-agent` stores name + capabilities + status - **Agent model:** `register-agent` stores name + capabilities + status
(`active`/`idle`/`offline`). No `host` field, no `last_seen`, no heartbeat, (`active`/`idle`/`offline`). Awaiting `host` field, `last_seen`, heartbeat,
no lease/TTL. and lease/TTL (Phase 3).
- **Task assignment:** `pick_agent()` matches by capability score (partial - **Task assignment:** `pick_agent()` matches by capability score (partial
match counts, highest score wins, tie → later-in-slice). `claim_task()` is a match counts, highest score wins, tie → later-in-slice). `claim_task()` is a
blind UPDATE with no concurrency guard. blind UPDATE; await a concurrency guard (Gap 4).
- **Polling:** `colibri_poll.py` queries `list-tasks status=started` filtered - **Polling:** `colibri_poll.py` queries `list-tasks status=started` filtered
by `agent_id`. `colibri_task_done.py` calls `transition-task`. by `agent_id`. `colibri_task_done.py` calls `transition-task`.
- **Spawning:** `poll_tasks()` in daemon.rs spawns agents for `Claimed` tasks, - **Spawning:** `poll_tasks()` in daemon.rs spawns agents for `Claimed` tasks,
skipping those with an existing session (idempotency guard). skipping those with an existing session (idempotency guard).
### Socket command inventory (17 commands, all Unix-socket) ### Socket command inventory (19 commands, all Unix-socket)
| Category | Commands | | Category | Commands |
| -------------- | --------------------------------------------------------------------------- | | -------------- | --------------------------------------------------------------------------- |
@ -53,14 +54,14 @@ The multi-host stack lives **outside the Rust daemon**:
| Agent process | `spawn-agent`, `kill-agent` | | Agent process | `spawn-agent`, `kill-agent` |
| Board | `list-tasks`, `create-task`, `transition-task`, `claim-task`, `intake-task` | | Board | `list-tasks`, `create-task`, `transition-task`, `claim-task`, `intake-task` |
| Agent registry | `register-agent`, `list-agents` | | Agent registry | `register-agent`, `list-agents` |
| Tenant | `register-tenant`, `list-tenants` |
| Skills | `list-skills`, `register-skill` | | Skills | `list-skills`, `register-skill` |
### CLI surface (10 of 17 commands exposed) ### CLI surface (16 of 19 commands exposed)
Missing from CLI: `claim-task`, `transition-task`, `register-agent`, Awaiting CLI exposure: `claim-task`, `transition-task`, `set-cost-mode`
`list-agents`, `set-cost-mode`, `register-skill` (register-skill IS in CLI; (Phase 2b). Remote agents currently use raw Python socket calls for these
the others are socket-only). Remote agents currently must use raw Python three commands.
socket calls.
--- ---
@ -73,21 +74,38 @@ socket calls.
- SIGTERM cleanup + stale socket safety - SIGTERM cleanup + stale socket safety
- Session isolation with 2 agents (bypasses task board) - Session isolation with 2 agents (bypasses task board)
- Cost mode derivation in background rotation - Cost mode derivation in background rotation
- `pick_agent` unit tests: best match, offline exclusion, no-match, empty-required - `pick_agent` unit tests: best match (2 agents), offline exclusion, no-match,
- Scheduler tick drains intake queue empty-required, partial scoring, none scoring
- Scheduler tick drains intake queue without deadlock
- `poll_tasks` spawns agent for a claimed task - `poll_tasks` spawns agent for a claimed task
- Double-spawn session isolation
- Tenant register + list over socket
### What is NOT tested ### Test targets (awaiting coverage)
| # | Gap | Severity | Linux-doable? | | # | Gap | Severity | Linux-doable? |
| --- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- | | --- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- |
| 1 | **Multi-agent task-board contention**`pick_agent` only tested with 0-1 agents; no capability-based multi-agent assignment test; no same-agent-multiple-tasks test | High | Yes | | 1 | **Multi-agent task-board contention**`pick_agent` tie-breaking, multi-required-capability, and active-status eligibility await dedicated tests | High | Yes |
| 2 | **CLI surface gaps**`claim-task`, `transition-task`, `register-agent`, `list-agents` have no CLI; remote agents forced to use raw Python | Medium | Yes | | 2 | **CLI surface gaps**`claim-task`, `transition-task`, `set-cost-mode` await CLI exposure (Phase 2b) | Medium | Yes |
| 3 | **Agent presence model** — missing `host`, `last_seen`, and heartbeat/lease columns; add these schema fields to detect stale remote agents | High | Yes (schema change) | | 3 | **Agent presence model** — await `host`, `last_seen`, and heartbeat/lease columns to detect stale remote agents (Phase 3) | High | Yes (schema change) |
| 4 | **Remote-safe task claim**`claim_task` is a blind UPDATE, no concurrency safety, no lease/TTL | Medium | Yes | | 4 | **Remote-safe task claim**`claim_task` is a blind UPDATE; await a concurrency guard or lease/TTL | Medium | Yes |
| 5 | **Python polling scripts**`colibri_poll.py` and `colibri_task_done.py` have zero test coverage | Medium | Yes | | 5 | **Python polling scripts**`colibri_poll.py` and `colibri_task_done.py` have zero test coverage | Medium | Yes |
| 6 | **TCP bridge round-trip** — socat bridge untested end-to-end | Medium | Partial (needs socat or FreeBSD) | | 6 | **TCP bridge round-trip** — socat bridge untested end-to-end | Medium | Partial (needs socat or FreeBSD) |
| 7 | **Cross-host coordination** — no test simulates a remote agent claiming/transitioning a task over the bridge | High | FreeBSD only | | 7 | **Cross-host coordination** — await a test simulating a remote agent claiming/transitioning a task over the bridge | High | FreeBSD only |
### Closed gaps (since the original 2026-06-19 analysis)
- **CLI: register-agent + list-agents** — merged (Phase 2a, PR #107)
- **CLI: register-tenant + list-tenants + register-skill** — merged
- **pick_agent scoring** — partial-match and no-match scoring tests added
- **Tenant/vault provision chain** — register-tenant, jail spawn flags,
`provision_tenant_env()`, `colibri-vault::provision` all landed
- **Issue #88** (CollectionNotFound) — daemon passes `tenant_id` (collection
name) to `vault::provision`
- **Issue #91** (tenant provision target verification) — `trim_trailing_slash`
string-equality check
- **Issue #92** (vault provision canonicalization) — canonicalize +
allowed-root containment (PR #119)
--- ---
@ -98,14 +116,14 @@ socket calls.
#### 1a. Pure `pick_agent` unit tests — extend `scheduler.rs` test module #### 1a. Pure `pick_agent` unit tests — extend `scheduler.rs` test module
Existing tests cover: best match (2 agents, different caps), offline exclusion, Existing tests cover: best match (2 agents, different caps), offline exclusion,
no-match, empty-required. Add: no-match, empty-required, partial scoring, none scoring, tick-drains-intake.
Add:
| Test | What it proves | | Test | What it proves |
| -------------------------------------------------- | ---------------------------------------------------------------------------------------- | | ------------------------------------------------ | --------------------------------------------------------------------------------- |
| `test_pick_agent_partial_match_wins_over_no_match` | Agent with `["rust","freebsd"]` beats agent with `["python"]` for required `["freebsd"]` | | `test_pick_agent_tie_breaking` | Two agents with same score — verify deterministic tie-break (later-in-slice wins) |
| `test_pick_agent_tie_breaking` | Two agents with same score — verify deterministic tie-break (later name wins) | | `test_pick_agent_multiple_required_capabilities` | Required `["rust","freebsd"]` — agent with both beats agent with one |
| `test_pick_agent_multiple_required_capabilities` | Required `["rust","freebsd"]` — agent with both beats agent with one | | `test_pick_agent_active_status_eligible` | `status: "active"` is treated same as `"idle"` (both eligible) |
| `test_pick_agent_active_status_eligible` | `status: "active"` is treated same as `"idle"` (both eligible) |
#### 1b. Multi-agent board integration test — new file `crates/colibri-daemon/tests/multi_agent_board.rs` #### 1b. Multi-agent board integration test — new file `crates/colibri-daemon/tests/multi_agent_board.rs`
@ -145,36 +163,31 @@ Documents the current contention behavior (no guard against same agent getting
multiple tasks) and proves session isolation when one agent handles multiple multiple tasks) and proves session isolation when one agent handles multiple
tasks. tasks.
### Phase 2: Merge `feat/cli-register-agent` + add claim/transition CLI ### Phase 2: CLI surface completion
#### 2a. Merge `feat/cli-register-agent` (existing branch, 64 lines, client-only) #### 2a. Merge `feat/cli-register-agent` — COMPLETE
The branch is clean and ready: `register-agent` and `list-agents` are in the CLI (merged via PR #107).
- `Command::RegisterAgent { name, capabilities }` + `Command::ListAgents` #### 2b. Add `claim-task`, `transition-task`, and `set-cost-mode` to CLI
- `parse_capabilities()` helper (reuses `--capability`/`--capabilities` pattern)
- `DaemonClient::register_agent()` + `DaemonClient::list_agents()`
- Usage text
Enables: `colibri register-agent osa-agent --capability freebsd` and The three commands `colibri_task_done.py` currently reaches via raw socket.
`colibri list-agents`. Adding them to the CLI means remote agents can work entirely through the
`colibri` binary:
#### 2b. Add `claim-task` and `transition-task` to CLI
The two commands `colibri_task_done.py` currently does via raw socket. Adding
them to the CLI means remote agents can work entirely through the `colibri`
binary:
``` ```
colibri claim-task --task-id <UUID> --agent-id <UUID> colibri claim-task --task-id <UUID> --agent-id <UUID>
colibri transition-task --task-id <UUID> --status done|failed colibri transition-task --task-id <UUID> --status done|failed
colibri set-cost-mode MODE
``` ```
Implementation: Implementation:
- Add `Command::ClaimTask { task_id, agent_id }` and - Add `Command::ClaimTask { task_id, agent_id }`,
`Command::TransitionTask { task_id, status }` variants `Command::TransitionTask { task_id, status }`, and
- Add `DaemonClient::claim_task()` and `DaemonClient::transition_task()` `Command::SetCostMode { mode }` variants
- Add `DaemonClient::claim_task()`, `DaemonClient::transition_task()`, and
`DaemonClient::set_cost_mode()` methods
- Add CLI parsing (follow existing `--flag value` pattern) - Add CLI parsing (follow existing `--flag value` pattern)
#### 2c. Add CLI unit tests for new commands #### 2c. Add CLI unit tests for new commands
@ -197,7 +210,7 @@ simulating what `colibri_poll.py` does. Register two agents, create tasks with
different capabilities, verify each agent sees only its tasks via the poll different capabilities, verify each agent sees only its tasks via the poll
path, transition tasks to done. path, transition tasks to done.
**Deferred** — depends on Phase 2 CLI additions (so the test can use CLI **Deferred** — depends on Phase 2b CLI additions (so the test can use CLI
commands instead of raw socket replication of the Python scripts). commands instead of raw socket replication of the Python scripts).
### Phase 5: Bridge validation (FreeBSD-only) ### Phase 5: Bridge validation (FreeBSD-only)
@ -211,17 +224,17 @@ mesh.**
## Summary ## Summary
| Phase | What | Files | Linux? | Status | | Phase | What | Files | Linux? | Status |
| ----- | ---------------------------------------- | ------------------------------------ | ------ | ------------------------ | | ----- | ---------------------------------------------------------- | ------------------------------------ | ------ | ------------------------- |
| 1a | `pick_agent` unit tests | `scheduler.rs` tests | Yes | Ready | | 1a | `pick_agent` unit tests (3 remaining) | `scheduler.rs` tests | Yes | Ready |
| 1b | Multi-agent board integration test | `tests/multi_agent_board.rs` (new) | Yes | Ready | | 1b | Multi-agent board integration test | `tests/multi_agent_board.rs` (new) | Yes | Ready |
| 1c | Same-capability multi-task test | Same file | Yes | Ready | | 1c | Same-capability multi-task test | Same file | Yes | Ready |
| 2a | Merge `feat/cli-register-agent` | `colibri.rs` + `lib.rs` | Yes | Branch exists | | 2a | Merge `feat/cli-register-agent` | `colibri.rs` + `lib.rs` | Yes | **Complete** |
| 2b | Add `claim-task` + `transition-task` CLI | `colibri.rs` + `lib.rs` | Yes | Ready | | 2b | Add `claim-task` + `transition-task` + `set-cost-mode` CLI | `colibri.rs` + `lib.rs` | Yes | Ready |
| 2c | CLI parse tests | `colibri.rs` tests | Yes | Ready | | 2c | CLI parse tests | `colibri.rs` tests | Yes | Ready |
| 3 | Agent presence schema | `schema.rs` + `lib.rs` + `socket.rs` | Yes | Deferred | | 3 | Agent presence schema | `schema.rs` + `lib.rs` + `socket.rs` | Yes | Deferred |
| 4 | Polling workflow test | `tests/` | Yes | Deferred (needs Phase 2) | | 4 | Polling workflow test | `tests/` | Yes | Deferred (needs Phase 2b) |
| 5 | TCP bridge validation | FreeBSD host | No | FreeBSD lane | | 5 | TCP bridge validation | FreeBSD host | No | FreeBSD lane |
**Immediate scope:** Phases 1-2. All testable on Linux with `cargo test` + **Immediate scope:** Phases 1 + 2b. All testable on Linux with `cargo test` +
`cargo clippy` gate. No FreeBSD dependency for implementation. `cargo clippy` gate. No FreeBSD dependency for implementation.