2026-06-21 18:47:04 +02:00
4 changed files with 343 additions and 69 deletions
--- a/crates/colibri-client/src/bin/colibri.rs
+++ b/crates/colibri-client/src/bin/colibri.rs
@ -34,6 +34,17 @@ enum Command {
    ListTasks {
        status: Option<String>,
    },
+    ClaimTask {
+        task_id: String,
+        agent_id: String,
+    },
+    TransitionTask {
+        task_id: String,
+        status: String,
+    },
+    SetCostMode {
+        mode: String,
+    },
    CreateTask {
        title: String,
        description: Option<String>,
@ -77,6 +88,9 @@ fn usage() -> &'static str {
  colibri [--socket PATH] get-session SESSION_ID
  colibri [--socket PATH] compact-session SESSION_ID
  colibri [--socket PATH] list-tasks [--status STATUS]
+  colibri [--socket PATH] claim-task --task-id UUID --agent-id UUID
+  colibri [--socket PATH] transition-task --task-id UUID --status STATUS
+  colibri [--socket PATH] set-cost-mode MODE
  colibri [--socket PATH] create-task --title TEXT [--description TEXT]
  colibri [--socket PATH] intake-task --title TEXT [--description TEXT] [--capability CAP]...
  colibri [--socket PATH] list-skills
@ -93,6 +107,9 @@ Examples:
  colibri create-task --title "verify OSA check" --description "manual follow-up"
  colibri intake-task --title "triage watchdog" --capability freebsd
  colibri list-tasks --status queued
+  colibri claim-task --task-id 550e8400-e29b-41d4-a716-446655440000 --agent-id 660e8400-e29b-41d4-a716-446655440000
+  colibri transition-task --task-id 550e8400-e29b-41d4-a716-446655440000 --status done
+  colibri set-cost-mode smart
  colibri register-skill freebsd-check --description "Live USB startup check" --category freebsd
  colibri list-skills
  colibri register-agent NAME [--capability CAP]... [--capabilities CSV]
@ -183,6 +200,17 @@ where
            session_id: args[1].clone(),
        }),
        "list-tasks" => parse_list_tasks_options(&args[1..]),
+        "claim-task" => parse_claim_task_options(&args[1..]),
+        "transition-task" => parse_transition_task_options(&args[1..]),
+        "set-cost-mode" => {
+            if args.len() != 2 {
+                Err("set-cost-mode requires MODE\n\n".to_string() + usage())
+            } else {
+                Ok(Command::SetCostMode {
+                    mode: args[1].clone(),
+                })
+            }
+        }
        "create-task" => {
            let (title, description) = parse_task_text_options("create-task", &args[1..])?;
            Ok(Command::CreateTask { title, description })
@ -275,6 +303,71 @@ fn parse_list_tasks_options(args: &[String]) -> Result<Command, String> {
    Ok(Command::ListTasks { status })
 }

+fn parse_claim_task_options(args: &[String]) -> Result<Command, String> {
+    let mut task_id = None;
+    let mut agent_id = None;
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--task-id" => {
+                let Some(value) = args.get(i + 1) else {
+                    return Err("--task-id requires UUID\n\n".to_string() + usage());
+                };
+                task_id = Some(value.clone());
+                i += 2;
+            }
+            "--agent-id" => {
+                let Some(value) = args.get(i + 1) else {
+                    return Err("--agent-id requires UUID\n\n".to_string() + usage());
+                };
+                agent_id = Some(value.clone());
+                i += 2;
+            }
+            other => return Err(format!("unknown claim-task option: {other}\n\n{}", usage())),
+        }
+    }
+    let task_id =
+        task_id.ok_or_else(|| format!("claim-task requires --task-id UUID\n\n{}", usage()))?;
+    let agent_id =
+        agent_id.ok_or_else(|| format!("claim-task requires --agent-id UUID\n\n{}", usage()))?;
+    Ok(Command::ClaimTask { task_id, agent_id })
+}
+
+fn parse_transition_task_options(args: &[String]) -> Result<Command, String> {
+    let mut task_id = None;
+    let mut status = None;
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--task-id" => {
+                let Some(value) = args.get(i + 1) else {
+                    return Err("--task-id requires UUID\n\n".to_string() + usage());
+                };
+                task_id = Some(value.clone());
+                i += 2;
+            }
+            "--status" => {
+                let Some(value) = args.get(i + 1) else {
+                    return Err("--status requires STATUS\n\n".to_string() + usage());
+                };
+                status = Some(value.clone());
+                i += 2;
+            }
+            other => {
+                return Err(format!(
+                    "unknown transition-task option: {other}\n\n{}",
+                    usage()
+                ))
+            }
+        }
+    }
+    let task_id =
+        task_id.ok_or_else(|| format!("transition-task requires --task-id UUID\n\n{}", usage()))?;
+    let status =
+        status.ok_or_else(|| format!("transition-task requires --status STATUS\n\n{}", usage()))?;
+    Ok(Command::TransitionTask { task_id, status })
+}
+
 fn parse_task_text_options(
    command: &str,
    args: &[String],
@ -517,6 +610,13 @@ async fn run(options: Options) -> Result<(), ClientError> {
            print_json(&client.compact_session(session_id).await?)
        }
        Command::ListTasks { status } => print_json(&client.list_tasks(status).await?),
+        Command::ClaimTask { task_id, agent_id } => {
+            print_json(&client.claim_task(task_id, agent_id).await?)
+        }
+        Command::TransitionTask { task_id, status } => {
+            print_json(&client.transition_task(task_id, status).await?)
+        }
+        Command::SetCostMode { mode } => print_json(&client.set_cost_mode(mode).await?),
        Command::CreateTask { title, description } => {
            print_json(&client.create_task(title, description).await?)
        }
@ -671,6 +771,65 @@ mod tests {
        );
    }

+    #[test]
+    fn parses_claim_task() {
+        assert_eq!(
+            parsed(&["claim-task", "--task-id", "task-1", "--agent-id", "agent-1",]),
+            Options {
+                socket_path: default_socket_path(),
+                command: Command::ClaimTask {
+                    task_id: "task-1".to_string(),
+                    agent_id: "agent-1".to_string(),
+                },
+            }
+        );
+    }
+
+    #[test]
+    fn rejects_claim_task_missing_flags() {
+        let err = parse_args(["claim-task", "--task-id", "only"]).unwrap_err();
+        assert!(err.contains("claim-task requires --agent-id"));
+    }
+
+    #[test]
+    fn parses_transition_task() {
+        assert_eq!(
+            parsed(&["transition-task", "--task-id", "task-1", "--status", "done",]),
+            Options {
+                socket_path: default_socket_path(),
+                command: Command::TransitionTask {
+                    task_id: "task-1".to_string(),
+                    status: "done".to_string(),
+                },
+            }
+        );
+    }
+
+    #[test]
+    fn rejects_transition_task_missing_flags() {
+        let err = parse_args(["transition-task", "--task-id", "only"]).unwrap_err();
+        assert!(err.contains("transition-task requires --status"));
+    }
+
+    #[test]
+    fn parses_set_cost_mode() {
+        assert_eq!(
+            parsed(&["set-cost-mode", "smart"]),
+            Options {
+                socket_path: default_socket_path(),
+                command: Command::SetCostMode {
+                    mode: "smart".to_string(),
+                },
+            }
+        );
+    }
+
+    #[test]
+    fn rejects_set_cost_mode_without_arg() {
+        let err = parse_args(["set-cost-mode"]).unwrap_err();
+        assert!(err.contains("set-cost-mode requires MODE"));
+    }
+
    #[test]
    fn rejects_create_task_without_title() {
        let err = parse_args(["create-task", "--description", "missing title"]).unwrap_err();
--- a/crates/colibri-client/src/lib.rs
+++ b/crates/colibri-client/src/lib.rs
@ -196,6 +196,38 @@ impl DaemonClient {
        .await
    }

+    pub async fn claim_task(
+        &self,
+        task_id: impl Into<String>,
+        agent_id: impl Into<String>,
+    ) -> Result<serde_json::Value, ClientError> {
+        self.request(&ColibriCommand::ClaimTask {
+            task_id: task_id.into(),
+            agent_id: agent_id.into(),
+        })
+        .await
+    }
+
+    pub async fn transition_task(
+        &self,
+        task_id: impl Into<String>,
+        status: impl Into<String>,
+    ) -> Result<serde_json::Value, ClientError> {
+        self.request(&ColibriCommand::TransitionTask {
+            task_id: task_id.into(),
+            status: status.into(),
+        })
+        .await
+    }
+
+    pub async fn set_cost_mode(
+        &self,
+        mode: impl Into<String>,
+    ) -> Result<serde_json::Value, ClientError> {
+        self.request(&ColibriCommand::SetCostMode { mode: mode.into() })
+            .await
+    }
+
    pub async fn list_skills(&self) -> Result<serde_json::Value, ClientError> {
        self.request(&ColibriCommand::ListSkills).await
    }
--- a/crates/colibri-daemon/src/scheduler.rs
+++ b/crates/colibri-daemon/src/scheduler.rs
@ -529,6 +529,76 @@ mod tests {
        assert_eq!(pick_agent(&[], &agents).unwrap().name, "generalist");
    }

+    #[test]
+    fn test_pick_agent_tie_breaking() {
+        let required = vec!["rust".to_string()];
+        let agents = vec![
+            colibri_store::Agent {
+                id: "a1".into(),
+                name: "first-rust".into(),
+                capabilities: serde_json::json!(["rust"]),
+                status: "idle".into(),
+                created_at: "2026-01-01T00:00:00Z".into(),
+            },
+            colibri_store::Agent {
+                id: "a2".into(),
+                name: "second-rust".into(),
+                capabilities: serde_json::json!(["rust"]),
+                status: "idle".into(),
+                created_at: "2026-01-02T00:00:00Z".into(),
+            },
+        ];
+        let picked = pick_agent(&required, &agents).unwrap();
+        assert_eq!(
+            picked.name, "second-rust",
+            "tie-break must favor later-in-slice (deterministic)"
+        );
+    }
+
+    #[test]
+    fn test_pick_agent_multiple_required_capabilities() {
+        let required = vec!["rust".to_string(), "freebsd".to_string()];
+        let agents = vec![
+            colibri_store::Agent {
+                id: "a1".into(),
+                name: "rust-only".into(),
+                capabilities: serde_json::json!(["rust"]),
+                status: "idle".into(),
+                created_at: "2026-01-01T00:00:00Z".into(),
+            },
+            colibri_store::Agent {
+                id: "a2".into(),
+                name: "rust-and-freebsd".into(),
+                capabilities: serde_json::json!(["rust", "freebsd"]),
+                status: "idle".into(),
+                created_at: "2026-01-01T00:00:00Z".into(),
+            },
+        ];
+        let picked = pick_agent(&required, &agents).unwrap();
+        assert_eq!(
+            picked.name, "rust-and-freebsd",
+            "agent with both required caps beats agent with one"
+        );
+    }
+
+    #[test]
+    fn test_pick_agent_active_status_eligible() {
+        let required = vec!["rust".to_string()];
+        let agents = vec![colibri_store::Agent {
+            id: "a1".into(),
+            name: "active-bot".into(),
+            capabilities: serde_json::json!(["rust"]),
+            status: "active".into(),
+            created_at: "2026-01-01T00:00:00Z".into(),
+        }];
+        let picked = pick_agent(&required, &agents);
+        assert!(
+            picked.is_some(),
+            "status 'active' must be eligible same as 'idle'"
+        );
+        assert_eq!(picked.unwrap().name, "active-bot");
+    }
+
    #[tokio::test]
    async fn test_scheduler_tick_drains_intake_without_deadlock() {
        let config = test_config();
--- a/docs/MULTI-AGENT-HOST-PLAN.md
+++ b/docs/MULTI-AGENT-HOST-PLAN.md
@ -1,24 +1,25 @@
 # Multi-Agent Multi-Host — Gap Analysis & Implementation Plan

 **Created:** 2026-06-19 (Sam & Hermes)
-**Status:** Phase 1-2 ready for implementation
+**Updated:** 2026-06-21 (Sam & Claude) — reflects 0.11.0 release and narrowed gaps
+**Status:** Phase 2a complete; Phase 1 + Phase 2b ready for implementation

 ## Context

-The 0.10.0 milestone (ISO build, rc.d lifecycle, SIGTERM/socket fixes, release
-gate) is staged for the FreeBSD build host. The next milestone is proving the
-multi-agent, multi-host coordination model: multiple agents on different hosts
-reading from the same Colibri task board, each picking up work by capability,
-and reporting results back.
+Colibri 0.11.0 is released (MIT license, 230 tests, FreeBSD port + CI running).
+The tenant/vault provision chain has landed (`register-tenant` → jail spawn →
+`provision_tenant_env()` → `colibri-vault::provision`). The next milestone is
+proving the multi-agent, multi-host coordination model: multiple agents on
+different hosts reading from the same Colibri task board, each picking up work
+by capability, and reporting results back.

 PR #83 landed the first cross-host plumbing — a socat TCP bridge, Python polling
-scripts, and a Hermes cronjob configuration. But the gap analysis below shows
-that **the multi-host plane is packaged and documented but almost entirely
-untested**. This document defines what needs to happen to close that gap.
+scripts, and a Hermes cronjob configuration. The gap analysis below defines what
+remains to close the multi-host testing gap.

 ---

-## Current architecture (as of PR #83)
+## Current architecture (as of 0.11.0)

 The multi-host stack lives **outside the Rust daemon**:

@ -34,17 +35,17 @@ The multi-host stack lives **outside the Rust daemon**:
 - **Transport:** `tokio::net::UnixListener` only — zero TCP in Rust. The socat
  bridge is a shell-level relay.
 - **Agent model:** `register-agent` stores name + capabilities + status
-  (`active`/`idle`/`offline`). No `host` field, no `last_seen`, no heartbeat,
-  no lease/TTL.
+  (`active`/`idle`/`offline`). Awaiting `host` field, `last_seen`, heartbeat,
+  and lease/TTL (Phase 3).
 - **Task assignment:** `pick_agent()` matches by capability score (partial
  match counts, highest score wins, tie → later-in-slice). `claim_task()` is a
-  blind UPDATE with no concurrency guard.
+  blind UPDATE; await a concurrency guard (Gap 4).
 - **Polling:** `colibri_poll.py` queries `list-tasks status=started` filtered
  by `agent_id`. `colibri_task_done.py` calls `transition-task`.
 - **Spawning:** `poll_tasks()` in daemon.rs spawns agents for `Claimed` tasks,
  skipping those with an existing session (idempotency guard).

-### Socket command inventory (17 commands, all Unix-socket)
+### Socket command inventory (19 commands, all Unix-socket)

 | Category       | Commands                                                                    |
 | -------------- | --------------------------------------------------------------------------- |
@ -53,14 +54,14 @@ The multi-host stack lives **outside the Rust daemon**:
 | Agent process  | `spawn-agent`, `kill-agent`                                                 |
 | Board          | `list-tasks`, `create-task`, `transition-task`, `claim-task`, `intake-task` |
 | Agent registry | `register-agent`, `list-agents`                                             |
+| Tenant         | `register-tenant`, `list-tenants`                                           |
 | Skills         | `list-skills`, `register-skill`                                             |

-### CLI surface (10 of 17 commands exposed)
+### CLI surface (16 of 19 commands exposed)

-Missing from CLI: `claim-task`, `transition-task`, `register-agent`,
-`list-agents`, `set-cost-mode`, `register-skill` (register-skill IS in CLI;
-the others are socket-only). Remote agents currently must use raw Python
-socket calls.
+Awaiting CLI exposure: `claim-task`, `transition-task`, `set-cost-mode`
+(Phase 2b). Remote agents currently use raw Python socket calls for these
+three commands.

 ---

@ -73,21 +74,38 @@ socket calls.
 - SIGTERM cleanup + stale socket safety
 - Session isolation with 2 agents (bypasses task board)
 - Cost mode derivation in background rotation
- `pick_agent` unit tests: best match, offline exclusion, no-match, empty-required
- Scheduler tick drains intake queue
+- `pick_agent` unit tests: best match (2 agents), offline exclusion, no-match,
+  empty-required, partial scoring, none scoring
+- Scheduler tick drains intake queue without deadlock
 - `poll_tasks` spawns agent for a claimed task
+- Double-spawn session isolation
+- Tenant register + list over socket

-### What is NOT tested
+### Test targets (awaiting coverage)

-| #   | Gap                                                                                                                                                                  | Severity | Linux-doable?                    |
-| --- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- |
-| 1   | **Multi-agent task-board contention** — `pick_agent` only tested with 0-1 agents; no capability-based multi-agent assignment test; no same-agent-multiple-tasks test | High     | Yes                              |
-| 2   | **CLI surface gaps** — `claim-task`, `transition-task`, `register-agent`, `list-agents` have no CLI; remote agents forced to use raw Python                          | Medium   | Yes                              |
-| 3   | **Agent presence model** — missing `host`, `last_seen`, and heartbeat/lease columns; add these schema fields to detect stale remote agents                           | High     | Yes (schema change)              |
-| 4   | **Remote-safe task claim** — `claim_task` is a blind UPDATE, no concurrency safety, no lease/TTL                                                                     | Medium   | Yes                              |
-| 5   | **Python polling scripts** — `colibri_poll.py` and `colibri_task_done.py` have zero test coverage                                                                    | Medium   | Yes                              |
-| 6   | **TCP bridge round-trip** — socat bridge untested end-to-end                                                                                                         | Medium   | Partial (needs socat or FreeBSD) |
-| 7   | **Cross-host coordination** — no test simulates a remote agent claiming/transitioning a task over the bridge                                                         | High     | FreeBSD only                     |
+| #   | Gap                                                                                                                                               | Severity | Linux-doable?                    |
+| --- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- |
+| 1   | **Multi-agent task-board contention** — `pick_agent` tie-breaking, multi-required-capability, and active-status eligibility await dedicated tests | High     | Yes                              |
+| 2   | **CLI surface gaps** — `claim-task`, `transition-task`, `set-cost-mode` await CLI exposure (Phase 2b)                                             | Medium   | Yes                              |
+| 3   | **Agent presence model** — await `host`, `last_seen`, and heartbeat/lease columns to detect stale remote agents (Phase 3)                         | High     | Yes (schema change)              |
+| 4   | **Remote-safe task claim** — `claim_task` is a blind UPDATE; await a concurrency guard or lease/TTL                                               | Medium   | Yes                              |
+| 5   | **Python polling scripts** — `colibri_poll.py` and `colibri_task_done.py` have zero test coverage                                                 | Medium   | Yes                              |
+| 6   | **TCP bridge round-trip** — socat bridge untested end-to-end                                                                                      | Medium   | Partial (needs socat or FreeBSD) |
+| 7   | **Cross-host coordination** — await a test simulating a remote agent claiming/transitioning a task over the bridge                                | High     | FreeBSD only                     |
+
+### Closed gaps (since the original 2026-06-19 analysis)
+
+- **CLI: register-agent + list-agents** — merged (Phase 2a, PR #107)
+- **CLI: register-tenant + list-tenants + register-skill** — merged
+- **pick_agent scoring** — partial-match and no-match scoring tests added
+- **Tenant/vault provision chain** — register-tenant, jail spawn flags,
+  `provision_tenant_env()`, `colibri-vault::provision` all landed
+- **Issue #88** (CollectionNotFound) — daemon passes `tenant_id` (collection
+  name) to `vault::provision`
+- **Issue #91** (tenant provision target verification) — `trim_trailing_slash`
+  string-equality check
+- **Issue #92** (vault provision canonicalization) — canonicalize +
+  allowed-root containment (PR #119)

 ---

@ -98,14 +116,14 @@ socket calls.
 #### 1a. Pure `pick_agent` unit tests — extend `scheduler.rs` test module

 Existing tests cover: best match (2 agents, different caps), offline exclusion,
-no-match, empty-required. Add:
+no-match, empty-required, partial scoring, none scoring, tick-drains-intake.
+Add:

-| Test                                               | What it proves                                                                           |
-| -------------------------------------------------- | ---------------------------------------------------------------------------------------- |
-| `test_pick_agent_partial_match_wins_over_no_match` | Agent with `["rust","freebsd"]` beats agent with `["python"]` for required `["freebsd"]` |
-| `test_pick_agent_tie_breaking`                     | Two agents with same score — verify deterministic tie-break (later name wins)            |
-| `test_pick_agent_multiple_required_capabilities`   | Required `["rust","freebsd"]` — agent with both beats agent with one                     |
-| `test_pick_agent_active_status_eligible`           | `status: "active"` is treated same as `"idle"` (both eligible)                           |
+| Test                                             | What it proves                                                                    |
+| ------------------------------------------------ | --------------------------------------------------------------------------------- |
+| `test_pick_agent_tie_breaking`                   | Two agents with same score — verify deterministic tie-break (later-in-slice wins) |
+| `test_pick_agent_multiple_required_capabilities` | Required `["rust","freebsd"]` — agent with both beats agent with one              |
+| `test_pick_agent_active_status_eligible`         | `status: "active"` is treated same as `"idle"` (both eligible)                    |

 #### 1b. Multi-agent board integration test — new file `crates/colibri-daemon/tests/multi_agent_board.rs`

@ -145,36 +163,31 @@ Documents the current contention behavior (no guard against same agent getting
 multiple tasks) and proves session isolation when one agent handles multiple
 tasks.

-### Phase 2: Merge `feat/cli-register-agent` + add claim/transition CLI
+### Phase 2: CLI surface completion

-#### 2a. Merge `feat/cli-register-agent` (existing branch, 64 lines, client-only)
+#### 2a. Merge `feat/cli-register-agent` — COMPLETE

-The branch is clean and ready:
+`register-agent` and `list-agents` are in the CLI (merged via PR #107).

- `Command::RegisterAgent { name, capabilities }` + `Command::ListAgents`
- `parse_capabilities()` helper (reuses `--capability`/`--capabilities` pattern)
- `DaemonClient::register_agent()` + `DaemonClient::list_agents()`
- Usage text
+#### 2b. Add `claim-task`, `transition-task`, and `set-cost-mode` to CLI

-Enables: `colibri register-agent osa-agent --capability freebsd` and
-`colibri list-agents`.
-
-#### 2b. Add `claim-task` and `transition-task` to CLI
-
-The two commands `colibri_task_done.py` currently does via raw socket. Adding
-them to the CLI means remote agents can work entirely through the `colibri`
-binary:
+The three commands `colibri_task_done.py` currently reaches via raw socket.
+Adding them to the CLI means remote agents can work entirely through the
+`colibri` binary:

 ```
 colibri claim-task --task-id <UUID> --agent-id <UUID>
 colibri transition-task --task-id <UUID> --status done|failed
+colibri set-cost-mode MODE
 ```

 Implementation:

- Add `Command::ClaimTask { task_id, agent_id }` and
-  `Command::TransitionTask { task_id, status }` variants
- Add `DaemonClient::claim_task()` and `DaemonClient::transition_task()`
+- Add `Command::ClaimTask { task_id, agent_id }`,
+  `Command::TransitionTask { task_id, status }`, and
+  `Command::SetCostMode { mode }` variants
+- Add `DaemonClient::claim_task()`, `DaemonClient::transition_task()`, and
+  `DaemonClient::set_cost_mode()` methods
 - Add CLI parsing (follow existing `--flag value` pattern)

 #### 2c. Add CLI unit tests for new commands
@ -197,7 +210,7 @@ simulating what `colibri_poll.py` does. Register two agents, create tasks with
 different capabilities, verify each agent sees only its tasks via the poll
 path, transition tasks to done.

-**Deferred** — depends on Phase 2 CLI additions (so the test can use CLI
+**Deferred** — depends on Phase 2b CLI additions (so the test can use CLI
 commands instead of raw socket replication of the Python scripts).

 ### Phase 5: Bridge validation (FreeBSD-only)
@ -211,17 +224,17 @@ mesh.**

 ## Summary

-| Phase | What                                     | Files                                | Linux? | Status                   |
-| ----- | ---------------------------------------- | ------------------------------------ | ------ | ------------------------ |
-| 1a    | `pick_agent` unit tests                  | `scheduler.rs` tests                 | Yes    | Ready                    |
-| 1b    | Multi-agent board integration test       | `tests/multi_agent_board.rs` (new)   | Yes    | Ready                    |
-| 1c    | Same-capability multi-task test          | Same file                            | Yes    | Ready                    |
-| 2a    | Merge `feat/cli-register-agent`          | `colibri.rs` + `lib.rs`              | Yes    | Branch exists            |
-| 2b    | Add `claim-task` + `transition-task` CLI | `colibri.rs` + `lib.rs`              | Yes    | Ready                    |
-| 2c    | CLI parse tests                          | `colibri.rs` tests                   | Yes    | Ready                    |
-| 3     | Agent presence schema                    | `schema.rs` + `lib.rs` + `socket.rs` | Yes    | Deferred                 |
-| 4     | Polling workflow test                    | `tests/`                             | Yes    | Deferred (needs Phase 2) |
-| 5     | TCP bridge validation                    | FreeBSD host                         | No     | FreeBSD lane             |
+| Phase | What                                                       | Files                                | Linux? | Status                    |
+| ----- | ---------------------------------------------------------- | ------------------------------------ | ------ | ------------------------- |
+| 1a    | `pick_agent` unit tests (3 remaining)                      | `scheduler.rs` tests                 | Yes    | Ready                     |
+| 1b    | Multi-agent board integration test                         | `tests/multi_agent_board.rs` (new)   | Yes    | Ready                     |
+| 1c    | Same-capability multi-task test                            | Same file                            | Yes    | Ready                     |
+| 2a    | Merge `feat/cli-register-agent`                            | `colibri.rs` + `lib.rs`              | Yes    | **Complete**              |
+| 2b    | Add `claim-task` + `transition-task` + `set-cost-mode` CLI | `colibri.rs` + `lib.rs`              | Yes    | Ready                     |
+| 2c    | CLI parse tests                                            | `colibri.rs` tests                   | Yes    | Ready                     |
+| 3     | Agent presence schema                                      | `schema.rs` + `lib.rs` + `socket.rs` | Yes    | Deferred                  |
+| 4     | Polling workflow test                                      | `tests/`                             | Yes    | Deferred (needs Phase 2b) |
+| 5     | TCP bridge validation                                      | FreeBSD host                         | No     | FreeBSD lane              |

-**Immediate scope:** Phases 1-2. All testable on Linux with `cargo test` +
+**Immediate scope:** Phases 1 + 2b. All testable on Linux with `cargo test` +
 `cargo clippy` gate. No FreeBSD dependency for implementation.