diff --git a/crates/colibri-client/src/bin/colibri.rs b/crates/colibri-client/src/bin/colibri.rs index 237d647..2936999 100644 --- a/crates/colibri-client/src/bin/colibri.rs +++ b/crates/colibri-client/src/bin/colibri.rs @@ -34,6 +34,17 @@ enum Command { ListTasks { status: Option, }, + ClaimTask { + task_id: String, + agent_id: String, + }, + TransitionTask { + task_id: String, + status: String, + }, + SetCostMode { + mode: String, + }, CreateTask { title: String, description: Option, @@ -77,6 +88,9 @@ fn usage() -> &'static str { colibri [--socket PATH] get-session SESSION_ID colibri [--socket PATH] compact-session SESSION_ID colibri [--socket PATH] list-tasks [--status STATUS] + colibri [--socket PATH] claim-task --task-id UUID --agent-id UUID + colibri [--socket PATH] transition-task --task-id UUID --status STATUS + colibri [--socket PATH] set-cost-mode MODE colibri [--socket PATH] create-task --title TEXT [--description TEXT] colibri [--socket PATH] intake-task --title TEXT [--description TEXT] [--capability CAP]... colibri [--socket PATH] list-skills @@ -93,6 +107,9 @@ Examples: colibri create-task --title "verify OSA check" --description "manual follow-up" colibri intake-task --title "triage watchdog" --capability freebsd colibri list-tasks --status queued + colibri claim-task --task-id 550e8400-e29b-41d4-a716-446655440000 --agent-id 660e8400-e29b-41d4-a716-446655440000 + colibri transition-task --task-id 550e8400-e29b-41d4-a716-446655440000 --status done + colibri set-cost-mode smart colibri register-skill freebsd-check --description "Live USB startup check" --category freebsd colibri list-skills colibri register-agent NAME [--capability CAP]... [--capabilities CSV] @@ -183,6 +200,17 @@ where session_id: args[1].clone(), }), "list-tasks" => parse_list_tasks_options(&args[1..]), + "claim-task" => parse_claim_task_options(&args[1..]), + "transition-task" => parse_transition_task_options(&args[1..]), + "set-cost-mode" => { + if args.len() != 2 { + Err("set-cost-mode requires MODE\n\n".to_string() + usage()) + } else { + Ok(Command::SetCostMode { + mode: args[1].clone(), + }) + } + } "create-task" => { let (title, description) = parse_task_text_options("create-task", &args[1..])?; Ok(Command::CreateTask { title, description }) @@ -275,6 +303,71 @@ fn parse_list_tasks_options(args: &[String]) -> Result { Ok(Command::ListTasks { status }) } +fn parse_claim_task_options(args: &[String]) -> Result { + let mut task_id = None; + let mut agent_id = None; + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--task-id" => { + let Some(value) = args.get(i + 1) else { + return Err("--task-id requires UUID\n\n".to_string() + usage()); + }; + task_id = Some(value.clone()); + i += 2; + } + "--agent-id" => { + let Some(value) = args.get(i + 1) else { + return Err("--agent-id requires UUID\n\n".to_string() + usage()); + }; + agent_id = Some(value.clone()); + i += 2; + } + other => return Err(format!("unknown claim-task option: {other}\n\n{}", usage())), + } + } + let task_id = + task_id.ok_or_else(|| format!("claim-task requires --task-id UUID\n\n{}", usage()))?; + let agent_id = + agent_id.ok_or_else(|| format!("claim-task requires --agent-id UUID\n\n{}", usage()))?; + Ok(Command::ClaimTask { task_id, agent_id }) +} + +fn parse_transition_task_options(args: &[String]) -> Result { + let mut task_id = None; + let mut status = None; + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--task-id" => { + let Some(value) = args.get(i + 1) else { + return Err("--task-id requires UUID\n\n".to_string() + usage()); + }; + task_id = Some(value.clone()); + i += 2; + } + "--status" => { + let Some(value) = args.get(i + 1) else { + return Err("--status requires STATUS\n\n".to_string() + usage()); + }; + status = Some(value.clone()); + i += 2; + } + other => { + return Err(format!( + "unknown transition-task option: {other}\n\n{}", + usage() + )) + } + } + } + let task_id = + task_id.ok_or_else(|| format!("transition-task requires --task-id UUID\n\n{}", usage()))?; + let status = + status.ok_or_else(|| format!("transition-task requires --status STATUS\n\n{}", usage()))?; + Ok(Command::TransitionTask { task_id, status }) +} + fn parse_task_text_options( command: &str, args: &[String], @@ -517,6 +610,13 @@ async fn run(options: Options) -> Result<(), ClientError> { print_json(&client.compact_session(session_id).await?) } Command::ListTasks { status } => print_json(&client.list_tasks(status).await?), + Command::ClaimTask { task_id, agent_id } => { + print_json(&client.claim_task(task_id, agent_id).await?) + } + Command::TransitionTask { task_id, status } => { + print_json(&client.transition_task(task_id, status).await?) + } + Command::SetCostMode { mode } => print_json(&client.set_cost_mode(mode).await?), Command::CreateTask { title, description } => { print_json(&client.create_task(title, description).await?) } @@ -671,6 +771,65 @@ mod tests { ); } + #[test] + fn parses_claim_task() { + assert_eq!( + parsed(&["claim-task", "--task-id", "task-1", "--agent-id", "agent-1",]), + Options { + socket_path: default_socket_path(), + command: Command::ClaimTask { + task_id: "task-1".to_string(), + agent_id: "agent-1".to_string(), + }, + } + ); + } + + #[test] + fn rejects_claim_task_missing_flags() { + let err = parse_args(["claim-task", "--task-id", "only"]).unwrap_err(); + assert!(err.contains("claim-task requires --agent-id")); + } + + #[test] + fn parses_transition_task() { + assert_eq!( + parsed(&["transition-task", "--task-id", "task-1", "--status", "done",]), + Options { + socket_path: default_socket_path(), + command: Command::TransitionTask { + task_id: "task-1".to_string(), + status: "done".to_string(), + }, + } + ); + } + + #[test] + fn rejects_transition_task_missing_flags() { + let err = parse_args(["transition-task", "--task-id", "only"]).unwrap_err(); + assert!(err.contains("transition-task requires --status")); + } + + #[test] + fn parses_set_cost_mode() { + assert_eq!( + parsed(&["set-cost-mode", "smart"]), + Options { + socket_path: default_socket_path(), + command: Command::SetCostMode { + mode: "smart".to_string(), + }, + } + ); + } + + #[test] + fn rejects_set_cost_mode_without_arg() { + let err = parse_args(["set-cost-mode"]).unwrap_err(); + assert!(err.contains("set-cost-mode requires MODE")); + } + #[test] fn rejects_create_task_without_title() { let err = parse_args(["create-task", "--description", "missing title"]).unwrap_err(); diff --git a/crates/colibri-client/src/lib.rs b/crates/colibri-client/src/lib.rs index a1ec575..4dfb537 100644 --- a/crates/colibri-client/src/lib.rs +++ b/crates/colibri-client/src/lib.rs @@ -196,6 +196,38 @@ impl DaemonClient { .await } + pub async fn claim_task( + &self, + task_id: impl Into, + agent_id: impl Into, + ) -> Result { + self.request(&ColibriCommand::ClaimTask { + task_id: task_id.into(), + agent_id: agent_id.into(), + }) + .await + } + + pub async fn transition_task( + &self, + task_id: impl Into, + status: impl Into, + ) -> Result { + self.request(&ColibriCommand::TransitionTask { + task_id: task_id.into(), + status: status.into(), + }) + .await + } + + pub async fn set_cost_mode( + &self, + mode: impl Into, + ) -> Result { + self.request(&ColibriCommand::SetCostMode { mode: mode.into() }) + .await + } + pub async fn list_skills(&self) -> Result { self.request(&ColibriCommand::ListSkills).await } diff --git a/crates/colibri-daemon/src/scheduler.rs b/crates/colibri-daemon/src/scheduler.rs index 7be0277..f98cbd8 100644 --- a/crates/colibri-daemon/src/scheduler.rs +++ b/crates/colibri-daemon/src/scheduler.rs @@ -529,6 +529,76 @@ mod tests { assert_eq!(pick_agent(&[], &agents).unwrap().name, "generalist"); } + #[test] + fn test_pick_agent_tie_breaking() { + let required = vec!["rust".to_string()]; + let agents = vec![ + colibri_store::Agent { + id: "a1".into(), + name: "first-rust".into(), + capabilities: serde_json::json!(["rust"]), + status: "idle".into(), + created_at: "2026-01-01T00:00:00Z".into(), + }, + colibri_store::Agent { + id: "a2".into(), + name: "second-rust".into(), + capabilities: serde_json::json!(["rust"]), + status: "idle".into(), + created_at: "2026-01-02T00:00:00Z".into(), + }, + ]; + let picked = pick_agent(&required, &agents).unwrap(); + assert_eq!( + picked.name, "second-rust", + "tie-break must favor later-in-slice (deterministic)" + ); + } + + #[test] + fn test_pick_agent_multiple_required_capabilities() { + let required = vec!["rust".to_string(), "freebsd".to_string()]; + let agents = vec![ + colibri_store::Agent { + id: "a1".into(), + name: "rust-only".into(), + capabilities: serde_json::json!(["rust"]), + status: "idle".into(), + created_at: "2026-01-01T00:00:00Z".into(), + }, + colibri_store::Agent { + id: "a2".into(), + name: "rust-and-freebsd".into(), + capabilities: serde_json::json!(["rust", "freebsd"]), + status: "idle".into(), + created_at: "2026-01-01T00:00:00Z".into(), + }, + ]; + let picked = pick_agent(&required, &agents).unwrap(); + assert_eq!( + picked.name, "rust-and-freebsd", + "agent with both required caps beats agent with one" + ); + } + + #[test] + fn test_pick_agent_active_status_eligible() { + let required = vec!["rust".to_string()]; + let agents = vec![colibri_store::Agent { + id: "a1".into(), + name: "active-bot".into(), + capabilities: serde_json::json!(["rust"]), + status: "active".into(), + created_at: "2026-01-01T00:00:00Z".into(), + }]; + let picked = pick_agent(&required, &agents); + assert!( + picked.is_some(), + "status 'active' must be eligible same as 'idle'" + ); + assert_eq!(picked.unwrap().name, "active-bot"); + } + #[tokio::test] async fn test_scheduler_tick_drains_intake_without_deadlock() { let config = test_config(); diff --git a/docs/MULTI-AGENT-HOST-PLAN.md b/docs/MULTI-AGENT-HOST-PLAN.md index 47e5504..78f574c 100644 --- a/docs/MULTI-AGENT-HOST-PLAN.md +++ b/docs/MULTI-AGENT-HOST-PLAN.md @@ -1,24 +1,25 @@ # Multi-Agent Multi-Host — Gap Analysis & Implementation Plan **Created:** 2026-06-19 (Sam & Hermes) -**Status:** Phase 1-2 ready for implementation +**Updated:** 2026-06-21 (Sam & Claude) — reflects 0.11.0 release and narrowed gaps +**Status:** Phase 2a complete; Phase 1 + Phase 2b ready for implementation ## Context -The 0.10.0 milestone (ISO build, rc.d lifecycle, SIGTERM/socket fixes, release -gate) is staged for the FreeBSD build host. The next milestone is proving the -multi-agent, multi-host coordination model: multiple agents on different hosts -reading from the same Colibri task board, each picking up work by capability, -and reporting results back. +Colibri 0.11.0 is released (MIT license, 230 tests, FreeBSD port + CI running). +The tenant/vault provision chain has landed (`register-tenant` → jail spawn → +`provision_tenant_env()` → `colibri-vault::provision`). The next milestone is +proving the multi-agent, multi-host coordination model: multiple agents on +different hosts reading from the same Colibri task board, each picking up work +by capability, and reporting results back. PR #83 landed the first cross-host plumbing — a socat TCP bridge, Python polling -scripts, and a Hermes cronjob configuration. But the gap analysis below shows -that **the multi-host plane is packaged and documented but almost entirely -untested**. This document defines what needs to happen to close that gap. +scripts, and a Hermes cronjob configuration. The gap analysis below defines what +remains to close the multi-host testing gap. --- -## Current architecture (as of PR #83) +## Current architecture (as of 0.11.0) The multi-host stack lives **outside the Rust daemon**: @@ -34,17 +35,17 @@ The multi-host stack lives **outside the Rust daemon**: - **Transport:** `tokio::net::UnixListener` only — zero TCP in Rust. The socat bridge is a shell-level relay. - **Agent model:** `register-agent` stores name + capabilities + status - (`active`/`idle`/`offline`). No `host` field, no `last_seen`, no heartbeat, - no lease/TTL. + (`active`/`idle`/`offline`). Awaiting `host` field, `last_seen`, heartbeat, + and lease/TTL (Phase 3). - **Task assignment:** `pick_agent()` matches by capability score (partial match counts, highest score wins, tie → later-in-slice). `claim_task()` is a - blind UPDATE with no concurrency guard. + blind UPDATE; await a concurrency guard (Gap 4). - **Polling:** `colibri_poll.py` queries `list-tasks status=started` filtered by `agent_id`. `colibri_task_done.py` calls `transition-task`. - **Spawning:** `poll_tasks()` in daemon.rs spawns agents for `Claimed` tasks, skipping those with an existing session (idempotency guard). -### Socket command inventory (17 commands, all Unix-socket) +### Socket command inventory (19 commands, all Unix-socket) | Category | Commands | | -------------- | --------------------------------------------------------------------------- | @@ -53,14 +54,14 @@ The multi-host stack lives **outside the Rust daemon**: | Agent process | `spawn-agent`, `kill-agent` | | Board | `list-tasks`, `create-task`, `transition-task`, `claim-task`, `intake-task` | | Agent registry | `register-agent`, `list-agents` | +| Tenant | `register-tenant`, `list-tenants` | | Skills | `list-skills`, `register-skill` | -### CLI surface (10 of 17 commands exposed) +### CLI surface (16 of 19 commands exposed) -Missing from CLI: `claim-task`, `transition-task`, `register-agent`, -`list-agents`, `set-cost-mode`, `register-skill` (register-skill IS in CLI; -the others are socket-only). Remote agents currently must use raw Python -socket calls. +Awaiting CLI exposure: `claim-task`, `transition-task`, `set-cost-mode` +(Phase 2b). Remote agents currently use raw Python socket calls for these +three commands. --- @@ -73,21 +74,38 @@ socket calls. - SIGTERM cleanup + stale socket safety - Session isolation with 2 agents (bypasses task board) - Cost mode derivation in background rotation -- `pick_agent` unit tests: best match, offline exclusion, no-match, empty-required -- Scheduler tick drains intake queue +- `pick_agent` unit tests: best match (2 agents), offline exclusion, no-match, + empty-required, partial scoring, none scoring +- Scheduler tick drains intake queue without deadlock - `poll_tasks` spawns agent for a claimed task +- Double-spawn session isolation +- Tenant register + list over socket -### What is NOT tested +### Test targets (awaiting coverage) -| # | Gap | Severity | Linux-doable? | -| --- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- | -| 1 | **Multi-agent task-board contention** — `pick_agent` only tested with 0-1 agents; no capability-based multi-agent assignment test; no same-agent-multiple-tasks test | High | Yes | -| 2 | **CLI surface gaps** — `claim-task`, `transition-task`, `register-agent`, `list-agents` have no CLI; remote agents forced to use raw Python | Medium | Yes | -| 3 | **Agent presence model** — missing `host`, `last_seen`, and heartbeat/lease columns; add these schema fields to detect stale remote agents | High | Yes (schema change) | -| 4 | **Remote-safe task claim** — `claim_task` is a blind UPDATE, no concurrency safety, no lease/TTL | Medium | Yes | -| 5 | **Python polling scripts** — `colibri_poll.py` and `colibri_task_done.py` have zero test coverage | Medium | Yes | -| 6 | **TCP bridge round-trip** — socat bridge untested end-to-end | Medium | Partial (needs socat or FreeBSD) | -| 7 | **Cross-host coordination** — no test simulates a remote agent claiming/transitioning a task over the bridge | High | FreeBSD only | +| # | Gap | Severity | Linux-doable? | +| --- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------- | +| 1 | **Multi-agent task-board contention** — `pick_agent` tie-breaking, multi-required-capability, and active-status eligibility await dedicated tests | High | Yes | +| 2 | **CLI surface gaps** — `claim-task`, `transition-task`, `set-cost-mode` await CLI exposure (Phase 2b) | Medium | Yes | +| 3 | **Agent presence model** — await `host`, `last_seen`, and heartbeat/lease columns to detect stale remote agents (Phase 3) | High | Yes (schema change) | +| 4 | **Remote-safe task claim** — `claim_task` is a blind UPDATE; await a concurrency guard or lease/TTL | Medium | Yes | +| 5 | **Python polling scripts** — `colibri_poll.py` and `colibri_task_done.py` have zero test coverage | Medium | Yes | +| 6 | **TCP bridge round-trip** — socat bridge untested end-to-end | Medium | Partial (needs socat or FreeBSD) | +| 7 | **Cross-host coordination** — await a test simulating a remote agent claiming/transitioning a task over the bridge | High | FreeBSD only | + +### Closed gaps (since the original 2026-06-19 analysis) + +- **CLI: register-agent + list-agents** — merged (Phase 2a, PR #107) +- **CLI: register-tenant + list-tenants + register-skill** — merged +- **pick_agent scoring** — partial-match and no-match scoring tests added +- **Tenant/vault provision chain** — register-tenant, jail spawn flags, + `provision_tenant_env()`, `colibri-vault::provision` all landed +- **Issue #88** (CollectionNotFound) — daemon passes `tenant_id` (collection + name) to `vault::provision` +- **Issue #91** (tenant provision target verification) — `trim_trailing_slash` + string-equality check +- **Issue #92** (vault provision canonicalization) — canonicalize + + allowed-root containment (PR #119) --- @@ -98,14 +116,14 @@ socket calls. #### 1a. Pure `pick_agent` unit tests — extend `scheduler.rs` test module Existing tests cover: best match (2 agents, different caps), offline exclusion, -no-match, empty-required. Add: +no-match, empty-required, partial scoring, none scoring, tick-drains-intake. +Add: -| Test | What it proves | -| -------------------------------------------------- | ---------------------------------------------------------------------------------------- | -| `test_pick_agent_partial_match_wins_over_no_match` | Agent with `["rust","freebsd"]` beats agent with `["python"]` for required `["freebsd"]` | -| `test_pick_agent_tie_breaking` | Two agents with same score — verify deterministic tie-break (later name wins) | -| `test_pick_agent_multiple_required_capabilities` | Required `["rust","freebsd"]` — agent with both beats agent with one | -| `test_pick_agent_active_status_eligible` | `status: "active"` is treated same as `"idle"` (both eligible) | +| Test | What it proves | +| ------------------------------------------------ | --------------------------------------------------------------------------------- | +| `test_pick_agent_tie_breaking` | Two agents with same score — verify deterministic tie-break (later-in-slice wins) | +| `test_pick_agent_multiple_required_capabilities` | Required `["rust","freebsd"]` — agent with both beats agent with one | +| `test_pick_agent_active_status_eligible` | `status: "active"` is treated same as `"idle"` (both eligible) | #### 1b. Multi-agent board integration test — new file `crates/colibri-daemon/tests/multi_agent_board.rs` @@ -145,36 +163,31 @@ Documents the current contention behavior (no guard against same agent getting multiple tasks) and proves session isolation when one agent handles multiple tasks. -### Phase 2: Merge `feat/cli-register-agent` + add claim/transition CLI +### Phase 2: CLI surface completion -#### 2a. Merge `feat/cli-register-agent` (existing branch, 64 lines, client-only) +#### 2a. Merge `feat/cli-register-agent` — COMPLETE -The branch is clean and ready: +`register-agent` and `list-agents` are in the CLI (merged via PR #107). -- `Command::RegisterAgent { name, capabilities }` + `Command::ListAgents` -- `parse_capabilities()` helper (reuses `--capability`/`--capabilities` pattern) -- `DaemonClient::register_agent()` + `DaemonClient::list_agents()` -- Usage text +#### 2b. Add `claim-task`, `transition-task`, and `set-cost-mode` to CLI -Enables: `colibri register-agent osa-agent --capability freebsd` and -`colibri list-agents`. - -#### 2b. Add `claim-task` and `transition-task` to CLI - -The two commands `colibri_task_done.py` currently does via raw socket. Adding -them to the CLI means remote agents can work entirely through the `colibri` -binary: +The three commands `colibri_task_done.py` currently reaches via raw socket. +Adding them to the CLI means remote agents can work entirely through the +`colibri` binary: ``` colibri claim-task --task-id --agent-id colibri transition-task --task-id --status done|failed +colibri set-cost-mode MODE ``` Implementation: -- Add `Command::ClaimTask { task_id, agent_id }` and - `Command::TransitionTask { task_id, status }` variants -- Add `DaemonClient::claim_task()` and `DaemonClient::transition_task()` +- Add `Command::ClaimTask { task_id, agent_id }`, + `Command::TransitionTask { task_id, status }`, and + `Command::SetCostMode { mode }` variants +- Add `DaemonClient::claim_task()`, `DaemonClient::transition_task()`, and + `DaemonClient::set_cost_mode()` methods - Add CLI parsing (follow existing `--flag value` pattern) #### 2c. Add CLI unit tests for new commands @@ -197,7 +210,7 @@ simulating what `colibri_poll.py` does. Register two agents, create tasks with different capabilities, verify each agent sees only its tasks via the poll path, transition tasks to done. -**Deferred** — depends on Phase 2 CLI additions (so the test can use CLI +**Deferred** — depends on Phase 2b CLI additions (so the test can use CLI commands instead of raw socket replication of the Python scripts). ### Phase 5: Bridge validation (FreeBSD-only) @@ -211,17 +224,17 @@ mesh.** ## Summary -| Phase | What | Files | Linux? | Status | -| ----- | ---------------------------------------- | ------------------------------------ | ------ | ------------------------ | -| 1a | `pick_agent` unit tests | `scheduler.rs` tests | Yes | Ready | -| 1b | Multi-agent board integration test | `tests/multi_agent_board.rs` (new) | Yes | Ready | -| 1c | Same-capability multi-task test | Same file | Yes | Ready | -| 2a | Merge `feat/cli-register-agent` | `colibri.rs` + `lib.rs` | Yes | Branch exists | -| 2b | Add `claim-task` + `transition-task` CLI | `colibri.rs` + `lib.rs` | Yes | Ready | -| 2c | CLI parse tests | `colibri.rs` tests | Yes | Ready | -| 3 | Agent presence schema | `schema.rs` + `lib.rs` + `socket.rs` | Yes | Deferred | -| 4 | Polling workflow test | `tests/` | Yes | Deferred (needs Phase 2) | -| 5 | TCP bridge validation | FreeBSD host | No | FreeBSD lane | +| Phase | What | Files | Linux? | Status | +| ----- | ---------------------------------------------------------- | ------------------------------------ | ------ | ------------------------- | +| 1a | `pick_agent` unit tests (3 remaining) | `scheduler.rs` tests | Yes | Ready | +| 1b | Multi-agent board integration test | `tests/multi_agent_board.rs` (new) | Yes | Ready | +| 1c | Same-capability multi-task test | Same file | Yes | Ready | +| 2a | Merge `feat/cli-register-agent` | `colibri.rs` + `lib.rs` | Yes | **Complete** | +| 2b | Add `claim-task` + `transition-task` + `set-cost-mode` CLI | `colibri.rs` + `lib.rs` | Yes | Ready | +| 2c | CLI parse tests | `colibri.rs` tests | Yes | Ready | +| 3 | Agent presence schema | `schema.rs` + `lib.rs` + `socket.rs` | Yes | Deferred | +| 4 | Polling workflow test | `tests/` | Yes | Deferred (needs Phase 2b) | +| 5 | TCP bridge validation | FreeBSD host | No | FreeBSD lane | -**Immediate scope:** Phases 1-2. All testable on Linux with `cargo test` + +**Immediate scope:** Phases 1 + 2b. All testable on Linux with `cargo test` + `cargo clippy` gate. No FreeBSD dependency for implementation.