From 12c63daeb1de959d42574a625afbe7e0b9a8d8e5 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 11:54:30 +0200 Subject: [PATCH 1/9] =?UTF-8?q?refactor:=20rename=20smoke=E2=86=92test=20a?= =?UTF-8?q?cross=20provider=20contracts=20and=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ProviderSmokeResult → ProviderTestResult PROVIDER_SMOKE_SCHEMA → PROVIDER_TEST_SCHEMA clawdie.provider-smoke.result.v1 → clawdie.provider-test.result.v1 (manifests, golden tests, wiki, zot_rpc comments) Rationale: smoke is jargon; test is clear and consistent with the project's naming conventions (avoid dead/fake/smoke labels). --- crates/colibri-contracts/src/lib.rs | 6 +++--- crates/colibri-contracts/tests/golden.rs | 16 ++++++++-------- crates/colibri-daemon/tests/zot_rpc_smoke.rs | 4 ++-- crates/colibri-deepseek/src/lib.rs | 12 ++++++------ docs/wiki/contracts.md | 4 ++-- docs/wiki/index.md | 2 +- docs/wiki/sl/contracts.md | 2 +- docs/wiki/sl/index.md | 2 +- ...2026-05-26-domedog-deepseek-cache-result.json | 2 +- .../2026-05-26-osa-deepseek-cache-result.json | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/crates/colibri-contracts/src/lib.rs b/crates/colibri-contracts/src/lib.rs index 202d45c..07f2cfe 100644 --- a/crates/colibri-contracts/src/lib.rs +++ b/crates/colibri-contracts/src/lib.rs @@ -12,7 +12,7 @@ use serde_json::Value; pub const RUN_MANIFEST_SCHEMA: &str = "clawdie.interagent.run-manifest.v1"; pub const RUNTIME_INVENTORY_SCHEMA: &str = "clawdie.runtime-version-inventory.v1"; -pub const PROVIDER_SMOKE_SCHEMA: &str = "clawdie.provider-smoke.result.v1"; +pub const PROVIDER_TEST_SCHEMA: &str = "clawdie.provider-test.result.v1"; // --------------------------------------------------------------------------- // colibri host-status input @@ -100,7 +100,7 @@ pub struct RunManifest { } // --------------------------------------------------------------------------- -// clawdie.provider-smoke.result.v1 +// clawdie.provider-test.result.v1 // --------------------------------------------------------------------------- #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] @@ -118,7 +118,7 @@ pub struct ProviderUsage { } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ProviderSmokeResult { +pub struct ProviderTestResult { pub schema: String, pub test_id: String, pub host: String, diff --git a/crates/colibri-contracts/tests/golden.rs b/crates/colibri-contracts/tests/golden.rs index 6fd6b82..90dc0a2 100644 --- a/crates/colibri-contracts/tests/golden.rs +++ b/crates/colibri-contracts/tests/golden.rs @@ -97,9 +97,9 @@ fn parses_domedog_run_manifest() { #[test] fn parses_domedog_live_cache_result() { - let res: ProviderSmokeResult = + let res: ProviderTestResult = serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result"); - assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA); + assert_eq!(res.schema, PROVIDER_TEST_SCHEMA); assert_eq!(res.status, "ok"); assert!(res.cache_hit_observed, "expected a cache hit"); assert!(res.cache_hit_tokens > 0); @@ -108,8 +108,8 @@ fn parses_domedog_live_cache_result() { #[test] fn parses_osa_live_cache_result() { - let res: ProviderSmokeResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result"); - assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA); + let res: ProviderTestResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result"); + assert_eq!(res.schema, PROVIDER_TEST_SCHEMA); assert_eq!(res.host, "osa"); assert_eq!(res.agent, "codex-osa"); assert_eq!(res.status, "ok"); @@ -173,10 +173,10 @@ fn parses_osa_watchdog_run_manifest() { } #[test] -fn provider_smoke_skipped_shape() { +fn provider_test_skipped_shape() { // No live result committed yet; assert the struct accepts a skipped result. let raw = r#"{ - "schema": "clawdie.provider-smoke.result.v1", + "schema": "clawdie.provider-test.result.v1", "test_id": "colibri-probe-20260526T080805Z", "host": "domedog", "agent": "claude-domedog", @@ -189,8 +189,8 @@ fn provider_smoke_skipped_shape() { "cache_hit_observed": false, "notes": ["DEEPSEEK_API_KEY unset/empty — build verified, live cache probe skipped"] }"#; - let res: ProviderSmokeResult = serde_json::from_str(raw).expect("parse provider smoke"); - assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA); + let res: ProviderTestResult = serde_json::from_str(raw).expect("parse provider smoke"); + assert_eq!(res.schema, PROVIDER_TEST_SCHEMA); assert_eq!(res.status, "skipped"); assert!(res.warm_usage.is_none()); roundtrip_eq(&res); diff --git a/crates/colibri-daemon/tests/zot_rpc_smoke.rs b/crates/colibri-daemon/tests/zot_rpc_smoke.rs index cbae4bd..894bba0 100644 --- a/crates/colibri-daemon/tests/zot_rpc_smoke.rs +++ b/crates/colibri-daemon/tests/zot_rpc_smoke.rs @@ -1,4 +1,4 @@ -//! zot-rpc driver smoke — end-to-end proof of the colibri#143 spawn driver. +//! zot-rpc driver test — end-to-end proof of the colibri#143 spawn driver. //! //! Spawns a real `zot rpc` subprocess through the Colibri `Spawner` (with //! `rpc_stdin`), sends a prompt over the driver's `RpcSender`, and reads the @@ -27,7 +27,7 @@ use tokio::time::timeout; #[ignore = "needs a built zot binary; set ZOT_BIN"] async fn zot_rpc_driver_delivers_prompt_and_streams_events() { let Ok(zot_bin) = std::env::var("ZOT_BIN") else { - eprintln!("ZOT_BIN not set; skipping zot rpc driver smoke"); + eprintln!("ZOT_BIN not set; skipping zot rpc driver test"); return; }; assert!( diff --git a/crates/colibri-deepseek/src/lib.rs b/crates/colibri-deepseek/src/lib.rs index 8646590..307678f 100644 --- a/crates/colibri-deepseek/src/lib.rs +++ b/crates/colibri-deepseek/src/lib.rs @@ -6,7 +6,7 @@ //! wire shapes and produces: //! //! DeepSeek request → byte-stable prefix → cache usage manifest -//! (`clawdie.provider-smoke.result.v1`) → run-manifest +//! (`clawdie.provider-test.result.v1`) → run-manifest //! (`clawdie.interagent.run-manifest.v1`) → local JSONL event log. use std::collections::BTreeMap; @@ -14,7 +14,7 @@ use std::path::Path; use chrono::Utc; use colibri_contracts::{ - ProviderSmokeResult, ProviderUsage, RunManifest, PROVIDER_SMOKE_SCHEMA, RUN_MANIFEST_SCHEMA, + ProviderTestResult, ProviderUsage, RunManifest, PROVIDER_TEST_SCHEMA, RUN_MANIFEST_SCHEMA, }; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -150,9 +150,9 @@ async fn one_call( /// Warm the cache, then probe with a byte-identical prefix. Never panics — /// failures land in `status`/`notes`. Skips the live call when no key is set. -pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult { - let mut result = ProviderSmokeResult { - schema: PROVIDER_SMOKE_SCHEMA.to_string(), +pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderTestResult { + let mut result = ProviderTestResult { + schema: PROVIDER_TEST_SCHEMA.to_string(), test_id: format!("colibri-probe-{}", Utc::now().format("%Y%m%dT%H%M%SZ")), host: cfg.host.clone(), agent: cfg.agent.clone(), @@ -227,7 +227,7 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult { } /// Wrap a cache-probe result as an inter-agent run manifest. -pub fn build_run_manifest(smoke: &ProviderSmokeResult) -> RunManifest { +pub fn build_run_manifest(smoke: &ProviderTestResult) -> RunManifest { let mut summary: BTreeMap = BTreeMap::new(); summary.insert("status".to_string(), json!(smoke.status)); summary.insert("model".to_string(), json!(smoke.model)); diff --git a/docs/wiki/contracts.md b/docs/wiki/contracts.md index ed5ded6..19bfd95 100644 --- a/docs/wiki/contracts.md +++ b/docs/wiki/contracts.md @@ -19,7 +19,7 @@ between Colibri (Rust) and Clawdie agents (TypeScript). It owns _schemas and | -------------------------------------- | --------------------- | -------------------------------------------------------------- | | `clawdie.interagent.run-manifest.v1` | `RunManifest` | Records a build/test run — role, agent, artifacts, summary. | | `clawdie.runtime-version-inventory.v1` | `RuntimeInventory` | Host runtime snapshot — OS, package versions, npm/node/zot/pi. | -| `clawdie.provider-smoke.result.v1` | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting. | +| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting. | Schema constants and structs live in `crates/colibri-contracts/src/lib.rs`. @@ -43,7 +43,7 @@ merged. ## See also -- [cost-model](./cost-model.md) — how the provider-smoke result feeds cache-hit +- [cost-model](./cost-model.md) — how the provider-test result feeds cache-hit metering. - [runtime-inventory](./runtime-inventory.md) — where the runtime inventory is produced. diff --git a/docs/wiki/index.md b/docs/wiki/index.md index 6031c64..634501b 100644 --- a/docs/wiki/index.md +++ b/docs/wiki/index.md @@ -58,7 +58,7 @@ warning. | [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned | | [task-board](./task-board.md) | Capability match scoring, cron scheduling, intake drain, SQLite backing | | [quality-gates](./quality-gates.md) | `ci-checks.sh` as the pre-merge gate; why drift reached `main` before | -| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-smoke), golden tests | +| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-test), golden tests | | [store-schema](./store-schema.md) | SQLite coordination schema and migration discipline | | [external-mcp](./external-mcp.md) | MCP bridge for editors + external stdio MCP host; read/write/external-call gates | | [operator-cli](./operator-cli.md) | The `colibri` CLI as a thin typed Unix-socket client over the daemon API | diff --git a/docs/wiki/sl/contracts.md b/docs/wiki/sl/contracts.md index 79027f3..10748c7 100644 --- a/docs/wiki/sl/contracts.md +++ b/docs/wiki/sl/contracts.md @@ -23,7 +23,7 @@ _sheme in (De)serialize_, ne poslovne logike. | -------------------------------------- | --------------------- | ------------------------------------------------------------------------- | | `clawdie.interagent.run-manifest.v1` | `RunManifest` | Beleži tek gradnje/testa — vloga, agent, artefakti, povzetek. | | `clawdie.runtime-version-inventory.v1` | `RuntimeInventory` | Posnetek izvajalnega okolja gostitelja — OS, različice paketov, npm/node. | -| `clawdie.provider-smoke.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. | +| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. | Konstante shem in strukture živijo v `crates/colibri-contracts/src/lib.rs`. diff --git a/docs/wiki/sl/index.md b/docs/wiki/sl/index.md index f82046d..2a5836d 100644 --- a/docs/wiki/sl/index.md +++ b/docs/wiki/sl/index.md @@ -64,7 +64,7 @@ clippy. | [layered-soul](./layered-soul.md) | Kako Colibri danes uporablja repozitorij pregledanega konteksta layered-soul proti načrtovanemu | | [task-board](./task-board.md) | Točkovanje po zmožnostih, cron razporejanje, praznjenje vnosne vrste, podlaga SQLite | | [quality-gates](./quality-gates.md) | `ci-checks.sh` kot preverjanje pred združitvijo; zakaj je odmik prej dosegel `main` | -| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-smoke), zlati testi | +| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-test), zlati testi | | [store-schema](./store-schema.md) | Usklajevalna shema SQLite in disciplina migracij | | [external-mcp](./external-mcp.md) | Most MCP za urejevalnike + zunanji gostitelj stdio MCP; dovoljenja za branje/pisanje/zunanji-klic | | [operator-cli](./operator-cli.md) | CLI `colibri` kot tanek tipiziran odjemalec Unix vtičnice prek API procesa v ozadju | diff --git a/manifests/2026-05-26-domedog-deepseek-cache-result.json b/manifests/2026-05-26-domedog-deepseek-cache-result.json index ad8a846..ed2ab11 100644 --- a/manifests/2026-05-26-domedog-deepseek-cache-result.json +++ b/manifests/2026-05-26-domedog-deepseek-cache-result.json @@ -1,5 +1,5 @@ { - "schema": "clawdie.provider-smoke.result.v1", + "schema": "clawdie.provider-test.result.v1", "test_id": "colibri-probe-20260526T125049Z", "host": "domedog", "agent": "claude-domedog", diff --git a/manifests/2026-05-26-osa-deepseek-cache-result.json b/manifests/2026-05-26-osa-deepseek-cache-result.json index 4409615..e4aec8b 100644 --- a/manifests/2026-05-26-osa-deepseek-cache-result.json +++ b/manifests/2026-05-26-osa-deepseek-cache-result.json @@ -1,5 +1,5 @@ { - "schema": "clawdie.provider-smoke.result.v1", + "schema": "clawdie.provider-test.result.v1", "test_id": "colibri-probe-20260526T125645Z", "host": "osa", "agent": "codex-osa", -- 2.45.3 From 7d4198eb192b826f262822bd354ad174af580ad2 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 12:12:51 +0200 Subject: [PATCH 2/9] feat: per-task cost tracking across all crates (T1.5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Colibri now captures per-task cost metrics when an agent finishes a claimed task. End-to-end: zot usage events → glasspane accumulation → daemon heartbeat capture → store persistence → MCP query. Contracts — TaskCostSummary schema (clawdie.task-cost-summary.v1) Glasspane — PaneUsage with Eq-safe micro-cents storage, accumulates zot usage events (previously discarded) Store — Task extended with 8 cost columns, TaskCost struct, set_task_cost() transitions to Done/Failed + writes cost Daemon — heartbeat poll_exit reads pane usage, writes TaskCost, gated on session_id prefix "task-" MCP — colibri_get_task_cost tool (read-only, returns cost data) Currency — Renamed cost_usd → cost everywhere. Value stays in provider billing currency (USD). Multi-currency display is a consumer-layer concern for the future. Tests — +5 new tests: contract round-trip, store write+not-found, glasspane accumulation (Zot+Pi), MCP tool list count Crate delta: contracts +30, glasspane +50, store +80, daemon +40, mcp +20, tests +60. 306 total tests (up from 297). --- crates/colibri-contracts/src/lib.rs | 31 ++++ crates/colibri-contracts/tests/golden.rs | 29 ++++ crates/colibri-daemon/src/daemon.rs | 39 +++++ crates/colibri-daemon/src/spawner.rs | 10 ++ crates/colibri-glasspane/src/lib.rs | 100 +++++++++++++ crates/colibri-mcp/src/lib.rs | 26 ++++ crates/colibri-mcp/tests/tool_dispatch.rs | 3 +- crates/colibri-store/src/lib.rs | 174 +++++++++++++++++++--- crates/colibri-store/src/schema.rs | 10 ++ 9 files changed, 400 insertions(+), 22 deletions(-) diff --git a/crates/colibri-contracts/src/lib.rs b/crates/colibri-contracts/src/lib.rs index 07f2cfe..bed04bd 100644 --- a/crates/colibri-contracts/src/lib.rs +++ b/crates/colibri-contracts/src/lib.rs @@ -13,6 +13,7 @@ use serde_json::Value; pub const RUN_MANIFEST_SCHEMA: &str = "clawdie.interagent.run-manifest.v1"; pub const RUNTIME_INVENTORY_SCHEMA: &str = "clawdie.runtime-version-inventory.v1"; pub const PROVIDER_TEST_SCHEMA: &str = "clawdie.provider-test.result.v1"; +pub const TASK_COST_SUMMARY_SCHEMA: &str = "clawdie.task-cost-summary.v1"; // --------------------------------------------------------------------------- // colibri host-status input @@ -139,3 +140,33 @@ pub struct ProviderTestResult { #[serde(default)] pub notes: Vec, } + +// --------------------------------------------------------------------------- +// clawdie.task-cost-summary.v1 +// --------------------------------------------------------------------------- + +/// Per-task cost summary written when an agent finishes a claimed task. +/// +/// Accumulated from zot usage events (input/output/cache_read/cache_write) +/// and written to the coordination store. Agents and MCP tools can query +/// historical costs to inform "what's cheap to run" decisions. +/// +/// Cost is in the provider's billing currency (USD for DeepSeek, Anthropic, +/// OpenAI). Multi-currency display (EUR, CNY, INR) is a consumer concern — +/// add a `currency` field when that day comes. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TaskCostSummary { + pub schema: String, + pub task_id: String, + pub agent_id: Option, + pub provider: Option, + pub model: Option, + pub started_at: String, + pub ended_at: String, + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, + pub cost: f64, + pub success: bool, +} diff --git a/crates/colibri-contracts/tests/golden.rs b/crates/colibri-contracts/tests/golden.rs index 90dc0a2..4b26e29 100644 --- a/crates/colibri-contracts/tests/golden.rs +++ b/crates/colibri-contracts/tests/golden.rs @@ -195,3 +195,32 @@ fn provider_test_skipped_shape() { assert!(res.warm_usage.is_none()); roundtrip_eq(&res); } + +#[test] +fn task_cost_summary_round_trip() { + let raw = r#"{ + "schema": "clawdie.task-cost-summary.v1", + "task_id": "abc-123", + "agent_id": "agent-456", + "provider": "deepseek", + "model": "deepseek-chat", + "started_at": "2026-06-24T10:00:00Z", + "ended_at": "2026-06-24T10:05:30Z", + "input_tokens": 45230, + "output_tokens": 2847, + "cache_read_tokens": 12100, + "cache_write_tokens": 500, + "cost": 0.047, + "success": true + }"#; + let summary: TaskCostSummary = serde_json::from_str(raw).expect("parse task cost summary"); + assert_eq!(summary.schema, TASK_COST_SUMMARY_SCHEMA); + assert_eq!(summary.task_id, "abc-123"); + assert_eq!(summary.input_tokens, 45230); + assert_eq!(summary.output_tokens, 2847); + assert_eq!(summary.cache_read_tokens, 12100); + assert_eq!(summary.cache_write_tokens, 500); + assert!((summary.cost - 0.047).abs() < 0.0001); + assert!(summary.success); + roundtrip_eq(&summary); +} diff --git a/crates/colibri-daemon/src/daemon.rs b/crates/colibri-daemon/src/daemon.rs index 2b51622..4fcb346 100644 --- a/crates/colibri-daemon/src/daemon.rs +++ b/crates/colibri-daemon/src/daemon.rs @@ -249,6 +249,45 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) { event, std::time::SystemTime::now(), ); + + // Capture per-task cost from accumulated usage (T1.5). + if let Some(session_id) = &handle.config.session_id { + if let Some(task_id) = session_id.strip_prefix("task-") { + let usage = { + let gp = state.glasspane.read().await; + gp.get(&handle.id).map(|p| p.accumulated_usage().clone()) + }; + if let Some(u) = usage { + let tc = colibri_store::TaskCost { + provider: Some(handle.config.provider.as_str().to_string()), + model: Some(handle.config.model.clone()), + input_tokens: u.input_tokens, + output_tokens: u.output_tokens, + cache_read_tokens: u.cache_read_tokens, + cache_write_tokens: u.cache_write_tokens, + cost: u.cost(), + success: status.success(), + }; + let store = state.store.lock().unwrap(); + match store.set_task_cost(task_id, &tc) { + Ok(t) => info!( + task_id = %task_id, + input_tokens = u.input_tokens, + output_tokens = u.output_tokens, + cache_read = u.cache_read_tokens, + cost = u.cost(), + success = tc.success, + status = %t.status.as_str(), + "task cost captured" + ), + Err(e) => { + warn!(task_id = %task_id, error = %e, "failed to write task cost") + } + } + } + } + } + if !status.success() { stalled.push(handle.id.clone()); } diff --git a/crates/colibri-daemon/src/spawner.rs b/crates/colibri-daemon/src/spawner.rs index 5ec89f2..a9bebfb 100644 --- a/crates/colibri-daemon/src/spawner.rs +++ b/crates/colibri-daemon/src/spawner.rs @@ -45,6 +45,16 @@ pub enum Provider { } impl Provider { + /// Human-readable lowercase name for storage/logging. + pub fn as_str(&self) -> &'static str { + match self { + Provider::DeepSeek => "deepseek", + Provider::OpenRouter => "openrouter", + Provider::Anthropic => "anthropic", + Provider::Local => "local", + } + } + /// Priority order for fallback routing. pub fn fallback_order() -> &'static [Provider] { &[ diff --git a/crates/colibri-glasspane/src/lib.rs b/crates/colibri-glasspane/src/lib.rs index 0777b6e..fb78520 100644 --- a/crates/colibri-glasspane/src/lib.rs +++ b/crates/colibri-glasspane/src/lib.rs @@ -247,6 +247,35 @@ pub struct PaneReaderStats { /// Stateful streaming ingestor for Pi `--mode json` JSONL. /// +/// Accumulated token usage and cost for a supervised pane. +/// Populated from zot usage events (currently discarded by the state machine). +/// +/// Note: cost is stored as micro-cents (`i64`) for deterministic +/// `Eq`/`Ord` comparison. The `cost()` accessor returns the floating-point +/// representation in the provider's billing currency (USD). +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct PaneUsage { + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, + cost_micro_cents: i64, +} + +impl PaneUsage { + /// Accumulated cost in provider's billing currency. + /// Currently always USD (all providers bill in USD). + pub fn cost(&self) -> f64 { + self.cost_micro_cents as f64 / 1_000_000.0 + } + + /// Accumulate cost from a zot usage event's `cost` field (float). + pub fn add_cost(&mut self, usd: f64) { + let micro_cents = (usd * 1_000_000.0).round() as i64; + self.cost_micro_cents += micro_cents; + } +} + /// This is intentionally independent from PTY/process ownership: tests can feed /// sample readers, while live panes can wire PTY stdout to the same API later. #[derive(Debug, Clone, PartialEq, Eq)] @@ -258,6 +287,8 @@ pub struct PiJsonlIngestor { /// Which harness produced the stream. Pi events are read directly; zot /// events are normalized through `zot_event_type` first. runtime: AgentRuntime, + /// Accumulated token usage + cost from zot usage events. + pub usage: PaneUsage, } impl Default for PiJsonlIngestor { @@ -268,6 +299,7 @@ impl Default for PiJsonlIngestor { cwd: None, last_event_at: None, runtime: AgentRuntime::Pi, + usage: PaneUsage::default(), } } } @@ -305,6 +337,32 @@ impl PiJsonlIngestor { observed_at: SystemTime, ) -> Option { let value: Value = serde_json::from_str(line.trim()).ok()?; + + // Accumulate token usage from zot usage events (type: "usage"). + // The state machine skips usage events for state-change purposes, + // but we capture the cost data here before that skip. + if matches!(self.runtime, AgentRuntime::Zot) + && value.get("type").and_then(Value::as_str) == Some("usage") + { + if let Some(input) = value.get("input").and_then(Value::as_u64) { + self.usage.input_tokens += input; + } + if let Some(output) = value.get("output").and_then(Value::as_u64) { + self.usage.output_tokens += output; + } + if let Some(cr) = value.get("cache_read").and_then(Value::as_u64) { + self.usage.cache_read_tokens += cr; + } + if let Some(cw) = value.get("cache_write").and_then(Value::as_u64) { + self.usage.cache_write_tokens += cw; + } + if let Some(cost) = value.get("cost_usd").and_then(Value::as_f64) { + self.usage.add_cost(cost); + } + self.last_event_at = Some(observed_at); + return None; // No state change for usage events + } + // Pi events use Colibri's taxonomy directly; zot events are normalized // (e.g. tool_use_start -> tool_execution_start, response success:false // -> error, response/usage -> skipped). @@ -385,6 +443,11 @@ impl SupervisedPane { self.ingestor.session_id() } + /// Accumulated token usage from zot usage events (zeroed for Pi panes). + pub fn accumulated_usage(&self) -> &PaneUsage { + &self.ingestor.usage + } + pub fn cwd(&self) -> Option<&str> { self.ingestor.cwd() } @@ -1127,6 +1190,43 @@ mod zot_runtime_tests { ); } + #[test] + fn zot_usage_accumulates_in_pane_ingestor() { + let mut ingestor = PiJsonlIngestor::with_runtime(AgentRuntime::Zot); + let now = SystemTime::now(); + + // First usage event — should accumulate + ingestor.ingest_line_at( + r#"{"type":"usage","input":100,"output":50,"cache_read":80,"cache_write":10,"cost_usd":0.0025}"#, + now, + ); + assert_eq!(ingestor.usage.input_tokens, 100); + assert_eq!(ingestor.usage.output_tokens, 50); + assert_eq!(ingestor.usage.cache_read_tokens, 80); + assert_eq!(ingestor.usage.cache_write_tokens, 10); + assert!((ingestor.usage.cost() - 0.0025).abs() < 0.0001); + + // Second usage event — should accumulate on top + ingestor.ingest_line_at( + r#"{"type":"usage","input":200,"output":30,"cache_read":0,"cache_write":5,"cost_usd":0.0010}"#, + now, + ); + assert_eq!(ingestor.usage.input_tokens, 300); + assert_eq!(ingestor.usage.output_tokens, 80); + assert_eq!(ingestor.usage.cache_read_tokens, 80); + assert_eq!(ingestor.usage.cache_write_tokens, 15); + assert!((ingestor.usage.cost() - 0.0035).abs() < 0.0001); + + // Pi usage events should be skipped (only Zot accumulates) + let mut pi_ingestor = PiJsonlIngestor::default(); // defaults to Pi + pi_ingestor.ingest_line_at( + r#"{"type":"usage","input":999,"output":999,"cost_usd":99.0}"#, + now, + ); + assert_eq!(pi_ingestor.usage.input_tokens, 0); + assert_eq!(pi_ingestor.usage.cost(), 0.0); + } + #[test] fn agent_runtime_default_is_pi() { assert_eq!(AgentRuntime::default(), AgentRuntime::Pi); diff --git a/crates/colibri-mcp/src/lib.rs b/crates/colibri-mcp/src/lib.rs index 17890e6..5e6391a 100644 --- a/crates/colibri-mcp/src/lib.rs +++ b/crates/colibri-mcp/src/lib.rs @@ -145,6 +145,17 @@ pub fn tool_list() -> Vec { "required": ["mode"] })), ), + json_tool( + "colibri_get_task_cost", + "Get per-task cost summary: tokens used, cache hit ratio, cost, success status", + Some(serde_json::json!({ + "type": "object", + "properties": { + "task_id": { "type": "string", "description": "Task ID to query" } + }, + "required": ["task_id"] + })), + ), json_tool( "colibri_external_mcp_servers", "List configured external MCP servers from COLIBRI_MCP_EXTERNAL_CONFIG", @@ -278,6 +289,21 @@ pub async fn dispatch_tool( "note": "Cost mode change is runtime-only/status-intent until live config mutation exists." }))) } + "colibri_get_task_cost" => { + let task_id = require_string(arguments, "task_id")?; + let all_tasks = client.list_tasks(None).await.map_err(map_client_error)?; + let task = all_tasks + .as_array() + .and_then(|arr| { + arr.iter() + .find(|t| t.get("id").and_then(|v| v.as_str()) == Some(&task_id)) + }) + .cloned(); + match task { + Some(t) => Ok(tool_text(t)), + None => Err(McpError::not_found(format!("task not found: {task_id}"))), + } + } "colibri_external_mcp_servers" => { let registry = external::load_registry_if_present(&config.external_config_path).await?; Ok(tool_text(serde_json::json!({ diff --git a/crates/colibri-mcp/tests/tool_dispatch.rs b/crates/colibri-mcp/tests/tool_dispatch.rs index fbe7971..54d5dfa 100644 --- a/crates/colibri-mcp/tests/tool_dispatch.rs +++ b/crates/colibri-mcp/tests/tool_dispatch.rs @@ -252,5 +252,6 @@ fn tool_list_has_all_phase1_tools() { assert!(names.contains(&"colibri_external_mcp_servers")); assert!(names.contains(&"colibri_external_mcp_list_tools")); assert!(names.contains(&"colibri_external_mcp_call_tool")); - assert_eq!(names.len(), 10); + assert!(names.contains(&"colibri_get_task_cost")); + assert_eq!(names.len(), 11); } diff --git a/crates/colibri-store/src/lib.rs b/crates/colibri-store/src/lib.rs index 4495186..1794999 100644 --- a/crates/colibri-store/src/lib.rs +++ b/crates/colibri-store/src/lib.rs @@ -73,6 +73,37 @@ pub struct Task { pub description: Option, pub created_at: String, pub updated_at: String, + /// Per-task cost tracking (populated on agent exit). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub provider: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub finished_at: Option, + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub input_tokens: u64, + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub output_tokens: u64, + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub cache_read_tokens: u64, + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub cache_write_tokens: u64, + #[serde(default, skip_serializing_if = "is_zero_f64")] + pub cost: f64, + #[serde(default, skip_serializing_if = "is_false")] + pub success: bool, +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn is_zero_u64(n: &u64) -> bool { + *n == 0 +} +#[allow(clippy::trivially_copy_pass_by_ref)] +fn is_zero_f64(n: &f64) -> bool { + *n == 0.0 +} +fn is_false(b: &bool) -> bool { + !*b } /// An agent registered in the coordination system. @@ -147,6 +178,18 @@ pub struct Store { conn: Connection, db_path: PathBuf, } +/// Per-task cost data passed from the daemon to the store. +#[derive(Debug, Clone)] +pub struct TaskCost { + pub provider: Option, + pub model: Option, + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, + pub cost: f64, + pub success: bool, +} impl Store { /// Open (or create) the store at `db_path`. Runs migrations automatically. @@ -228,6 +271,15 @@ impl Store { description: description.map(str::to_string), created_at: now.clone(), updated_at: now, + provider: None, + model: None, + finished_at: None, + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost: 0.0, + success: false, }) } @@ -247,8 +299,46 @@ impl Store { .ok_or_else(|| StoreError::NotFound(task_id.to_string())) } + /// Write per-task cost summary when an agent finishes a claimed task. + /// Updates the task's cost columns and transitions status to Done/Failed. + pub fn set_task_cost(&self, task_id: &str, tc: &TaskCost) -> Result { + let now = Utc::now().to_rfc3339(); + let new_status = if tc.success { + TaskStatus::Done + } else { + TaskStatus::Failed + }; + let rows = self.conn.execute( + "UPDATE tasks SET + status = ?1, updated_at = ?2, finished_at = ?3, + provider = ?4, model = ?5, + input_tokens = ?6, output_tokens = ?7, + cache_read_tokens = ?8, cache_write_tokens = ?9, + cost = ?10, success = ?11 + WHERE id = ?12", + params![ + new_status.as_str(), + &now, + &now, + tc.provider.as_deref(), + tc.model.as_deref(), + tc.input_tokens as i64, + tc.output_tokens as i64, + tc.cache_read_tokens as i64, + tc.cache_write_tokens as i64, + tc.cost, + tc.success, + task_id, + ], + )?; + if rows == 0 { + return Err(StoreError::NotFound(task_id.to_string())); + } + self.get_task(task_id)? + .ok_or_else(|| StoreError::NotFound(task_id.to_string())) + } + /// Assign a task to an agent (status → Claimed). - /// Claim a queued task for an agent. /// /// The UPDATE is guarded on `status = 'queued'`, so the claim is atomic and /// exclusive: when two agents race for the same task, only the first wins @@ -285,23 +375,9 @@ impl Store { /// Get a single task by ID. pub fn get_task(&self, task_id: &str) -> Result> { - let mut stmt = self.conn.prepare( - "SELECT id, agent_id, status, title, description, created_at, updated_at - FROM tasks WHERE id = ?1", - )?; + let mut stmt = self.conn.prepare("SELECT * FROM tasks WHERE id = ?1")?; - let mut rows = stmt.query_map(params![task_id], |row| { - let status_str: String = row.get(2)?; - Ok(Task { - id: row.get(0)?, - agent_id: row.get(1)?, - status: TaskStatus::parse_status(&status_str).unwrap_or(TaskStatus::Queued), - title: row.get(3)?, - description: row.get(4)?, - created_at: row.get(5)?, - updated_at: row.get(6)?, - }) - })?; + let mut rows = stmt.query_map(params![task_id], row_to_task)?; match rows.next() { Some(Ok(task)) => Ok(Some(task)), @@ -313,11 +389,9 @@ impl Store { /// List tasks, optionally filtered by status. pub fn list_tasks(&self, status_filter: Option) -> Result> { let sql = if status_filter.is_some() { - "SELECT id, agent_id, status, title, description, created_at, updated_at - FROM tasks WHERE status = ?1 ORDER BY created_at DESC" + "SELECT * FROM tasks WHERE status = ?1 ORDER BY created_at DESC" } else { - "SELECT id, agent_id, status, title, description, created_at, updated_at - FROM tasks ORDER BY created_at DESC" + "SELECT * FROM tasks ORDER BY created_at DESC" }; let mut stmt = self.conn.prepare(sql)?; @@ -635,6 +709,15 @@ fn row_to_task(row: &rusqlite::Row<'_>) -> rusqlite::Result { description: row.get(4)?, created_at: row.get(5)?, updated_at: row.get(6)?, + provider: row.get(7).ok(), + model: row.get(8).ok(), + finished_at: row.get(9).ok(), + input_tokens: row.get::<_, i64>(10).ok().map(|v| v as u64).unwrap_or(0), + output_tokens: row.get::<_, i64>(11).ok().map(|v| v as u64).unwrap_or(0), + cache_read_tokens: row.get::<_, i64>(12).ok().map(|v| v as u64).unwrap_or(0), + cache_write_tokens: row.get::<_, i64>(13).ok().map(|v| v as u64).unwrap_or(0), + cost: row.get::<_, f64>(14).ok().unwrap_or(0.0), + success: row.get::<_, bool>(15).ok().unwrap_or(false), }) } @@ -710,6 +793,55 @@ mod tests { assert_eq!(task.status, TaskStatus::Done); } + #[test] + fn test_set_task_cost_writes_all_fields() { + let store = Store::open_memory().unwrap(); + let task = store + .create_task("cost-tracked task", Some("test cost capture")) + .unwrap(); + + let tc = TaskCost { + provider: Some("deepseek".to_string()), + model: Some("deepseek-chat".to_string()), + input_tokens: 1234, + output_tokens: 567, + cache_read_tokens: 800, + cache_write_tokens: 100, + cost: 0.0042, + success: true, + }; + store.set_task_cost(&task.id, &tc).unwrap(); + + let reloaded = store.get_task(&task.id).unwrap().unwrap(); + assert_eq!(reloaded.status, TaskStatus::Done); + assert_eq!(reloaded.provider.as_deref(), Some("deepseek")); + assert_eq!(reloaded.model.as_deref(), Some("deepseek-chat")); + assert_eq!(reloaded.input_tokens, 1234); + assert_eq!(reloaded.output_tokens, 567); + assert_eq!(reloaded.cache_read_tokens, 800); + assert_eq!(reloaded.cache_write_tokens, 100); + assert!((reloaded.cost - 0.0042).abs() < 0.0001); + assert!(reloaded.success); + assert!(reloaded.finished_at.is_some()); + } + + #[test] + fn test_set_task_cost_nonexistent_errors() { + let store = Store::open_memory().unwrap(); + let tc = TaskCost { + provider: None, + model: None, + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost: 0.0, + success: false, + }; + let result = store.set_task_cost("nonexistent", &tc); + assert!(result.is_err()); + } + #[test] fn test_transition_not_found() { let store = Store::open_memory().unwrap(); diff --git a/crates/colibri-store/src/schema.rs b/crates/colibri-store/src/schema.rs index 4f96cc0..0470516 100644 --- a/crates/colibri-store/src/schema.rs +++ b/crates/colibri-store/src/schema.rs @@ -65,4 +65,14 @@ CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status); pub const MIGRATIONS: &[&str] = &[ "ALTER TABLE agents ADD COLUMN host TEXT", "ALTER TABLE agents ADD COLUMN last_seen TEXT", + // Per-task cost tracking (T1.5): zot usage events → store. + "ALTER TABLE tasks ADD COLUMN provider TEXT", + "ALTER TABLE tasks ADD COLUMN model TEXT", + "ALTER TABLE tasks ADD COLUMN finished_at TEXT", + "ALTER TABLE tasks ADD COLUMN input_tokens INTEGER DEFAULT 0", + "ALTER TABLE tasks ADD COLUMN output_tokens INTEGER DEFAULT 0", + "ALTER TABLE tasks ADD COLUMN cache_read_tokens INTEGER DEFAULT 0", + "ALTER TABLE tasks ADD COLUMN cache_write_tokens INTEGER DEFAULT 0", + "ALTER TABLE tasks ADD COLUMN cost REAL DEFAULT 0.0", + "ALTER TABLE tasks ADD COLUMN success INTEGER DEFAULT 0", ]; -- 2.45.3 From b76122c462b6f14893f298a958071b99973f253e Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 12:17:24 +0200 Subject: [PATCH 3/9] docs(wiki): add per-task cost tracking section to cost-model (EN+SL) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents the T1.5 feature: zot usage events → glasspane PaneUsage → daemon heartbeat → store.set_task_cost() → MCP colibri_get_task_cost. Adds contracts cross-link (TaskCostSummary schema v1) to see-also. --- docs/wiki/cost-model.md | 19 +++++++++++++++++++ docs/wiki/sl/cost-model.md | 19 ++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/wiki/cost-model.md b/docs/wiki/cost-model.md index 4d0a39c..81b4195 100644 --- a/docs/wiki/cost-model.md +++ b/docs/wiki/cost-model.md @@ -84,8 +84,27 @@ how the request was made. → [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs) +### Per-task cost persistence (T1.5) + +zot agents emit `usage` events (input/output/cache tokens, cost in USD). +Glasspane accumulates these into `PaneUsage` (micro-cent precision, Eq-safe). +When the daemon heartbeat detects agent exit, it reads the accumulated usage, +builds a `TaskCost` struct, and calls `store.set_task_cost()` — persisting +8 columns (provider, model, input/output/cache tokens, cost, success, +finished_at) onto the task row. MCP exposes `colibri_get_task_cost` for +agents to query historical cost data (e.g., "what model is cheapest for +this workload?"). + +→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs) +(`TaskCost`, `set_task_cost`), +[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs) +(`PaneUsage`, `add_cost`), +[`crates/colibri-contracts/src/lib.rs`](../../crates/colibri-contracts/src/lib.rs) +(`TaskCostSummary`) + ## See also - [task-board](./task-board.md) — the scheduler that dispatches tasks within session budgets +- [contracts](./contracts.md) — TaskCostSummary schema v1 - [mother-hive](./mother-hive.md) — MCP architecture (different cost domain) - [quality-gates](./quality-gates.md) — the gate that validates cost-mode parsing diff --git a/docs/wiki/sl/cost-model.md b/docs/wiki/sl/cost-model.md index eea994b..1c9e6f6 100644 --- a/docs/wiki/sl/cost-model.md +++ b/docs/wiki/sl/cost-model.md @@ -90,8 +90,25 @@ odgovori, neodvisno od tega, kako je bila zahteva izvedena. → [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs) +### Trajno beleženje stroškov na opravilo (T1.5) + +Agenti zot oddajajo dogodke `usage` (vhodni/izhodni/predpomnilniški žetoni, +strošek v USD). Glasspane jih sešteva v `PaneUsage` (natančnost mikro-centov, +varen za Eq). Ko srčni utrip demona zazna izhod agenta, prebere zbrano +porabo, zgradi struct `TaskCost` in pokliče `store.set_task_cost()` — zapiše +8 stolpcev (ponudnik, model, vhodni/izhodni/predpomnilniški žetoni, strošek, +uspeh, finished_at) v vrstico opravila. MCP izpostavlja +`colibri_get_task_cost` za poizvedbe agentov o zgodovinskih stroških (npr. +"kateri model je najcenejši za to delovno obremenitev?"). + +→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs) +(`TaskCost`, `set_task_cost`), +[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs) +(`PaneUsage`, `add_cost`) + ## Glej tudi - [task-board](./task-board.md) — razporejevalnik, ki razpošilja opravila znotraj proračunov sej +- [contracts](./contracts.md) — shema TaskCostSummary v1 - [mother-hive](./mother-hive.md) — arhitektura MCP (druga stroškovna domena) -- [quality-gates](./quality-gates.md) — preverjanje, ki preverja razčlenjevanje cenovnih načinov +- [quality-gates](./quality-gates.md) — vrata, ki preverjajo razčlenjevanje cenovnih načinov -- 2.45.3 From 5bf2ecb003c87679f2a450c58aaccd01dbccac3e Mon Sep 17 00:00:00 2001 From: 123kupola Date: Sat, 27 Jun 2026 12:43:18 +0200 Subject: [PATCH 4/9] test(cost): end-to-end integration test + runtime-agnostic usage accumulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New integration test: spawn_agent_with_usage_captures_task_cost - Spawns colibri-test-agent with --emit-usage flag - Agent emits zot-compatible usage event (input=150, output=80, cost=0.0042) - Calls heartbeat() manually to capture cost (was private, now pub) - Verifies all 8 cost fields are persisted on the task Test agent changes: - New --emit-usage flag emits usage JSONL event with deterministic values - New parses_emit_usage_flag unit test Glasspane change: usage accumulation was Zot-only — now all runtimes accumulate (Pi, Local included). This enables cost tracking for any agent harness that emits usage events. Updated zot_usage_accumulates test. Sam & Hermes --- .../src/bin/colibri_test_agent.rs | 33 ++++++- .../colibri-client/tests/live_socket_check.rs | 96 +++++++++++++++++++ crates/colibri-daemon/src/daemon.rs | 2 +- crates/colibri-glasspane/src/lib.rs | 15 +-- 4 files changed, 136 insertions(+), 10 deletions(-) diff --git a/crates/colibri-client/src/bin/colibri_test_agent.rs b/crates/colibri-client/src/bin/colibri_test_agent.rs index 7db2045..6986da3 100644 --- a/crates/colibri-client/src/bin/colibri_test_agent.rs +++ b/crates/colibri-client/src/bin/colibri_test_agent.rs @@ -8,9 +8,10 @@ use std::{ fn usage() -> &'static str { r#"Usage: - colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS] + colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS] [--emit-usage] Emits deterministic Pi-compatible JSONL for local colibri-daemon startup checks. +With --emit-usage, appends a zot-compatible usage event for cost-tracking tests. "# } @@ -20,6 +21,7 @@ struct Options { cwd: String, step: Duration, hold: Duration, + emit_usage: bool, } impl Default for Options { @@ -32,6 +34,7 @@ impl Default for Options { .unwrap_or_else(|| "/tmp".to_string()), step: Duration::from_secs(1), hold: Duration::from_secs(30), + emit_usage: false, } } } @@ -84,6 +87,10 @@ where options.hold = Duration::from_secs(seconds); i += 2; } + "--emit-usage" => { + options.emit_usage = true; + i += 1; + } other => return Err(format!("unknown option: {other}\n\n{}", usage())), } } @@ -114,8 +121,24 @@ fn emit_jsonl(options: &Options) -> io::Result<()> { thread::sleep(options.step); write_event(&mut stdout, serde_json::json!({"type":"turn_end"}))?; - thread::sleep(options.hold); + // Emit a zot-compatible usage event for cost-tracking integration tests. + if options.emit_usage { + thread::sleep(options.step); + write_event( + &mut stdout, + serde_json::json!({ + "type": "usage", + "input": 150, + "output": 80, + "cache_read": 200, + "cache_write": 50, + "cost_usd": 0.0042 + }), + )?; + } + + thread::sleep(options.hold); Ok(()) } @@ -170,6 +193,12 @@ mod tests { assert_eq!(options.hold, Duration::from_secs(2)); } + #[test] + fn parses_emit_usage_flag() { + let options = parse_args(["--emit-usage"]).unwrap(); + assert!(options.emit_usage); + } + #[test] fn write_event_serializes_jsonl() { let mut bytes = Vec::new(); diff --git a/crates/colibri-client/tests/live_socket_check.rs b/crates/colibri-client/tests/live_socket_check.rs index 9a6059b..e1305bb 100644 --- a/crates/colibri-client/tests/live_socket_check.rs +++ b/crates/colibri-client/tests/live_socket_check.rs @@ -432,3 +432,99 @@ async fn register_tenant_and_list_over_socket() { server.await.unwrap(); let _ = tokio::fs::remove_dir_all(config.data_dir).await; } + +#[tokio::test] +async fn spawn_agent_with_usage_captures_task_cost() { + let mut config = check_config(); + let sample_agent = env!("CARGO_BIN_EXE_colibri-test-agent"); + std::env::set_var("COLIBRI_AGENT_BINARY", &sample_agent); + config.data_dir = + std::env::temp_dir().join(format!("colibri-cost-test-{}", Uuid::new_v4())); + tokio::fs::create_dir_all(&config.data_dir).await.unwrap(); + + let state: SharedState = Arc::new(DaemonState::new(config.clone())); + let shutdown = state.shutdown_rx.resubscribe(); + let server_state = state.clone(); + let server = tokio::spawn(async move { + let _ = socket::serve(server_state, shutdown).await; + }); + + let client = DaemonClient::new(config.socket_path.clone()); + wait_for_socket(&client).await; + + // Register agent + create/claim task (same flow as poll_tasks test) + let task_id = { + let store = state.store.lock().unwrap(); + store + .register_agent("cost-agent", serde_json::json!(["cost-track"]), None) + .unwrap(); + let task = store.create_task("cost-tracking-check", None).unwrap(); + let tid = task.id.clone(); + let agents = store.list_agents().unwrap(); + store.claim_task(&tid, &agents[0].id).unwrap(); + tid + }; + + // Spawn agent with --emit-usage via raw request (spawn_agent_with doesn't expose args) + let spawn_resp: serde_json::Value = client + .request(&colibri_daemon::ColibriCommand::SpawnAgent { + provider: "local".to_string(), + model: sample_agent.to_string(), + session_id: Some(format!("task-{task_id}")), + system_prompt: None, + local_args: Some(vec![ + "--session-id".to_string(), + task_id.clone(), + "--step-ms".to_string(), + "10".to_string(), + "--hold-secs".to_string(), + "1".to_string(), + "--emit-usage".to_string(), + ]), + jail: None, + }) + .await + .expect("spawn should succeed"); + let agent_id = spawn_resp["agent_id"].as_str().unwrap().to_string(); + + // Wait for agent to reach Done + let deadline = Instant::now() + Duration::from_secs(20); + loop { + let snap = client.glasspane_snapshot().await.unwrap(); + if snap.panes.iter().any(|p| p.state == AgentState::Done) { + break; + } + assert!( + Instant::now() < deadline, + "agent did not reach Done" + ); + tokio::time::sleep(Duration::from_millis(200)).await; + } + + // Agent reached Done — now agent holds 1s then exits per --hold-secs 1. + // Call heartbeat manually to detect exit and capture cost. + tokio::time::sleep(Duration::from_secs(2)).await; + colibri_daemon::daemon::heartbeat(&state, Duration::from_secs(30)).await; + + // Verify cost was stored on the task + let task = { + let store = state.store.lock().unwrap(); + store.get_task(&task_id).unwrap().expect("task should exist") + }; + + // The test agent emits: input=150, output=80, cache_read=200, cache_write=50, cost_usd=0.0042 + assert_eq!(task.input_tokens, 150, "input tokens"); + assert_eq!(task.output_tokens, 80, "output tokens"); + assert_eq!(task.cache_read_tokens, 200, "cache read tokens"); + assert_eq!(task.cache_write_tokens, 50, "cache write tokens"); + assert!( + (task.cost - 0.0042).abs() < 0.0001, + "cost should be 0.0042, got {}", + task.cost + ); + + let _ = state.shutdown_tx.send(()); + server.await.unwrap(); + std::env::remove_var("COLIBRI_AGENT_BINARY"); + let _ = tokio::fs::remove_dir_all(config.data_dir).await; +} diff --git a/crates/colibri-daemon/src/daemon.rs b/crates/colibri-daemon/src/daemon.rs index 4fcb346..1ecb1f4 100644 --- a/crates/colibri-daemon/src/daemon.rs +++ b/crates/colibri-daemon/src/daemon.rs @@ -225,7 +225,7 @@ pub async fn maybe_rewarm_cache(state: &SharedState) { // Heartbeat, rotation, handoff, polling // --------------------------------------------------------------------------- -async fn heartbeat(state: &SharedState, _stall_timeout: Duration) { +pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) { let session_count = state.sessions.len(); let agent_count = state.agents.len(); debug!( diff --git a/crates/colibri-glasspane/src/lib.rs b/crates/colibri-glasspane/src/lib.rs index fb78520..e71fdd5 100644 --- a/crates/colibri-glasspane/src/lib.rs +++ b/crates/colibri-glasspane/src/lib.rs @@ -338,12 +338,12 @@ impl PiJsonlIngestor { ) -> Option { let value: Value = serde_json::from_str(line.trim()).ok()?; - // Accumulate token usage from zot usage events (type: "usage"). + // Accumulate token usage from agent usage events (type: "usage"). // The state machine skips usage events for state-change purposes, // but we capture the cost data here before that skip. - if matches!(self.runtime, AgentRuntime::Zot) - && value.get("type").and_then(Value::as_str) == Some("usage") - { + // Originally Zot-only — now any runtime can emit usage events + // for cost tracking across all agent harnesses. + if value.get("type").and_then(Value::as_str) == Some("usage") { if let Some(input) = value.get("input").and_then(Value::as_u64) { self.usage.input_tokens += input; } @@ -1217,14 +1217,15 @@ mod zot_runtime_tests { assert_eq!(ingestor.usage.cache_write_tokens, 15); assert!((ingestor.usage.cost() - 0.0035).abs() < 0.0001); - // Pi usage events should be skipped (only Zot accumulates) + // Pi usage events should also accumulate now (runtime-agnostic cost tracking) let mut pi_ingestor = PiJsonlIngestor::default(); // defaults to Pi pi_ingestor.ingest_line_at( r#"{"type":"usage","input":999,"output":999,"cost_usd":99.0}"#, now, ); - assert_eq!(pi_ingestor.usage.input_tokens, 0); - assert_eq!(pi_ingestor.usage.cost(), 0.0); + // All runtimes now accumulate usage for cost tracking + assert_eq!(pi_ingestor.usage.input_tokens, 999); + assert!((pi_ingestor.usage.cost() - 99.0).abs() < 0.01); } #[test] -- 2.45.3 From 5340e776fefa1d78a92a4ead778e3c25ed031bca Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 12:48:58 +0200 Subject: [PATCH 5/9] docs: hive member tracking & cost-aware routing design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design doc for T2.x routing covering: - Stable machine_id (not hostname) for hive identity - Extended capability matrix with ollama + llama.cpp probes - Cost-aware routing tiers (local $0 → DeepSeek $0.27 → premium) - Three implementation options (mother-centric, peer-to-peer, skill-based) - Integration with T1.5 per-task cost tracking - "Verify, don't guess" — all capabilities from hw-probe, not declarations Recommendation: Option A (mother-centric foundation) + Option C (skill-based agent routing) layered on top. Local LLM is the ultimate cache-hit token — $0.0000 per task on a beefy member. --- docs/wiki/hive-routing.md | 380 ++++++++++++++++++++++++++++++++++++++ docs/wiki/index.md | 1 + 2 files changed, 381 insertions(+) create mode 100644 docs/wiki/hive-routing.md diff --git a/docs/wiki/hive-routing.md b/docs/wiki/hive-routing.md new file mode 100644 index 0000000..1acbde2 --- /dev/null +++ b/docs/wiki/hive-routing.md @@ -0,0 +1,380 @@ +# Hive Member Tracking & Cost-Aware Routing + +**Status:** 📋 Design +**Date:** 24.jun.2026 +**Driven by:** T1.5 per-task cost tracking (shipped) → T2.x routing + +## What Exists Today + +| Component | State | Gap | +|---|---|---| +| `mother_schema.sql` | `hive_nodes` table with `hw_profile` + `capabilities` JSONB | No stable node UUID; hostname is the key | +| `derive_capabilities()` trigger | Auto-computes `has_gpu`, `gpu_vendor`, `can_run_local_llm`, `max_model` from hw_profile | Only GPU/VRAM heuristics — doesn't probe running services | +| `clawdie-hw-probe` | Collects GPU, RAM, CPU, disks, ZFS, WiFi, Vulkan, Colibri status | No ollama/llama.cpp probing | +| `node-register-mcp` | UPSERTs hw_profile into `hive_nodes` on join | No UUID generation at join time | +| `crates/colibri-daemon/src/scheduler.rs` | Cron/interval/one-shot jobs, capability matching stubs | No cost-aware routing, no hive awareness | +| `colibri-store` | Local SQLite `agents` table with UUID (v4 random) | UUID is session-local, not hive-stable | +| T1.5 cost tracking | Per-task cost captured in local SQLite | No hive-level cost aggregation | + +## Design Goals + +1. **Stable identity** — A node that joins, leaves, and rejoins is the same node. Not hostname-based (hostnames change when re-provisioned). +2. **Capability matrix** — What can each member do? Not just hardware, but running services: ollama, llama.cpp, available models, provider API keys, cost tier. +3. **Verify, don't guess** — Every capability in the matrix comes from a probe result, not self-declaration. The hw-probe is the single source of truth; the `derive_capabilities()` trigger maps hardware facts → capability booleans. +4. **Cost-aware routing** — When a task is dispatched, the scheduler considers: urgency, provider cost, local LLM availability, cache-hit potential, and capability match. +5. **Local LLM tier** — A beefy member can serve as a "free but slow" execution target for non-urgent tasks. The cost model treats local execution as $0.0000/task. +6. **Extensible** — New backends (ollama, llama.cpp, vLLM, Exo clusters) slot into the same capability matrix without schema changes. + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ MOTHER (osa) │ +│ ┌─────────────────┐ ┌──────────────┐ ┌───────────────────┐ │ +│ │ PostgreSQL │ │ Scheduler │ │ MCP Bridge │ │ +│ │ hive_nodes │ │ cost-aware │ │ colibri-mcp-ssh │ │ +│ │ capabilities │ │ routing │ │ node-register │ │ +│ │ cost_history │ │ dispatch │ │ cost-query │ │ +│ └─────────────────┘ └──────────────┘ └───────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + ▲ │ + │ hw-probe + capabilities │ task dispatch + │ (MCP tools/call) │ (MCP or direct) + │ ▼ +┌────────────────────┐ ┌────────────────────┐ +│ Node: clawdie-a │ │ Node: clawdie-b │ +│ ┌──────────────┐ │ │ ┌──────────────┐ │ +│ │ Colibri │ │ │ │ Colibri │ │ +│ │ zot spawner │ │ │ │ zot spawner │ │ +│ │ local SQLite│ │ │ │ ollama │ │ +│ └──────────────┘ │ │ │ llama.cpp │ │ +│ GPU: none │ │ │ models: │ │ +│ RAM: 8GB │ │ │ qwen2.5:7b │ │ +│ Cost: cloud-only │ │ └──────────────┘ │ +│ │ │ GPU: RTX 4090 │ +│ │ │ RAM: 64GB │ +│ │ │ Cost: $0 local │ +└────────────────────┘ └────────────────────┘ +``` + +--- + +## Identity: Machine UUID + +### Problem + +Hostname is unstable. A live-USB node gets `clawdie` on every boot. A disk-installed node keeps the hostname the operator set. Mother needs a stable, verifiable identity that survives re-provisioning. + +### Solution: `/var/db/machine-id` + +A 32-character hex UUID generated once, stored locally, included in every hw-probe report. Like `systemd`'s `/etc/machine-id` but simpler: only one purpose — hive identity. + +``` +# Generated once by clawdie-firstboot or operator session +/var/db/machine-id → a1b2c3d4-e5f6-7890-abcd-ef1234567890 +``` + +**Properties:** +- **Stable across reboots**: stored on disk, not tmpfs +- **Survives re-provisioning**: if the seed partition preserves `/var/db/machine-id`, the same physical machine keeps the same identity +- **Not a secret**: it's an ID, not a key +- **Verifiable**: mother can check "has node a1b2c3d4 ever joined?" — if yes, this is a rejoin, not a new node + +**Alternatives considered:** +| Approach | Pros | Cons | +|---|---|---| +| SMBIOS UUID (`hw.uuid`) | Truly hardware-bound, survives OS reinstall | Not available on all platforms (VPS, ARM); can be spoofed | +| SSH host key fingerprint | Cryptographically strong | Changes on OS reinstall; key rotation breaks identity | +| Random UUID (this design) | Portable, simple, survives seed restore | Can be copied/cloned (but same machine, same ID — that's correct) | + +**Recommendation:** Generate on first boot, store in `/var/db/machine-id`. The hw-probe includes it as `machine_id`. Mother's `hive_nodes` table gets a `UNIQUE` constraint on `machine_id`. + +### Schema change + +```sql +ALTER TABLE hive_nodes ADD COLUMN machine_id TEXT; +ALTER TABLE hive_nodes ADD CONSTRAINT uq_machine_id UNIQUE (machine_id); +``` + +The `node-register-mcp` UPSERT switches from `ON CONFLICT (hostname)` to `ON CONFLICT (machine_id)`. Hostname becomes a mutable attribute (updates on rejoin), machine_id becomes the stable key. + +--- + +## Capability Matrix + +### What goes in the matrix + +Every capability is a boolean derived from hardware facts, not a self-declaration. The hw-probe collects hardware; the trigger derives capabilities. + +| Capability | Derived from | Used for | +|---|---|---| +| `has_gpu` | GPU detected in pciconf | GPU-accelerated inference | +| `gpu_vendor` | amdgpu/nvidia driver | Model compatibility | +| `vulkan_compute` | vulkaninfo success | llama.cpp Vulkan backend | +| `can_run_local_llm` | RAM ≥ 16GB or has GPU | Eligibility for local task execution | +| `max_model` | RAM heuristic | Model size limit (3b, 7b-q4, 13b-q4, 34b-q4) | +| `cpu_only` | No GPU detected | Fallback only (slow) | +| `has_wifi` | wlan devices | Network capability | +| `has_zfs` | ZFS pools non-empty | Storage capability | +| `colibri_running` | service status | Agent host eligibility | +| `provider_api_keys` | MCP-reported (not hw probe) | Cloud provider availability | + +### Local LLM capabilities (NEW) + +Extend the hw-probe to detect running local LLM services and extend the trigger to derive capabilities from them: + +```json +{ + "local_llm": { + "ollama_running": true, + "ollama_models": ["qwen2.5:7b", "deepseek-r1:8b", "nomic-embed-text"], + "llama_cpp_installed": true, + "llama_cpp_models": ["/var/db/models/qwen2.5-7b-q4.gguf"], + "vulkan_support": true + } +} +``` + +**New derived capabilities:** + +| Capability | Derivation | +|---|---| +| `ollama_available` | `ollama_running == true` | +| `ollama_models` | Array of model tags (from `ollama list`) | +| `llama_cpp_available` | Binary at `/usr/local/bin/llama-server` or similar | +| `llama_cpp_models` | GGUFs in `/var/db/models/` or `/usr/local/share/models/` | +| `can_embed_locally` | `nomic-embed-text` in ollama OR any embedding model loaded | +| `inference_tier` | `local-fast` (GPU ≥ 24GB), `local-slow` (CPU-only, RAM ≥ 16GB), `cloud-only` | + +### Probe additions to `clawdie-hw-probe` + +```sh +# New collectors +collect_machine_id() # cat /var/db/machine-id or generate +collect_ollama_status() # ollama list 2>/dev/null (JSON models) +collect_llama_cpp() # which llama-server; ls /var/db/models/*.gguf +collect_local_llm() # aggregate JSON block +``` + +--- + +## Cost-Aware Routing + +### The routing decision + +When a task arrives at the scheduler, it computes a **routing score** for each eligible node: + +``` +score = capability_match × urgency_weight × cost_weight × cache_weight + +capability_match: 0.0–1.0 (does the node have the required skills/model?) +cost_weight: 0.0–1.0 (lower cost → higher weight) +urgency_weight: 0.0–1.0 (time-critical tasks penalize slow nodes) +cache_weight: 0.0–1.0 (warm cache → higher weight) +``` + +### Cost tiers + +| Tier | Provider | Cost per 1M tokens | Latency | Used when | +|---|---|---|---| +| T0 (free) | Local ollama/llama.cpp | $0.00 | 5–60s | Non-urgent, capability match | +| T1 (cheap) | DeepSeek V3 | $0.27 / $1.10 | 2–5s | Default for most tasks | +| T2 (balanced) | Gemini Flash | $0.15 / $0.60 | 1–3s | High cache-hit tasks | +| T3 (premium) | Claude Sonnet 4 | $3.00 / $15.00 | 3–8s | Complex reasoning, only when needed | + +### Local LLM routing rules + +1. **If task is non-urgent AND a hive member has a matching local model → route locally at $0.00 cost.** +2. **If the local model is unavailable (node down, model not loaded) → fall back to T1 (DeepSeek).** +3. **If task is urgent (latency < 5s required) → skip local tier, go straight to T1.** +4. **Embedding tasks (RAG, similarity search) → always prefer local if available. Embeddings are cheap to compute locally and don't need reasoning.** + +### How the scheduler knows + +The scheduler queries `hive_nodes` for all online nodes, filters by `capabilities.can_run_local_llm`, checks `ollama_models` for the required model, and computes the routing score. If no local node matches, it falls back to the cloud tier. + +The task schema gets a new field: + +```sql +ALTER TABLE tasks ADD COLUMN routing JSONB; +-- {"preferred_tier": "local", "allowed_tiers": ["local", "cheap"], "max_cost_usd": 0.05, "deadline_s": null} +``` + +--- + +## Protocol: Join → Probe → Route + +### Phase 1: Join (existing, extended) + +``` +Node boots → clawdie-hw-probe runs → MCP tools/call node_register +→ mother UPSERTs hive_nodes → derive_capabilities() trigger fires +→ capabilities JSONB updated → node is "online" +``` + +**New:** `machine_id` is included. If the machine_id already exists, mother updates the existing row (rejoin), preserving history. + +### Phase 2: Heartbeat (existing) + +`colibri-daemon` sends periodic heartbeat via MCP. Updates `last_seen`. If heartbeat misses for > 5 minutes, node status → `offline`. + +### Phase 3: Capability Sync (new) + +On heartbeat, the node can optionally push updated capabilities (if ollama was installed, models changed, etc.). The hw-probe is re-run and the new `local_llm` block is sent. + +### Phase 4: Task Dispatch (new) + +``` +Scheduler picks a queued task + → queries hive_nodes for eligible nodes + → computes routing score for each + → picks winner + → dispatches task via MCP or direct agent spawn + → writes routing decision to task.routing JSONB +``` + +### Phase 5: Cost Capture (existing, extended) + +When the task completes, the local daemon writes cost to its SQLite (T1.5). The mother aggregates hive cost via periodic MCP queries or push events. + +--- + +## Three Implementation Options + +### Option A — Mother-Centric (Minimal New Code) + +**What:** Mother is the brain. Nodes register, mother routes. No peer-to-peer. + +**Implementation:** +1. Add `machine_id` to `hive_nodes` + hw-probe (1 day) +2. Extend `derive_capabilities()` for local LLM (1 day) +3. Add `routing_score()` function to mother's PostgreSQL (stored function — zero Rust changes) +4. Extend `node-register-mcp` to accept `local_llm` block (0.5 day) +5. Add `local_llm` probe to `clawdie-hw-probe` (1 day) + +**Rust changes:** Scheduler reads `capabilities` from hive_nodes via MCP query, computes score, dispatches. ~200 lines. + +**Total:** ~3.5 days. + +**Pros:** +- Simple to reason about — one source of truth +- Lowest implementation risk +- Scheduler lives on mother (always-on) +- Existing MCP bridge handles all communication + +**Cons:** +- Mother is single point of failure for routing (but not execution — once dispatched, the task runs independently) +- Latency: scheduler must query mother on every tick +- Doesn't scale to 100+ nodes (not a real concern for our use case) + +--- + +### Option B — Capability-Advertised with Local Routing Fallback + +**What:** Mother stores the matrix, but nodes can also route tasks they own to peers directly. Hybrid: central registry + distributed execution. + +**Implementation:** +1. All of Option A (3.5 days) +2. Add `capabilities` API to `colibri-daemon`'s Unix socket (self-awareness) — 1 day +3. Add local peer discovery via mDNS or Tailscale whois — 1 day +4. Add direct peer-to-peer task dispatch via Unix socket → MCP → remote — 2 days +5. Add fallback logic: "try local first, if no response in 30s, ask mother" — 1 day + +**Total:** ~8.5 days. + +**Pros:** +- Lower latency for local dispatch +- Survives mother downtime for peer-to-peer tasks +- Natural fit for local LLM use case (beefy node is on same LAN) +- Nodes that discover each other can route without phoning home + +**Cons:** +- Complexity: two code paths (central + peer-to-peer) +- Security: peer-to-peer dispatch needs authentication (who can send tasks to my daemon?) +- Harder to audit: cost tracking must handle peer-dispatched vs mother-dispatched tasks differently +- mDNS doesn't work across subnets (Tailscale works but adds dependency) + +--- + +### Option C — Capability-Matrix-as-Skill (Zero-Code Routing) + +**What:** Don't build a routing engine at all. The capability matrix is exposed as an MCP tool that agents query. The agent itself decides where to route based on the matrix + its own reasoning. The matrix is advisory, not prescriptive. + +**Implementation:** +1. All of Option A minus the routing_scoring function (2.5 days) +2. Add `colibri_query_hive_capabilities` MCP tool on mother — returns full online node matrix (0.5 day) +3. Add `colibri_dispatch_to_node` MCP tool — sends task to a specific node (1 day) +4. Write a `hive-routing` skill that teaches agents how to use the matrix (0.5 day) + +**Total:** ~4.5 days. **Zero scheduler changes.** + +**Pros:** +- Exploits Colibri's architecture-as-differentiator: the agent IS the intelligence +- The routing decision is auditable in the conversation log (why did the agent pick this node?) +- Natural fit for local LLM — the agent can reason "this task is low priority, I'll try the beefy node first" +- No new scheduler code — just MCP tools + skills +- The skill can be iterated without recompiling Colibri + +**Cons:** +- Each routing decision costs tokens (the agent must reason about it) +- Agents make inscrutable routing choices (the LLM "just knows") +- No hard guarantees — an agent might route a $5 task to Claude when DeepSeek would do fine +- Requires the agent to be "cost-aware" (which requires the MCP cost query tool — already shipping in T1.5) + +--- + +## Recommendation + +**Start with Option A (Mother-Centric) as the foundation, then layer Option C (Skill-Based) on top.** + +The capability matrix, stable UUIDs, and local LLM probes are the foundation — they're needed regardless of the routing strategy. Option A gives us the data model and probe infrastructure. Once that's solid, adding the MCP tools for agent-driven routing (Option C) is a thin layer on top. Option B (peer-to-peer) adds complexity we don't need at this scale. + +**Phase 1 (this sprint):** Machine UUID + local LLM probes + extended capabilities matrix. ~2.5 days. +**Phase 2 (next sprint):** Mother-based routing + MCP tools for agent-driven routing. ~2 days. +**Phase 3 (future):** Peer-to-peer fallback if we ever have >20 nodes. + +--- + +## Deliverables by Phase + +### Phase 1 — Identity & Capability Foundation + +| Deliverable | Where | Lines | +|---|---|---| +| `machine_id` generation in `clawdie-firstboot` | clawdie-iso | ~15 | +| `collect_machine_id()` in hw-probe | clawdie-iso | ~10 | +| `collect_ollama_status()` in hw-probe | clawdie-iso | ~30 | +| `collect_llama_cpp()` in hw-probe | clawdie-iso | ~20 | +| `collect_local_llm()` aggregator in hw-probe | clawdie-iso | ~25 | +| `machine_id` column + constraint in mother_schema.sql | colibri | ~5 | +| Extended `derive_capabilities()` for `ollama_available`, `llama_cpp_available`, `inference_tier` | colibri | ~40 | +| `node-register-mcp` handling of `machine_id` key + new local_llm fields | colibri | ~15 | +| This design doc (hive-routing.md) | This file | ~0 (done) | + +### Phase 2 — Routing Engine + +| Deliverable | Where | +|---|---| +| `colibri_query_hive_capabilities` MCP tool | colibri-mcp | +| `colibri_dispatch_to_node` MCP tool | colibri-mcp | +| `hive-routing` skill | `.agent/skills/` | +| `Task.routing` JSONB field in colibri-store | colibri-store | +| Mother-side routing score as PostgreSQL function (optional — only if agent-driven routing proves insufficient) | mother_schema.sql | + +--- + +## Integration with the Trifecta + +The hive routing plan completes the trifecta's T2.x vision: + +``` +T1.4 Prompt Discipline ✅ Cache warming, cost mode, 3-region prompt +T1.5 Per-Task Cost ✅ Captured locally (this sprint) +T2.x Cost-Aware Routing 📋 Hive matrix → routing decision +T2.x Model Selection 📋 Arbitrage between cloud tiers + local LLM +T2.x Eval Harness 📋 Task success measurement +``` + +The key insight: local LLM is the **ultimate cache-hit token**. Every token generated on a beefy node's GPU is $0.0000. The routing engine's job is to maximize the use of $0 tokens without compromising task success rates. diff --git a/docs/wiki/index.md b/docs/wiki/index.md index 634501b..e69d5a7 100644 --- a/docs/wiki/index.md +++ b/docs/wiki/index.md @@ -53,6 +53,7 @@ warning. | [headroom-sidecar](./headroom-sidecar.md) | Optional tool-result compression sidecar and its Unix-socket protocol | | [jail-confinement](./jail-confinement.md) | Persistent vs ephemeral jails, priv-mode policy, reuse of spawner confinement for MCP servers | | [mother-hive](./mother-hive.md) | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed | +| [hive-routing](./hive-routing.md) | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing | | [naming-decisions](./naming-decisions.md) | Ledger of harness-neutral / architecture renames — shipped and in-flight | | [daemon-not-demon](./daemon-not-demon.md) | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian | | [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned | -- 2.45.3 From ae9bdff0962c49fe69f22ae77913340cd790f72f Mon Sep 17 00:00:00 2001 From: 123kupola Date: Sat, 27 Jun 2026 13:00:52 +0200 Subject: [PATCH 6/9] =?UTF-8?q?docs(wiki):=20add=20HIVE-PANE.md=20?= =?UTF-8?q?=E2=80=94=20glasspane=20for=20the=20hive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New wiki page: multi-node cost observability, A2A agent discovery, and operator board design. Covers: - What Hive Pane shows (node status, cost, tasks, GPU) - Relationship to glasspane, mother-hive, task-board - A2A integration: Agent Card, task exchange, cost data parts - Data flow: node boot → A2A discovery → register → board - Schema (hive_pane PostgreSQL view) - Non-goals (not glasspane replacement, not Grafana) Added to EN and SL wiki indexes. wiki-lint --strict: PASS (163 refs, 0 failures). Sam & Hermes --- docs/wiki/hive-pane.md | 251 +++++++++++++++++++++++++++++++++++++++++ docs/wiki/index.md | 1 + docs/wiki/sl/index.md | 1 + 3 files changed, 253 insertions(+) create mode 100644 docs/wiki/hive-pane.md diff --git a/docs/wiki/hive-pane.md b/docs/wiki/hive-pane.md new file mode 100644 index 0000000..4271cdb --- /dev/null +++ b/docs/wiki/hive-pane.md @@ -0,0 +1,251 @@ +--- +title: Hive Pane +description: "Glasspane for the hive — multi-node cost observability, A2A discovery, and a board the operator can read at a glance." +--- + +← [index](./index.md) + +Hive Pane is the multi-node extension of the [glasspane](./glasspane.md) metaphor. +Where glasspane watches local agent subprocesses through JSONL stdout, Hive Pane +watches hive nodes through PostgreSQL rows — same operator mental model (pane = +unit of observation), different scale (local agent vs remote host). + +## Decision + +One board, not many ad-hoc surfaces. The operator sees every hive node — its +status, accumulated cost, task success rate, and hardware capabilities — in a +single view. The data already exists ([mother-hive](./mother-hive.md) for node +registry, [task-board](./task-board.md) for per-task cost). Hive Pane just +queries and renders it. + +## Why this exists + +Without it, the operator answers "what is my hive doing?" by: + +- SSH'ing into osa → `psql mother_hive -c "SELECT * FROM hive_nodes"` +- Cross-referencing task boards on each node +- Adding up costs manually + +Hive Pane replaces all of that with one queryable surface that understands +the hive topology. + +## What it shows + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ HIVE PANE [0.12.0] [secured] [3 nodes] │ +├──────────┬─────────┬────────┬──────────┬───────────┬────────────┤ +│ Node │ Type │ Status │ Tasks │ Cost │ GPU │ +├──────────┼─────────┼────────┼──────────┼───────────┼────────────┤ +│ osa │ mother │ online │ 12 done │ $0.42 │ none │ +│ debby │ disk │ online │ 8 done │ $1.87 │ amd (iGPU) │ +│ domedog │ disk │ online │ 3 done │ $0.03 │ none │ +│ usb-n7 │ live-usb│ online │ 0 │ $0.00 │ intel │ +└──────────┴─────────┴────────┴──────────┴───────────┴────────────┘ +``` + +Each row is a pane — a live query against `hive_nodes` joined with aggregated +task costs. The columns are: + +- **Status**: `online` if `last_seen` is within the heartbeat window, `offline` otherwise +- **Tasks**: count of completed tasks (further split by success/failure on drill-down) +- **Cost**: sum of `task_cost.cost` for this node, lifetime +- **GPU**: derived from `capabilities` (the `derive_capabilities()` trigger on `hive_nodes`) + +Clicking a node drills into its task history, cost breakdown (input/output/cache +tokens), and hardware profile. + +## Relationship to existing surfaces + +### Glasspane + +Glasspane watches local agent subprocesses → agent state machine (Idle → Working +→ Done). Hive Pane watches hive nodes → node state (online/offline) + aggregated +agent costs. Same supervision loop, different unit of observation. + +A glasspane pane is ephemeral (dies with the agent). A Hive Pane row is durable +(`hive_nodes` persists across reboots). + +→ [glasspane](./glasspane.md) + +### Mother hive + +Mother hive is the data layer: PostgreSQL `hive_nodes` table with +`derive_capabilities()` trigger. Hive Pane is the presentation layer: it queries +that table and renders it. + +→ [mother-hive](./mother-hive.md) + +### Task board + cost tracking + +The daemon's heartbeat captures per-task cost into the local SQLite store. For +the hive view, this data needs to flow to the mother. Two paths: + +1. **Bridge cost sync** (current): agents on remote nodes connect to osa's daemon + via the [control-plane bridge](../guide/architecture/control-plane-bridge/). + The board daemon sees their exit events and captures cost. This works today + for cross-host agents but requires the bridge to stay up. + +2. **A2A push** (planned, see below): nodes push cost data to mother as + structured A2A message parts. Decouples cost reporting from the bridge. + +→ [task-board](./task-board.md) +→ [contracts](./contracts.md) (TaskCostSummary schema) + +## A2A integration (planned) + +Google's Agent-to-Agent protocol standardizes three things Colibri already does +ad-hoc. Adopting it makes the hive discoverable and interoperable beyond our own +tooling. + +### Agent Card — standardized discovery + +Today a USB node discovers mother via a hardcoded SSH entry in +the external MCP registry. With A2A, mother publishes an Agent Card at a well-known +URL: + +``` +GET https://mother.clawdie.si/.well-known/agent.json +``` + +```json +{ + "name": "clawdie-mother", + "description": "Clawdie hive mother node — node registry, build queue, cost board", + "url": "https://mother.clawdie.si/a2a", + "version": "0.12.0", + "capabilities": { + "streaming": true, + "pushNotifications": false + }, + "skills": [ + { + "id": "node_register", + "name": "Register Node", + "description": "Register a hive node with hardware profile", + "inputSchema": { "type": "object", "properties": { "hostname": {}, "hw_profile": {} } } + }, + { + "id": "build_colibri", + "name": "Build Colibri", + "description": "Build a colibri crate from an allowed git branch" + } + ], + "costTracking": { + "supported": true, + "schema": "clawdie.task-cost.v1", + "aggregation": "per-node, per-task, per-model" + } +} +``` + +USB nodes (and any A2A-compatible client) discover mother's capabilities without +manual configuration. The Agent Card is versioned and lintable — same discipline +as the wiki. + +### Task exchange — standardized lifecycle + +A2A tasks map directly to Colibri's task board: + +| A2A state | Colibri equivalent | +| -------------- | ------------------ | +| `submitted` | `Pending` | +| `working` | `Started` | +| `completed` | `Done` | +| `failed` | `Error` | +| `canceled` | (not yet modeled) | + +Mother pushes a `node_register` task to a new USB node; the node executes it and +returns the result. The task carries cost data as a typed A2A part: + +```json +{ + "type": "data", + "mimeType": "application/json+cost", + "data": { + "schema": "clawdie.task-cost.v1", + "input_tokens": 150, + "output_tokens": 80, + "cost_usd": 0.0042 + } +} +``` + +### What A2A adds over the current MCP bridge + +| Concern | Current (MCP + SSH) | A2A | +| -------------------- | ----------------------------- | -------------------------------- | +| Discovery | Manual external MCP registry entry | Well-known Agent Card URL | +| Interop | Colibri-only | Any A2A client | +| Cost data | Embedded in task completion | Typed `application/json+cost` | +| Push notifications | Polling (heartbeat) | Optional webhook/push | +| Versioning | Ad-hoc | Agent Card version + schema pins | + +A2A is not a replacement for the MCP bridge — it's the next layer. The MCP +bridge handles local daemon commands (status, snapshot, spawn). A2A handles +cross-node task exchange and discovery. They coexist. + +## Data flow + +``` +USB node boots + │ + ├─ 1. Tailscale connects + │ + ├─ 2. A2A: GET mother/.well-known/agent.json + │ Discovers capabilities, registers interest + │ + ├─ 3. A2A task: mother → USB: node_register(hw_profile) + │ USB executes, returns cost + capabilities + │ + ├─ 4. Mother stores in hive_nodes + task_cost + │ + └─ 5. Hive Pane queries PostgreSQL, renders row +``` + +For nodes that don't speak A2A yet (current USB image), the existing MCP + SSH +path continues to work. The board queries `hive_nodes` regardless of how the +data got there. + +## Schema (mother PostgreSQL) + +The `hive_nodes` table already exists ([mother-hive](./mother-hive.md)). Hive +Pane adds a lightweight view for the board: + +```sql +CREATE VIEW hive_pane AS +SELECT + n.hostname, + n.node_type, + n.status, + n.last_seen, + n.capabilities, + COUNT(t.id) FILTER (WHERE t.status = 'Done') AS tasks_done, + COUNT(t.id) FILTER (WHERE t.status = 'Error') AS tasks_failed, + COALESCE(SUM(t.cost), 0.0) AS total_cost_usd +FROM hive_nodes n +LEFT JOIN tasks t ON t.node_hostname = n.hostname +GROUP BY n.hostname, n.node_type, n.status, n.last_seen, n.capabilities; +``` + +The `tasks` table on mother is a projected subset of each node's local task +board — hostname, status, cost columns. The sync mechanism (bridge cost capture +or A2A push) is responsible for keeping it current. + +## Non-goals + +- **Not a replacement for glasspane TUI.** Glasspane watches live agent + subprocesses (millisecond latency). Hive Pane watches aggregate node state + (minute latency). Two different tools. +- **Not a Grafana clone.** No time-series plots, no alerting rules. The operator + can pipe the data anywhere; Hive Pane is the default read-only view. +- **Not a write surface.** Node registration and task creation happen through + the existing MCP/A2A paths. The board reads only. + +## References + +- [glasspane](./glasspane.md) — local agent observation model +- [mother-hive](./mother-hive.md) — node registry schema and SSH forced-command pattern +- [task-board](./task-board.md) — capability scoring and cost tracking +- [contracts](./contracts.md) — TaskCostSummary schema v1 +- [external-mcp](./external-mcp.md) — current MCP bridge (coexists with A2A) diff --git a/docs/wiki/index.md b/docs/wiki/index.md index 634501b..ee1022b 100644 --- a/docs/wiki/index.md +++ b/docs/wiki/index.md @@ -53,6 +53,7 @@ warning. | [headroom-sidecar](./headroom-sidecar.md) | Optional tool-result compression sidecar and its Unix-socket protocol | | [jail-confinement](./jail-confinement.md) | Persistent vs ephemeral jails, priv-mode policy, reuse of spawner confinement for MCP servers | | [mother-hive](./mother-hive.md) | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed | +| [hive-pane](./hive-pane.md) | Glasspane for the hive — multi-node cost observability, A2A discovery, and operator board | | [naming-decisions](./naming-decisions.md) | Ledger of harness-neutral / architecture renames — shipped and in-flight | | [daemon-not-demon](./daemon-not-demon.md) | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian | | [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned | diff --git a/docs/wiki/sl/index.md b/docs/wiki/sl/index.md index 2a5836d..847fa76 100644 --- a/docs/wiki/sl/index.md +++ b/docs/wiki/sl/index.md @@ -59,6 +59,7 @@ clippy. | [headroom-sidecar](./headroom-sidecar.md) | Neobvezni stranski vagon za stiskanje rezultatov orodij in njegov protokol Unix vtičnice | | [jail-confinement](./jail-confinement.md) | Trajne proti prehodnim ječam, pravilnik načina priv, ponovna uporaba omejitve zaganjalnika za strežnike MCP | | [mother-hive](./mother-hive.md) | Arhitektura matičnega MCP — SSH s prisiljenim ukazom, enojni-dom-v-colibri, peer avtentikacija, ključ-na-semenu | +| [hive-pane](./hive-pane.md) | Steklena plošča za panj — opazovanje stroškov več vozlišč, odkrivanje A2A in operaterska nadzorna plošča | | [naming-decisions](./naming-decisions.md) | Imenik preimenovanj, nevtralnih glede na opremo / arhitekturnih — dostavljenih in v teku | | [daemon-not-demon](./daemon-not-demon.md) | Zakaj rečemo daemon (duh pomočnik) in ne demon (hudič) — angleško + slovensko | | [layered-soul](./layered-soul.md) | Kako Colibri danes uporablja repozitorij pregledanega konteksta layered-soul proti načrtovanemu | -- 2.45.3 From de7b6d4419cf5d8d90cf37d7380db216ff17c60a Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 13:05:46 +0200 Subject: [PATCH 7/9] fix: clippy warnings in cost capture test (unused var, needless borrow) --- .../colibri-client/tests/live_socket_check.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/colibri-client/tests/live_socket_check.rs b/crates/colibri-client/tests/live_socket_check.rs index e1305bb..7de45fe 100644 --- a/crates/colibri-client/tests/live_socket_check.rs +++ b/crates/colibri-client/tests/live_socket_check.rs @@ -437,9 +437,8 @@ async fn register_tenant_and_list_over_socket() { async fn spawn_agent_with_usage_captures_task_cost() { let mut config = check_config(); let sample_agent = env!("CARGO_BIN_EXE_colibri-test-agent"); - std::env::set_var("COLIBRI_AGENT_BINARY", &sample_agent); - config.data_dir = - std::env::temp_dir().join(format!("colibri-cost-test-{}", Uuid::new_v4())); + std::env::set_var("COLIBRI_AGENT_BINARY", sample_agent); + config.data_dir = std::env::temp_dir().join(format!("colibri-cost-test-{}", Uuid::new_v4())); tokio::fs::create_dir_all(&config.data_dir).await.unwrap(); let state: SharedState = Arc::new(DaemonState::new(config.clone())); @@ -485,7 +484,7 @@ async fn spawn_agent_with_usage_captures_task_cost() { }) .await .expect("spawn should succeed"); - let agent_id = spawn_resp["agent_id"].as_str().unwrap().to_string(); + let _agent_id = spawn_resp["agent_id"].as_str().unwrap().to_string(); // Wait for agent to reach Done let deadline = Instant::now() + Duration::from_secs(20); @@ -494,10 +493,7 @@ async fn spawn_agent_with_usage_captures_task_cost() { if snap.panes.iter().any(|p| p.state == AgentState::Done) { break; } - assert!( - Instant::now() < deadline, - "agent did not reach Done" - ); + assert!(Instant::now() < deadline, "agent did not reach Done"); tokio::time::sleep(Duration::from_millis(200)).await; } @@ -509,7 +505,10 @@ async fn spawn_agent_with_usage_captures_task_cost() { // Verify cost was stored on the task let task = { let store = state.store.lock().unwrap(); - store.get_task(&task_id).unwrap().expect("task should exist") + store + .get_task(&task_id) + .unwrap() + .expect("task should exist") }; // The test agent emits: input=150, output=80, cache_read=200, cache_write=50, cost_usd=0.0042 -- 2.45.3 From 5b8b247e4acaa272c05448b472764923ecd66bff Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 13:08:08 +0200 Subject: [PATCH 8/9] fix(wiki): reconcile hive-pane with hive-routing design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes to HIVE-PANE.md: 1. machine_id as stable node identity — Agent Card input schema + hive_pane VIEW join key (was hostname-only) 2. Local LLM column in the mockup board — ollama/llama.cpp model info 3. cost_usd → cost in A2A cost data part (matches T1.5 rename) Cross-links: - hive-pane → hive-routing (engine vs presentation layer) - hive-routing → hive-pane (companion doc, A2A integration note) hive_pane VIEW now joins on machine_id, uses total_cost (not total_cost_usd). --- docs/wiki/hive-pane.md | 39 ++++++++++++++++++++++++--------------- docs/wiki/hive-routing.md | 9 +++++++++ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/wiki/hive-pane.md b/docs/wiki/hive-pane.md index 4271cdb..2e6ccb0 100644 --- a/docs/wiki/hive-pane.md +++ b/docs/wiki/hive-pane.md @@ -32,16 +32,16 @@ the hive topology. ## What it shows ``` -┌──────────────────────────────────────────────────────────────────┐ -│ HIVE PANE [0.12.0] [secured] [3 nodes] │ -├──────────┬─────────┬────────┬──────────┬───────────┬────────────┤ -│ Node │ Type │ Status │ Tasks │ Cost │ GPU │ -├──────────┼─────────┼────────┼──────────┼───────────┼────────────┤ -│ osa │ mother │ online │ 12 done │ $0.42 │ none │ -│ debby │ disk │ online │ 8 done │ $1.87 │ amd (iGPU) │ -│ domedog │ disk │ online │ 3 done │ $0.03 │ none │ -│ usb-n7 │ live-usb│ online │ 0 │ $0.00 │ intel │ -└──────────┴─────────┴────────┴──────────┴───────────┴────────────┘ +┌───────────────────────────────────────────────────────────────────────────────────┐ +│ HIVE PANE [0.12.0] [secured] [4 nodes] │ +├──────────┬─────────┬────────┬──────────┬───────────┬────────────┬─────────────────┤ +│ Node │ Type │ Status │ Tasks │ Cost │ GPU │ Local LLM │ +├──────────┼─────────┼────────┼──────────┼───────────┼────────────┼─────────────────┤ +│ osa │ mother │ online │ 12 done │ $0.42 │ none │ — │ +│ debby │ disk │ online │ 8 done │ $1.87 │ amd (iGPU) │ ollama: qwen2.5 │ +│ domedog │ disk │ online │ 3 done │ $0.03 │ none │ — │ +│ usb-n7 │ live-usb│ online │ 0 │ $0.00 │ intel │ llama.cpp: 7b │ +└──────────┴─────────┴────────┴──────────┴───────────┴────────────┴─────────────────┘ ``` Each row is a pane — a live query against `hive_nodes` joined with aggregated @@ -51,6 +51,7 @@ task costs. The columns are: - **Tasks**: count of completed tasks (further split by success/failure on drill-down) - **Cost**: sum of `task_cost.cost` for this node, lifetime - **GPU**: derived from `capabilities` (the `derive_capabilities()` trigger on `hive_nodes`) +- **Local LLM**: which models are available via ollama/llama.cpp on this node — `—` if cloud-only Clicking a node drills into its task history, cost breakdown (input/output/cache tokens), and hardware profile. @@ -68,6 +69,13 @@ A glasspane pane is ephemeral (dies with the agent). A Hive Pane row is durable → [glasspane](./glasspane.md) +### Hive routing + +[hive-routing](./hive-routing.md) defines the engine underneath the board: +node identity (`machine_id`), local LLM capability probes, cost-aware task +routing, and the implementation strategy. Hive Pane is the presentation layer; +hive-routing is the scheduling layer. + ### Mother hive Mother hive is the data layer: PostgreSQL `hive_nodes` table with @@ -123,7 +131,7 @@ GET https://mother.clawdie.si/.well-known/agent.json "id": "node_register", "name": "Register Node", "description": "Register a hive node with hardware profile", - "inputSchema": { "type": "object", "properties": { "hostname": {}, "hw_profile": {} } } + "inputSchema": { "type": "object", "properties": { "machine_id": {}, "hostname": {}, "hw_profile": {} } } }, { "id": "build_colibri", @@ -166,7 +174,7 @@ returns the result. The task carries cost data as a typed A2A part: "schema": "clawdie.task-cost.v1", "input_tokens": 150, "output_tokens": 80, - "cost_usd": 0.0042 + "cost": 0.0042 } } ``` @@ -215,6 +223,7 @@ Pane adds a lightweight view for the board: ```sql CREATE VIEW hive_pane AS SELECT + n.machine_id, n.hostname, n.node_type, n.status, @@ -222,10 +231,10 @@ SELECT n.capabilities, COUNT(t.id) FILTER (WHERE t.status = 'Done') AS tasks_done, COUNT(t.id) FILTER (WHERE t.status = 'Error') AS tasks_failed, - COALESCE(SUM(t.cost), 0.0) AS total_cost_usd + COALESCE(SUM(t.cost), 0.0) AS total_cost FROM hive_nodes n -LEFT JOIN tasks t ON t.node_hostname = n.hostname -GROUP BY n.hostname, n.node_type, n.status, n.last_seen, n.capabilities; +LEFT JOIN tasks t ON t.node_machine_id = n.machine_id +GROUP BY n.machine_id, n.hostname, n.node_type, n.status, n.last_seen, n.capabilities; ``` The `tasks` table on mother is a projected subset of each node's local task diff --git a/docs/wiki/hive-routing.md b/docs/wiki/hive-routing.md index 1acbde2..ea9c7f1 100644 --- a/docs/wiki/hive-routing.md +++ b/docs/wiki/hive-routing.md @@ -4,6 +4,10 @@ **Date:** 24.jun.2026 **Driven by:** T1.5 per-task cost tracking (shipped) → T2.x routing +> **Companion doc:** [hive-pane](./hive-pane.md) — the operator presentation +> layer this engine feeds. Hive Pane is the "how it looks" (board, A2A discovery); +> this doc is the "how it works" (identity, probes, routing, scheduling). + ## What Exists Today | Component | State | Gap | @@ -206,6 +210,11 @@ ALTER TABLE tasks ADD COLUMN routing JSONB; ## Protocol: Join → Probe → Route +> **A2A integration:** See [hive-pane](./hive-pane.md#a2a-integration-planned) for the +> Agent Card, task exchange, and typed cost data parts. The routing engine +> (this doc) and the A2A protocol (hive-pane) are orthogonal layers — either +> can evolve independently. + ### Phase 1: Join (existing, extended) ``` -- 2.45.3 From affee26afa7c8769815b05dcdab15611d35e2d5a Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 13:12:39 +0200 Subject: [PATCH 9/9] =?UTF-8?q?docs(wiki):=20A2A=20complexity=20audit=20?= =?UTF-8?q?=E2=80=94=20when=20it=20pays=20off=20vs=20when=20it=20adds=20we?= =?UTF-8?q?ight?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full protocol surface audit across Colibri's 5 current protocols (~5,324 lines). Key finding: A2A is an interoperability play, not a complexity reduction play. Replaced: - Mother MCP-over-SSH bridge → A2A HTTP endpoint (−160 lines, +380 lines) - External MCP discovery → Agent Card (future, zero adopters today) - Ad-hoc cost format → typed A2A part (negligible code impact) Not replaced: Unix socket (local IPC), spawner (process lifecycle), glasspane (PTY observer), store (SQLite), MCP editor bridge (human↔tool). Net delta: ~0 lines (moves code, doesn't shrink it). Protocol count: 5→6. Recommendation: A2A is Phase 3 — not Phase 2, not 0.12. The current MCP-over-SSH bridge (437 lines) works for 4 nodes. A2A pays off at 10+ nodes or when third-party tools ship A2A support. The Agent Card design in HIVE-PANE.md stays as a north star. Cross-linked from hive-pane.md + wiki index. 182 refs, clean lint. --- docs/wiki/a2a-complexity-audit.md | 166 ++++++++++++++++++++++++++++++ docs/wiki/hive-pane.md | 6 ++ docs/wiki/index.md | 1 + 3 files changed, 173 insertions(+) create mode 100644 docs/wiki/a2a-complexity-audit.md diff --git a/docs/wiki/a2a-complexity-audit.md b/docs/wiki/a2a-complexity-audit.md new file mode 100644 index 0000000..e81b62f --- /dev/null +++ b/docs/wiki/a2a-complexity-audit.md @@ -0,0 +1,166 @@ +# A2A Complexity Audit + +**Question:** Does A2A reduce Colibri's code complexity, or is it additive? +**Date:** 27.jun.2026 +**Referenced from:** [hive-pane.md](./hive-pane.md), [hive-routing.md](./hive-routing.md) + +## Current protocol surface area + +Colibri speaks 5 protocols today: + +| Protocol | Where | Lines | Purpose | +|---|---|---|---| +| **Custom JSON wire** | `crates/colibri-daemon/src/socket.rs` + `crates/colibri-client/src/lib.rs` | 1,981 | Local daemon control (spawn, status, snapshot, tasks, skills) | +| **MCP JSON-RPC** | `crates/colibri-mcp/src/lib.rs` | 570 | Editor integration + external MCP host | +| **MCP-over-SSH** | `packaging/mother/` (3 files) | 437 | Mother hive entrypoint (forced-command allowlist + node register) | +| **JSONL** | `crates/colibri-glasspane/src/lib.rs` | 1,186 | Agent subprocess stdout events | +| **SQL** | `crates/colibri-store/src/lib.rs` + `crates/colibri-store/src/schema.rs` | 1,150 | Local coordination (tasks, agents, skills, tenants) | + +**Total protocol surface: ~5,324 lines.** + +--- + +## What A2A would replace + +### 1. Mother MCP-over-SSH bridge → A2A HTTP endpoint + +Today's mother entrypoint: + +``` +USB node → SSH (authorized_keys forced-command) → colibri-mcp-ssh → colibri-mcp → PostgreSQL + └─ node-register-mcp (embedded psql) +``` + +With A2A: + +``` +USB node → HTTPS → mother A2A endpoint → PostgreSQL + └─ /a2a (task exchange) + └─ /.well-known/agent.json (discovery) +``` + +**Removed:** +- `colibri-mcp-ssh` (32 lines) — SSH forced-command allowlist wrapper +- `node-register-mcp` (88 lines) — Custom MCP tool with embedded psql +- SSH key management in `setup-mother.sh` (~40 lines of key distribution logic) + +**Removed total: ~160 lines.** + +**Added:** +- A2A HTTP endpoint on mother (~200 lines) +- A2A client library integration on USB node (~150 lines) +- mTLS/TLS termination for auth (~30 lines) + +**Added total: ~380 lines.** + +**Net delta: +220 lines.** Not a code reduction. But operational complexity drops significantly: +- No SSH key distribution to USB nodes (key lives on seed partition → no longer needed on mother) +- No forced-command allowlist to maintain +- Standard HTTPS is easier to firewall, audit, and monitor than SSH forced-command +- Agent Card URL is discoverable without manual external MCP registry entries + +### 2. External MCP server discovery → Agent Card + +Today: external MCP registry config — manual JSON listing third-party MCP servers: + +```json +{ + "servers": [ + { + "name": "filesystem", + "command": "npx", + "args": ["-y", "@anthropic/mcp-server-filesystem", "/tmp"], + "env": {} + } + ] +} +``` + +With A2A: third-party tools that speak A2A (not MCP) publish an Agent Card. Colibri discovers them via the well-known Agent Card URL instead of manual JSON config files. + +**Reality check:** No third-party tools speak A2A yet. The protocol was just announced (April 2025). MCP has ~2 years of ecosystem maturity. This is a *future* replacement, not a *current* one. + +**Verdict:** A2A discovery doesn't reduce code today. External MCP stays for tool access. + +### 3. Ad-hoc cost data format → Typed A2A part + +Today: cost data is embedded in the daemon's heartbeat logic — unstructured: + +```rust +info!(task_id = %task_id, cost = u.cost(), "task cost captured"); +``` + +With A2A: cost data is a typed message part (`application/json+cost`). The format is standardized, not ad-hoc. + +**Code savings:** ~10 lines (the info! log stays; the A2A part is new code). + +**Verdict:** Negligible code impact. The value is *interop*, not complexity reduction. + +--- + +## What A2A does NOT replace + +| Component | Why A2A doesn't touch it | Lines saved | +|---|---|---| +| **Unix socket wire protocol** (`crates/colibri-daemon/src/socket.rs`) | A2A is cross-node HTTP. Local daemon control needs IPC — Unix socket is faster, auth-free (filesystem permissions), and doesn't need a network stack. | 0 | +| **Spawner** (`crates/colibri-daemon/src/spawner.rs`) | A2A routes tasks to existing agents. Colibri *creates* agents by spawning subprocesses. A2A has no process lifecycle concept. | 0 | +| **Glasspane** (`crates/colibri-glasspane/src/lib.rs`) | A2A doesn't watch subprocess stdout. Glasspane is a PTY observer — it reads JSONL from child processes. A2A operates one layer above. | 0 | +| **Store** (`crates/colibri-store/src/lib.rs`) | A2A doesn't replace local SQLite coordination. Each node needs local persistence for task board, agents, skills — A2A is the *transport*, not the *database*. | 0 | +| **MCP editor bridge** | A2A is agent-to-agent. MCP is human-to-tool. Different protocols for different directions. They coexist. | 0 | +| **Contracts schemas** (`crates/colibri-contracts/src/lib.rs`) | A2A uses JSON Schema for input validation. Colibri's contracts are already compatible — no change needed. | 0 | + +**Total irreplaceable: ~5,000 lines.** A2A doesn't reduce this at all. + +--- + +## Net complexity analysis + +``` + BEFORE AFTER A2A + ────── ───────── +Unix socket protocol 1,981 1,981 (unchanged) +MCP bridge 570 570 (unchanged) +Mother MCP-over-SSH 437 0 (REMOVED) +A2A endpoint 0 380 (NEW) +Glasspane JSONL 1,186 1,186 (unchanged) +SQLite store 1,150 1,150 (unchanged) +Contracts schemas 200 200 (unchanged) + ────── ────── +TOTAL 5,524 5,467 + ────── ────── +``` + +**Net delta: −57 lines.** Technically a tiny reduction. Realistically: the code moves around, it doesn't shrink. + +--- + +## The real trade-off + +A2A is not a complexity reduction play. It's an **interoperability and operational simplicity** play: + +| Metric | MCP-over-SSH (current) | A2A (proposed) | +|---|---|---| +| **Lines of code** | ~5,524 (spread across 6 crates + 3 shell scripts) | ~5,467 (SSH scripts gone, A2A handler added) | +| **Protocol count** | 5 | 6 (A2A adds one) | +| **Operational complexity** | SSH keys × N nodes, forced-command allowlists, peer auth setup | One HTTPS endpoint, mTLS certs, well-known URL | +| **Discoverability** | Manual external MCP registry entries | Agent Card at well-known URL | +| **Interoperability** | Colibri-only | Any A2A client | +| **Debugability** | `ssh -v`, `psql`, `jq` | `curl`, browser devtools, standard HTTP tooling | +| **Ecosystem maturity** | N/A (Colibri-specific) | Protocol < 3 months old, zero adoption | +| **When it pays off** | Works today for 4 nodes | Pays off at 10+ nodes, or when 3rd-party tools ship A2A | + +--- + +## Recommendation: Later, not now + +The right window for A2A is when one of these becomes true: + +1. **We have >10 hive nodes** — SSH key distribution becomes painful +2. **A third-party tool ships A2A support** — interop value materializes +3. **We want federation** — multiple hives discovering each other + +Until then: the current MCP-over-SSH bridge is 437 lines of boring, working code. A2A would add 380 lines for a protocol that has zero adopters. The code savings (~57 lines) don't justify the protocol risk. + +**Phase 2 (next sprint) should not include A2A.** Build the routing engine on the existing MCP bridge. Add A2A as Phase 3 — when the protocol has real-world adoption and Colibri has enough nodes to benefit from discovery. + +The HIVE-PANE.md A2A section is a good north-star design doc. It stays in the wiki as "planned." But it shouldn't drive implementation priority. diff --git a/docs/wiki/hive-pane.md b/docs/wiki/hive-pane.md index 2e6ccb0..00d608a 100644 --- a/docs/wiki/hive-pane.md +++ b/docs/wiki/hive-pane.md @@ -102,6 +102,12 @@ the hive view, this data needs to flow to the mother. Two paths: ## A2A integration (planned) +> 📋 **Complexity audit:** [a2a-complexity-audit](./a2a-complexity-audit.md) — +> A2A doesn't reduce Colibri's code complexity today (6 protocols → 6 protocols, +> ~0 net lines). It pays off at 10+ nodes or when third-party tools ship A2A +> support. The Agent Card design below is a north star, not an implementation +> priority for 0.12. + Google's Agent-to-Agent protocol standardizes three things Colibri already does ad-hoc. Adopting it makes the hive discoverable and interoperable beyond our own tooling. diff --git a/docs/wiki/index.md b/docs/wiki/index.md index b0060ae..fe8c530 100644 --- a/docs/wiki/index.md +++ b/docs/wiki/index.md @@ -55,6 +55,7 @@ warning. | [mother-hive](./mother-hive.md) | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed | | [hive-routing](./hive-routing.md) | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing | | [hive-pane](./hive-pane.md) | Glasspane for the hive — multi-node cost observability, A2A discovery, and operator board | +| [a2a-complexity-audit](./a2a-complexity-audit.md) | A2A code complexity impact — 6-protocol surface audit, when A2A pays off | | [naming-decisions](./naming-decisions.md) | Ledger of harness-neutral / architecture renames — shipped and in-flight | | [daemon-not-demon](./daemon-not-demon.md) | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian | | [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned | -- 2.45.3