2026-06-27 13:23:48 +02:00
24 changed files with 1416 additions and 52 deletions
--- a/crates/colibri-client/src/bin/colibri_test_agent.rs
+++ b/crates/colibri-client/src/bin/colibri_test_agent.rs
@ -8,9 +8,10 @@ use std::{

 fn usage() -> &'static str {
    r#"Usage:
-  colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS]
+  colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS] [--emit-usage]

 Emits deterministic Pi-compatible JSONL for local colibri-daemon startup checks.
+With --emit-usage, appends a zot-compatible usage event for cost-tracking tests.
 "#
 }

@ -20,6 +21,7 @@ struct Options {
    cwd: String,
    step: Duration,
    hold: Duration,
+    emit_usage: bool,
 }

 impl Default for Options {
@ -32,6 +34,7 @@ impl Default for Options {
                .unwrap_or_else(|| "/tmp".to_string()),
            step: Duration::from_secs(1),
            hold: Duration::from_secs(30),
+            emit_usage: false,
        }
    }
 }
@ -84,6 +87,10 @@ where
                options.hold = Duration::from_secs(seconds);
                i += 2;
            }
+            "--emit-usage" => {
+                options.emit_usage = true;
+                i += 1;
+            }
            other => return Err(format!("unknown option: {other}\n\n{}", usage())),
        }
    }
@ -114,8 +121,24 @@ fn emit_jsonl(options: &Options) -> io::Result<()> {
    thread::sleep(options.step);

    write_event(&mut stdout, serde_json::json!({"type":"turn_end"}))?;
-    thread::sleep(options.hold);

+    // Emit a zot-compatible usage event for cost-tracking integration tests.
+    if options.emit_usage {
+        thread::sleep(options.step);
+        write_event(
+            &mut stdout,
+            serde_json::json!({
+                "type": "usage",
+                "input": 150,
+                "output": 80,
+                "cache_read": 200,
+                "cache_write": 50,
+                "cost_usd": 0.0042
+            }),
+        )?;
+    }
+
+    thread::sleep(options.hold);
    Ok(())
 }

@ -170,6 +193,12 @@ mod tests {
        assert_eq!(options.hold, Duration::from_secs(2));
    }

+    #[test]
+    fn parses_emit_usage_flag() {
+        let options = parse_args(["--emit-usage"]).unwrap();
+        assert!(options.emit_usage);
+    }
+
    #[test]
    fn write_event_serializes_jsonl() {
        let mut bytes = Vec::new();
--- a/crates/colibri-client/tests/live_socket_check.rs
+++ b/crates/colibri-client/tests/live_socket_check.rs
@ -432,3 +432,98 @@ async fn register_tenant_and_list_over_socket() {
    server.await.unwrap();
    let _ = tokio::fs::remove_dir_all(config.data_dir).await;
 }
+
+#[tokio::test]
+async fn spawn_agent_with_usage_captures_task_cost() {
+    let mut config = check_config();
+    let sample_agent = env!("CARGO_BIN_EXE_colibri-test-agent");
+    std::env::set_var("COLIBRI_AGENT_BINARY", sample_agent);
+    config.data_dir = std::env::temp_dir().join(format!("colibri-cost-test-{}", Uuid::new_v4()));
+    tokio::fs::create_dir_all(&config.data_dir).await.unwrap();
+
+    let state: SharedState = Arc::new(DaemonState::new(config.clone()));
+    let shutdown = state.shutdown_rx.resubscribe();
+    let server_state = state.clone();
+    let server = tokio::spawn(async move {
+        let _ = socket::serve(server_state, shutdown).await;
+    });
+
+    let client = DaemonClient::new(config.socket_path.clone());
+    wait_for_socket(&client).await;
+
+    // Register agent + create/claim task (same flow as poll_tasks test)
+    let task_id = {
+        let store = state.store.lock().unwrap();
+        store
+            .register_agent("cost-agent", serde_json::json!(["cost-track"]), None)
+            .unwrap();
+        let task = store.create_task("cost-tracking-check", None).unwrap();
+        let tid = task.id.clone();
+        let agents = store.list_agents().unwrap();
+        store.claim_task(&tid, &agents[0].id).unwrap();
+        tid
+    };
+
+    // Spawn agent with --emit-usage via raw request (spawn_agent_with doesn't expose args)
+    let spawn_resp: serde_json::Value = client
+        .request(&colibri_daemon::ColibriCommand::SpawnAgent {
+            provider: "local".to_string(),
+            model: sample_agent.to_string(),
+            session_id: Some(format!("task-{task_id}")),
+            system_prompt: None,
+            local_args: Some(vec![
+                "--session-id".to_string(),
+                task_id.clone(),
+                "--step-ms".to_string(),
+                "10".to_string(),
+                "--hold-secs".to_string(),
+                "1".to_string(),
+                "--emit-usage".to_string(),
+            ]),
+            jail: None,
+        })
+        .await
+        .expect("spawn should succeed");
+    let _agent_id = spawn_resp["agent_id"].as_str().unwrap().to_string();
+
+    // Wait for agent to reach Done
+    let deadline = Instant::now() + Duration::from_secs(20);
+    loop {
+        let snap = client.glasspane_snapshot().await.unwrap();
+        if snap.panes.iter().any(|p| p.state == AgentState::Done) {
+            break;
+        }
+        assert!(Instant::now() < deadline, "agent did not reach Done");
+        tokio::time::sleep(Duration::from_millis(200)).await;
+    }
+
+    // Agent reached Done — now agent holds 1s then exits per --hold-secs 1.
+    // Call heartbeat manually to detect exit and capture cost.
+    tokio::time::sleep(Duration::from_secs(2)).await;
+    colibri_daemon::daemon::heartbeat(&state, Duration::from_secs(30)).await;
+
+    // Verify cost was stored on the task
+    let task = {
+        let store = state.store.lock().unwrap();
+        store
+            .get_task(&task_id)
+            .unwrap()
+            .expect("task should exist")
+    };
+
+    // The test agent emits: input=150, output=80, cache_read=200, cache_write=50, cost_usd=0.0042
+    assert_eq!(task.input_tokens, 150, "input tokens");
+    assert_eq!(task.output_tokens, 80, "output tokens");
+    assert_eq!(task.cache_read_tokens, 200, "cache read tokens");
+    assert_eq!(task.cache_write_tokens, 50, "cache write tokens");
+    assert!(
+        (task.cost - 0.0042).abs() < 0.0001,
+        "cost should be 0.0042, got {}",
+        task.cost
+    );
+
+    let _ = state.shutdown_tx.send(());
+    server.await.unwrap();
+    std::env::remove_var("COLIBRI_AGENT_BINARY");
+    let _ = tokio::fs::remove_dir_all(config.data_dir).await;
+}
--- a/crates/colibri-contracts/src/lib.rs
+++ b/crates/colibri-contracts/src/lib.rs
@ -12,7 +12,8 @@ use serde_json::Value;

 pub const RUN_MANIFEST_SCHEMA: &str = "clawdie.interagent.run-manifest.v1";
 pub const RUNTIME_INVENTORY_SCHEMA: &str = "clawdie.runtime-version-inventory.v1";
-pub const PROVIDER_SMOKE_SCHEMA: &str = "clawdie.provider-smoke.result.v1";
+pub const PROVIDER_TEST_SCHEMA: &str = "clawdie.provider-test.result.v1";
+pub const TASK_COST_SUMMARY_SCHEMA: &str = "clawdie.task-cost-summary.v1";

 // ---------------------------------------------------------------------------
 // colibri host-status input
@ -100,7 +101,7 @@ pub struct RunManifest {
 }

 // ---------------------------------------------------------------------------
-// clawdie.provider-smoke.result.v1
+// clawdie.provider-test.result.v1
 // ---------------------------------------------------------------------------

 #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
@ -118,7 +119,7 @@ pub struct ProviderUsage {
 }

 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct ProviderSmokeResult {
+pub struct ProviderTestResult {
    pub schema: String,
    pub test_id: String,
    pub host: String,
@ -139,3 +140,33 @@ pub struct ProviderSmokeResult {
    #[serde(default)]
    pub notes: Vec<String>,
 }
+
+// ---------------------------------------------------------------------------
+// clawdie.task-cost-summary.v1
+// ---------------------------------------------------------------------------
+
+/// Per-task cost summary written when an agent finishes a claimed task.
+///
+/// Accumulated from zot usage events (input/output/cache_read/cache_write)
+/// and written to the coordination store. Agents and MCP tools can query
+/// historical costs to inform "what's cheap to run" decisions.
+///
+/// Cost is in the provider's billing currency (USD for DeepSeek, Anthropic,
+/// OpenAI). Multi-currency display (EUR, CNY, INR) is a consumer concern —
+/// add a `currency` field when that day comes.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TaskCostSummary {
+    pub schema: String,
+    pub task_id: String,
+    pub agent_id: Option<String>,
+    pub provider: Option<String>,
+    pub model: Option<String>,
+    pub started_at: String,
+    pub ended_at: String,
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_write_tokens: u64,
+    pub cost: f64,
+    pub success: bool,
+}
--- a/crates/colibri-contracts/tests/golden.rs
+++ b/crates/colibri-contracts/tests/golden.rs
@ -97,9 +97,9 @@ fn parses_domedog_run_manifest() {

 #[test]
 fn parses_domedog_live_cache_result() {
-    let res: ProviderSmokeResult =
+    let res: ProviderTestResult =
        serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result");
-    assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
+    assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
    assert_eq!(res.status, "ok");
    assert!(res.cache_hit_observed, "expected a cache hit");
    assert!(res.cache_hit_tokens > 0);
@ -108,8 +108,8 @@ fn parses_domedog_live_cache_result() {

 #[test]
 fn parses_osa_live_cache_result() {
-    let res: ProviderSmokeResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result");
-    assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
+    let res: ProviderTestResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result");
+    assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
    assert_eq!(res.host, "osa");
    assert_eq!(res.agent, "codex-osa");
    assert_eq!(res.status, "ok");
@ -173,10 +173,10 @@ fn parses_osa_watchdog_run_manifest() {
 }

 #[test]
-fn provider_smoke_skipped_shape() {
+fn provider_test_skipped_shape() {
    // No live result committed yet; assert the struct accepts a skipped result.
    let raw = r#"{
-        "schema": "clawdie.provider-smoke.result.v1",
+        "schema": "clawdie.provider-test.result.v1",
        "test_id": "colibri-probe-20260526T080805Z",
        "host": "domedog",
        "agent": "claude-domedog",
@ -189,9 +189,38 @@ fn provider_smoke_skipped_shape() {
        "cache_hit_observed": false,
        "notes": ["DEEPSEEK_API_KEY unset/empty — build verified, live cache probe skipped"]
    }"#;
-    let res: ProviderSmokeResult = serde_json::from_str(raw).expect("parse provider smoke");
-    assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
+    let res: ProviderTestResult = serde_json::from_str(raw).expect("parse provider smoke");
+    assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
    assert_eq!(res.status, "skipped");
    assert!(res.warm_usage.is_none());
    roundtrip_eq(&res);
 }
+
+#[test]
+fn task_cost_summary_round_trip() {
+    let raw = r#"{
+        "schema": "clawdie.task-cost-summary.v1",
+        "task_id": "abc-123",
+        "agent_id": "agent-456",
+        "provider": "deepseek",
+        "model": "deepseek-chat",
+        "started_at": "2026-06-24T10:00:00Z",
+        "ended_at": "2026-06-24T10:05:30Z",
+        "input_tokens": 45230,
+        "output_tokens": 2847,
+        "cache_read_tokens": 12100,
+        "cache_write_tokens": 500,
+        "cost": 0.047,
+        "success": true
+    }"#;
+    let summary: TaskCostSummary = serde_json::from_str(raw).expect("parse task cost summary");
+    assert_eq!(summary.schema, TASK_COST_SUMMARY_SCHEMA);
+    assert_eq!(summary.task_id, "abc-123");
+    assert_eq!(summary.input_tokens, 45230);
+    assert_eq!(summary.output_tokens, 2847);
+    assert_eq!(summary.cache_read_tokens, 12100);
+    assert_eq!(summary.cache_write_tokens, 500);
+    assert!((summary.cost - 0.047).abs() < 0.0001);
+    assert!(summary.success);
+    roundtrip_eq(&summary);
+}
--- a/crates/colibri-daemon/src/daemon.rs
+++ b/crates/colibri-daemon/src/daemon.rs
@ -225,7 +225,7 @@ pub async fn maybe_rewarm_cache(state: &SharedState) {
 // Heartbeat, rotation, handoff, polling
 // ---------------------------------------------------------------------------

-async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
+pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
    let session_count = state.sessions.len();
    let agent_count = state.agents.len();
    debug!(
@ -249,6 +249,45 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
                event,
                std::time::SystemTime::now(),
            );
+
+            // Capture per-task cost from accumulated usage (T1.5).
+            if let Some(session_id) = &handle.config.session_id {
+                if let Some(task_id) = session_id.strip_prefix("task-") {
+                    let usage = {
+                        let gp = state.glasspane.read().await;
+                        gp.get(&handle.id).map(|p| p.accumulated_usage().clone())
+                    };
+                    if let Some(u) = usage {
+                        let tc = colibri_store::TaskCost {
+                            provider: Some(handle.config.provider.as_str().to_string()),
+                            model: Some(handle.config.model.clone()),
+                            input_tokens: u.input_tokens,
+                            output_tokens: u.output_tokens,
+                            cache_read_tokens: u.cache_read_tokens,
+                            cache_write_tokens: u.cache_write_tokens,
+                            cost: u.cost(),
+                            success: status.success(),
+                        };
+                        let store = state.store.lock().unwrap();
+                        match store.set_task_cost(task_id, &tc) {
+                            Ok(t) => info!(
+                                task_id = %task_id,
+                                input_tokens = u.input_tokens,
+                                output_tokens = u.output_tokens,
+                                cache_read = u.cache_read_tokens,
+                                cost = u.cost(),
+                                success = tc.success,
+                                status = %t.status.as_str(),
+                                "task cost captured"
+                            ),
+                            Err(e) => {
+                                warn!(task_id = %task_id, error = %e, "failed to write task cost")
+                            }
+                        }
+                    }
+                }
+            }
+
            if !status.success() {
                stalled.push(handle.id.clone());
            }
--- a/crates/colibri-daemon/src/spawner.rs
+++ b/crates/colibri-daemon/src/spawner.rs
@ -45,6 +45,16 @@ pub enum Provider {
 }

 impl Provider {
+    /// Human-readable lowercase name for storage/logging.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Provider::DeepSeek => "deepseek",
+            Provider::OpenRouter => "openrouter",
+            Provider::Anthropic => "anthropic",
+            Provider::Local => "local",
+        }
+    }
+
    /// Priority order for fallback routing.
    pub fn fallback_order() -> &'static [Provider] {
        &[
--- a/crates/colibri-daemon/tests/zot_rpc_smoke.rs
+++ b/crates/colibri-daemon/tests/zot_rpc_smoke.rs
@ -1,4 +1,4 @@
-//! zot-rpc driver smoke — end-to-end proof of the colibri#143 spawn driver.
+//! zot-rpc driver test — end-to-end proof of the colibri#143 spawn driver.
 //!
 //! Spawns a real `zot rpc` subprocess through the Colibri `Spawner` (with
 //! `rpc_stdin`), sends a prompt over the driver's `RpcSender`, and reads the
@ -27,7 +27,7 @@ use tokio::time::timeout;
 #[ignore = "needs a built zot binary; set ZOT_BIN"]
 async fn zot_rpc_driver_delivers_prompt_and_streams_events() {
    let Ok(zot_bin) = std::env::var("ZOT_BIN") else {
-        eprintln!("ZOT_BIN not set; skipping zot rpc driver smoke");
+        eprintln!("ZOT_BIN not set; skipping zot rpc driver test");
        return;
    };
    assert!(
--- a/crates/colibri-deepseek/src/lib.rs
+++ b/crates/colibri-deepseek/src/lib.rs
@ -6,7 +6,7 @@
 //! wire shapes and produces:
 //!
 //!   DeepSeek request → byte-stable prefix → cache usage manifest
-//!     (`clawdie.provider-smoke.result.v1`) → run-manifest
+//!     (`clawdie.provider-test.result.v1`) → run-manifest
 //!     (`clawdie.interagent.run-manifest.v1`) → local JSONL event log.

 use std::collections::BTreeMap;
@ -14,7 +14,7 @@ use std::path::Path;

 use chrono::Utc;
 use colibri_contracts::{
-    ProviderSmokeResult, ProviderUsage, RunManifest, PROVIDER_SMOKE_SCHEMA, RUN_MANIFEST_SCHEMA,
+    ProviderTestResult, ProviderUsage, RunManifest, PROVIDER_TEST_SCHEMA, RUN_MANIFEST_SCHEMA,
 };
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
@ -150,9 +150,9 @@ async fn one_call(

 /// Warm the cache, then probe with a byte-identical prefix. Never panics —
 /// failures land in `status`/`notes`. Skips the live call when no key is set.
-pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
-    let mut result = ProviderSmokeResult {
-        schema: PROVIDER_SMOKE_SCHEMA.to_string(),
+pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderTestResult {
+    let mut result = ProviderTestResult {
+        schema: PROVIDER_TEST_SCHEMA.to_string(),
        test_id: format!("colibri-probe-{}", Utc::now().format("%Y%m%dT%H%M%SZ")),
        host: cfg.host.clone(),
        agent: cfg.agent.clone(),
@ -227,7 +227,7 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
 }

 /// Wrap a cache-probe result as an inter-agent run manifest.
-pub fn build_run_manifest(smoke: &ProviderSmokeResult) -> RunManifest {
+pub fn build_run_manifest(smoke: &ProviderTestResult) -> RunManifest {
    let mut summary: BTreeMap<String, Value> = BTreeMap::new();
    summary.insert("status".to_string(), json!(smoke.status));
    summary.insert("model".to_string(), json!(smoke.model));
--- a/crates/colibri-glasspane/src/lib.rs
+++ b/crates/colibri-glasspane/src/lib.rs
@ -247,6 +247,35 @@ pub struct PaneReaderStats {

 /// Stateful streaming ingestor for Pi `--mode json` JSONL.
 ///
+/// Accumulated token usage and cost for a supervised pane.
+/// Populated from zot usage events (currently discarded by the state machine).
+///
+/// Note: cost is stored as micro-cents (`i64`) for deterministic
+/// `Eq`/`Ord` comparison. The `cost()` accessor returns the floating-point
+/// representation in the provider's billing currency (USD).
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct PaneUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_write_tokens: u64,
+    cost_micro_cents: i64,
+}
+
+impl PaneUsage {
+    /// Accumulated cost in provider's billing currency.
+    /// Currently always USD (all providers bill in USD).
+    pub fn cost(&self) -> f64 {
+        self.cost_micro_cents as f64 / 1_000_000.0
+    }
+
+    /// Accumulate cost from a zot usage event's `cost` field (float).
+    pub fn add_cost(&mut self, usd: f64) {
+        let micro_cents = (usd * 1_000_000.0).round() as i64;
+        self.cost_micro_cents += micro_cents;
+    }
+}
+
 /// This is intentionally independent from PTY/process ownership: tests can feed
 /// sample readers, while live panes can wire PTY stdout to the same API later.
 #[derive(Debug, Clone, PartialEq, Eq)]
@ -258,6 +287,8 @@ pub struct PiJsonlIngestor {
    /// Which harness produced the stream. Pi events are read directly; zot
    /// events are normalized through `zot_event_type` first.
    runtime: AgentRuntime,
+    /// Accumulated token usage + cost from zot usage events.
+    pub usage: PaneUsage,
 }

 impl Default for PiJsonlIngestor {
@ -268,6 +299,7 @@ impl Default for PiJsonlIngestor {
            cwd: None,
            last_event_at: None,
            runtime: AgentRuntime::Pi,
+            usage: PaneUsage::default(),
        }
    }
 }
@ -305,6 +337,32 @@ impl PiJsonlIngestor {
        observed_at: SystemTime,
    ) -> Option<PiStreamUpdate> {
        let value: Value = serde_json::from_str(line.trim()).ok()?;
+
+        // Accumulate token usage from agent usage events (type: "usage").
+        // The state machine skips usage events for state-change purposes,
+        // but we capture the cost data here before that skip.
+        // Originally Zot-only — now any runtime can emit usage events
+        // for cost tracking across all agent harnesses.
+        if value.get("type").and_then(Value::as_str) == Some("usage") {
+            if let Some(input) = value.get("input").and_then(Value::as_u64) {
+                self.usage.input_tokens += input;
+            }
+            if let Some(output) = value.get("output").and_then(Value::as_u64) {
+                self.usage.output_tokens += output;
+            }
+            if let Some(cr) = value.get("cache_read").and_then(Value::as_u64) {
+                self.usage.cache_read_tokens += cr;
+            }
+            if let Some(cw) = value.get("cache_write").and_then(Value::as_u64) {
+                self.usage.cache_write_tokens += cw;
+            }
+            if let Some(cost) = value.get("cost_usd").and_then(Value::as_f64) {
+                self.usage.add_cost(cost);
+            }
+            self.last_event_at = Some(observed_at);
+            return None; // No state change for usage events
+        }
+
        // Pi events use Colibri's taxonomy directly; zot events are normalized
        // (e.g. tool_use_start -> tool_execution_start, response success:false
        // -> error, response/usage -> skipped).
@ -385,6 +443,11 @@ impl SupervisedPane {
        self.ingestor.session_id()
    }

+    /// Accumulated token usage from zot usage events (zeroed for Pi panes).
+    pub fn accumulated_usage(&self) -> &PaneUsage {
+        &self.ingestor.usage
+    }
+
    pub fn cwd(&self) -> Option<&str> {
        self.ingestor.cwd()
    }
@ -1127,6 +1190,44 @@ mod zot_runtime_tests {
        );
    }

+    #[test]
+    fn zot_usage_accumulates_in_pane_ingestor() {
+        let mut ingestor = PiJsonlIngestor::with_runtime(AgentRuntime::Zot);
+        let now = SystemTime::now();
+
+        // First usage event — should accumulate
+        ingestor.ingest_line_at(
+            r#"{"type":"usage","input":100,"output":50,"cache_read":80,"cache_write":10,"cost_usd":0.0025}"#,
+            now,
+        );
+        assert_eq!(ingestor.usage.input_tokens, 100);
+        assert_eq!(ingestor.usage.output_tokens, 50);
+        assert_eq!(ingestor.usage.cache_read_tokens, 80);
+        assert_eq!(ingestor.usage.cache_write_tokens, 10);
+        assert!((ingestor.usage.cost() - 0.0025).abs() < 0.0001);
+
+        // Second usage event — should accumulate on top
+        ingestor.ingest_line_at(
+            r#"{"type":"usage","input":200,"output":30,"cache_read":0,"cache_write":5,"cost_usd":0.0010}"#,
+            now,
+        );
+        assert_eq!(ingestor.usage.input_tokens, 300);
+        assert_eq!(ingestor.usage.output_tokens, 80);
+        assert_eq!(ingestor.usage.cache_read_tokens, 80);
+        assert_eq!(ingestor.usage.cache_write_tokens, 15);
+        assert!((ingestor.usage.cost() - 0.0035).abs() < 0.0001);
+
+        // Pi usage events should also accumulate now (runtime-agnostic cost tracking)
+        let mut pi_ingestor = PiJsonlIngestor::default(); // defaults to Pi
+        pi_ingestor.ingest_line_at(
+            r#"{"type":"usage","input":999,"output":999,"cost_usd":99.0}"#,
+            now,
+        );
+        // All runtimes now accumulate usage for cost tracking
+        assert_eq!(pi_ingestor.usage.input_tokens, 999);
+        assert!((pi_ingestor.usage.cost() - 99.0).abs() < 0.01);
+    }
+
    #[test]
    fn agent_runtime_default_is_pi() {
        assert_eq!(AgentRuntime::default(), AgentRuntime::Pi);
--- a/crates/colibri-mcp/src/lib.rs
+++ b/crates/colibri-mcp/src/lib.rs
@ -145,6 +145,17 @@ pub fn tool_list() -> Vec<Value> {
                "required": ["mode"]
            })),
        ),
+        json_tool(
+            "colibri_get_task_cost",
+            "Get per-task cost summary: tokens used, cache hit ratio, cost, success status",
+            Some(serde_json::json!({
+                "type": "object",
+                "properties": {
+                    "task_id": { "type": "string", "description": "Task ID to query" }
+                },
+                "required": ["task_id"]
+            })),
+        ),
        json_tool(
            "colibri_external_mcp_servers",
            "List configured external MCP servers from COLIBRI_MCP_EXTERNAL_CONFIG",
@ -278,6 +289,21 @@ pub async fn dispatch_tool(
                "note": "Cost mode change is runtime-only/status-intent until live config mutation exists."
            })))
        }
+        "colibri_get_task_cost" => {
+            let task_id = require_string(arguments, "task_id")?;
+            let all_tasks = client.list_tasks(None).await.map_err(map_client_error)?;
+            let task = all_tasks
+                .as_array()
+                .and_then(|arr| {
+                    arr.iter()
+                        .find(|t| t.get("id").and_then(|v| v.as_str()) == Some(&task_id))
+                })
+                .cloned();
+            match task {
+                Some(t) => Ok(tool_text(t)),
+                None => Err(McpError::not_found(format!("task not found: {task_id}"))),
+            }
+        }
        "colibri_external_mcp_servers" => {
            let registry = external::load_registry_if_present(&config.external_config_path).await?;
            Ok(tool_text(serde_json::json!({
--- a/crates/colibri-mcp/tests/tool_dispatch.rs
+++ b/crates/colibri-mcp/tests/tool_dispatch.rs
@ -252,5 +252,6 @@ fn tool_list_has_all_phase1_tools() {
    assert!(names.contains(&"colibri_external_mcp_servers"));
    assert!(names.contains(&"colibri_external_mcp_list_tools"));
    assert!(names.contains(&"colibri_external_mcp_call_tool"));
-    assert_eq!(names.len(), 10);
+    assert!(names.contains(&"colibri_get_task_cost"));
+    assert_eq!(names.len(), 11);
 }
--- a/crates/colibri-store/src/lib.rs
+++ b/crates/colibri-store/src/lib.rs
@ -73,6 +73,37 @@ pub struct Task {
    pub description: Option<String>,
    pub created_at: String,
    pub updated_at: String,
+    /// Per-task cost tracking (populated on agent exit).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub provider: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub finished_at: Option<String>,
+    #[serde(default, skip_serializing_if = "is_zero_u64")]
+    pub input_tokens: u64,
+    #[serde(default, skip_serializing_if = "is_zero_u64")]
+    pub output_tokens: u64,
+    #[serde(default, skip_serializing_if = "is_zero_u64")]
+    pub cache_read_tokens: u64,
+    #[serde(default, skip_serializing_if = "is_zero_u64")]
+    pub cache_write_tokens: u64,
+    #[serde(default, skip_serializing_if = "is_zero_f64")]
+    pub cost: f64,
+    #[serde(default, skip_serializing_if = "is_false")]
+    pub success: bool,
+}
+
+#[allow(clippy::trivially_copy_pass_by_ref)]
+fn is_zero_u64(n: &u64) -> bool {
+    *n == 0
+}
+#[allow(clippy::trivially_copy_pass_by_ref)]
+fn is_zero_f64(n: &f64) -> bool {
+    *n == 0.0
+}
+fn is_false(b: &bool) -> bool {
+    !*b
 }

 /// An agent registered in the coordination system.
@ -147,6 +178,18 @@ pub struct Store {
    conn: Connection,
    db_path: PathBuf,
 }
+/// Per-task cost data passed from the daemon to the store.
+#[derive(Debug, Clone)]
+pub struct TaskCost {
+    pub provider: Option<String>,
+    pub model: Option<String>,
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_write_tokens: u64,
+    pub cost: f64,
+    pub success: bool,
+}

 impl Store {
    /// Open (or create) the store at `db_path`. Runs migrations automatically.
@ -228,6 +271,15 @@ impl Store {
            description: description.map(str::to_string),
            created_at: now.clone(),
            updated_at: now,
+            provider: None,
+            model: None,
+            finished_at: None,
+            input_tokens: 0,
+            output_tokens: 0,
+            cache_read_tokens: 0,
+            cache_write_tokens: 0,
+            cost: 0.0,
+            success: false,
        })
    }

@ -247,8 +299,46 @@ impl Store {
            .ok_or_else(|| StoreError::NotFound(task_id.to_string()))
    }

+    /// Write per-task cost summary when an agent finishes a claimed task.
+    /// Updates the task's cost columns and transitions status to Done/Failed.
+    pub fn set_task_cost(&self, task_id: &str, tc: &TaskCost) -> Result<Task> {
+        let now = Utc::now().to_rfc3339();
+        let new_status = if tc.success {
+            TaskStatus::Done
+        } else {
+            TaskStatus::Failed
+        };
+        let rows = self.conn.execute(
+            "UPDATE tasks SET
+                status = ?1, updated_at = ?2, finished_at = ?3,
+                provider = ?4, model = ?5,
+                input_tokens = ?6, output_tokens = ?7,
+                cache_read_tokens = ?8, cache_write_tokens = ?9,
+                cost = ?10, success = ?11
+             WHERE id = ?12",
+            params![
+                new_status.as_str(),
+                &now,
+                &now,
+                tc.provider.as_deref(),
+                tc.model.as_deref(),
+                tc.input_tokens as i64,
+                tc.output_tokens as i64,
+                tc.cache_read_tokens as i64,
+                tc.cache_write_tokens as i64,
+                tc.cost,
+                tc.success,
+                task_id,
+            ],
+        )?;
+        if rows == 0 {
+            return Err(StoreError::NotFound(task_id.to_string()));
+        }
+        self.get_task(task_id)?
+            .ok_or_else(|| StoreError::NotFound(task_id.to_string()))
+    }
+
    /// Assign a task to an agent (status → Claimed).
-    /// Claim a queued task for an agent.
    ///
    /// The UPDATE is guarded on `status = 'queued'`, so the claim is atomic and
    /// exclusive: when two agents race for the same task, only the first wins
@ -285,23 +375,9 @@ impl Store {

    /// Get a single task by ID.
    pub fn get_task(&self, task_id: &str) -> Result<Option<Task>> {
-        let mut stmt = self.conn.prepare(
-            "SELECT id, agent_id, status, title, description, created_at, updated_at
-             FROM tasks WHERE id = ?1",
-        )?;
+        let mut stmt = self.conn.prepare("SELECT * FROM tasks WHERE id = ?1")?;

-        let mut rows = stmt.query_map(params![task_id], |row| {
-            let status_str: String = row.get(2)?;
-            Ok(Task {
-                id: row.get(0)?,
-                agent_id: row.get(1)?,
-                status: TaskStatus::parse_status(&status_str).unwrap_or(TaskStatus::Queued),
-                title: row.get(3)?,
-                description: row.get(4)?,
-                created_at: row.get(5)?,
-                updated_at: row.get(6)?,
-            })
-        })?;
+        let mut rows = stmt.query_map(params![task_id], row_to_task)?;

        match rows.next() {
            Some(Ok(task)) => Ok(Some(task)),
@ -313,11 +389,9 @@ impl Store {
    /// List tasks, optionally filtered by status.
    pub fn list_tasks(&self, status_filter: Option<TaskStatus>) -> Result<Vec<Task>> {
        let sql = if status_filter.is_some() {
-            "SELECT id, agent_id, status, title, description, created_at, updated_at
-             FROM tasks WHERE status = ?1 ORDER BY created_at DESC"
+            "SELECT * FROM tasks WHERE status = ?1 ORDER BY created_at DESC"
        } else {
-            "SELECT id, agent_id, status, title, description, created_at, updated_at
-             FROM tasks ORDER BY created_at DESC"
+            "SELECT * FROM tasks ORDER BY created_at DESC"
        };

        let mut stmt = self.conn.prepare(sql)?;
@ -635,6 +709,15 @@ fn row_to_task(row: &rusqlite::Row<'_>) -> rusqlite::Result<Task> {
        description: row.get(4)?,
        created_at: row.get(5)?,
        updated_at: row.get(6)?,
+        provider: row.get(7).ok(),
+        model: row.get(8).ok(),
+        finished_at: row.get(9).ok(),
+        input_tokens: row.get::<_, i64>(10).ok().map(|v| v as u64).unwrap_or(0),
+        output_tokens: row.get::<_, i64>(11).ok().map(|v| v as u64).unwrap_or(0),
+        cache_read_tokens: row.get::<_, i64>(12).ok().map(|v| v as u64).unwrap_or(0),
+        cache_write_tokens: row.get::<_, i64>(13).ok().map(|v| v as u64).unwrap_or(0),
+        cost: row.get::<_, f64>(14).ok().unwrap_or(0.0),
+        success: row.get::<_, bool>(15).ok().unwrap_or(false),
    })
 }

@ -710,6 +793,55 @@ mod tests {
        assert_eq!(task.status, TaskStatus::Done);
    }

+    #[test]
+    fn test_set_task_cost_writes_all_fields() {
+        let store = Store::open_memory().unwrap();
+        let task = store
+            .create_task("cost-tracked task", Some("test cost capture"))
+            .unwrap();
+
+        let tc = TaskCost {
+            provider: Some("deepseek".to_string()),
+            model: Some("deepseek-chat".to_string()),
+            input_tokens: 1234,
+            output_tokens: 567,
+            cache_read_tokens: 800,
+            cache_write_tokens: 100,
+            cost: 0.0042,
+            success: true,
+        };
+        store.set_task_cost(&task.id, &tc).unwrap();
+
+        let reloaded = store.get_task(&task.id).unwrap().unwrap();
+        assert_eq!(reloaded.status, TaskStatus::Done);
+        assert_eq!(reloaded.provider.as_deref(), Some("deepseek"));
+        assert_eq!(reloaded.model.as_deref(), Some("deepseek-chat"));
+        assert_eq!(reloaded.input_tokens, 1234);
+        assert_eq!(reloaded.output_tokens, 567);
+        assert_eq!(reloaded.cache_read_tokens, 800);
+        assert_eq!(reloaded.cache_write_tokens, 100);
+        assert!((reloaded.cost - 0.0042).abs() < 0.0001);
+        assert!(reloaded.success);
+        assert!(reloaded.finished_at.is_some());
+    }
+
+    #[test]
+    fn test_set_task_cost_nonexistent_errors() {
+        let store = Store::open_memory().unwrap();
+        let tc = TaskCost {
+            provider: None,
+            model: None,
+            input_tokens: 0,
+            output_tokens: 0,
+            cache_read_tokens: 0,
+            cache_write_tokens: 0,
+            cost: 0.0,
+            success: false,
+        };
+        let result = store.set_task_cost("nonexistent", &tc);
+        assert!(result.is_err());
+    }
+
    #[test]
    fn test_transition_not_found() {
        let store = Store::open_memory().unwrap();
--- a/crates/colibri-store/src/schema.rs
+++ b/crates/colibri-store/src/schema.rs
@ -65,4 +65,14 @@ CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status);
 pub const MIGRATIONS: &[&str] = &[
    "ALTER TABLE agents ADD COLUMN host TEXT",
    "ALTER TABLE agents ADD COLUMN last_seen TEXT",
+    // Per-task cost tracking (T1.5): zot usage events → store.
+    "ALTER TABLE tasks ADD COLUMN provider TEXT",
+    "ALTER TABLE tasks ADD COLUMN model TEXT",
+    "ALTER TABLE tasks ADD COLUMN finished_at TEXT",
+    "ALTER TABLE tasks ADD COLUMN input_tokens INTEGER DEFAULT 0",
+    "ALTER TABLE tasks ADD COLUMN output_tokens INTEGER DEFAULT 0",
+    "ALTER TABLE tasks ADD COLUMN cache_read_tokens INTEGER DEFAULT 0",
+    "ALTER TABLE tasks ADD COLUMN cache_write_tokens INTEGER DEFAULT 0",
+    "ALTER TABLE tasks ADD COLUMN cost REAL DEFAULT 0.0",
+    "ALTER TABLE tasks ADD COLUMN success INTEGER DEFAULT 0",
 ];
--- a/docs/wiki/a2a-complexity-audit.md
+++ b/docs/wiki/a2a-complexity-audit.md
@ -0,0 +1,166 @@
+# A2A Complexity Audit
+
+**Question:** Does A2A reduce Colibri's code complexity, or is it additive?
+**Date:** 27.jun.2026
+**Referenced from:** [hive-pane.md](./hive-pane.md), [hive-routing.md](./hive-routing.md)
+
+## Current protocol surface area
+
+Colibri speaks 5 protocols today:
+
+| Protocol | Where | Lines | Purpose |
+|---|---|---|---|
+| **Custom JSON wire** | `crates/colibri-daemon/src/socket.rs` + `crates/colibri-client/src/lib.rs` | 1,981 | Local daemon control (spawn, status, snapshot, tasks, skills) |
+| **MCP JSON-RPC** | `crates/colibri-mcp/src/lib.rs` | 570 | Editor integration + external MCP host |
+| **MCP-over-SSH** | `packaging/mother/` (3 files) | 437 | Mother hive entrypoint (forced-command allowlist + node register) |
+| **JSONL** | `crates/colibri-glasspane/src/lib.rs` | 1,186 | Agent subprocess stdout events |
+| **SQL** | `crates/colibri-store/src/lib.rs` + `crates/colibri-store/src/schema.rs` | 1,150 | Local coordination (tasks, agents, skills, tenants) |
+
+**Total protocol surface: ~5,324 lines.**
+
+---
+
+## What A2A would replace
+
+### 1. Mother MCP-over-SSH bridge → A2A HTTP endpoint
+
+Today's mother entrypoint:
+
+```
+USB node → SSH (authorized_keys forced-command) → colibri-mcp-ssh → colibri-mcp → PostgreSQL
+                                                                       └─ node-register-mcp (embedded psql)
+```
+
+With A2A:
+
+```
+USB node → HTTPS → mother A2A endpoint → PostgreSQL
+                    └─ /a2a (task exchange)
+                    └─ /.well-known/agent.json (discovery)
+```
+
+**Removed:**
+- `colibri-mcp-ssh` (32 lines) — SSH forced-command allowlist wrapper
+- `node-register-mcp` (88 lines) — Custom MCP tool with embedded psql
+- SSH key management in `setup-mother.sh` (~40 lines of key distribution logic)
+
+**Removed total: ~160 lines.**
+
+**Added:**
+- A2A HTTP endpoint on mother (~200 lines)
+- A2A client library integration on USB node (~150 lines)
+- mTLS/TLS termination for auth (~30 lines)
+
+**Added total: ~380 lines.**
+
+**Net delta: +220 lines.** Not a code reduction. But operational complexity drops significantly:
+- No SSH key distribution to USB nodes (key lives on seed partition → no longer needed on mother)
+- No forced-command allowlist to maintain
+- Standard HTTPS is easier to firewall, audit, and monitor than SSH forced-command
+- Agent Card URL is discoverable without manual external MCP registry entries
+
+### 2. External MCP server discovery → Agent Card
+
+Today: external MCP registry config — manual JSON listing third-party MCP servers:
+
+```json
+{
+  "servers": [
+    {
+      "name": "filesystem",
+      "command": "npx",
+      "args": ["-y", "@anthropic/mcp-server-filesystem", "/tmp"],
+      "env": {}
+    }
+  ]
+}
+```
+
+With A2A: third-party tools that speak A2A (not MCP) publish an Agent Card. Colibri discovers them via the well-known Agent Card URL instead of manual JSON config files.
+
+**Reality check:** No third-party tools speak A2A yet. The protocol was just announced (April 2025). MCP has ~2 years of ecosystem maturity. This is a *future* replacement, not a *current* one.
+
+**Verdict:** A2A discovery doesn't reduce code today. External MCP stays for tool access.
+
+### 3. Ad-hoc cost data format → Typed A2A part
+
+Today: cost data is embedded in the daemon's heartbeat logic — unstructured:
+
+```rust
+info!(task_id = %task_id, cost = u.cost(), "task cost captured");
+```
+
+With A2A: cost data is a typed message part (`application/json+cost`). The format is standardized, not ad-hoc.
+
+**Code savings:** ~10 lines (the info! log stays; the A2A part is new code).
+
+**Verdict:** Negligible code impact. The value is *interop*, not complexity reduction.
+
+---
+
+## What A2A does NOT replace
+
+| Component | Why A2A doesn't touch it | Lines saved |
+|---|---|---|
+| **Unix socket wire protocol** (`crates/colibri-daemon/src/socket.rs`) | A2A is cross-node HTTP. Local daemon control needs IPC — Unix socket is faster, auth-free (filesystem permissions), and doesn't need a network stack. | 0 |
+| **Spawner** (`crates/colibri-daemon/src/spawner.rs`) | A2A routes tasks to existing agents. Colibri *creates* agents by spawning subprocesses. A2A has no process lifecycle concept. | 0 |
+| **Glasspane** (`crates/colibri-glasspane/src/lib.rs`) | A2A doesn't watch subprocess stdout. Glasspane is a PTY observer — it reads JSONL from child processes. A2A operates one layer above. | 0 |
+| **Store** (`crates/colibri-store/src/lib.rs`) | A2A doesn't replace local SQLite coordination. Each node needs local persistence for task board, agents, skills — A2A is the *transport*, not the *database*. | 0 |
+| **MCP editor bridge** | A2A is agent-to-agent. MCP is human-to-tool. Different protocols for different directions. They coexist. | 0 |
+| **Contracts schemas** (`crates/colibri-contracts/src/lib.rs`) | A2A uses JSON Schema for input validation. Colibri's contracts are already compatible — no change needed. | 0 |
+
+**Total irreplaceable: ~5,000 lines.** A2A doesn't reduce this at all.
+
+---
+
+## Net complexity analysis
+
+```
+                         BEFORE      AFTER A2A
+                         ──────      ─────────
+Unix socket protocol      1,981       1,981        (unchanged)
+MCP bridge                  570         570        (unchanged)
+Mother MCP-over-SSH         437           0        (REMOVED)
+A2A endpoint                  0         380        (NEW)
+Glasspane JSONL           1,186       1,186        (unchanged)
+SQLite store              1,150       1,150        (unchanged)
+Contracts schemas           200         200        (unchanged)
+                         ──────      ──────
+TOTAL                     5,524       5,467
+                         ──────      ──────
+```
+
+**Net delta: −57 lines.** Technically a tiny reduction. Realistically: the code moves around, it doesn't shrink.
+
+---
+
+## The real trade-off
+
+A2A is not a complexity reduction play. It's an **interoperability and operational simplicity** play:
+
+| Metric | MCP-over-SSH (current) | A2A (proposed) |
+|---|---|---|
+| **Lines of code** | ~5,524 (spread across 6 crates + 3 shell scripts) | ~5,467 (SSH scripts gone, A2A handler added) |
+| **Protocol count** | 5 | 6 (A2A adds one) |
+| **Operational complexity** | SSH keys × N nodes, forced-command allowlists, peer auth setup | One HTTPS endpoint, mTLS certs, well-known URL |
+| **Discoverability** | Manual external MCP registry entries | Agent Card at well-known URL |
+| **Interoperability** | Colibri-only | Any A2A client |
+| **Debugability** | `ssh -v`, `psql`, `jq` | `curl`, browser devtools, standard HTTP tooling |
+| **Ecosystem maturity** | N/A (Colibri-specific) | Protocol < 3 months old, zero adoption |
+| **When it pays off** | Works today for 4 nodes | Pays off at 10+ nodes, or when 3rd-party tools ship A2A |
+
+---
+
+## Recommendation: Later, not now
+
+The right window for A2A is when one of these becomes true:
+
+1. **We have >10 hive nodes** — SSH key distribution becomes painful
+2. **A third-party tool ships A2A support** — interop value materializes
+3. **We want federation** — multiple hives discovering each other
+
+Until then: the current MCP-over-SSH bridge is 437 lines of boring, working code. A2A would add 380 lines for a protocol that has zero adopters. The code savings (~57 lines) don't justify the protocol risk.
+
+**Phase 2 (next sprint) should not include A2A.** Build the routing engine on the existing MCP bridge. Add A2A as Phase 3 — when the protocol has real-world adoption and Colibri has enough nodes to benefit from discovery.
+
+The HIVE-PANE.md A2A section is a good north-star design doc. It stays in the wiki as "planned." But it shouldn't drive implementation priority.
--- a/docs/wiki/contracts.md
+++ b/docs/wiki/contracts.md
@ -19,7 +19,7 @@ between Colibri (Rust) and Clawdie agents (TypeScript). It owns _schemas and
 | -------------------------------------- | --------------------- | -------------------------------------------------------------- |
 | `clawdie.interagent.run-manifest.v1`   | `RunManifest`         | Records a build/test run — role, agent, artifacts, summary.    |
 | `clawdie.runtime-version-inventory.v1` | `RuntimeInventory`    | Host runtime snapshot — OS, package versions, npm/node/zot/pi. |
-| `clawdie.provider-smoke.result.v1`     | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting.          |
+| `clawdie.provider-test.result.v1`     | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting.          |

 Schema constants and structs live in `crates/colibri-contracts/src/lib.rs`.

@ -43,7 +43,7 @@ merged.

 ## See also

- [cost-model](./cost-model.md) — how the provider-smoke result feeds cache-hit
+- [cost-model](./cost-model.md) — how the provider-test result feeds cache-hit
  metering.
 - [runtime-inventory](./runtime-inventory.md) — where the runtime inventory is
  produced.
--- a/docs/wiki/cost-model.md
+++ b/docs/wiki/cost-model.md
@ -84,8 +84,27 @@ how the request was made.

 → [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs)

+### Per-task cost persistence (T1.5)
+
+zot agents emit `usage` events (input/output/cache tokens, cost in USD).
+Glasspane accumulates these into `PaneUsage` (micro-cent precision, Eq-safe).
+When the daemon heartbeat detects agent exit, it reads the accumulated usage,
+builds a `TaskCost` struct, and calls `store.set_task_cost()` — persisting
+8 columns (provider, model, input/output/cache tokens, cost, success,
+finished_at) onto the task row. MCP exposes `colibri_get_task_cost` for
+agents to query historical cost data (e.g., "what model is cheapest for
+this workload?").
+
+→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs)
+(`TaskCost`, `set_task_cost`),
+[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs)
+(`PaneUsage`, `add_cost`),
+[`crates/colibri-contracts/src/lib.rs`](../../crates/colibri-contracts/src/lib.rs)
+(`TaskCostSummary`)
+
 ## See also

 - [task-board](./task-board.md) — the scheduler that dispatches tasks within session budgets
+- [contracts](./contracts.md) — TaskCostSummary schema v1
 - [mother-hive](./mother-hive.md) — MCP architecture (different cost domain)
 - [quality-gates](./quality-gates.md) — the gate that validates cost-mode parsing
--- a/docs/wiki/hive-pane.md
+++ b/docs/wiki/hive-pane.md
@ -0,0 +1,266 @@
+---
+title: Hive Pane
+description: "Glasspane for the hive — multi-node cost observability, A2A discovery, and a board the operator can read at a glance."
+---
+
+← [index](./index.md)
+
+Hive Pane is the multi-node extension of the [glasspane](./glasspane.md) metaphor.
+Where glasspane watches local agent subprocesses through JSONL stdout, Hive Pane
+watches hive nodes through PostgreSQL rows — same operator mental model (pane =
+unit of observation), different scale (local agent vs remote host).
+
+## Decision
+
+One board, not many ad-hoc surfaces. The operator sees every hive node — its
+status, accumulated cost, task success rate, and hardware capabilities — in a
+single view. The data already exists ([mother-hive](./mother-hive.md) for node
+registry, [task-board](./task-board.md) for per-task cost). Hive Pane just
+queries and renders it.
+
+## Why this exists
+
+Without it, the operator answers "what is my hive doing?" by:
+
+- SSH'ing into osa → `psql mother_hive -c "SELECT * FROM hive_nodes"`
+- Cross-referencing task boards on each node
+- Adding up costs manually
+
+Hive Pane replaces all of that with one queryable surface that understands
+the hive topology.
+
+## What it shows
+
+```
+┌───────────────────────────────────────────────────────────────────────────────────┐
+│ HIVE PANE                                       [0.12.0] [secured] [4 nodes]      │
+├──────────┬─────────┬────────┬──────────┬───────────┬────────────┬─────────────────┤
+│ Node     │ Type    │ Status │ Tasks    │ Cost      │ GPU        │ Local LLM       │
+├──────────┼─────────┼────────┼──────────┼───────────┼────────────┼─────────────────┤
+│ osa      │ mother  │ online │ 12 done  │ $0.42     │ none       │ —               │
+│ debby    │ disk    │ online │  8 done  │ $1.87     │ amd (iGPU) │ ollama: qwen2.5 │
+│ domedog  │ disk    │ online │  3 done  │ $0.03     │ none       │ —               │
+│ usb-n7   │ live-usb│ online │  0       │ $0.00     │ intel      │ llama.cpp: 7b   │
+└──────────┴─────────┴────────┴──────────┴───────────┴────────────┴─────────────────┘
+```
+
+Each row is a pane — a live query against `hive_nodes` joined with aggregated
+task costs. The columns are:
+
+- **Status**: `online` if `last_seen` is within the heartbeat window, `offline` otherwise
+- **Tasks**: count of completed tasks (further split by success/failure on drill-down)
+- **Cost**: sum of `task_cost.cost` for this node, lifetime
+- **GPU**: derived from `capabilities` (the `derive_capabilities()` trigger on `hive_nodes`)
+- **Local LLM**: which models are available via ollama/llama.cpp on this node — `—` if cloud-only
+
+Clicking a node drills into its task history, cost breakdown (input/output/cache
+tokens), and hardware profile.
+
+## Relationship to existing surfaces
+
+### Glasspane
+
+Glasspane watches local agent subprocesses → agent state machine (Idle → Working
+→ Done). Hive Pane watches hive nodes → node state (online/offline) + aggregated
+agent costs. Same supervision loop, different unit of observation.
+
+A glasspane pane is ephemeral (dies with the agent). A Hive Pane row is durable
+(`hive_nodes` persists across reboots).
+
+→ [glasspane](./glasspane.md)
+
+### Hive routing
+
+[hive-routing](./hive-routing.md) defines the engine underneath the board:
+node identity (`machine_id`), local LLM capability probes, cost-aware task
+routing, and the implementation strategy. Hive Pane is the presentation layer;
+hive-routing is the scheduling layer.
+
+### Mother hive
+
+Mother hive is the data layer: PostgreSQL `hive_nodes` table with
+`derive_capabilities()` trigger. Hive Pane is the presentation layer: it queries
+that table and renders it.
+
+→ [mother-hive](./mother-hive.md)
+
+### Task board + cost tracking
+
+The daemon's heartbeat captures per-task cost into the local SQLite store. For
+the hive view, this data needs to flow to the mother. Two paths:
+
+1. **Bridge cost sync** (current): agents on remote nodes connect to osa's daemon
+   via the [control-plane bridge](../guide/architecture/control-plane-bridge/).
+   The board daemon sees their exit events and captures cost. This works today
+   for cross-host agents but requires the bridge to stay up.
+
+2. **A2A push** (planned, see below): nodes push cost data to mother as
+   structured A2A message parts. Decouples cost reporting from the bridge.
+
+→ [task-board](./task-board.md)
+→ [contracts](./contracts.md) (TaskCostSummary schema)
+
+## A2A integration (planned)
+
+> 📋 **Complexity audit:** [a2a-complexity-audit](./a2a-complexity-audit.md) —
+> A2A doesn't reduce Colibri's code complexity today (6 protocols → 6 protocols,
+> ~0 net lines). It pays off at 10+ nodes or when third-party tools ship A2A
+> support. The Agent Card design below is a north star, not an implementation
+> priority for 0.12.
+
+Google's Agent-to-Agent protocol standardizes three things Colibri already does
+ad-hoc. Adopting it makes the hive discoverable and interoperable beyond our own
+tooling.
+
+### Agent Card — standardized discovery
+
+Today a USB node discovers mother via a hardcoded SSH entry in
+the external MCP registry. With A2A, mother publishes an Agent Card at a well-known
+URL:
+
+```
+GET https://mother.clawdie.si/.well-known/agent.json
+```
+
+```json
+{
+  "name": "clawdie-mother",
+  "description": "Clawdie hive mother node — node registry, build queue, cost board",
+  "url": "https://mother.clawdie.si/a2a",
+  "version": "0.12.0",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "node_register",
+      "name": "Register Node",
+      "description": "Register a hive node with hardware profile",
+      "inputSchema": { "type": "object", "properties": { "machine_id": {}, "hostname": {}, "hw_profile": {} } }
+    },
+    {
+      "id": "build_colibri",
+      "name": "Build Colibri",
+      "description": "Build a colibri crate from an allowed git branch"
+    }
+  ],
+  "costTracking": {
+    "supported": true,
+    "schema": "clawdie.task-cost.v1",
+    "aggregation": "per-node, per-task, per-model"
+  }
+}
+```
+
+USB nodes (and any A2A-compatible client) discover mother's capabilities without
+manual configuration. The Agent Card is versioned and lintable — same discipline
+as the wiki.
+
+### Task exchange — standardized lifecycle
+
+A2A tasks map directly to Colibri's task board:
+
+| A2A state      | Colibri equivalent |
+| -------------- | ------------------ |
+| `submitted`    | `Pending`          |
+| `working`      | `Started`          |
+| `completed`    | `Done`             |
+| `failed`       | `Error`            |
+| `canceled`     | (not yet modeled)  |
+
+Mother pushes a `node_register` task to a new USB node; the node executes it and
+returns the result. The task carries cost data as a typed A2A part:
+
+```json
+{
+  "type": "data",
+  "mimeType": "application/json+cost",
+  "data": {
+    "schema": "clawdie.task-cost.v1",
+    "input_tokens": 150,
+    "output_tokens": 80,
+    "cost": 0.0042
+  }
+}
+```
+
+### What A2A adds over the current MCP bridge
+
+| Concern              | Current (MCP + SSH)           | A2A                              |
+| -------------------- | ----------------------------- | -------------------------------- |
+| Discovery            | Manual external MCP registry entry    | Well-known Agent Card URL        |
+| Interop              | Colibri-only                  | Any A2A client                   |
+| Cost data            | Embedded in task completion   | Typed `application/json+cost`    |
+| Push notifications   | Polling (heartbeat)           | Optional webhook/push            |
+| Versioning           | Ad-hoc                        | Agent Card version + schema pins |
+
+A2A is not a replacement for the MCP bridge — it's the next layer. The MCP
+bridge handles local daemon commands (status, snapshot, spawn). A2A handles
+cross-node task exchange and discovery. They coexist.
+
+## Data flow
+
+```
+USB node boots
+  │
+  ├─ 1. Tailscale connects
+  │
+  ├─ 2. A2A: GET mother/.well-known/agent.json
+  │      Discovers capabilities, registers interest
+  │
+  ├─ 3. A2A task: mother → USB: node_register(hw_profile)
+  │      USB executes, returns cost + capabilities
+  │
+  ├─ 4. Mother stores in hive_nodes + task_cost
+  │
+  └─ 5. Hive Pane queries PostgreSQL, renders row
+```
+
+For nodes that don't speak A2A yet (current USB image), the existing MCP + SSH
+path continues to work. The board queries `hive_nodes` regardless of how the
+data got there.
+
+## Schema (mother PostgreSQL)
+
+The `hive_nodes` table already exists ([mother-hive](./mother-hive.md)). Hive
+Pane adds a lightweight view for the board:
+
+```sql
+CREATE VIEW hive_pane AS
+SELECT
+    n.machine_id,
+    n.hostname,
+    n.node_type,
+    n.status,
+    n.last_seen,
+    n.capabilities,
+    COUNT(t.id) FILTER (WHERE t.status = 'Done')   AS tasks_done,
+    COUNT(t.id) FILTER (WHERE t.status = 'Error')  AS tasks_failed,
+    COALESCE(SUM(t.cost), 0.0)                     AS total_cost
+FROM hive_nodes n
+LEFT JOIN tasks t ON t.node_machine_id = n.machine_id
+GROUP BY n.machine_id, n.hostname, n.node_type, n.status, n.last_seen, n.capabilities;
+```
+
+The `tasks` table on mother is a projected subset of each node's local task
+board — hostname, status, cost columns. The sync mechanism (bridge cost capture
+or A2A push) is responsible for keeping it current.
+
+## Non-goals
+
+- **Not a replacement for glasspane TUI.** Glasspane watches live agent
+  subprocesses (millisecond latency). Hive Pane watches aggregate node state
+  (minute latency). Two different tools.
+- **Not a Grafana clone.** No time-series plots, no alerting rules. The operator
+  can pipe the data anywhere; Hive Pane is the default read-only view.
+- **Not a write surface.** Node registration and task creation happen through
+  the existing MCP/A2A paths. The board reads only.
+
+## References
+
+- [glasspane](./glasspane.md) — local agent observation model
+- [mother-hive](./mother-hive.md) — node registry schema and SSH forced-command pattern
+- [task-board](./task-board.md) — capability scoring and cost tracking
+- [contracts](./contracts.md) — TaskCostSummary schema v1
+- [external-mcp](./external-mcp.md) — current MCP bridge (coexists with A2A)
--- a/docs/wiki/hive-routing.md
+++ b/docs/wiki/hive-routing.md
@ -0,0 +1,389 @@
+# Hive Member Tracking & Cost-Aware Routing
+
+**Status:** 📋 Design
+**Date:** 24.jun.2026
+**Driven by:** T1.5 per-task cost tracking (shipped) → T2.x routing
+
+> **Companion doc:** [hive-pane](./hive-pane.md) — the operator presentation
+> layer this engine feeds. Hive Pane is the "how it looks" (board, A2A discovery);
+> this doc is the "how it works" (identity, probes, routing, scheduling).
+
+## What Exists Today
+
+| Component | State | Gap |
+|---|---|---|
+| `mother_schema.sql` | `hive_nodes` table with `hw_profile` + `capabilities` JSONB | No stable node UUID; hostname is the key |
+| `derive_capabilities()` trigger | Auto-computes `has_gpu`, `gpu_vendor`, `can_run_local_llm`, `max_model` from hw_profile | Only GPU/VRAM heuristics — doesn't probe running services |
+| `clawdie-hw-probe` | Collects GPU, RAM, CPU, disks, ZFS, WiFi, Vulkan, Colibri status | No ollama/llama.cpp probing |
+| `node-register-mcp` | UPSERTs hw_profile into `hive_nodes` on join | No UUID generation at join time |
+| `crates/colibri-daemon/src/scheduler.rs` | Cron/interval/one-shot jobs, capability matching stubs | No cost-aware routing, no hive awareness |
+| `colibri-store` | Local SQLite `agents` table with UUID (v4 random) | UUID is session-local, not hive-stable |
+| T1.5 cost tracking | Per-task cost captured in local SQLite | No hive-level cost aggregation |
+
+## Design Goals
+
+1. **Stable identity** — A node that joins, leaves, and rejoins is the same node. Not hostname-based (hostnames change when re-provisioned).
+2. **Capability matrix** — What can each member do? Not just hardware, but running services: ollama, llama.cpp, available models, provider API keys, cost tier.
+3. **Verify, don't guess** — Every capability in the matrix comes from a probe result, not self-declaration. The hw-probe is the single source of truth; the `derive_capabilities()` trigger maps hardware facts → capability booleans.
+4. **Cost-aware routing** — When a task is dispatched, the scheduler considers: urgency, provider cost, local LLM availability, cache-hit potential, and capability match.
+5. **Local LLM tier** — A beefy member can serve as a "free but slow" execution target for non-urgent tasks. The cost model treats local execution as $0.0000/task.
+6. **Extensible** — New backends (ollama, llama.cpp, vLLM, Exo clusters) slot into the same capability matrix without schema changes.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        MOTHER (osa)                             │
+│  ┌─────────────────┐  ┌──────────────┐  ┌───────────────────┐  │
+│  │ PostgreSQL       │  │ Scheduler    │  │ MCP Bridge        │  │
+│  │  hive_nodes      │  │  cost-aware  │  │  colibri-mcp-ssh  │  │
+│  │  capabilities    │  │  routing     │  │  node-register    │  │
+│  │  cost_history    │  │  dispatch    │  │  cost-query       │  │
+│  └─────────────────┘  └──────────────┘  └───────────────────┘  │
+└─────────────────────────────────────────────────────────────────┘
+         ▲                              │
+         │ hw-probe + capabilities      │ task dispatch
+         │ (MCP tools/call)             │ (MCP or direct)
+         │                              ▼
+┌────────────────────┐    ┌────────────────────┐
+│  Node: clawdie-a   │    │  Node: clawdie-b   │
+│  ┌──────────────┐  │    │  ┌──────────────┐  │
+│  │ Colibri      │  │    │  │ Colibri      │  │
+│  │  zot spawner │  │    │  │  zot spawner │  │
+│  │  local SQLite│  │    │  │  ollama      │  │
+│  └──────────────┘  │    │  │  llama.cpp   │  │
+│  GPU: none         │    │  │  models:     │  │
+│  RAM: 8GB          │    │  │   qwen2.5:7b │  │
+│  Cost: cloud-only  │    │  └──────────────┘  │
+│                    │    │  GPU: RTX 4090     │
+│                    │    │  RAM: 64GB         │
+│                    │    │  Cost: $0 local    │
+└────────────────────┘    └────────────────────┘
+```
+
+---
+
+## Identity: Machine UUID
+
+### Problem
+
+Hostname is unstable. A live-USB node gets `clawdie` on every boot. A disk-installed node keeps the hostname the operator set. Mother needs a stable, verifiable identity that survives re-provisioning.
+
+### Solution: `/var/db/machine-id`
+
+A 32-character hex UUID generated once, stored locally, included in every hw-probe report. Like `systemd`'s `/etc/machine-id` but simpler: only one purpose — hive identity.
+
+```
+# Generated once by clawdie-firstboot or operator session
+/var/db/machine-id → a1b2c3d4-e5f6-7890-abcd-ef1234567890
+```
+
+**Properties:**
+- **Stable across reboots**: stored on disk, not tmpfs
+- **Survives re-provisioning**: if the seed partition preserves `/var/db/machine-id`, the same physical machine keeps the same identity
+- **Not a secret**: it's an ID, not a key
+- **Verifiable**: mother can check "has node a1b2c3d4 ever joined?" — if yes, this is a rejoin, not a new node
+
+**Alternatives considered:**
+| Approach | Pros | Cons |
+|---|---|---|
+| SMBIOS UUID (`hw.uuid`) | Truly hardware-bound, survives OS reinstall | Not available on all platforms (VPS, ARM); can be spoofed |
+| SSH host key fingerprint | Cryptographically strong | Changes on OS reinstall; key rotation breaks identity |
+| Random UUID (this design) | Portable, simple, survives seed restore | Can be copied/cloned (but same machine, same ID — that's correct) |
+
+**Recommendation:** Generate on first boot, store in `/var/db/machine-id`. The hw-probe includes it as `machine_id`. Mother's `hive_nodes` table gets a `UNIQUE` constraint on `machine_id`.
+
+### Schema change
+
+```sql
+ALTER TABLE hive_nodes ADD COLUMN machine_id TEXT;
+ALTER TABLE hive_nodes ADD CONSTRAINT uq_machine_id UNIQUE (machine_id);
+```
+
+The `node-register-mcp` UPSERT switches from `ON CONFLICT (hostname)` to `ON CONFLICT (machine_id)`. Hostname becomes a mutable attribute (updates on rejoin), machine_id becomes the stable key.
+
+---
+
+## Capability Matrix
+
+### What goes in the matrix
+
+Every capability is a boolean derived from hardware facts, not a self-declaration. The hw-probe collects hardware; the trigger derives capabilities.
+
+| Capability | Derived from | Used for |
+|---|---|---|
+| `has_gpu` | GPU detected in pciconf | GPU-accelerated inference |
+| `gpu_vendor` | amdgpu/nvidia driver | Model compatibility |
+| `vulkan_compute` | vulkaninfo success | llama.cpp Vulkan backend |
+| `can_run_local_llm` | RAM ≥ 16GB or has GPU | Eligibility for local task execution |
+| `max_model` | RAM heuristic | Model size limit (3b, 7b-q4, 13b-q4, 34b-q4) |
+| `cpu_only` | No GPU detected | Fallback only (slow) |
+| `has_wifi` | wlan devices | Network capability |
+| `has_zfs` | ZFS pools non-empty | Storage capability |
+| `colibri_running` | service status | Agent host eligibility |
+| `provider_api_keys` | MCP-reported (not hw probe) | Cloud provider availability |
+
+### Local LLM capabilities (NEW)
+
+Extend the hw-probe to detect running local LLM services and extend the trigger to derive capabilities from them:
+
+```json
+{
+  "local_llm": {
+    "ollama_running": true,
+    "ollama_models": ["qwen2.5:7b", "deepseek-r1:8b", "nomic-embed-text"],
+    "llama_cpp_installed": true,
+    "llama_cpp_models": ["/var/db/models/qwen2.5-7b-q4.gguf"],
+    "vulkan_support": true
+  }
+}
+```
+
+**New derived capabilities:**
+
+| Capability | Derivation |
+|---|---|
+| `ollama_available` | `ollama_running == true` |
+| `ollama_models` | Array of model tags (from `ollama list`) |
+| `llama_cpp_available` | Binary at `/usr/local/bin/llama-server` or similar |
+| `llama_cpp_models` | GGUFs in `/var/db/models/` or `/usr/local/share/models/` |
+| `can_embed_locally` | `nomic-embed-text` in ollama OR any embedding model loaded |
+| `inference_tier` | `local-fast` (GPU ≥ 24GB), `local-slow` (CPU-only, RAM ≥ 16GB), `cloud-only` |
+
+### Probe additions to `clawdie-hw-probe`
+
+```sh
+# New collectors
+collect_machine_id()   # cat /var/db/machine-id or generate
+collect_ollama_status() # ollama list 2>/dev/null (JSON models)
+collect_llama_cpp()    # which llama-server; ls /var/db/models/*.gguf
+collect_local_llm()    # aggregate JSON block
+```
+
+---
+
+## Cost-Aware Routing
+
+### The routing decision
+
+When a task arrives at the scheduler, it computes a **routing score** for each eligible node:
+
+```
+score = capability_match × urgency_weight × cost_weight × cache_weight
+
+capability_match: 0.0–1.0 (does the node have the required skills/model?)
+cost_weight:       0.0–1.0 (lower cost → higher weight)
+urgency_weight:    0.0–1.0 (time-critical tasks penalize slow nodes)
+cache_weight:      0.0–1.0 (warm cache → higher weight)
+```
+
+### Cost tiers
+
+| Tier | Provider | Cost per 1M tokens | Latency | Used when |
+|---|---|---|---|
+| T0 (free) | Local ollama/llama.cpp | $0.00 | 5–60s | Non-urgent, capability match |
+| T1 (cheap) | DeepSeek V3 | $0.27 / $1.10 | 2–5s | Default for most tasks |
+| T2 (balanced) | Gemini Flash | $0.15 / $0.60 | 1–3s | High cache-hit tasks |
+| T3 (premium) | Claude Sonnet 4 | $3.00 / $15.00 | 3–8s | Complex reasoning, only when needed |
+
+### Local LLM routing rules
+
+1. **If task is non-urgent AND a hive member has a matching local model → route locally at $0.00 cost.**
+2. **If the local model is unavailable (node down, model not loaded) → fall back to T1 (DeepSeek).**
+3. **If task is urgent (latency < 5s required) → skip local tier, go straight to T1.**
+4. **Embedding tasks (RAG, similarity search) → always prefer local if available. Embeddings are cheap to compute locally and don't need reasoning.**
+
+### How the scheduler knows
+
+The scheduler queries `hive_nodes` for all online nodes, filters by `capabilities.can_run_local_llm`, checks `ollama_models` for the required model, and computes the routing score. If no local node matches, it falls back to the cloud tier.
+
+The task schema gets a new field:
+
+```sql
+ALTER TABLE tasks ADD COLUMN routing JSONB;
+-- {"preferred_tier": "local", "allowed_tiers": ["local", "cheap"], "max_cost_usd": 0.05, "deadline_s": null}
+```
+
+---
+
+## Protocol: Join → Probe → Route
+
+> **A2A integration:** See [hive-pane](./hive-pane.md#a2a-integration-planned) for the
+> Agent Card, task exchange, and typed cost data parts. The routing engine
+> (this doc) and the A2A protocol (hive-pane) are orthogonal layers — either
+> can evolve independently.
+
+### Phase 1: Join (existing, extended)
+
+```
+Node boots → clawdie-hw-probe runs → MCP tools/call node_register
+→ mother UPSERTs hive_nodes → derive_capabilities() trigger fires
+→ capabilities JSONB updated → node is "online"
+```
+
+**New:** `machine_id` is included. If the machine_id already exists, mother updates the existing row (rejoin), preserving history.
+
+### Phase 2: Heartbeat (existing)
+
+`colibri-daemon` sends periodic heartbeat via MCP. Updates `last_seen`. If heartbeat misses for > 5 minutes, node status → `offline`.
+
+### Phase 3: Capability Sync (new)
+
+On heartbeat, the node can optionally push updated capabilities (if ollama was installed, models changed, etc.). The hw-probe is re-run and the new `local_llm` block is sent.
+
+### Phase 4: Task Dispatch (new)
+
+```
+Scheduler picks a queued task
+  → queries hive_nodes for eligible nodes
+  → computes routing score for each
+  → picks winner
+  → dispatches task via MCP or direct agent spawn
+  → writes routing decision to task.routing JSONB
+```
+
+### Phase 5: Cost Capture (existing, extended)
+
+When the task completes, the local daemon writes cost to its SQLite (T1.5). The mother aggregates hive cost via periodic MCP queries or push events.
+
+---
+
+## Three Implementation Options
+
+### Option A — Mother-Centric (Minimal New Code)
+
+**What:** Mother is the brain. Nodes register, mother routes. No peer-to-peer.
+
+**Implementation:**
+1. Add `machine_id` to `hive_nodes` + hw-probe (1 day)
+2. Extend `derive_capabilities()` for local LLM (1 day)
+3. Add `routing_score()` function to mother's PostgreSQL (stored function — zero Rust changes)
+4. Extend `node-register-mcp` to accept `local_llm` block (0.5 day)
+5. Add `local_llm` probe to `clawdie-hw-probe` (1 day)
+
+**Rust changes:** Scheduler reads `capabilities` from hive_nodes via MCP query, computes score, dispatches. ~200 lines.
+
+**Total:** ~3.5 days.
+
+**Pros:**
+- Simple to reason about — one source of truth
+- Lowest implementation risk
+- Scheduler lives on mother (always-on)
+- Existing MCP bridge handles all communication
+
+**Cons:**
+- Mother is single point of failure for routing (but not execution — once dispatched, the task runs independently)
+- Latency: scheduler must query mother on every tick
+- Doesn't scale to 100+ nodes (not a real concern for our use case)
+
+---
+
+### Option B — Capability-Advertised with Local Routing Fallback
+
+**What:** Mother stores the matrix, but nodes can also route tasks they own to peers directly. Hybrid: central registry + distributed execution.
+
+**Implementation:**
+1. All of Option A (3.5 days)
+2. Add `capabilities` API to `colibri-daemon`'s Unix socket (self-awareness) — 1 day
+3. Add local peer discovery via mDNS or Tailscale whois — 1 day
+4. Add direct peer-to-peer task dispatch via Unix socket → MCP → remote — 2 days
+5. Add fallback logic: "try local first, if no response in 30s, ask mother" — 1 day
+
+**Total:** ~8.5 days.
+
+**Pros:**
+- Lower latency for local dispatch
+- Survives mother downtime for peer-to-peer tasks
+- Natural fit for local LLM use case (beefy node is on same LAN)
+- Nodes that discover each other can route without phoning home
+
+**Cons:**
+- Complexity: two code paths (central + peer-to-peer)
+- Security: peer-to-peer dispatch needs authentication (who can send tasks to my daemon?)
+- Harder to audit: cost tracking must handle peer-dispatched vs mother-dispatched tasks differently
+- mDNS doesn't work across subnets (Tailscale works but adds dependency)
+
+---
+
+### Option C — Capability-Matrix-as-Skill (Zero-Code Routing)
+
+**What:** Don't build a routing engine at all. The capability matrix is exposed as an MCP tool that agents query. The agent itself decides where to route based on the matrix + its own reasoning. The matrix is advisory, not prescriptive.
+
+**Implementation:**
+1. All of Option A minus the routing_scoring function (2.5 days)
+2. Add `colibri_query_hive_capabilities` MCP tool on mother — returns full online node matrix (0.5 day)
+3. Add `colibri_dispatch_to_node` MCP tool — sends task to a specific node (1 day)
+4. Write a `hive-routing` skill that teaches agents how to use the matrix (0.5 day)
+
+**Total:** ~4.5 days. **Zero scheduler changes.**
+
+**Pros:**
+- Exploits Colibri's architecture-as-differentiator: the agent IS the intelligence
+- The routing decision is auditable in the conversation log (why did the agent pick this node?)
+- Natural fit for local LLM — the agent can reason "this task is low priority, I'll try the beefy node first"
+- No new scheduler code — just MCP tools + skills
+- The skill can be iterated without recompiling Colibri
+
+**Cons:**
+- Each routing decision costs tokens (the agent must reason about it)
+- Agents make inscrutable routing choices (the LLM "just knows")
+- No hard guarantees — an agent might route a $5 task to Claude when DeepSeek would do fine
+- Requires the agent to be "cost-aware" (which requires the MCP cost query tool — already shipping in T1.5)
+
+---
+
+## Recommendation
+
+**Start with Option A (Mother-Centric) as the foundation, then layer Option C (Skill-Based) on top.**
+
+The capability matrix, stable UUIDs, and local LLM probes are the foundation — they're needed regardless of the routing strategy. Option A gives us the data model and probe infrastructure. Once that's solid, adding the MCP tools for agent-driven routing (Option C) is a thin layer on top. Option B (peer-to-peer) adds complexity we don't need at this scale.
+
+**Phase 1 (this sprint):** Machine UUID + local LLM probes + extended capabilities matrix. ~2.5 days.
+**Phase 2 (next sprint):** Mother-based routing + MCP tools for agent-driven routing. ~2 days.
+**Phase 3 (future):** Peer-to-peer fallback if we ever have >20 nodes.
+
+---
+
+## Deliverables by Phase
+
+### Phase 1 — Identity & Capability Foundation
+
+| Deliverable | Where | Lines |
+|---|---|---|
+| `machine_id` generation in `clawdie-firstboot` | clawdie-iso | ~15 |
+| `collect_machine_id()` in hw-probe | clawdie-iso | ~10 |
+| `collect_ollama_status()` in hw-probe | clawdie-iso | ~30 |
+| `collect_llama_cpp()` in hw-probe | clawdie-iso | ~20 |
+| `collect_local_llm()` aggregator in hw-probe | clawdie-iso | ~25 |
+| `machine_id` column + constraint in mother_schema.sql | colibri | ~5 |
+| Extended `derive_capabilities()` for `ollama_available`, `llama_cpp_available`, `inference_tier` | colibri | ~40 |
+| `node-register-mcp` handling of `machine_id` key + new local_llm fields | colibri | ~15 |
+| This design doc (hive-routing.md) | This file | ~0 (done) |
+
+### Phase 2 — Routing Engine
+
+| Deliverable | Where |
+|---|---|
+| `colibri_query_hive_capabilities` MCP tool | colibri-mcp |
+| `colibri_dispatch_to_node` MCP tool | colibri-mcp |
+| `hive-routing` skill | `.agent/skills/` |
+| `Task.routing` JSONB field in colibri-store | colibri-store |
+| Mother-side routing score as PostgreSQL function (optional — only if agent-driven routing proves insufficient) | mother_schema.sql |
+
+---
+
+## Integration with the Trifecta
+
+The hive routing plan completes the trifecta's T2.x vision:
+
+```
+T1.4 Prompt Discipline    ✅ Cache warming, cost mode, 3-region prompt
+T1.5 Per-Task Cost        ✅ Captured locally (this sprint)
+T2.x Cost-Aware Routing   📋 Hive matrix → routing decision
+T2.x Model Selection      📋 Arbitrage between cloud tiers + local LLM
+T2.x Eval Harness         📋 Task success measurement
+```
+
+The key insight: local LLM is the **ultimate cache-hit token**. Every token generated on a beefy node's GPU is $0.0000. The routing engine's job is to maximize the use of $0 tokens without compromising task success rates.
--- a/docs/wiki/index.md
+++ b/docs/wiki/index.md
@ -53,12 +53,15 @@ warning.
 | [headroom-sidecar](./headroom-sidecar.md)             | Optional tool-result compression sidecar and its Unix-socket protocol                                           |
 | [jail-confinement](./jail-confinement.md)             | Persistent vs ephemeral jails, priv-mode policy, reuse of spawner confinement for MCP servers                   |
 | [mother-hive](./mother-hive.md)                       | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed                    |
+| [hive-routing](./hive-routing.md)                     | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing            |
+| [hive-pane](./hive-pane.md)                           | Glasspane for the hive — multi-node cost observability, A2A discovery, and operator board                       |
+| [a2a-complexity-audit](./a2a-complexity-audit.md)     | A2A code complexity impact — 6-protocol surface audit, when A2A pays off                                     |
 | [naming-decisions](./naming-decisions.md)             | Ledger of harness-neutral / architecture renames — shipped and in-flight                                        |
 | [daemon-not-demon](./daemon-not-demon.md)             | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian                                  |
 | [layered-soul](./layered-soul.md)                     | How Colibri consumes the layered-soul reviewed-context repo today vs planned                                    |
 | [task-board](./task-board.md)                         | Capability match scoring, cron scheduling, intake drain, SQLite backing                                         |
 | [quality-gates](./quality-gates.md)                   | `ci-checks.sh` as the pre-merge gate; why drift reached `main` before                                           |
-| [contracts](./contracts.md)                           | Stable JSON schemas (run-manifest, runtime-inventory, provider-smoke), golden tests                             |
+| [contracts](./contracts.md)                           | Stable JSON schemas (run-manifest, runtime-inventory, provider-test), golden tests                             |
 | [store-schema](./store-schema.md)                     | SQLite coordination schema and migration discipline                                                             |
 | [external-mcp](./external-mcp.md)                     | MCP bridge for editors + external stdio MCP host; read/write/external-call gates                                |
 | [operator-cli](./operator-cli.md)                     | The `colibri` CLI as a thin typed Unix-socket client over the daemon API                                        |
--- a/docs/wiki/sl/contracts.md
+++ b/docs/wiki/sl/contracts.md
@ -23,7 +23,7 @@ _sheme in (De)serialize_, ne poslovne logike.
 | -------------------------------------- | --------------------- | ------------------------------------------------------------------------- |
 | `clawdie.interagent.run-manifest.v1`   | `RunManifest`         | Beleži tek gradnje/testa — vloga, agent, artefakti, povzetek.             |
 | `clawdie.runtime-version-inventory.v1` | `RuntimeInventory`    | Posnetek izvajalnega okolja gostitelja — OS, različice paketov, npm/node. |
-| `clawdie.provider-smoke.result.v1`     | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov.                |
+| `clawdie.provider-test.result.v1`     | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov.                |

 Konstante shem in strukture živijo v `crates/colibri-contracts/src/lib.rs`.

--- a/docs/wiki/sl/cost-model.md
+++ b/docs/wiki/sl/cost-model.md
@ -90,8 +90,25 @@ odgovori, neodvisno od tega, kako je bila zahteva izvedena.

 → [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs)

+### Trajno beleženje stroškov na opravilo (T1.5)
+
+Agenti zot oddajajo dogodke `usage` (vhodni/izhodni/predpomnilniški žetoni,
+strošek v USD). Glasspane jih sešteva v `PaneUsage` (natančnost mikro-centov,
+varen za Eq). Ko srčni utrip demona zazna izhod agenta, prebere zbrano
+porabo, zgradi struct `TaskCost` in pokliče `store.set_task_cost()` — zapiše
+8 stolpcev (ponudnik, model, vhodni/izhodni/predpomnilniški žetoni, strošek,
+uspeh, finished_at) v vrstico opravila. MCP izpostavlja
+`colibri_get_task_cost` za poizvedbe agentov o zgodovinskih stroških (npr.
+"kateri model je najcenejši za to delovno obremenitev?").
+
+→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs)
+(`TaskCost`, `set_task_cost`),
+[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs)
+(`PaneUsage`, `add_cost`)
+
 ## Glej tudi

 - [task-board](./task-board.md) — razporejevalnik, ki razpošilja opravila znotraj proračunov sej
+- [contracts](./contracts.md) — shema TaskCostSummary v1
 - [mother-hive](./mother-hive.md) — arhitektura MCP (druga stroškovna domena)
- [quality-gates](./quality-gates.md) — preverjanje, ki preverja razčlenjevanje cenovnih načinov
+- [quality-gates](./quality-gates.md) — vrata, ki preverjajo razčlenjevanje cenovnih načinov
--- a/docs/wiki/sl/index.md
+++ b/docs/wiki/sl/index.md
@ -59,12 +59,13 @@ clippy.
 | [headroom-sidecar](./headroom-sidecar.md)             | Neobvezni stranski vagon za stiskanje rezultatov orodij in njegov protokol Unix vtičnice                                   |
 | [jail-confinement](./jail-confinement.md)             | Trajne proti prehodnim ječam, pravilnik načina priv, ponovna uporaba omejitve zaganjalnika za strežnike MCP                |
 | [mother-hive](./mother-hive.md)                       | Arhitektura matičnega MCP — SSH s prisiljenim ukazom, enojni-dom-v-colibri, peer avtentikacija, ključ-na-semenu            |
+| [hive-pane](./hive-pane.md)                           | Steklena plošča za panj — opazovanje stroškov več vozlišč, odkrivanje A2A in operaterska nadzorna plošča              |
 | [naming-decisions](./naming-decisions.md)             | Imenik preimenovanj, nevtralnih glede na opremo / arhitekturnih — dostavljenih in v teku                                   |
 | [daemon-not-demon](./daemon-not-demon.md)             | Zakaj rečemo daemon (duh pomočnik) in ne demon (hudič) — angleško + slovensko                                              |
 | [layered-soul](./layered-soul.md)                     | Kako Colibri danes uporablja repozitorij pregledanega konteksta layered-soul proti načrtovanemu                            |
 | [task-board](./task-board.md)                         | Točkovanje po zmožnostih, cron razporejanje, praznjenje vnosne vrste, podlaga SQLite                                       |
 | [quality-gates](./quality-gates.md)                   | `ci-checks.sh` kot preverjanje pred združitvijo; zakaj je odmik prej dosegel `main`                                        |
-| [contracts](./contracts.md)                           | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-smoke), zlati testi                                         |
+| [contracts](./contracts.md)                           | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-test), zlati testi                                         |
 | [store-schema](./store-schema.md)                     | Usklajevalna shema SQLite in disciplina migracij                                                                           |
 | [external-mcp](./external-mcp.md)                     | Most MCP za urejevalnike + zunanji gostitelj stdio MCP; dovoljenja za branje/pisanje/zunanji-klic                          |
 | [operator-cli](./operator-cli.md)                     | CLI `colibri` kot tanek tipiziran odjemalec Unix vtičnice prek API procesa v ozadju                                        |
--- a/manifests/2026-05-26-domedog-deepseek-cache-result.json
+++ b/manifests/2026-05-26-domedog-deepseek-cache-result.json
@ -1,5 +1,5 @@
 {
-  "schema": "clawdie.provider-smoke.result.v1",
+  "schema": "clawdie.provider-test.result.v1",
  "test_id": "colibri-probe-20260526T125049Z",
  "host": "domedog",
  "agent": "claude-domedog",
--- a/manifests/2026-05-26-osa-deepseek-cache-result.json
+++ b/manifests/2026-05-26-osa-deepseek-cache-result.json
@ -1,5 +1,5 @@
 {
-  "schema": "clawdie.provider-smoke.result.v1",
+  "schema": "clawdie.provider-test.result.v1",
  "test_id": "colibri-probe-20260526T125645Z",
  "host": "osa",
  "agent": "codex-osa",