review/per-task-cost #230

Merged
clawdie merged 10 commits from review/per-task-cost into main 2026-06-27 13:23:48 +02:00
24 changed files with 1416 additions and 52 deletions

View file

@ -8,9 +8,10 @@ use std::{
fn usage() -> &'static str {
r#"Usage:
colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS]
colibri-test-agent [--session-id ID] [--cwd PATH] [--step-ms MS] [--hold-secs SECONDS] [--emit-usage]
Emits deterministic Pi-compatible JSONL for local colibri-daemon startup checks.
With --emit-usage, appends a zot-compatible usage event for cost-tracking tests.
"#
}
@ -20,6 +21,7 @@ struct Options {
cwd: String,
step: Duration,
hold: Duration,
emit_usage: bool,
}
impl Default for Options {
@ -32,6 +34,7 @@ impl Default for Options {
.unwrap_or_else(|| "/tmp".to_string()),
step: Duration::from_secs(1),
hold: Duration::from_secs(30),
emit_usage: false,
}
}
}
@ -84,6 +87,10 @@ where
options.hold = Duration::from_secs(seconds);
i += 2;
}
"--emit-usage" => {
options.emit_usage = true;
i += 1;
}
other => return Err(format!("unknown option: {other}\n\n{}", usage())),
}
}
@ -114,8 +121,24 @@ fn emit_jsonl(options: &Options) -> io::Result<()> {
thread::sleep(options.step);
write_event(&mut stdout, serde_json::json!({"type":"turn_end"}))?;
thread::sleep(options.hold);
// Emit a zot-compatible usage event for cost-tracking integration tests.
if options.emit_usage {
thread::sleep(options.step);
write_event(
&mut stdout,
serde_json::json!({
"type": "usage",
"input": 150,
"output": 80,
"cache_read": 200,
"cache_write": 50,
"cost_usd": 0.0042
}),
)?;
}
thread::sleep(options.hold);
Ok(())
}
@ -170,6 +193,12 @@ mod tests {
assert_eq!(options.hold, Duration::from_secs(2));
}
#[test]
fn parses_emit_usage_flag() {
let options = parse_args(["--emit-usage"]).unwrap();
assert!(options.emit_usage);
}
#[test]
fn write_event_serializes_jsonl() {
let mut bytes = Vec::new();

View file

@ -432,3 +432,98 @@ async fn register_tenant_and_list_over_socket() {
server.await.unwrap();
let _ = tokio::fs::remove_dir_all(config.data_dir).await;
}
#[tokio::test]
async fn spawn_agent_with_usage_captures_task_cost() {
let mut config = check_config();
let sample_agent = env!("CARGO_BIN_EXE_colibri-test-agent");
std::env::set_var("COLIBRI_AGENT_BINARY", sample_agent);
config.data_dir = std::env::temp_dir().join(format!("colibri-cost-test-{}", Uuid::new_v4()));
tokio::fs::create_dir_all(&config.data_dir).await.unwrap();
let state: SharedState = Arc::new(DaemonState::new(config.clone()));
let shutdown = state.shutdown_rx.resubscribe();
let server_state = state.clone();
let server = tokio::spawn(async move {
let _ = socket::serve(server_state, shutdown).await;
});
let client = DaemonClient::new(config.socket_path.clone());
wait_for_socket(&client).await;
// Register agent + create/claim task (same flow as poll_tasks test)
let task_id = {
let store = state.store.lock().unwrap();
store
.register_agent("cost-agent", serde_json::json!(["cost-track"]), None)
.unwrap();
let task = store.create_task("cost-tracking-check", None).unwrap();
let tid = task.id.clone();
let agents = store.list_agents().unwrap();
store.claim_task(&tid, &agents[0].id).unwrap();
tid
};
// Spawn agent with --emit-usage via raw request (spawn_agent_with doesn't expose args)
let spawn_resp: serde_json::Value = client
.request(&colibri_daemon::ColibriCommand::SpawnAgent {
provider: "local".to_string(),
model: sample_agent.to_string(),
session_id: Some(format!("task-{task_id}")),
system_prompt: None,
local_args: Some(vec![
"--session-id".to_string(),
task_id.clone(),
"--step-ms".to_string(),
"10".to_string(),
"--hold-secs".to_string(),
"1".to_string(),
"--emit-usage".to_string(),
]),
jail: None,
})
.await
.expect("spawn should succeed");
let _agent_id = spawn_resp["agent_id"].as_str().unwrap().to_string();
// Wait for agent to reach Done
let deadline = Instant::now() + Duration::from_secs(20);
loop {
let snap = client.glasspane_snapshot().await.unwrap();
if snap.panes.iter().any(|p| p.state == AgentState::Done) {
break;
}
assert!(Instant::now() < deadline, "agent did not reach Done");
tokio::time::sleep(Duration::from_millis(200)).await;
}
// Agent reached Done — now agent holds 1s then exits per --hold-secs 1.
// Call heartbeat manually to detect exit and capture cost.
tokio::time::sleep(Duration::from_secs(2)).await;
colibri_daemon::daemon::heartbeat(&state, Duration::from_secs(30)).await;
// Verify cost was stored on the task
let task = {
let store = state.store.lock().unwrap();
store
.get_task(&task_id)
.unwrap()
.expect("task should exist")
};
// The test agent emits: input=150, output=80, cache_read=200, cache_write=50, cost_usd=0.0042
assert_eq!(task.input_tokens, 150, "input tokens");
assert_eq!(task.output_tokens, 80, "output tokens");
assert_eq!(task.cache_read_tokens, 200, "cache read tokens");
assert_eq!(task.cache_write_tokens, 50, "cache write tokens");
assert!(
(task.cost - 0.0042).abs() < 0.0001,
"cost should be 0.0042, got {}",
task.cost
);
let _ = state.shutdown_tx.send(());
server.await.unwrap();
std::env::remove_var("COLIBRI_AGENT_BINARY");
let _ = tokio::fs::remove_dir_all(config.data_dir).await;
}

View file

@ -12,7 +12,8 @@ use serde_json::Value;
pub const RUN_MANIFEST_SCHEMA: &str = "clawdie.interagent.run-manifest.v1";
pub const RUNTIME_INVENTORY_SCHEMA: &str = "clawdie.runtime-version-inventory.v1";
pub const PROVIDER_SMOKE_SCHEMA: &str = "clawdie.provider-smoke.result.v1";
pub const PROVIDER_TEST_SCHEMA: &str = "clawdie.provider-test.result.v1";
pub const TASK_COST_SUMMARY_SCHEMA: &str = "clawdie.task-cost-summary.v1";
// ---------------------------------------------------------------------------
// colibri host-status input
@ -100,7 +101,7 @@ pub struct RunManifest {
}
// ---------------------------------------------------------------------------
// clawdie.provider-smoke.result.v1
// clawdie.provider-test.result.v1
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
@ -118,7 +119,7 @@ pub struct ProviderUsage {
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ProviderSmokeResult {
pub struct ProviderTestResult {
pub schema: String,
pub test_id: String,
pub host: String,
@ -139,3 +140,33 @@ pub struct ProviderSmokeResult {
#[serde(default)]
pub notes: Vec<String>,
}
// ---------------------------------------------------------------------------
// clawdie.task-cost-summary.v1
// ---------------------------------------------------------------------------
/// Per-task cost summary written when an agent finishes a claimed task.
///
/// Accumulated from zot usage events (input/output/cache_read/cache_write)
/// and written to the coordination store. Agents and MCP tools can query
/// historical costs to inform "what's cheap to run" decisions.
///
/// Cost is in the provider's billing currency (USD for DeepSeek, Anthropic,
/// OpenAI). Multi-currency display (EUR, CNY, INR) is a consumer concern —
/// add a `currency` field when that day comes.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TaskCostSummary {
pub schema: String,
pub task_id: String,
pub agent_id: Option<String>,
pub provider: Option<String>,
pub model: Option<String>,
pub started_at: String,
pub ended_at: String,
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_read_tokens: u64,
pub cache_write_tokens: u64,
pub cost: f64,
pub success: bool,
}

View file

@ -97,9 +97,9 @@ fn parses_domedog_run_manifest() {
#[test]
fn parses_domedog_live_cache_result() {
let res: ProviderSmokeResult =
let res: ProviderTestResult =
serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result");
assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
assert_eq!(res.status, "ok");
assert!(res.cache_hit_observed, "expected a cache hit");
assert!(res.cache_hit_tokens > 0);
@ -108,8 +108,8 @@ fn parses_domedog_live_cache_result() {
#[test]
fn parses_osa_live_cache_result() {
let res: ProviderSmokeResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result");
assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
let res: ProviderTestResult = serde_json::from_str(OSA_CACHE).expect("parse osa cache result");
assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
assert_eq!(res.host, "osa");
assert_eq!(res.agent, "codex-osa");
assert_eq!(res.status, "ok");
@ -173,10 +173,10 @@ fn parses_osa_watchdog_run_manifest() {
}
#[test]
fn provider_smoke_skipped_shape() {
fn provider_test_skipped_shape() {
// No live result committed yet; assert the struct accepts a skipped result.
let raw = r#"{
"schema": "clawdie.provider-smoke.result.v1",
"schema": "clawdie.provider-test.result.v1",
"test_id": "colibri-probe-20260526T080805Z",
"host": "domedog",
"agent": "claude-domedog",
@ -189,9 +189,38 @@ fn provider_smoke_skipped_shape() {
"cache_hit_observed": false,
"notes": ["DEEPSEEK_API_KEY unset/empty — build verified, live cache probe skipped"]
}"#;
let res: ProviderSmokeResult = serde_json::from_str(raw).expect("parse provider smoke");
assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
let res: ProviderTestResult = serde_json::from_str(raw).expect("parse provider smoke");
assert_eq!(res.schema, PROVIDER_TEST_SCHEMA);
assert_eq!(res.status, "skipped");
assert!(res.warm_usage.is_none());
roundtrip_eq(&res);
}
#[test]
fn task_cost_summary_round_trip() {
let raw = r#"{
"schema": "clawdie.task-cost-summary.v1",
"task_id": "abc-123",
"agent_id": "agent-456",
"provider": "deepseek",
"model": "deepseek-chat",
"started_at": "2026-06-24T10:00:00Z",
"ended_at": "2026-06-24T10:05:30Z",
"input_tokens": 45230,
"output_tokens": 2847,
"cache_read_tokens": 12100,
"cache_write_tokens": 500,
"cost": 0.047,
"success": true
}"#;
let summary: TaskCostSummary = serde_json::from_str(raw).expect("parse task cost summary");
assert_eq!(summary.schema, TASK_COST_SUMMARY_SCHEMA);
assert_eq!(summary.task_id, "abc-123");
assert_eq!(summary.input_tokens, 45230);
assert_eq!(summary.output_tokens, 2847);
assert_eq!(summary.cache_read_tokens, 12100);
assert_eq!(summary.cache_write_tokens, 500);
assert!((summary.cost - 0.047).abs() < 0.0001);
assert!(summary.success);
roundtrip_eq(&summary);
}

View file

@ -225,7 +225,7 @@ pub async fn maybe_rewarm_cache(state: &SharedState) {
// Heartbeat, rotation, handoff, polling
// ---------------------------------------------------------------------------
async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
let session_count = state.sessions.len();
let agent_count = state.agents.len();
debug!(
@ -249,6 +249,45 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
event,
std::time::SystemTime::now(),
);
// Capture per-task cost from accumulated usage (T1.5).
if let Some(session_id) = &handle.config.session_id {
if let Some(task_id) = session_id.strip_prefix("task-") {
let usage = {
let gp = state.glasspane.read().await;
gp.get(&handle.id).map(|p| p.accumulated_usage().clone())
};
if let Some(u) = usage {
let tc = colibri_store::TaskCost {
provider: Some(handle.config.provider.as_str().to_string()),
model: Some(handle.config.model.clone()),
input_tokens: u.input_tokens,
output_tokens: u.output_tokens,
cache_read_tokens: u.cache_read_tokens,
cache_write_tokens: u.cache_write_tokens,
cost: u.cost(),
success: status.success(),
};
let store = state.store.lock().unwrap();
match store.set_task_cost(task_id, &tc) {
Ok(t) => info!(
task_id = %task_id,
input_tokens = u.input_tokens,
output_tokens = u.output_tokens,
cache_read = u.cache_read_tokens,
cost = u.cost(),
success = tc.success,
status = %t.status.as_str(),
"task cost captured"
),
Err(e) => {
warn!(task_id = %task_id, error = %e, "failed to write task cost")
}
}
}
}
}
if !status.success() {
stalled.push(handle.id.clone());
}

View file

@ -45,6 +45,16 @@ pub enum Provider {
}
impl Provider {
/// Human-readable lowercase name for storage/logging.
pub fn as_str(&self) -> &'static str {
match self {
Provider::DeepSeek => "deepseek",
Provider::OpenRouter => "openrouter",
Provider::Anthropic => "anthropic",
Provider::Local => "local",
}
}
/// Priority order for fallback routing.
pub fn fallback_order() -> &'static [Provider] {
&[

View file

@ -1,4 +1,4 @@
//! zot-rpc driver smoke — end-to-end proof of the colibri#143 spawn driver.
//! zot-rpc driver test — end-to-end proof of the colibri#143 spawn driver.
//!
//! Spawns a real `zot rpc` subprocess through the Colibri `Spawner` (with
//! `rpc_stdin`), sends a prompt over the driver's `RpcSender`, and reads the
@ -27,7 +27,7 @@ use tokio::time::timeout;
#[ignore = "needs a built zot binary; set ZOT_BIN"]
async fn zot_rpc_driver_delivers_prompt_and_streams_events() {
let Ok(zot_bin) = std::env::var("ZOT_BIN") else {
eprintln!("ZOT_BIN not set; skipping zot rpc driver smoke");
eprintln!("ZOT_BIN not set; skipping zot rpc driver test");
return;
};
assert!(

View file

@ -6,7 +6,7 @@
//! wire shapes and produces:
//!
//! DeepSeek request → byte-stable prefix → cache usage manifest
//! (`clawdie.provider-smoke.result.v1`) → run-manifest
//! (`clawdie.provider-test.result.v1`) → run-manifest
//! (`clawdie.interagent.run-manifest.v1`) → local JSONL event log.
use std::collections::BTreeMap;
@ -14,7 +14,7 @@ use std::path::Path;
use chrono::Utc;
use colibri_contracts::{
ProviderSmokeResult, ProviderUsage, RunManifest, PROVIDER_SMOKE_SCHEMA, RUN_MANIFEST_SCHEMA,
ProviderTestResult, ProviderUsage, RunManifest, PROVIDER_TEST_SCHEMA, RUN_MANIFEST_SCHEMA,
};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
@ -150,9 +150,9 @@ async fn one_call(
/// Warm the cache, then probe with a byte-identical prefix. Never panics —
/// failures land in `status`/`notes`. Skips the live call when no key is set.
pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
let mut result = ProviderSmokeResult {
schema: PROVIDER_SMOKE_SCHEMA.to_string(),
pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderTestResult {
let mut result = ProviderTestResult {
schema: PROVIDER_TEST_SCHEMA.to_string(),
test_id: format!("colibri-probe-{}", Utc::now().format("%Y%m%dT%H%M%SZ")),
host: cfg.host.clone(),
agent: cfg.agent.clone(),
@ -227,7 +227,7 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
}
/// Wrap a cache-probe result as an inter-agent run manifest.
pub fn build_run_manifest(smoke: &ProviderSmokeResult) -> RunManifest {
pub fn build_run_manifest(smoke: &ProviderTestResult) -> RunManifest {
let mut summary: BTreeMap<String, Value> = BTreeMap::new();
summary.insert("status".to_string(), json!(smoke.status));
summary.insert("model".to_string(), json!(smoke.model));

View file

@ -247,6 +247,35 @@ pub struct PaneReaderStats {
/// Stateful streaming ingestor for Pi `--mode json` JSONL.
///
/// Accumulated token usage and cost for a supervised pane.
/// Populated from zot usage events (currently discarded by the state machine).
///
/// Note: cost is stored as micro-cents (`i64`) for deterministic
/// `Eq`/`Ord` comparison. The `cost()` accessor returns the floating-point
/// representation in the provider's billing currency (USD).
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct PaneUsage {
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_read_tokens: u64,
pub cache_write_tokens: u64,
cost_micro_cents: i64,
}
impl PaneUsage {
/// Accumulated cost in provider's billing currency.
/// Currently always USD (all providers bill in USD).
pub fn cost(&self) -> f64 {
self.cost_micro_cents as f64 / 1_000_000.0
}
/// Accumulate cost from a zot usage event's `cost` field (float).
pub fn add_cost(&mut self, usd: f64) {
let micro_cents = (usd * 1_000_000.0).round() as i64;
self.cost_micro_cents += micro_cents;
}
}
/// This is intentionally independent from PTY/process ownership: tests can feed
/// sample readers, while live panes can wire PTY stdout to the same API later.
#[derive(Debug, Clone, PartialEq, Eq)]
@ -258,6 +287,8 @@ pub struct PiJsonlIngestor {
/// Which harness produced the stream. Pi events are read directly; zot
/// events are normalized through `zot_event_type` first.
runtime: AgentRuntime,
/// Accumulated token usage + cost from zot usage events.
pub usage: PaneUsage,
}
impl Default for PiJsonlIngestor {
@ -268,6 +299,7 @@ impl Default for PiJsonlIngestor {
cwd: None,
last_event_at: None,
runtime: AgentRuntime::Pi,
usage: PaneUsage::default(),
}
}
}
@ -305,6 +337,32 @@ impl PiJsonlIngestor {
observed_at: SystemTime,
) -> Option<PiStreamUpdate> {
let value: Value = serde_json::from_str(line.trim()).ok()?;
// Accumulate token usage from agent usage events (type: "usage").
// The state machine skips usage events for state-change purposes,
// but we capture the cost data here before that skip.
// Originally Zot-only — now any runtime can emit usage events
// for cost tracking across all agent harnesses.
if value.get("type").and_then(Value::as_str) == Some("usage") {
if let Some(input) = value.get("input").and_then(Value::as_u64) {
self.usage.input_tokens += input;
}
if let Some(output) = value.get("output").and_then(Value::as_u64) {
self.usage.output_tokens += output;
}
if let Some(cr) = value.get("cache_read").and_then(Value::as_u64) {
self.usage.cache_read_tokens += cr;
}
if let Some(cw) = value.get("cache_write").and_then(Value::as_u64) {
self.usage.cache_write_tokens += cw;
}
if let Some(cost) = value.get("cost_usd").and_then(Value::as_f64) {
self.usage.add_cost(cost);
}
self.last_event_at = Some(observed_at);
return None; // No state change for usage events
}
// Pi events use Colibri's taxonomy directly; zot events are normalized
// (e.g. tool_use_start -> tool_execution_start, response success:false
// -> error, response/usage -> skipped).
@ -385,6 +443,11 @@ impl SupervisedPane {
self.ingestor.session_id()
}
/// Accumulated token usage from zot usage events (zeroed for Pi panes).
pub fn accumulated_usage(&self) -> &PaneUsage {
&self.ingestor.usage
}
pub fn cwd(&self) -> Option<&str> {
self.ingestor.cwd()
}
@ -1127,6 +1190,44 @@ mod zot_runtime_tests {
);
}
#[test]
fn zot_usage_accumulates_in_pane_ingestor() {
let mut ingestor = PiJsonlIngestor::with_runtime(AgentRuntime::Zot);
let now = SystemTime::now();
// First usage event — should accumulate
ingestor.ingest_line_at(
r#"{"type":"usage","input":100,"output":50,"cache_read":80,"cache_write":10,"cost_usd":0.0025}"#,
now,
);
assert_eq!(ingestor.usage.input_tokens, 100);
assert_eq!(ingestor.usage.output_tokens, 50);
assert_eq!(ingestor.usage.cache_read_tokens, 80);
assert_eq!(ingestor.usage.cache_write_tokens, 10);
assert!((ingestor.usage.cost() - 0.0025).abs() < 0.0001);
// Second usage event — should accumulate on top
ingestor.ingest_line_at(
r#"{"type":"usage","input":200,"output":30,"cache_read":0,"cache_write":5,"cost_usd":0.0010}"#,
now,
);
assert_eq!(ingestor.usage.input_tokens, 300);
assert_eq!(ingestor.usage.output_tokens, 80);
assert_eq!(ingestor.usage.cache_read_tokens, 80);
assert_eq!(ingestor.usage.cache_write_tokens, 15);
assert!((ingestor.usage.cost() - 0.0035).abs() < 0.0001);
// Pi usage events should also accumulate now (runtime-agnostic cost tracking)
let mut pi_ingestor = PiJsonlIngestor::default(); // defaults to Pi
pi_ingestor.ingest_line_at(
r#"{"type":"usage","input":999,"output":999,"cost_usd":99.0}"#,
now,
);
// All runtimes now accumulate usage for cost tracking
assert_eq!(pi_ingestor.usage.input_tokens, 999);
assert!((pi_ingestor.usage.cost() - 99.0).abs() < 0.01);
}
#[test]
fn agent_runtime_default_is_pi() {
assert_eq!(AgentRuntime::default(), AgentRuntime::Pi);

View file

@ -145,6 +145,17 @@ pub fn tool_list() -> Vec<Value> {
"required": ["mode"]
})),
),
json_tool(
"colibri_get_task_cost",
"Get per-task cost summary: tokens used, cache hit ratio, cost, success status",
Some(serde_json::json!({
"type": "object",
"properties": {
"task_id": { "type": "string", "description": "Task ID to query" }
},
"required": ["task_id"]
})),
),
json_tool(
"colibri_external_mcp_servers",
"List configured external MCP servers from COLIBRI_MCP_EXTERNAL_CONFIG",
@ -278,6 +289,21 @@ pub async fn dispatch_tool(
"note": "Cost mode change is runtime-only/status-intent until live config mutation exists."
})))
}
"colibri_get_task_cost" => {
let task_id = require_string(arguments, "task_id")?;
let all_tasks = client.list_tasks(None).await.map_err(map_client_error)?;
let task = all_tasks
.as_array()
.and_then(|arr| {
arr.iter()
.find(|t| t.get("id").and_then(|v| v.as_str()) == Some(&task_id))
})
.cloned();
match task {
Some(t) => Ok(tool_text(t)),
None => Err(McpError::not_found(format!("task not found: {task_id}"))),
}
}
"colibri_external_mcp_servers" => {
let registry = external::load_registry_if_present(&config.external_config_path).await?;
Ok(tool_text(serde_json::json!({

View file

@ -252,5 +252,6 @@ fn tool_list_has_all_phase1_tools() {
assert!(names.contains(&"colibri_external_mcp_servers"));
assert!(names.contains(&"colibri_external_mcp_list_tools"));
assert!(names.contains(&"colibri_external_mcp_call_tool"));
assert_eq!(names.len(), 10);
assert!(names.contains(&"colibri_get_task_cost"));
assert_eq!(names.len(), 11);
}

View file

@ -73,6 +73,37 @@ pub struct Task {
pub description: Option<String>,
pub created_at: String,
pub updated_at: String,
/// Per-task cost tracking (populated on agent exit).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub provider: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub finished_at: Option<String>,
#[serde(default, skip_serializing_if = "is_zero_u64")]
pub input_tokens: u64,
#[serde(default, skip_serializing_if = "is_zero_u64")]
pub output_tokens: u64,
#[serde(default, skip_serializing_if = "is_zero_u64")]
pub cache_read_tokens: u64,
#[serde(default, skip_serializing_if = "is_zero_u64")]
pub cache_write_tokens: u64,
#[serde(default, skip_serializing_if = "is_zero_f64")]
pub cost: f64,
#[serde(default, skip_serializing_if = "is_false")]
pub success: bool,
}
#[allow(clippy::trivially_copy_pass_by_ref)]
fn is_zero_u64(n: &u64) -> bool {
*n == 0
}
#[allow(clippy::trivially_copy_pass_by_ref)]
fn is_zero_f64(n: &f64) -> bool {
*n == 0.0
}
fn is_false(b: &bool) -> bool {
!*b
}
/// An agent registered in the coordination system.
@ -147,6 +178,18 @@ pub struct Store {
conn: Connection,
db_path: PathBuf,
}
/// Per-task cost data passed from the daemon to the store.
#[derive(Debug, Clone)]
pub struct TaskCost {
pub provider: Option<String>,
pub model: Option<String>,
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_read_tokens: u64,
pub cache_write_tokens: u64,
pub cost: f64,
pub success: bool,
}
impl Store {
/// Open (or create) the store at `db_path`. Runs migrations automatically.
@ -228,6 +271,15 @@ impl Store {
description: description.map(str::to_string),
created_at: now.clone(),
updated_at: now,
provider: None,
model: None,
finished_at: None,
input_tokens: 0,
output_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
cost: 0.0,
success: false,
})
}
@ -247,8 +299,46 @@ impl Store {
.ok_or_else(|| StoreError::NotFound(task_id.to_string()))
}
/// Write per-task cost summary when an agent finishes a claimed task.
/// Updates the task's cost columns and transitions status to Done/Failed.
pub fn set_task_cost(&self, task_id: &str, tc: &TaskCost) -> Result<Task> {
let now = Utc::now().to_rfc3339();
let new_status = if tc.success {
TaskStatus::Done
} else {
TaskStatus::Failed
};
let rows = self.conn.execute(
"UPDATE tasks SET
status = ?1, updated_at = ?2, finished_at = ?3,
provider = ?4, model = ?5,
input_tokens = ?6, output_tokens = ?7,
cache_read_tokens = ?8, cache_write_tokens = ?9,
cost = ?10, success = ?11
WHERE id = ?12",
params![
new_status.as_str(),
&now,
&now,
tc.provider.as_deref(),
tc.model.as_deref(),
tc.input_tokens as i64,
tc.output_tokens as i64,
tc.cache_read_tokens as i64,
tc.cache_write_tokens as i64,
tc.cost,
tc.success,
task_id,
],
)?;
if rows == 0 {
return Err(StoreError::NotFound(task_id.to_string()));
}
self.get_task(task_id)?
.ok_or_else(|| StoreError::NotFound(task_id.to_string()))
}
/// Assign a task to an agent (status → Claimed).
/// Claim a queued task for an agent.
///
/// The UPDATE is guarded on `status = 'queued'`, so the claim is atomic and
/// exclusive: when two agents race for the same task, only the first wins
@ -285,23 +375,9 @@ impl Store {
/// Get a single task by ID.
pub fn get_task(&self, task_id: &str) -> Result<Option<Task>> {
let mut stmt = self.conn.prepare(
"SELECT id, agent_id, status, title, description, created_at, updated_at
FROM tasks WHERE id = ?1",
)?;
let mut stmt = self.conn.prepare("SELECT * FROM tasks WHERE id = ?1")?;
let mut rows = stmt.query_map(params![task_id], |row| {
let status_str: String = row.get(2)?;
Ok(Task {
id: row.get(0)?,
agent_id: row.get(1)?,
status: TaskStatus::parse_status(&status_str).unwrap_or(TaskStatus::Queued),
title: row.get(3)?,
description: row.get(4)?,
created_at: row.get(5)?,
updated_at: row.get(6)?,
})
})?;
let mut rows = stmt.query_map(params![task_id], row_to_task)?;
match rows.next() {
Some(Ok(task)) => Ok(Some(task)),
@ -313,11 +389,9 @@ impl Store {
/// List tasks, optionally filtered by status.
pub fn list_tasks(&self, status_filter: Option<TaskStatus>) -> Result<Vec<Task>> {
let sql = if status_filter.is_some() {
"SELECT id, agent_id, status, title, description, created_at, updated_at
FROM tasks WHERE status = ?1 ORDER BY created_at DESC"
"SELECT * FROM tasks WHERE status = ?1 ORDER BY created_at DESC"
} else {
"SELECT id, agent_id, status, title, description, created_at, updated_at
FROM tasks ORDER BY created_at DESC"
"SELECT * FROM tasks ORDER BY created_at DESC"
};
let mut stmt = self.conn.prepare(sql)?;
@ -635,6 +709,15 @@ fn row_to_task(row: &rusqlite::Row<'_>) -> rusqlite::Result<Task> {
description: row.get(4)?,
created_at: row.get(5)?,
updated_at: row.get(6)?,
provider: row.get(7).ok(),
model: row.get(8).ok(),
finished_at: row.get(9).ok(),
input_tokens: row.get::<_, i64>(10).ok().map(|v| v as u64).unwrap_or(0),
output_tokens: row.get::<_, i64>(11).ok().map(|v| v as u64).unwrap_or(0),
cache_read_tokens: row.get::<_, i64>(12).ok().map(|v| v as u64).unwrap_or(0),
cache_write_tokens: row.get::<_, i64>(13).ok().map(|v| v as u64).unwrap_or(0),
cost: row.get::<_, f64>(14).ok().unwrap_or(0.0),
success: row.get::<_, bool>(15).ok().unwrap_or(false),
})
}
@ -710,6 +793,55 @@ mod tests {
assert_eq!(task.status, TaskStatus::Done);
}
#[test]
fn test_set_task_cost_writes_all_fields() {
let store = Store::open_memory().unwrap();
let task = store
.create_task("cost-tracked task", Some("test cost capture"))
.unwrap();
let tc = TaskCost {
provider: Some("deepseek".to_string()),
model: Some("deepseek-chat".to_string()),
input_tokens: 1234,
output_tokens: 567,
cache_read_tokens: 800,
cache_write_tokens: 100,
cost: 0.0042,
success: true,
};
store.set_task_cost(&task.id, &tc).unwrap();
let reloaded = store.get_task(&task.id).unwrap().unwrap();
assert_eq!(reloaded.status, TaskStatus::Done);
assert_eq!(reloaded.provider.as_deref(), Some("deepseek"));
assert_eq!(reloaded.model.as_deref(), Some("deepseek-chat"));
assert_eq!(reloaded.input_tokens, 1234);
assert_eq!(reloaded.output_tokens, 567);
assert_eq!(reloaded.cache_read_tokens, 800);
assert_eq!(reloaded.cache_write_tokens, 100);
assert!((reloaded.cost - 0.0042).abs() < 0.0001);
assert!(reloaded.success);
assert!(reloaded.finished_at.is_some());
}
#[test]
fn test_set_task_cost_nonexistent_errors() {
let store = Store::open_memory().unwrap();
let tc = TaskCost {
provider: None,
model: None,
input_tokens: 0,
output_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
cost: 0.0,
success: false,
};
let result = store.set_task_cost("nonexistent", &tc);
assert!(result.is_err());
}
#[test]
fn test_transition_not_found() {
let store = Store::open_memory().unwrap();

View file

@ -65,4 +65,14 @@ CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status);
pub const MIGRATIONS: &[&str] = &[
"ALTER TABLE agents ADD COLUMN host TEXT",
"ALTER TABLE agents ADD COLUMN last_seen TEXT",
// Per-task cost tracking (T1.5): zot usage events → store.
"ALTER TABLE tasks ADD COLUMN provider TEXT",
"ALTER TABLE tasks ADD COLUMN model TEXT",
"ALTER TABLE tasks ADD COLUMN finished_at TEXT",
"ALTER TABLE tasks ADD COLUMN input_tokens INTEGER DEFAULT 0",
"ALTER TABLE tasks ADD COLUMN output_tokens INTEGER DEFAULT 0",
"ALTER TABLE tasks ADD COLUMN cache_read_tokens INTEGER DEFAULT 0",
"ALTER TABLE tasks ADD COLUMN cache_write_tokens INTEGER DEFAULT 0",
"ALTER TABLE tasks ADD COLUMN cost REAL DEFAULT 0.0",
"ALTER TABLE tasks ADD COLUMN success INTEGER DEFAULT 0",
];

View file

@ -0,0 +1,166 @@
# A2A Complexity Audit
**Question:** Does A2A reduce Colibri's code complexity, or is it additive?
**Date:** 27.jun.2026
**Referenced from:** [hive-pane.md](./hive-pane.md), [hive-routing.md](./hive-routing.md)
## Current protocol surface area
Colibri speaks 5 protocols today:
| Protocol | Where | Lines | Purpose |
|---|---|---|---|
| **Custom JSON wire** | `crates/colibri-daemon/src/socket.rs` + `crates/colibri-client/src/lib.rs` | 1,981 | Local daemon control (spawn, status, snapshot, tasks, skills) |
| **MCP JSON-RPC** | `crates/colibri-mcp/src/lib.rs` | 570 | Editor integration + external MCP host |
| **MCP-over-SSH** | `packaging/mother/` (3 files) | 437 | Mother hive entrypoint (forced-command allowlist + node register) |
| **JSONL** | `crates/colibri-glasspane/src/lib.rs` | 1,186 | Agent subprocess stdout events |
| **SQL** | `crates/colibri-store/src/lib.rs` + `crates/colibri-store/src/schema.rs` | 1,150 | Local coordination (tasks, agents, skills, tenants) |
**Total protocol surface: ~5,324 lines.**
---
## What A2A would replace
### 1. Mother MCP-over-SSH bridge → A2A HTTP endpoint
Today's mother entrypoint:
```
USB node → SSH (authorized_keys forced-command) → colibri-mcp-ssh → colibri-mcp → PostgreSQL
└─ node-register-mcp (embedded psql)
```
With A2A:
```
USB node → HTTPS → mother A2A endpoint → PostgreSQL
└─ /a2a (task exchange)
└─ /.well-known/agent.json (discovery)
```
**Removed:**
- `colibri-mcp-ssh` (32 lines) — SSH forced-command allowlist wrapper
- `node-register-mcp` (88 lines) — Custom MCP tool with embedded psql
- SSH key management in `setup-mother.sh` (~40 lines of key distribution logic)
**Removed total: ~160 lines.**
**Added:**
- A2A HTTP endpoint on mother (~200 lines)
- A2A client library integration on USB node (~150 lines)
- mTLS/TLS termination for auth (~30 lines)
**Added total: ~380 lines.**
**Net delta: +220 lines.** Not a code reduction. But operational complexity drops significantly:
- No SSH key distribution to USB nodes (key lives on seed partition → no longer needed on mother)
- No forced-command allowlist to maintain
- Standard HTTPS is easier to firewall, audit, and monitor than SSH forced-command
- Agent Card URL is discoverable without manual external MCP registry entries
### 2. External MCP server discovery → Agent Card
Today: external MCP registry config — manual JSON listing third-party MCP servers:
```json
{
"servers": [
{
"name": "filesystem",
"command": "npx",
"args": ["-y", "@anthropic/mcp-server-filesystem", "/tmp"],
"env": {}
}
]
}
```
With A2A: third-party tools that speak A2A (not MCP) publish an Agent Card. Colibri discovers them via the well-known Agent Card URL instead of manual JSON config files.
**Reality check:** No third-party tools speak A2A yet. The protocol was just announced (April 2025). MCP has ~2 years of ecosystem maturity. This is a *future* replacement, not a *current* one.
**Verdict:** A2A discovery doesn't reduce code today. External MCP stays for tool access.
### 3. Ad-hoc cost data format → Typed A2A part
Today: cost data is embedded in the daemon's heartbeat logic — unstructured:
```rust
info!(task_id = %task_id, cost = u.cost(), "task cost captured");
```
With A2A: cost data is a typed message part (`application/json+cost`). The format is standardized, not ad-hoc.
**Code savings:** ~10 lines (the info! log stays; the A2A part is new code).
**Verdict:** Negligible code impact. The value is *interop*, not complexity reduction.
---
## What A2A does NOT replace
| Component | Why A2A doesn't touch it | Lines saved |
|---|---|---|
| **Unix socket wire protocol** (`crates/colibri-daemon/src/socket.rs`) | A2A is cross-node HTTP. Local daemon control needs IPC — Unix socket is faster, auth-free (filesystem permissions), and doesn't need a network stack. | 0 |
| **Spawner** (`crates/colibri-daemon/src/spawner.rs`) | A2A routes tasks to existing agents. Colibri *creates* agents by spawning subprocesses. A2A has no process lifecycle concept. | 0 |
| **Glasspane** (`crates/colibri-glasspane/src/lib.rs`) | A2A doesn't watch subprocess stdout. Glasspane is a PTY observer — it reads JSONL from child processes. A2A operates one layer above. | 0 |
| **Store** (`crates/colibri-store/src/lib.rs`) | A2A doesn't replace local SQLite coordination. Each node needs local persistence for task board, agents, skills — A2A is the *transport*, not the *database*. | 0 |
| **MCP editor bridge** | A2A is agent-to-agent. MCP is human-to-tool. Different protocols for different directions. They coexist. | 0 |
| **Contracts schemas** (`crates/colibri-contracts/src/lib.rs`) | A2A uses JSON Schema for input validation. Colibri's contracts are already compatible — no change needed. | 0 |
**Total irreplaceable: ~5,000 lines.** A2A doesn't reduce this at all.
---
## Net complexity analysis
```
BEFORE AFTER A2A
────── ─────────
Unix socket protocol 1,981 1,981 (unchanged)
MCP bridge 570 570 (unchanged)
Mother MCP-over-SSH 437 0 (REMOVED)
A2A endpoint 0 380 (NEW)
Glasspane JSONL 1,186 1,186 (unchanged)
SQLite store 1,150 1,150 (unchanged)
Contracts schemas 200 200 (unchanged)
────── ──────
TOTAL 5,524 5,467
────── ──────
```
**Net delta: 57 lines.** Technically a tiny reduction. Realistically: the code moves around, it doesn't shrink.
---
## The real trade-off
A2A is not a complexity reduction play. It's an **interoperability and operational simplicity** play:
| Metric | MCP-over-SSH (current) | A2A (proposed) |
|---|---|---|
| **Lines of code** | ~5,524 (spread across 6 crates + 3 shell scripts) | ~5,467 (SSH scripts gone, A2A handler added) |
| **Protocol count** | 5 | 6 (A2A adds one) |
| **Operational complexity** | SSH keys × N nodes, forced-command allowlists, peer auth setup | One HTTPS endpoint, mTLS certs, well-known URL |
| **Discoverability** | Manual external MCP registry entries | Agent Card at well-known URL |
| **Interoperability** | Colibri-only | Any A2A client |
| **Debugability** | `ssh -v`, `psql`, `jq` | `curl`, browser devtools, standard HTTP tooling |
| **Ecosystem maturity** | N/A (Colibri-specific) | Protocol < 3 months old, zero adoption |
| **When it pays off** | Works today for 4 nodes | Pays off at 10+ nodes, or when 3rd-party tools ship A2A |
---
## Recommendation: Later, not now
The right window for A2A is when one of these becomes true:
1. **We have >10 hive nodes** — SSH key distribution becomes painful
2. **A third-party tool ships A2A support** — interop value materializes
3. **We want federation** — multiple hives discovering each other
Until then: the current MCP-over-SSH bridge is 437 lines of boring, working code. A2A would add 380 lines for a protocol that has zero adopters. The code savings (~57 lines) don't justify the protocol risk.
**Phase 2 (next sprint) should not include A2A.** Build the routing engine on the existing MCP bridge. Add A2A as Phase 3 — when the protocol has real-world adoption and Colibri has enough nodes to benefit from discovery.
The HIVE-PANE.md A2A section is a good north-star design doc. It stays in the wiki as "planned." But it shouldn't drive implementation priority.

View file

@ -19,7 +19,7 @@ between Colibri (Rust) and Clawdie agents (TypeScript). It owns _schemas and
| -------------------------------------- | --------------------- | -------------------------------------------------------------- |
| `clawdie.interagent.run-manifest.v1` | `RunManifest` | Records a build/test run — role, agent, artifacts, summary. |
| `clawdie.runtime-version-inventory.v1` | `RuntimeInventory` | Host runtime snapshot — OS, package versions, npm/node/zot/pi. |
| `clawdie.provider-smoke.result.v1` | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting. |
| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | DeepSeek cache-hit probe result and token accounting. |
Schema constants and structs live in `crates/colibri-contracts/src/lib.rs`.
@ -43,7 +43,7 @@ merged.
## See also
- [cost-model](./cost-model.md) — how the provider-smoke result feeds cache-hit
- [cost-model](./cost-model.md) — how the provider-test result feeds cache-hit
metering.
- [runtime-inventory](./runtime-inventory.md) — where the runtime inventory is
produced.

View file

@ -84,8 +84,27 @@ how the request was made.
→ [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs)
### Per-task cost persistence (T1.5)
zot agents emit `usage` events (input/output/cache tokens, cost in USD).
Glasspane accumulates these into `PaneUsage` (micro-cent precision, Eq-safe).
When the daemon heartbeat detects agent exit, it reads the accumulated usage,
builds a `TaskCost` struct, and calls `store.set_task_cost()` — persisting
8 columns (provider, model, input/output/cache tokens, cost, success,
finished_at) onto the task row. MCP exposes `colibri_get_task_cost` for
agents to query historical cost data (e.g., "what model is cheapest for
this workload?").
→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs)
(`TaskCost`, `set_task_cost`),
[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs)
(`PaneUsage`, `add_cost`),
[`crates/colibri-contracts/src/lib.rs`](../../crates/colibri-contracts/src/lib.rs)
(`TaskCostSummary`)
## See also
- [task-board](./task-board.md) — the scheduler that dispatches tasks within session budgets
- [contracts](./contracts.md) — TaskCostSummary schema v1
- [mother-hive](./mother-hive.md) — MCP architecture (different cost domain)
- [quality-gates](./quality-gates.md) — the gate that validates cost-mode parsing

266
docs/wiki/hive-pane.md Normal file
View file

@ -0,0 +1,266 @@
---
title: Hive Pane
description: "Glasspane for the hive — multi-node cost observability, A2A discovery, and a board the operator can read at a glance."
---
← [index](./index.md)
Hive Pane is the multi-node extension of the [glasspane](./glasspane.md) metaphor.
Where glasspane watches local agent subprocesses through JSONL stdout, Hive Pane
watches hive nodes through PostgreSQL rows — same operator mental model (pane =
unit of observation), different scale (local agent vs remote host).
## Decision
One board, not many ad-hoc surfaces. The operator sees every hive node — its
status, accumulated cost, task success rate, and hardware capabilities — in a
single view. The data already exists ([mother-hive](./mother-hive.md) for node
registry, [task-board](./task-board.md) for per-task cost). Hive Pane just
queries and renders it.
## Why this exists
Without it, the operator answers "what is my hive doing?" by:
- SSH'ing into osa → `psql mother_hive -c "SELECT * FROM hive_nodes"`
- Cross-referencing task boards on each node
- Adding up costs manually
Hive Pane replaces all of that with one queryable surface that understands
the hive topology.
## What it shows
```
┌───────────────────────────────────────────────────────────────────────────────────┐
│ HIVE PANE [0.12.0] [secured] [4 nodes] │
├──────────┬─────────┬────────┬──────────┬───────────┬────────────┬─────────────────┤
│ Node │ Type │ Status │ Tasks │ Cost │ GPU │ Local LLM │
├──────────┼─────────┼────────┼──────────┼───────────┼────────────┼─────────────────┤
│ osa │ mother │ online │ 12 done │ $0.42 │ none │ — │
│ debby │ disk │ online │ 8 done │ $1.87 │ amd (iGPU) │ ollama: qwen2.5 │
│ domedog │ disk │ online │ 3 done │ $0.03 │ none │ — │
│ usb-n7 │ live-usb│ online │ 0 │ $0.00 │ intel │ llama.cpp: 7b │
└──────────┴─────────┴────────┴──────────┴───────────┴────────────┴─────────────────┘
```
Each row is a pane — a live query against `hive_nodes` joined with aggregated
task costs. The columns are:
- **Status**: `online` if `last_seen` is within the heartbeat window, `offline` otherwise
- **Tasks**: count of completed tasks (further split by success/failure on drill-down)
- **Cost**: sum of `task_cost.cost` for this node, lifetime
- **GPU**: derived from `capabilities` (the `derive_capabilities()` trigger on `hive_nodes`)
- **Local LLM**: which models are available via ollama/llama.cpp on this node — `—` if cloud-only
Clicking a node drills into its task history, cost breakdown (input/output/cache
tokens), and hardware profile.
## Relationship to existing surfaces
### Glasspane
Glasspane watches local agent subprocesses → agent state machine (Idle → Working
→ Done). Hive Pane watches hive nodes → node state (online/offline) + aggregated
agent costs. Same supervision loop, different unit of observation.
A glasspane pane is ephemeral (dies with the agent). A Hive Pane row is durable
(`hive_nodes` persists across reboots).
→ [glasspane](./glasspane.md)
### Hive routing
[hive-routing](./hive-routing.md) defines the engine underneath the board:
node identity (`machine_id`), local LLM capability probes, cost-aware task
routing, and the implementation strategy. Hive Pane is the presentation layer;
hive-routing is the scheduling layer.
### Mother hive
Mother hive is the data layer: PostgreSQL `hive_nodes` table with
`derive_capabilities()` trigger. Hive Pane is the presentation layer: it queries
that table and renders it.
→ [mother-hive](./mother-hive.md)
### Task board + cost tracking
The daemon's heartbeat captures per-task cost into the local SQLite store. For
the hive view, this data needs to flow to the mother. Two paths:
1. **Bridge cost sync** (current): agents on remote nodes connect to osa's daemon
via the [control-plane bridge](../guide/architecture/control-plane-bridge/).
The board daemon sees their exit events and captures cost. This works today
for cross-host agents but requires the bridge to stay up.
2. **A2A push** (planned, see below): nodes push cost data to mother as
structured A2A message parts. Decouples cost reporting from the bridge.
→ [task-board](./task-board.md)
→ [contracts](./contracts.md) (TaskCostSummary schema)
## A2A integration (planned)
> 📋 **Complexity audit:** [a2a-complexity-audit](./a2a-complexity-audit.md) —
> A2A doesn't reduce Colibri's code complexity today (6 protocols → 6 protocols,
> ~0 net lines). It pays off at 10+ nodes or when third-party tools ship A2A
> support. The Agent Card design below is a north star, not an implementation
> priority for 0.12.
Google's Agent-to-Agent protocol standardizes three things Colibri already does
ad-hoc. Adopting it makes the hive discoverable and interoperable beyond our own
tooling.
### Agent Card — standardized discovery
Today a USB node discovers mother via a hardcoded SSH entry in
the external MCP registry. With A2A, mother publishes an Agent Card at a well-known
URL:
```
GET https://mother.clawdie.si/.well-known/agent.json
```
```json
{
"name": "clawdie-mother",
"description": "Clawdie hive mother node — node registry, build queue, cost board",
"url": "https://mother.clawdie.si/a2a",
"version": "0.12.0",
"capabilities": {
"streaming": true,
"pushNotifications": false
},
"skills": [
{
"id": "node_register",
"name": "Register Node",
"description": "Register a hive node with hardware profile",
"inputSchema": { "type": "object", "properties": { "machine_id": {}, "hostname": {}, "hw_profile": {} } }
},
{
"id": "build_colibri",
"name": "Build Colibri",
"description": "Build a colibri crate from an allowed git branch"
}
],
"costTracking": {
"supported": true,
"schema": "clawdie.task-cost.v1",
"aggregation": "per-node, per-task, per-model"
}
}
```
USB nodes (and any A2A-compatible client) discover mother's capabilities without
manual configuration. The Agent Card is versioned and lintable — same discipline
as the wiki.
### Task exchange — standardized lifecycle
A2A tasks map directly to Colibri's task board:
| A2A state | Colibri equivalent |
| -------------- | ------------------ |
| `submitted` | `Pending` |
| `working` | `Started` |
| `completed` | `Done` |
| `failed` | `Error` |
| `canceled` | (not yet modeled) |
Mother pushes a `node_register` task to a new USB node; the node executes it and
returns the result. The task carries cost data as a typed A2A part:
```json
{
"type": "data",
"mimeType": "application/json+cost",
"data": {
"schema": "clawdie.task-cost.v1",
"input_tokens": 150,
"output_tokens": 80,
"cost": 0.0042
}
}
```
### What A2A adds over the current MCP bridge
| Concern | Current (MCP + SSH) | A2A |
| -------------------- | ----------------------------- | -------------------------------- |
| Discovery | Manual external MCP registry entry | Well-known Agent Card URL |
| Interop | Colibri-only | Any A2A client |
| Cost data | Embedded in task completion | Typed `application/json+cost` |
| Push notifications | Polling (heartbeat) | Optional webhook/push |
| Versioning | Ad-hoc | Agent Card version + schema pins |
A2A is not a replacement for the MCP bridge — it's the next layer. The MCP
bridge handles local daemon commands (status, snapshot, spawn). A2A handles
cross-node task exchange and discovery. They coexist.
## Data flow
```
USB node boots
├─ 1. Tailscale connects
├─ 2. A2A: GET mother/.well-known/agent.json
│ Discovers capabilities, registers interest
├─ 3. A2A task: mother → USB: node_register(hw_profile)
│ USB executes, returns cost + capabilities
├─ 4. Mother stores in hive_nodes + task_cost
└─ 5. Hive Pane queries PostgreSQL, renders row
```
For nodes that don't speak A2A yet (current USB image), the existing MCP + SSH
path continues to work. The board queries `hive_nodes` regardless of how the
data got there.
## Schema (mother PostgreSQL)
The `hive_nodes` table already exists ([mother-hive](./mother-hive.md)). Hive
Pane adds a lightweight view for the board:
```sql
CREATE VIEW hive_pane AS
SELECT
n.machine_id,
n.hostname,
n.node_type,
n.status,
n.last_seen,
n.capabilities,
COUNT(t.id) FILTER (WHERE t.status = 'Done') AS tasks_done,
COUNT(t.id) FILTER (WHERE t.status = 'Error') AS tasks_failed,
COALESCE(SUM(t.cost), 0.0) AS total_cost
FROM hive_nodes n
LEFT JOIN tasks t ON t.node_machine_id = n.machine_id
GROUP BY n.machine_id, n.hostname, n.node_type, n.status, n.last_seen, n.capabilities;
```
The `tasks` table on mother is a projected subset of each node's local task
board — hostname, status, cost columns. The sync mechanism (bridge cost capture
or A2A push) is responsible for keeping it current.
## Non-goals
- **Not a replacement for glasspane TUI.** Glasspane watches live agent
subprocesses (millisecond latency). Hive Pane watches aggregate node state
(minute latency). Two different tools.
- **Not a Grafana clone.** No time-series plots, no alerting rules. The operator
can pipe the data anywhere; Hive Pane is the default read-only view.
- **Not a write surface.** Node registration and task creation happen through
the existing MCP/A2A paths. The board reads only.
## References
- [glasspane](./glasspane.md) — local agent observation model
- [mother-hive](./mother-hive.md) — node registry schema and SSH forced-command pattern
- [task-board](./task-board.md) — capability scoring and cost tracking
- [contracts](./contracts.md) — TaskCostSummary schema v1
- [external-mcp](./external-mcp.md) — current MCP bridge (coexists with A2A)

389
docs/wiki/hive-routing.md Normal file
View file

@ -0,0 +1,389 @@
# Hive Member Tracking & Cost-Aware Routing
**Status:** 📋 Design
**Date:** 24.jun.2026
**Driven by:** T1.5 per-task cost tracking (shipped) → T2.x routing
> **Companion doc:** [hive-pane](./hive-pane.md) — the operator presentation
> layer this engine feeds. Hive Pane is the "how it looks" (board, A2A discovery);
> this doc is the "how it works" (identity, probes, routing, scheduling).
## What Exists Today
| Component | State | Gap |
|---|---|---|
| `mother_schema.sql` | `hive_nodes` table with `hw_profile` + `capabilities` JSONB | No stable node UUID; hostname is the key |
| `derive_capabilities()` trigger | Auto-computes `has_gpu`, `gpu_vendor`, `can_run_local_llm`, `max_model` from hw_profile | Only GPU/VRAM heuristics — doesn't probe running services |
| `clawdie-hw-probe` | Collects GPU, RAM, CPU, disks, ZFS, WiFi, Vulkan, Colibri status | No ollama/llama.cpp probing |
| `node-register-mcp` | UPSERTs hw_profile into `hive_nodes` on join | No UUID generation at join time |
| `crates/colibri-daemon/src/scheduler.rs` | Cron/interval/one-shot jobs, capability matching stubs | No cost-aware routing, no hive awareness |
| `colibri-store` | Local SQLite `agents` table with UUID (v4 random) | UUID is session-local, not hive-stable |
| T1.5 cost tracking | Per-task cost captured in local SQLite | No hive-level cost aggregation |
## Design Goals
1. **Stable identity** — A node that joins, leaves, and rejoins is the same node. Not hostname-based (hostnames change when re-provisioned).
2. **Capability matrix** — What can each member do? Not just hardware, but running services: ollama, llama.cpp, available models, provider API keys, cost tier.
3. **Verify, don't guess** — Every capability in the matrix comes from a probe result, not self-declaration. The hw-probe is the single source of truth; the `derive_capabilities()` trigger maps hardware facts → capability booleans.
4. **Cost-aware routing** — When a task is dispatched, the scheduler considers: urgency, provider cost, local LLM availability, cache-hit potential, and capability match.
5. **Local LLM tier** — A beefy member can serve as a "free but slow" execution target for non-urgent tasks. The cost model treats local execution as $0.0000/task.
6. **Extensible** — New backends (ollama, llama.cpp, vLLM, Exo clusters) slot into the same capability matrix without schema changes.
---
## Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ MOTHER (osa) │
│ ┌─────────────────┐ ┌──────────────┐ ┌───────────────────┐ │
│ │ PostgreSQL │ │ Scheduler │ │ MCP Bridge │ │
│ │ hive_nodes │ │ cost-aware │ │ colibri-mcp-ssh │ │
│ │ capabilities │ │ routing │ │ node-register │ │
│ │ cost_history │ │ dispatch │ │ cost-query │ │
│ └─────────────────┘ └──────────────┘ └───────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
▲ │
│ hw-probe + capabilities │ task dispatch
│ (MCP tools/call) │ (MCP or direct)
│ ▼
┌────────────────────┐ ┌────────────────────┐
│ Node: clawdie-a │ │ Node: clawdie-b │
│ ┌──────────────┐ │ │ ┌──────────────┐ │
│ │ Colibri │ │ │ │ Colibri │ │
│ │ zot spawner │ │ │ │ zot spawner │ │
│ │ local SQLite│ │ │ │ ollama │ │
│ └──────────────┘ │ │ │ llama.cpp │ │
│ GPU: none │ │ │ models: │ │
│ RAM: 8GB │ │ │ qwen2.5:7b │ │
│ Cost: cloud-only │ │ └──────────────┘ │
│ │ │ GPU: RTX 4090 │
│ │ │ RAM: 64GB │
│ │ │ Cost: $0 local │
└────────────────────┘ └────────────────────┘
```
---
## Identity: Machine UUID
### Problem
Hostname is unstable. A live-USB node gets `clawdie` on every boot. A disk-installed node keeps the hostname the operator set. Mother needs a stable, verifiable identity that survives re-provisioning.
### Solution: `/var/db/machine-id`
A 32-character hex UUID generated once, stored locally, included in every hw-probe report. Like `systemd`'s `/etc/machine-id` but simpler: only one purpose — hive identity.
```
# Generated once by clawdie-firstboot or operator session
/var/db/machine-id → a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
**Properties:**
- **Stable across reboots**: stored on disk, not tmpfs
- **Survives re-provisioning**: if the seed partition preserves `/var/db/machine-id`, the same physical machine keeps the same identity
- **Not a secret**: it's an ID, not a key
- **Verifiable**: mother can check "has node a1b2c3d4 ever joined?" — if yes, this is a rejoin, not a new node
**Alternatives considered:**
| Approach | Pros | Cons |
|---|---|---|
| SMBIOS UUID (`hw.uuid`) | Truly hardware-bound, survives OS reinstall | Not available on all platforms (VPS, ARM); can be spoofed |
| SSH host key fingerprint | Cryptographically strong | Changes on OS reinstall; key rotation breaks identity |
| Random UUID (this design) | Portable, simple, survives seed restore | Can be copied/cloned (but same machine, same ID — that's correct) |
**Recommendation:** Generate on first boot, store in `/var/db/machine-id`. The hw-probe includes it as `machine_id`. Mother's `hive_nodes` table gets a `UNIQUE` constraint on `machine_id`.
### Schema change
```sql
ALTER TABLE hive_nodes ADD COLUMN machine_id TEXT;
ALTER TABLE hive_nodes ADD CONSTRAINT uq_machine_id UNIQUE (machine_id);
```
The `node-register-mcp` UPSERT switches from `ON CONFLICT (hostname)` to `ON CONFLICT (machine_id)`. Hostname becomes a mutable attribute (updates on rejoin), machine_id becomes the stable key.
---
## Capability Matrix
### What goes in the matrix
Every capability is a boolean derived from hardware facts, not a self-declaration. The hw-probe collects hardware; the trigger derives capabilities.
| Capability | Derived from | Used for |
|---|---|---|
| `has_gpu` | GPU detected in pciconf | GPU-accelerated inference |
| `gpu_vendor` | amdgpu/nvidia driver | Model compatibility |
| `vulkan_compute` | vulkaninfo success | llama.cpp Vulkan backend |
| `can_run_local_llm` | RAM ≥ 16GB or has GPU | Eligibility for local task execution |
| `max_model` | RAM heuristic | Model size limit (3b, 7b-q4, 13b-q4, 34b-q4) |
| `cpu_only` | No GPU detected | Fallback only (slow) |
| `has_wifi` | wlan devices | Network capability |
| `has_zfs` | ZFS pools non-empty | Storage capability |
| `colibri_running` | service status | Agent host eligibility |
| `provider_api_keys` | MCP-reported (not hw probe) | Cloud provider availability |
### Local LLM capabilities (NEW)
Extend the hw-probe to detect running local LLM services and extend the trigger to derive capabilities from them:
```json
{
"local_llm": {
"ollama_running": true,
"ollama_models": ["qwen2.5:7b", "deepseek-r1:8b", "nomic-embed-text"],
"llama_cpp_installed": true,
"llama_cpp_models": ["/var/db/models/qwen2.5-7b-q4.gguf"],
"vulkan_support": true
}
}
```
**New derived capabilities:**
| Capability | Derivation |
|---|---|
| `ollama_available` | `ollama_running == true` |
| `ollama_models` | Array of model tags (from `ollama list`) |
| `llama_cpp_available` | Binary at `/usr/local/bin/llama-server` or similar |
| `llama_cpp_models` | GGUFs in `/var/db/models/` or `/usr/local/share/models/` |
| `can_embed_locally` | `nomic-embed-text` in ollama OR any embedding model loaded |
| `inference_tier` | `local-fast` (GPU ≥ 24GB), `local-slow` (CPU-only, RAM ≥ 16GB), `cloud-only` |
### Probe additions to `clawdie-hw-probe`
```sh
# New collectors
collect_machine_id() # cat /var/db/machine-id or generate
collect_ollama_status() # ollama list 2>/dev/null (JSON models)
collect_llama_cpp() # which llama-server; ls /var/db/models/*.gguf
collect_local_llm() # aggregate JSON block
```
---
## Cost-Aware Routing
### The routing decision
When a task arrives at the scheduler, it computes a **routing score** for each eligible node:
```
score = capability_match × urgency_weight × cost_weight × cache_weight
capability_match: 0.01.0 (does the node have the required skills/model?)
cost_weight: 0.01.0 (lower cost → higher weight)
urgency_weight: 0.01.0 (time-critical tasks penalize slow nodes)
cache_weight: 0.01.0 (warm cache → higher weight)
```
### Cost tiers
| Tier | Provider | Cost per 1M tokens | Latency | Used when |
|---|---|---|---|
| T0 (free) | Local ollama/llama.cpp | $0.00 | 560s | Non-urgent, capability match |
| T1 (cheap) | DeepSeek V3 | $0.27 / $1.10 | 25s | Default for most tasks |
| T2 (balanced) | Gemini Flash | $0.15 / $0.60 | 13s | High cache-hit tasks |
| T3 (premium) | Claude Sonnet 4 | $3.00 / $15.00 | 38s | Complex reasoning, only when needed |
### Local LLM routing rules
1. **If task is non-urgent AND a hive member has a matching local model → route locally at $0.00 cost.**
2. **If the local model is unavailable (node down, model not loaded) → fall back to T1 (DeepSeek).**
3. **If task is urgent (latency < 5s required) skip local tier, go straight to T1.**
4. **Embedding tasks (RAG, similarity search) → always prefer local if available. Embeddings are cheap to compute locally and don't need reasoning.**
### How the scheduler knows
The scheduler queries `hive_nodes` for all online nodes, filters by `capabilities.can_run_local_llm`, checks `ollama_models` for the required model, and computes the routing score. If no local node matches, it falls back to the cloud tier.
The task schema gets a new field:
```sql
ALTER TABLE tasks ADD COLUMN routing JSONB;
-- {"preferred_tier": "local", "allowed_tiers": ["local", "cheap"], "max_cost_usd": 0.05, "deadline_s": null}
```
---
## Protocol: Join → Probe → Route
> **A2A integration:** See [hive-pane](./hive-pane.md#a2a-integration-planned) for the
> Agent Card, task exchange, and typed cost data parts. The routing engine
> (this doc) and the A2A protocol (hive-pane) are orthogonal layers — either
> can evolve independently.
### Phase 1: Join (existing, extended)
```
Node boots → clawdie-hw-probe runs → MCP tools/call node_register
→ mother UPSERTs hive_nodes → derive_capabilities() trigger fires
→ capabilities JSONB updated → node is "online"
```
**New:** `machine_id` is included. If the machine_id already exists, mother updates the existing row (rejoin), preserving history.
### Phase 2: Heartbeat (existing)
`colibri-daemon` sends periodic heartbeat via MCP. Updates `last_seen`. If heartbeat misses for > 5 minutes, node status → `offline`.
### Phase 3: Capability Sync (new)
On heartbeat, the node can optionally push updated capabilities (if ollama was installed, models changed, etc.). The hw-probe is re-run and the new `local_llm` block is sent.
### Phase 4: Task Dispatch (new)
```
Scheduler picks a queued task
→ queries hive_nodes for eligible nodes
→ computes routing score for each
→ picks winner
→ dispatches task via MCP or direct agent spawn
→ writes routing decision to task.routing JSONB
```
### Phase 5: Cost Capture (existing, extended)
When the task completes, the local daemon writes cost to its SQLite (T1.5). The mother aggregates hive cost via periodic MCP queries or push events.
---
## Three Implementation Options
### Option A — Mother-Centric (Minimal New Code)
**What:** Mother is the brain. Nodes register, mother routes. No peer-to-peer.
**Implementation:**
1. Add `machine_id` to `hive_nodes` + hw-probe (1 day)
2. Extend `derive_capabilities()` for local LLM (1 day)
3. Add `routing_score()` function to mother's PostgreSQL (stored function — zero Rust changes)
4. Extend `node-register-mcp` to accept `local_llm` block (0.5 day)
5. Add `local_llm` probe to `clawdie-hw-probe` (1 day)
**Rust changes:** Scheduler reads `capabilities` from hive_nodes via MCP query, computes score, dispatches. ~200 lines.
**Total:** ~3.5 days.
**Pros:**
- Simple to reason about — one source of truth
- Lowest implementation risk
- Scheduler lives on mother (always-on)
- Existing MCP bridge handles all communication
**Cons:**
- Mother is single point of failure for routing (but not execution — once dispatched, the task runs independently)
- Latency: scheduler must query mother on every tick
- Doesn't scale to 100+ nodes (not a real concern for our use case)
---
### Option B — Capability-Advertised with Local Routing Fallback
**What:** Mother stores the matrix, but nodes can also route tasks they own to peers directly. Hybrid: central registry + distributed execution.
**Implementation:**
1. All of Option A (3.5 days)
2. Add `capabilities` API to `colibri-daemon`'s Unix socket (self-awareness) — 1 day
3. Add local peer discovery via mDNS or Tailscale whois — 1 day
4. Add direct peer-to-peer task dispatch via Unix socket → MCP → remote — 2 days
5. Add fallback logic: "try local first, if no response in 30s, ask mother" — 1 day
**Total:** ~8.5 days.
**Pros:**
- Lower latency for local dispatch
- Survives mother downtime for peer-to-peer tasks
- Natural fit for local LLM use case (beefy node is on same LAN)
- Nodes that discover each other can route without phoning home
**Cons:**
- Complexity: two code paths (central + peer-to-peer)
- Security: peer-to-peer dispatch needs authentication (who can send tasks to my daemon?)
- Harder to audit: cost tracking must handle peer-dispatched vs mother-dispatched tasks differently
- mDNS doesn't work across subnets (Tailscale works but adds dependency)
---
### Option C — Capability-Matrix-as-Skill (Zero-Code Routing)
**What:** Don't build a routing engine at all. The capability matrix is exposed as an MCP tool that agents query. The agent itself decides where to route based on the matrix + its own reasoning. The matrix is advisory, not prescriptive.
**Implementation:**
1. All of Option A minus the routing_scoring function (2.5 days)
2. Add `colibri_query_hive_capabilities` MCP tool on mother — returns full online node matrix (0.5 day)
3. Add `colibri_dispatch_to_node` MCP tool — sends task to a specific node (1 day)
4. Write a `hive-routing` skill that teaches agents how to use the matrix (0.5 day)
**Total:** ~4.5 days. **Zero scheduler changes.**
**Pros:**
- Exploits Colibri's architecture-as-differentiator: the agent IS the intelligence
- The routing decision is auditable in the conversation log (why did the agent pick this node?)
- Natural fit for local LLM — the agent can reason "this task is low priority, I'll try the beefy node first"
- No new scheduler code — just MCP tools + skills
- The skill can be iterated without recompiling Colibri
**Cons:**
- Each routing decision costs tokens (the agent must reason about it)
- Agents make inscrutable routing choices (the LLM "just knows")
- No hard guarantees — an agent might route a $5 task to Claude when DeepSeek would do fine
- Requires the agent to be "cost-aware" (which requires the MCP cost query tool — already shipping in T1.5)
---
## Recommendation
**Start with Option A (Mother-Centric) as the foundation, then layer Option C (Skill-Based) on top.**
The capability matrix, stable UUIDs, and local LLM probes are the foundation — they're needed regardless of the routing strategy. Option A gives us the data model and probe infrastructure. Once that's solid, adding the MCP tools for agent-driven routing (Option C) is a thin layer on top. Option B (peer-to-peer) adds complexity we don't need at this scale.
**Phase 1 (this sprint):** Machine UUID + local LLM probes + extended capabilities matrix. ~2.5 days.
**Phase 2 (next sprint):** Mother-based routing + MCP tools for agent-driven routing. ~2 days.
**Phase 3 (future):** Peer-to-peer fallback if we ever have >20 nodes.
---
## Deliverables by Phase
### Phase 1 — Identity & Capability Foundation
| Deliverable | Where | Lines |
|---|---|---|
| `machine_id` generation in `clawdie-firstboot` | clawdie-iso | ~15 |
| `collect_machine_id()` in hw-probe | clawdie-iso | ~10 |
| `collect_ollama_status()` in hw-probe | clawdie-iso | ~30 |
| `collect_llama_cpp()` in hw-probe | clawdie-iso | ~20 |
| `collect_local_llm()` aggregator in hw-probe | clawdie-iso | ~25 |
| `machine_id` column + constraint in mother_schema.sql | colibri | ~5 |
| Extended `derive_capabilities()` for `ollama_available`, `llama_cpp_available`, `inference_tier` | colibri | ~40 |
| `node-register-mcp` handling of `machine_id` key + new local_llm fields | colibri | ~15 |
| This design doc (hive-routing.md) | This file | ~0 (done) |
### Phase 2 — Routing Engine
| Deliverable | Where |
|---|---|
| `colibri_query_hive_capabilities` MCP tool | colibri-mcp |
| `colibri_dispatch_to_node` MCP tool | colibri-mcp |
| `hive-routing` skill | `.agent/skills/` |
| `Task.routing` JSONB field in colibri-store | colibri-store |
| Mother-side routing score as PostgreSQL function (optional — only if agent-driven routing proves insufficient) | mother_schema.sql |
---
## Integration with the Trifecta
The hive routing plan completes the trifecta's T2.x vision:
```
T1.4 Prompt Discipline ✅ Cache warming, cost mode, 3-region prompt
T1.5 Per-Task Cost ✅ Captured locally (this sprint)
T2.x Cost-Aware Routing 📋 Hive matrix → routing decision
T2.x Model Selection 📋 Arbitrage between cloud tiers + local LLM
T2.x Eval Harness 📋 Task success measurement
```
The key insight: local LLM is the **ultimate cache-hit token**. Every token generated on a beefy node's GPU is $0.0000. The routing engine's job is to maximize the use of $0 tokens without compromising task success rates.

View file

@ -53,12 +53,15 @@ warning.
| [headroom-sidecar](./headroom-sidecar.md) | Optional tool-result compression sidecar and its Unix-socket protocol |
| [jail-confinement](./jail-confinement.md) | Persistent vs ephemeral jails, priv-mode policy, reuse of spawner confinement for MCP servers |
| [mother-hive](./mother-hive.md) | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed |
| [hive-routing](./hive-routing.md) | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing |
| [hive-pane](./hive-pane.md) | Glasspane for the hive — multi-node cost observability, A2A discovery, and operator board |
| [a2a-complexity-audit](./a2a-complexity-audit.md) | A2A code complexity impact — 6-protocol surface audit, when A2A pays off |
| [naming-decisions](./naming-decisions.md) | Ledger of harness-neutral / architecture renames — shipped and in-flight |
| [daemon-not-demon](./daemon-not-demon.md) | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian |
| [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned |
| [task-board](./task-board.md) | Capability match scoring, cron scheduling, intake drain, SQLite backing |
| [quality-gates](./quality-gates.md) | `ci-checks.sh` as the pre-merge gate; why drift reached `main` before |
| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-smoke), golden tests |
| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-test), golden tests |
| [store-schema](./store-schema.md) | SQLite coordination schema and migration discipline |
| [external-mcp](./external-mcp.md) | MCP bridge for editors + external stdio MCP host; read/write/external-call gates |
| [operator-cli](./operator-cli.md) | The `colibri` CLI as a thin typed Unix-socket client over the daemon API |

View file

@ -23,7 +23,7 @@ _sheme in (De)serialize_, ne poslovne logike.
| -------------------------------------- | --------------------- | ------------------------------------------------------------------------- |
| `clawdie.interagent.run-manifest.v1` | `RunManifest` | Beleži tek gradnje/testa — vloga, agent, artefakti, povzetek. |
| `clawdie.runtime-version-inventory.v1` | `RuntimeInventory` | Posnetek izvajalnega okolja gostitelja — OS, različice paketov, npm/node. |
| `clawdie.provider-smoke.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. |
| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. |
Konstante shem in strukture živijo v `crates/colibri-contracts/src/lib.rs`.

View file

@ -90,8 +90,25 @@ odgovori, neodvisno od tega, kako je bila zahteva izvedena.
→ [`crates/colibri-deepseek/src/lib.rs`](../../crates/colibri-deepseek/src/lib.rs)
### Trajno beleženje stroškov na opravilo (T1.5)
Agenti zot oddajajo dogodke `usage` (vhodni/izhodni/predpomnilniški žetoni,
strošek v USD). Glasspane jih sešteva v `PaneUsage` (natančnost mikro-centov,
varen za Eq). Ko srčni utrip demona zazna izhod agenta, prebere zbrano
porabo, zgradi struct `TaskCost` in pokliče `store.set_task_cost()` — zapiše
8 stolpcev (ponudnik, model, vhodni/izhodni/predpomnilniški žetoni, strošek,
uspeh, finished_at) v vrstico opravila. MCP izpostavlja
`colibri_get_task_cost` za poizvedbe agentov o zgodovinskih stroških (npr.
"kateri model je najcenejši za to delovno obremenitev?").
→ [`crates/colibri-store/src/lib.rs`](../../crates/colibri-store/src/lib.rs)
(`TaskCost`, `set_task_cost`),
[`crates/colibri-glasspane/src/lib.rs`](../../crates/colibri-glasspane/src/lib.rs)
(`PaneUsage`, `add_cost`)
## Glej tudi
- [task-board](./task-board.md) — razporejevalnik, ki razpošilja opravila znotraj proračunov sej
- [contracts](./contracts.md) — shema TaskCostSummary v1
- [mother-hive](./mother-hive.md) — arhitektura MCP (druga stroškovna domena)
- [quality-gates](./quality-gates.md) — preverjanje, ki preverja razčlenjevanje cenovnih načinov
- [quality-gates](./quality-gates.md) — vrata, ki preverjajo razčlenjevanje cenovnih načinov

View file

@ -59,12 +59,13 @@ clippy.
| [headroom-sidecar](./headroom-sidecar.md) | Neobvezni stranski vagon za stiskanje rezultatov orodij in njegov protokol Unix vtičnice |
| [jail-confinement](./jail-confinement.md) | Trajne proti prehodnim ječam, pravilnik načina priv, ponovna uporaba omejitve zaganjalnika za strežnike MCP |
| [mother-hive](./mother-hive.md) | Arhitektura matičnega MCP — SSH s prisiljenim ukazom, enojni-dom-v-colibri, peer avtentikacija, ključ-na-semenu |
| [hive-pane](./hive-pane.md) | Steklena plošča za panj — opazovanje stroškov več vozlišč, odkrivanje A2A in operaterska nadzorna plošča |
| [naming-decisions](./naming-decisions.md) | Imenik preimenovanj, nevtralnih glede na opremo / arhitekturnih — dostavljenih in v teku |
| [daemon-not-demon](./daemon-not-demon.md) | Zakaj rečemo daemon (duh pomočnik) in ne demon (hudič) — angleško + slovensko |
| [layered-soul](./layered-soul.md) | Kako Colibri danes uporablja repozitorij pregledanega konteksta layered-soul proti načrtovanemu |
| [task-board](./task-board.md) | Točkovanje po zmožnostih, cron razporejanje, praznjenje vnosne vrste, podlaga SQLite |
| [quality-gates](./quality-gates.md) | `ci-checks.sh` kot preverjanje pred združitvijo; zakaj je odmik prej dosegel `main` |
| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-smoke), zlati testi |
| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-test), zlati testi |
| [store-schema](./store-schema.md) | Usklajevalna shema SQLite in disciplina migracij |
| [external-mcp](./external-mcp.md) | Most MCP za urejevalnike + zunanji gostitelj stdio MCP; dovoljenja za branje/pisanje/zunanji-klic |
| [operator-cli](./operator-cli.md) | CLI `colibri` kot tanek tipiziran odjemalec Unix vtičnice prek API procesa v ozadju |

View file

@ -1,5 +1,5 @@
{
"schema": "clawdie.provider-smoke.result.v1",
"schema": "clawdie.provider-test.result.v1",
"test_id": "colibri-probe-20260526T125049Z",
"host": "domedog",
"agent": "claude-domedog",

View file

@ -1,5 +1,5 @@
{
"schema": "clawdie.provider-smoke.result.v1",
"schema": "clawdie.provider-test.result.v1",
"test_id": "colibri-probe-20260526T125645Z",
"host": "osa",
"agent": "codex-osa",