feat/proof-text-inline #237

Merged
clawdie merged 2 commits from feat/proof-text-inline into main 2026-06-27 19:22:13 +02:00
12 changed files with 191 additions and 28 deletions

8
Cargo.lock generated
View file

@ -355,6 +355,13 @@ dependencies = [
"tokio",
]
[[package]]
name = "colibri-deploy"
version = "0.12.0"
dependencies = [
"thiserror 2.0.18",
]
[[package]]
name = "colibri-glasspane"
version = "0.12.0"
@ -386,6 +393,7 @@ dependencies = [
"clap",
"colibri-client",
"colibri-daemon",
"colibri-deploy",
"colibri-pf",
"colibri-zfs",
"serde",

View file

@ -1,5 +1,5 @@
[workspace]
members = ["crates/colibri-contracts", "crates/colibri-deepseek", "crates/colibri-runtime", "crates/colibri-glasspane", "crates/colibri-daemon", "crates/colibri-client", "crates/colibri-glasspane-tui", "crates/colibri-store", "crates/colibri-skills", "crates/colibri-mcp", "crates/colibri-vault", "crates/colibri-zfs", "crates/colibri-pf", "crates/clawdie"]
members = ["crates/colibri-contracts", "crates/colibri-deepseek", "crates/colibri-runtime", "crates/colibri-glasspane", "crates/colibri-daemon", "crates/colibri-client", "crates/colibri-glasspane-tui", "crates/colibri-store", "crates/colibri-skills", "crates/colibri-mcp", "crates/colibri-vault", "crates/colibri-zfs", "crates/colibri-pf", "crates/colibri-deploy", "crates/clawdie"]
[workspace.package]
version = "0.12.0"

View file

@ -253,9 +253,22 @@ pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
// Capture per-task cost from accumulated usage (T1.5).
if let Some(session_id) = &handle.config.session_id {
if let Some(task_id) = session_id.strip_prefix("task-") {
let usage = {
let (usage, proof_text) = {
let gp = state.glasspane.read().await;
gp.get(&handle.id).map(|p| p.accumulated_usage().clone())
let pane = gp.get(&handle.id);
let u = pane.map(|p| p.accumulated_usage().clone());
let proof = pane.map(|p| {
serde_json::json!({
"agent": p.agent,
"state": format!("{:?}", p.state()),
"session": p.session_id(),
"tokens_in": p.accumulated_usage().input_tokens,
"tokens_out": p.accumulated_usage().output_tokens,
"cache_read": p.accumulated_usage().cache_read_tokens,
"cost_usd": p.accumulated_usage().cost(),
}).to_string()
});
(u, proof)
};
if let Some(u) = usage {
let tc = colibri_store::TaskCost {
@ -292,7 +305,7 @@ pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
"task cost captured"
);
// Best-effort push to mother for dashboard aggregation.
push_cost_to_mother(task_id, &tc);
push_cost_to_mother(task_id, &tc, proof_text.as_deref());
}
Err(e) => {
warn!(task_id = %task_id, error = %e, "failed to write task cost")
@ -317,7 +330,7 @@ pub async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
/// SSH's to mother with `report-task-cost` forced command, pipes TaskCost JSON
/// to stdin. Failures are logged as warnings — the local SQLite store remains
/// authoritative.
fn push_cost_to_mother(task_id: &str, tc: &colibri_store::TaskCost) {
fn push_cost_to_mother(task_id: &str, tc: &colibri_store::TaskCost, proof_text: Option<&str>) {
let mother_host = match std::env::var("COLIBRI_MOTHER_HOST").ok() {
Some(h) => h,
None => return,
@ -334,9 +347,9 @@ fn push_cost_to_mother(task_id: &str, tc: &colibri_store::TaskCost) {
let cache_write_tokens = tc.cache_write_tokens;
let cost = tc.cost;
let success = tc.success;
// Optional: tmux-screenshot UUID set by the agent harness on completion.
// The daemon passes it through to mother; mother JOINs with screenshot storage.
let screenshot_uuid = std::env::var("COLIBRI_TASK_SCREENSHOT_UUID").ok();
// Text proof from glasspane at task exit — agent, state, tokens, cost.
// Replaces the old COLIBRI_TASK_SCREENSHOT_UUID env-var approach.
let proof = proof_text.map(|s| s.to_string());
// Run SSH in a blocking thread — heartbeat is async, SSH is fast (<1s).
std::thread::spawn(move || {
@ -354,8 +367,8 @@ fn push_cost_to_mother(task_id: &str, tc: &colibri_store::TaskCost) {
"success": success,
"finished_at": chrono::Utc::now().to_rfc3339(),
});
if let Some(ref uuid) = screenshot_uuid {
payload["screenshot_uuid"] = serde_json::Value::String(uuid.clone());
if let Some(ref p) = proof {
payload["proof_text"] = serde_json::Value::String(p.clone());
}
let payload_line = serde_json::to_string(&payload).unwrap_or_default();
let mut child = match std::process::Command::new("ssh")

View file

@ -0,0 +1,8 @@
[package]
name = "colibri-deploy"
version.workspace = true
edition = "2021"
license = "MIT"
[dependencies]
thiserror = "2"

View file

@ -0,0 +1,92 @@
use std::process::Command;
/// Run a command on a target. Supports "host" (local shell) and Bastille jail names.
pub fn run(target: &str, command: &str) -> Result<String, Error> {
match target {
"host" => run_host(command),
jail => run_jail(jail, command),
}
}
/// List available deployment targets: "host" always present,
/// plus any Bastille jails reachable via passwordless sudo.
pub fn list_targets() -> Vec<String> {
let mut targets = vec!["host".to_string()];
if let Ok(output) = Command::new("sudo").args(["bastille", "list"]).output() {
if output.status.success() {
for line in String::from_utf8_lossy(&output.stdout).lines() {
let line = line.trim();
if line.is_empty() || line.starts_with("JID") || line.starts_with('-') {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 3 {
targets.push(parts[0].to_string());
}
}
}
}
targets
}
fn run_host(command: &str) -> Result<String, Error> {
let output = Command::new("sh")
.args(["-c", command])
.output()
.map_err(|e| Error::Command(format!("host command: {e}")))?;
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
if !output.status.success() {
return Err(Error::Command(format!(
"exit {}: {}",
output.status.code().unwrap_or(-1),
if stderr.is_empty() { stdout } else { stderr }
)));
}
Ok(if stdout.is_empty() { stderr } else { stdout })
}
fn run_jail(jail: &str, command: &str) -> Result<String, Error> {
let output = Command::new("sudo")
.args(["bastille", "cmd", jail, command])
.output()
.map_err(|e| Error::Command(format!("bastille cmd {jail}: {e}")))?;
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let msg = if stderr.is_empty() { stdout } else { stderr };
return Err(Error::Command(format!("jail {jail}: {msg}")));
}
Ok(stdout)
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("deploy error: {0}")]
Command(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn run_host_echo() {
let result = run("host", "echo hello").unwrap();
assert_eq!(result.trim(), "hello");
}
#[test]
fn run_host_fail() {
assert!(run("host", "exit 1").is_err());
}
#[test]
fn list_targets_includes_host() {
let targets = list_targets();
assert!(targets.iter().any(|t| t == "host"));
}
}

View file

@ -14,6 +14,7 @@ colibri-client = { path = "../colibri-client" }
colibri-daemon = { path = "../colibri-daemon" }
colibri-pf = { path = "../colibri-pf" }
colibri-zfs = { path = "../colibri-zfs" }
colibri-deploy = { path = "../colibri-deploy" }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"

View file

@ -207,6 +207,25 @@ pub fn tool_list() -> Vec<Value> {
"List active PF state table entries: protocol, source, destination, state.",
None,
),
// ── Deploy tools ──
json_tool(
"colibrie_deploy_run",
"Run a shell command on the host or in a Bastille jail. Use deploy_targets to list available targets.",
Some(serde_json::json!({
"type": "object",
"properties": {
"target": { "type": "string", "description": "Target: host or jail name from deploy_targets" },
"command": { "type": "string", "description": "Shell command to run" }
},
"required": ["target", "command"]
})),
),
json_tool(
"colibrie_deploy_targets",
"List deploy targets: host and available Bastille jails.",
None,
),
// ── Wiki tools ──
json_tool(
"colibri_wiki_search",
@ -433,6 +452,23 @@ pub async fn dispatch_tool(
serde_json::json!({"page": page, "content": content}),
))
}
// ── Deploy dispatch ──
"colibri_deploy_run" => {
let target = require_string(arguments, "target")?;
let command = require_string(arguments, "command")?;
let output = colibri_deploy::run(&target, &command)
.map_err(|e| McpError::internal(format!("deploy: {e}")))?;
Ok(tool_text(serde_json::json!({
"target": target,
"output": output,
})))
}
"colibri_deploy_targets" => {
let targets = colibri_deploy::list_targets();
Ok(tool_text(
serde_json::to_value(&targets).unwrap_or_default(),
))
}
"colibri_external_mcp_servers" => {
let registry = external::load_registry_if_present(&config.external_config_path).await?;
Ok(tool_text(serde_json::json!({

View file

@ -255,5 +255,5 @@ fn tool_list_has_all_phase1_tools() {
assert!(names.contains(&"colibri_list_task_costs"));
assert!(names.contains(&"colibri_get_task"));
assert_eq!(names.len(), 18);
assert_eq!(names.len(), 20);
}

View file

@ -30,12 +30,12 @@ case "${SSH_ORIGINAL_COMMAND:-}" in
# Input: {"node_hostname":"debby","task_id":"abc","provider":"deepseek",
# "model":"deepseek-chat","input_tokens":150,"output_tokens":80,
# "cache_read_tokens":200,"cache_write_tokens":50,
# "cost_usd":0.0042,"success":true,"screenshot_uuid":"a1b2c3d4e5f6",
# "cost_usd":0.0042,"success":true,"proof_text":"a1b2c3d4e5f6",
# "finished_at":"2026-06-27T12:00:00Z"}
psql -d mother_hive -tA -v ON_ERROR_STOP=1 <<'PSQL'
INSERT INTO task_costs (node_id, task_id, provider, model,
input_tokens, output_tokens, cache_read_tokens, cache_write_tokens,
cost_usd, success, screenshot_uuid, finished_at)
cost_usd, success, proof_text, finished_at)
SELECT
(SELECT id FROM hive_nodes WHERE hostname = j->>'node_hostname'),
j->>'task_id',
@ -47,7 +47,7 @@ SELECT
COALESCE((j->>'cache_write_tokens')::BIGINT, 0),
COALESCE((j->>'cost_usd')::DOUBLE PRECISION, 0.0),
COALESCE((j->>'success')::BOOLEAN, false),
NULLIF(j->>'screenshot_uuid', ''),
NULLIF(j->>'proof_text', ''),
COALESCE((j->>'finished_at')::TIMESTAMPTZ, now())
FROM (SELECT (pg_read_file('/dev/stdin')::JSONB) AS j) AS _;
PSQL

View file

@ -63,7 +63,7 @@ SELECT json_build_object(
tc.cost_usd,
tc.success,
tc.finished_at,
tc.screenshot_uuid
tc.proof_text
FROM task_costs tc
LEFT JOIN hive_nodes hn ON hn.id = tc.node_id
ORDER BY tc.finished_at DESC

View file

@ -114,7 +114,7 @@ h1 .dot{display:inline-block; width:8px; height:8px; border-radius:50%; margin-r
.lightbox{display:none; position:fixed; inset:0; background:rgba(0,0,0,.94); z-index:1000;
align-items:center; justify-content:center; padding:1.5rem}
.lightbox.open{display:flex}
.lightbox img{max-width:94vw; max-height:88vh; border-radius:4px; box-shadow:0 0 40px rgba(0,180,216,.15)}
.lightbox pre{max-width:94vw; max-height:88vh; border-radius:4px; box-shadow:0 0 40px rgba(0,180,216,.15)}
.lightbox-close{position:fixed; top:1rem; right:1.5rem; background:var(--surface); color:var(--fg2);
border:1px solid var(--border); border-radius:4px; padding:.4rem 1rem; font-family:inherit;
font-size:.78rem; cursor:pointer; z-index:1001}
@ -189,7 +189,7 @@ h1 .dot{display:inline-block; width:8px; height:8px; border-radius:50%; margin-r
<div class="lightbox" id="lightbox">
<button class="lightbox-close">Esc to close</button>
<div class="lightbox-meta" id="lightbox-meta"></div>
<img id="lb-img" src="" alt="screenshot proof">
<pre id="lb-text"></pre>
</div>
<script>
@ -304,14 +304,14 @@ function renderCard(t) {
const total = (t.input_tokens||0) + (t.cache_read_tokens||0);
const cachePct = total > 0 ? Math.round((t.cache_read_tokens||0) / total * 100) : 0;
const freshPct = 100 - cachePct;
const hasProof = !!t.screenshot_uuid;
const hasProof = !!t.proof_text;
const cls = hasProof ? 'card has-proof' : 'card';
const onClick = hasProof
? `onclick="openProof('${esc(t.screenshot_uuid)}','${esc(t.task_id||'')}','${esc(t.provider||'')}','${(t.cost_usd||0).toFixed(4)}')"`
? `onclick="openProof(this,'${esc(t.task_id||'')}')"`
: '';
return `
<div class="${cls}" ${onClick} title="${hasProof?'Click for terminal proof':''}">
<div class="${cls}" ${onClick} title="${hasProof?'Click for proof':''}">
<div class="card-success ${t.success?'ok':'fail'}">${t.success?'✓':'✗'}</div>
<div class="card-provider">${esc(t.provider||'unknown')}</div>
<div class="card-cost">$${(t.cost_usd||0).toFixed(4)}</div>
@ -328,11 +328,16 @@ function renderCard(t) {
}
// ══ lightbox ════════════════════════════════════════════════════════════
function openProof(uuid, taskId, provider, cost) {
function openProof(_el, taskId) {
const t = (DATA.tasks || []).find(t => t.task_id === taskId);
const raw = t ? (t.proof_text || '{}') : '{}';
let display;
try { display = JSON.stringify(JSON.parse(raw), null, 2); }
catch(_) { display = raw; }
const lb = document.getElementById('lightbox');
document.getElementById('lb-img').src = `../screenshots/${uuid}.png`;
document.getElementById('lb-text').textContent = display;
document.getElementById('lightbox-meta').innerHTML =
`<strong>${esc(taskId)}</strong> ${esc(provider)} · $${cost}`;
`<strong>${esc(taskId)}</strong> ${esc(t?.provider||'')} · $${(t?.cost_usd||0).toFixed(4)}`;
lb.classList.add('open');
}
function closeLb() { document.getElementById('lightbox').classList.remove('open'); }
@ -352,7 +357,7 @@ function showJSON() {
byNode[n].push({task_id:t.task_id,provider:t.provider,model:t.model,
cost:t.cost_usd,success:t.success,finished_at:t.finished_at,
tokens:{in:t.input_tokens,out:t.output_tokens,cache_read:t.cache_read_tokens},
screenshot_uuid:t.screenshot_uuid||null});
proof_text:t.proof_text||null});
}
const output = {updated_at:DATA.updated_at, summary:DATA.summary, nodes:byNode};
document.getElementById('json-output').textContent = JSON.stringify(output, null, 2);
@ -379,7 +384,7 @@ document.getElementById('toggle-json').addEventListener('click', () => {
if (panel.classList.contains('open')) showJSON();
});
document.getElementById('lightbox').addEventListener('click', e => {
if (e.target.classList.contains('lightbox') || e.target.id === 'lb-img') closeLb();
if (e.target.classList.contains('lightbox') ) closeLb();
});
document.querySelector('.lightbox-close').addEventListener('click', closeLb);
document.addEventListener('keydown', e => {

View file

@ -65,15 +65,15 @@ CREATE TABLE IF NOT EXISTS task_costs (
cache_write_tokens BIGINT NOT NULL DEFAULT 0,
cost_usd DOUBLE PRECISION NOT NULL DEFAULT 0.0,
success BOOLEAN NOT NULL DEFAULT false,
screenshot_uuid TEXT, -- tmux-screenshot content hash (12-char UUID)
proof_text TEXT, -- tmux-screenshot content hash (12-char UUID)
finished_at TIMESTAMPTZ NOT NULL DEFAULT now(),
reported_at TIMESTAMPTZ NOT NULL DEFAULT now(),
screenshot_uuid TEXT -- optional; links to tmux-screenshot capture at task completion
proof_text TEXT -- optional; links to tmux-screenshot capture at task completion
);
CREATE INDEX IF NOT EXISTS idx_task_costs_node ON task_costs (node_id);
CREATE INDEX IF NOT EXISTS idx_task_costs_finished ON task_costs (finished_at DESC);
CREATE INDEX IF NOT EXISTS idx_task_costs_provider ON task_costs (provider, model);
ALTER TABLE task_costs ADD COLUMN IF NOT EXISTS screenshot_uuid TEXT;
ALTER TABLE task_costs ADD COLUMN IF NOT EXISTS proof_text TEXT;
CREATE TABLE IF NOT EXISTS build_queue (
id SERIAL PRIMARY KEY,