From 1fd4ad680b71395ab5c95e590c5d01a4c8237517 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Tue, 26 May 2026 13:23:45 +0200 Subject: [PATCH] Add migration inventory + colibri-contracts crate (Sam & Claude) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs/MIGRATION-INVENTORY.md: TS surfaces to keep as contracts, files to retire later (gated), schemas to port, crate build order, and proof gates before replacing the TS service. Extract contracts, not logic. crates/colibri-contracts: serde structs for clawdie.interagent.run-manifest.v1, clawdie.runtime-version-inventory.v1, clawdie.provider-smoke.result.v1. Root becomes a workspace; existing binaries unchanged. Golden tests round-trip the real committed osa+domedog manifests (5 passed) — a stable shared base before deeper refactor. Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 8 ++ Cargo.toml | 3 + crates/colibri-contracts/Cargo.toml | 10 ++ crates/colibri-contracts/src/lib.rs | 120 +++++++++++++++++++++++ crates/colibri-contracts/tests/golden.rs | 83 ++++++++++++++++ docs/MIGRATION-INVENTORY.md | 102 +++++++++++++++++++ 6 files changed, 326 insertions(+) create mode 100644 crates/colibri-contracts/Cargo.toml create mode 100644 crates/colibri-contracts/src/lib.rs create mode 100644 crates/colibri-contracts/tests/golden.rs create mode 100644 docs/MIGRATION-INVENTORY.md diff --git a/Cargo.lock b/Cargo.lock index 8d92d76..c13e939 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,6 +91,14 @@ dependencies = [ "tokio", ] +[[package]] +name = "colibri-contracts" +version = "0.0.1" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" diff --git a/Cargo.toml b/Cargo.toml index 983dea5..5743987 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,6 @@ +[workspace] +members = ["crates/colibri-contracts"] + [package] name = "colibri" version = "0.0.1" diff --git a/crates/colibri-contracts/Cargo.toml b/crates/colibri-contracts/Cargo.toml new file mode 100644 index 0000000..acf961f --- /dev/null +++ b/crates/colibri-contracts/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "colibri-contracts" +version = "0.0.1" +edition = "2021" +license = "AGPL-3.0-only" +description = "Serde structs for the stable Clawdie/Colibri JSON contracts" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/crates/colibri-contracts/src/lib.rs b/crates/colibri-contracts/src/lib.rs new file mode 100644 index 0000000..6ef50c5 --- /dev/null +++ b/crates/colibri-contracts/src/lib.rs @@ -0,0 +1,120 @@ +//! colibri-contracts — serde structs for the stable Clawdie/Colibri JSON +//! contracts shared across agents and hosts. +//! +//! These mirror the TypeScript definitions in `clawdie-ai/src/colibri-*.ts` +//! and the committed `manifests/`. They carry *no logic* — just the wire +//! shapes — so every lane (Rust + TS) shares one stable base. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +pub const RUN_MANIFEST_SCHEMA: &str = "clawdie.interagent.run-manifest.v1"; +pub const RUNTIME_INVENTORY_SCHEMA: &str = "clawdie.runtime-version-inventory.v1"; +pub const PROVIDER_SMOKE_SCHEMA: &str = "clawdie.provider-smoke.result.v1"; + +// --------------------------------------------------------------------------- +// clawdie.runtime-version-inventory.v1 +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct RuntimeInventory { + pub schema: String, + pub host: String, + pub os: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub node: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub npm: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pi: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub npm_prefix: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub package_manager: Option, + #[serde(default)] + pub iso_npm_globals_pin: BTreeMap, + #[serde(default)] + pub notes: Vec, +} + +// --------------------------------------------------------------------------- +// clawdie.interagent.run-manifest.v1 +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct RunManifestProtocols { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub download: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub capture: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub artifact_transfer: Option, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RunManifest { + pub schema: String, + pub test_id: String, + pub role: String, + pub host: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub agent: Option, + pub started_at: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ended_at: Option, + #[serde(default)] + pub protocols: RunManifestProtocols, + #[serde(default)] + pub network: BTreeMap, + #[serde(default)] + pub artifacts: BTreeMap, + #[serde(default)] + pub summary: BTreeMap, + #[serde(default)] + pub raw_transfer_required: bool, + #[serde(default)] + pub notes: Vec, +} + +// --------------------------------------------------------------------------- +// clawdie.provider-smoke.result.v1 +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct ProviderUsage { + #[serde(default)] + pub prompt_tokens: u64, + #[serde(default)] + pub completion_tokens: u64, + #[serde(default)] + pub total_tokens: u64, + #[serde(default)] + pub prompt_cache_hit_tokens: u64, + #[serde(default)] + pub prompt_cache_miss_tokens: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ProviderSmokeResult { + pub schema: String, + pub test_id: String, + pub host: String, + pub agent: String, + pub provider: String, + pub model: String, + pub started_at: String, + pub ended_at: String, + pub status: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub warm_usage: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub probe_usage: Option, + #[serde(default)] + pub cache_hit_tokens: u64, + #[serde(default)] + pub cache_hit_observed: bool, + #[serde(default)] + pub notes: Vec, +} diff --git a/crates/colibri-contracts/tests/golden.rs b/crates/colibri-contracts/tests/golden.rs new file mode 100644 index 0000000..8edd186 --- /dev/null +++ b/crates/colibri-contracts/tests/golden.rs @@ -0,0 +1,83 @@ +//! Golden tests: the contract structs must accept the real committed manifests +//! (produced by different agents/hosts) and round-trip without data loss. + +use colibri_contracts::*; + +const OSA_INV: &str = include_str!("../../../manifests/2026-05-26-osa-runtime-inventory.json"); +const DOMEDOG_INV: &str = + include_str!("../../../manifests/2026-05-26-domedog-runtime-inventory.json"); +const OSA_RUN: &str = include_str!("../../../manifests/2026-05-26-osa-pi-bump-run-manifest.json"); +const DOMEDOG_RUN: &str = + include_str!("../../../manifests/2026-05-26-domedog-linux-lane-run-manifest.json"); + +fn roundtrip_eq(value: &T) +where + T: serde::Serialize + serde::de::DeserializeOwned + PartialEq + std::fmt::Debug, +{ + let json = serde_json::to_string(value).expect("serialize"); + let again: T = serde_json::from_str(&json).expect("re-deserialize"); + assert_eq!(value, &again, "round-trip mismatch"); +} + +#[test] +fn parses_osa_inventory() { + let inv: RuntimeInventory = serde_json::from_str(OSA_INV).expect("parse osa inventory"); + assert_eq!(inv.schema, RUNTIME_INVENTORY_SCHEMA); + assert_eq!(inv.host, "osa.smilepowered.org"); + assert_eq!(inv.pi.as_deref(), Some("0.75.5")); + assert_eq!(inv.package_manager.as_deref(), Some("pkg")); + roundtrip_eq(&inv); +} + +#[test] +fn parses_domedog_inventory() { + let inv: RuntimeInventory = serde_json::from_str(DOMEDOG_INV).expect("parse domedog inventory"); + assert_eq!(inv.schema, RUNTIME_INVENTORY_SCHEMA); + assert_eq!(inv.host, "domedog"); + assert_eq!(inv.pi.as_deref(), Some("0.75.5")); + assert_eq!(inv.node.as_deref(), Some("v24.16.0")); + roundtrip_eq(&inv); +} + +#[test] +fn parses_osa_run_manifest() { + let run: RunManifest = serde_json::from_str(OSA_RUN).expect("parse osa run manifest"); + assert_eq!(run.schema, RUN_MANIFEST_SCHEMA); + assert_eq!(run.role, "freebsd-build"); + assert_eq!(run.agent.as_deref(), Some("codex-osa")); + assert!(run.summary.contains_key("pi_after")); + roundtrip_eq(&run); +} + +#[test] +fn parses_domedog_run_manifest() { + let run: RunManifest = serde_json::from_str(DOMEDOG_RUN).expect("parse domedog run manifest"); + assert_eq!(run.schema, RUN_MANIFEST_SCHEMA); + assert_eq!(run.role, "linux-build-smoke"); + assert_eq!(run.artifacts.get("runtime_inventory").map(String::as_str), Some("manifests/2026-05-26-domedog-runtime-inventory.json")); + roundtrip_eq(&run); +} + +#[test] +fn provider_smoke_skipped_shape() { + // No live result committed yet; assert the struct accepts a skipped result. + let raw = r#"{ + "schema": "clawdie.provider-smoke.result.v1", + "test_id": "colibri-probe-20260526T080805Z", + "host": "domedog", + "agent": "claude-domedog", + "provider": "deepseek", + "model": "deepseek-chat", + "started_at": "2026-05-26T08:08:05Z", + "ended_at": "2026-05-26T08:08:05Z", + "status": "skipped", + "cache_hit_tokens": 0, + "cache_hit_observed": false, + "notes": ["DEEPSEEK_API_KEY unset/empty — build verified, live cache probe skipped"] + }"#; + let res: ProviderSmokeResult = serde_json::from_str(raw).expect("parse provider smoke"); + assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA); + assert_eq!(res.status, "skipped"); + assert!(res.warm_usage.is_none()); + roundtrip_eq(&res); +} diff --git a/docs/MIGRATION-INVENTORY.md b/docs/MIGRATION-INVENTORY.md new file mode 100644 index 0000000..8026bfa --- /dev/null +++ b/docs/MIGRATION-INVENTORY.md @@ -0,0 +1,102 @@ +# Colibri Migration Inventory + +**Status:** narrow extraction + proof slice — NOT a replacement of the Clawdie +TS service. Source plan: `clawdie-ai/doc/COLIBRI-CONTROLPLANE-PLAN.md`. + +Principle: **extract contracts, not logic.** Port the stable wire surfaces from +`clawdie-ai` into Rust, use the TS definitions and committed `manifests/` as +golden fixtures, and do not migrate orchestration until the proof gates pass. + +## 0. Evidence freeze + +`manifests/` is the source of truth. Keep these as the comparison record: + +- osa FreeBSD inventory — `manifests/2026-05-26-osa-runtime-inventory.json` +- domedog Linux inventory — `manifests/2026-05-26-domedog-runtime-inventory.json` +- debby inventory — once Rust exists there +- DeepSeek live cache result — once the key lands (gates the first real slice) + +Do not migrate major logic until the **DeepSeek live cache manifest exists**. + +## 1. TS files to keep as contracts + +Port the *shape*, not the implementation. Use each as a golden fixture/test: + +| TS source (`clawdie-ai`) | Contract extracted | +| --------------------------------- | -------------------------------------------------------------- | +| `src/colibri-run-manifest.ts` | `clawdie.interagent.run-manifest.v1` | +| `src/colibri-runtime-inventory.ts`| `clawdie.runtime-version-inventory.v1` + drift-report logic | +| `src/colibri-pi-events.ts` | Pi `--mode json` event taxonomy (session/turn/tool/message…) | +| `src/provider-capabilities.ts` | model capability table (stability, heartbeatSafe) | +| watchdog status socket | `{"cmd":"status"}` → `{ok,data:{mode,throttled,freeMemoryMB,activeJails,queuedGroups,controlplaneStatus}}` | +| DeepSeek/OpenRouter result shape | `usage` incl. `prompt_cache_hit_tokens` / `prompt_cache_miss_tokens` | +| task/run lifecycle states | `queued → claimed → started → done \| failed` | + +## 2. TS files to retire later (gated) + +Only after the proof gates — and after a per-file caller inventory. Snapshot: +`clawdie-ai` `archive/multitenant-claude-pre-divergence`. + +- `src/controlplane-aider-runner.ts` — non-Pi backend +- `src/controlplane-heartbeat-codex.ts` — per-backend heartbeat (the `*-pi` one stays) +- `src/agent-runner.ts` — generic multi-backend runner +- `src/tmux-screenshot-command.ts` — terminal-scrape glue +- `src/jail-exec-runner.ts` — **verify**: may be load-bearing FreeBSD exec +- **Not** a Colibri concern: `src/browser-backend/*`, `src/browser-orchestrator.ts` + +## 3. JSON schemas to port + +Into `crates/colibri-contracts` as serde structs (this step): + +- `clawdie.interagent.run-manifest.v1` +- `clawdie.runtime-version-inventory.v1` +- `clawdie.provider-smoke.result.v1` + +Later: watchdog host-status shape, Pi `--mode json` events. + +## 4. First Rust modules to build + +Target workspace layout (`crates/`). Build order: + +| Crate | Role | When | +| --------------------- | ----------------------------------------------- | ----------- | +| `colibri-contracts` | JSON schemas + serde structs + golden tests | **now** | +| `colibri-deepseek` | DeepSeek client + cache accounting (probe home) | first slice | +| `colibri-runtime` | inventory + host/runtime detection | move existing | +| `colibri-cli` | operator commands / probes | move existing binaries | +| `colibri-daemon` | future always-on service (source of truth) | later | + +The current root-package binaries (`colibri-probe`, `colibri-runtime-inventory`) +move behind these crates later; they keep working in the meantime. + +### First real replacement slice + +```text +DeepSeek request + → byte-stable prompt prefix + → cache usage manifest (clawdie.provider-smoke.result.v1) + → run-manifest (clawdie.interagent.run-manifest.v1) + → local JSONL event log +``` + +Replaces a meaningful chunk of TS provider/smoke complexity **without touching +the production service**. + +## 5. Proof gates before replacing the Clawdie TS service + +1. `colibri-contracts` round-trips TS-produced fixtures (golden tests pass). +2. DeepSeek **live cache** manifest exists (`prompt_cache_hit_tokens > 0`). +3. Runtime inventory parity across osa / domedog / debby. +4. The first slice runs end-to-end on Linux **and** FreeBSD. +5. Watchdog status read without breaking `doctor` / `pi-profile`. +6. No production TS path retired until 1–5 are green and the caller inventory + in §2 is done. + +## Architecture boundary (GUI / Zed / Herdr) + +```text +Rust daemon = source of truth (scheduling, task ownership, provider logic) +GUI / Zed / Herdr-compatible client = display + controls over HTTP/SSE/WebSocket +``` + +No scheduling, task ownership, or provider logic in the GUI layer.