From 8a80ff8da5ee9db7277e33cd5276f468d1324db9 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Sat, 27 Jun 2026 17:19:57 +0200 Subject: [PATCH] =?UTF-8?q?style:=20restore=20main=20green=20=E2=80=94=20f?= =?UTF-8?q?mt=20+=20prettier=20drift=20(Sam=20&=20Claude)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/colibri-daemon/src/daemon.rs | 6 +- docs/wiki/a2a-complexity-audit.md | 57 +++++++------ docs/wiki/hive-pane.md | 28 +++---- docs/wiki/hive-routing.md | 125 +++++++++++++++------------- docs/wiki/index.md | 8 +- docs/wiki/sl/contracts.md | 2 +- docs/wiki/sl/index.md | 6 +- docs/wiki/tui.md | 10 +-- scripts/check-format.sh | 2 +- 9 files changed, 130 insertions(+), 114 deletions(-) diff --git a/crates/colibri-daemon/src/daemon.rs b/crates/colibri-daemon/src/daemon.rs index f87118e..4dd0961 100644 --- a/crates/colibri-daemon/src/daemon.rs +++ b/crates/colibri-daemon/src/daemon.rs @@ -354,8 +354,10 @@ fn push_cost_to_mother(task_id: &str, tc: &colibri_store::TaskCost) { let payload_line = serde_json::to_string(&payload).unwrap_or_default(); let mut child = match std::process::Command::new("ssh") .args([ - "-o", "BatchMode=yes", - "-o", "ConnectTimeout=5", + "-o", + "BatchMode=yes", + "-o", + "ConnectTimeout=5", &mother_host, "report-task-cost", ]) diff --git a/docs/wiki/a2a-complexity-audit.md b/docs/wiki/a2a-complexity-audit.md index e81b62f..baa091e 100644 --- a/docs/wiki/a2a-complexity-audit.md +++ b/docs/wiki/a2a-complexity-audit.md @@ -8,13 +8,13 @@ Colibri speaks 5 protocols today: -| Protocol | Where | Lines | Purpose | -|---|---|---|---| -| **Custom JSON wire** | `crates/colibri-daemon/src/socket.rs` + `crates/colibri-client/src/lib.rs` | 1,981 | Local daemon control (spawn, status, snapshot, tasks, skills) | -| **MCP JSON-RPC** | `crates/colibri-mcp/src/lib.rs` | 570 | Editor integration + external MCP host | -| **MCP-over-SSH** | `packaging/mother/` (3 files) | 437 | Mother hive entrypoint (forced-command allowlist + node register) | -| **JSONL** | `crates/colibri-glasspane/src/lib.rs` | 1,186 | Agent subprocess stdout events | -| **SQL** | `crates/colibri-store/src/lib.rs` + `crates/colibri-store/src/schema.rs` | 1,150 | Local coordination (tasks, agents, skills, tenants) | +| Protocol | Where | Lines | Purpose | +| -------------------- | -------------------------------------------------------------------------- | ----- | ----------------------------------------------------------------- | +| **Custom JSON wire** | `crates/colibri-daemon/src/socket.rs` + `crates/colibri-client/src/lib.rs` | 1,981 | Local daemon control (spawn, status, snapshot, tasks, skills) | +| **MCP JSON-RPC** | `crates/colibri-mcp/src/lib.rs` | 570 | Editor integration + external MCP host | +| **MCP-over-SSH** | `packaging/mother/` (3 files) | 437 | Mother hive entrypoint (forced-command allowlist + node register) | +| **JSONL** | `crates/colibri-glasspane/src/lib.rs` | 1,186 | Agent subprocess stdout events | +| **SQL** | `crates/colibri-store/src/lib.rs` + `crates/colibri-store/src/schema.rs` | 1,150 | Local coordination (tasks, agents, skills, tenants) | **Total protocol surface: ~5,324 lines.** @@ -40,6 +40,7 @@ USB node → HTTPS → mother A2A endpoint → PostgreSQL ``` **Removed:** + - `colibri-mcp-ssh` (32 lines) — SSH forced-command allowlist wrapper - `node-register-mcp` (88 lines) — Custom MCP tool with embedded psql - SSH key management in `setup-mother.sh` (~40 lines of key distribution logic) @@ -47,6 +48,7 @@ USB node → HTTPS → mother A2A endpoint → PostgreSQL **Removed total: ~160 lines.** **Added:** + - A2A HTTP endpoint on mother (~200 lines) - A2A client library integration on USB node (~150 lines) - mTLS/TLS termination for auth (~30 lines) @@ -54,6 +56,7 @@ USB node → HTTPS → mother A2A endpoint → PostgreSQL **Added total: ~380 lines.** **Net delta: +220 lines.** Not a code reduction. But operational complexity drops significantly: + - No SSH key distribution to USB nodes (key lives on seed partition → no longer needed on mother) - No forced-command allowlist to maintain - Standard HTTPS is easier to firewall, audit, and monitor than SSH forced-command @@ -78,7 +81,7 @@ Today: external MCP registry config — manual JSON listing third-party MCP serv With A2A: third-party tools that speak A2A (not MCP) publish an Agent Card. Colibri discovers them via the well-known Agent Card URL instead of manual JSON config files. -**Reality check:** No third-party tools speak A2A yet. The protocol was just announced (April 2025). MCP has ~2 years of ecosystem maturity. This is a *future* replacement, not a *current* one. +**Reality check:** No third-party tools speak A2A yet. The protocol was just announced (April 2025). MCP has ~2 years of ecosystem maturity. This is a _future_ replacement, not a _current_ one. **Verdict:** A2A discovery doesn't reduce code today. External MCP stays for tool access. @@ -94,20 +97,20 @@ With A2A: cost data is a typed message part (`application/json+cost`). The forma **Code savings:** ~10 lines (the info! log stays; the A2A part is new code). -**Verdict:** Negligible code impact. The value is *interop*, not complexity reduction. +**Verdict:** Negligible code impact. The value is _interop_, not complexity reduction. --- ## What A2A does NOT replace -| Component | Why A2A doesn't touch it | Lines saved | -|---|---|---| -| **Unix socket wire protocol** (`crates/colibri-daemon/src/socket.rs`) | A2A is cross-node HTTP. Local daemon control needs IPC — Unix socket is faster, auth-free (filesystem permissions), and doesn't need a network stack. | 0 | -| **Spawner** (`crates/colibri-daemon/src/spawner.rs`) | A2A routes tasks to existing agents. Colibri *creates* agents by spawning subprocesses. A2A has no process lifecycle concept. | 0 | -| **Glasspane** (`crates/colibri-glasspane/src/lib.rs`) | A2A doesn't watch subprocess stdout. Glasspane is a PTY observer — it reads JSONL from child processes. A2A operates one layer above. | 0 | -| **Store** (`crates/colibri-store/src/lib.rs`) | A2A doesn't replace local SQLite coordination. Each node needs local persistence for task board, agents, skills — A2A is the *transport*, not the *database*. | 0 | -| **MCP editor bridge** | A2A is agent-to-agent. MCP is human-to-tool. Different protocols for different directions. They coexist. | 0 | -| **Contracts schemas** (`crates/colibri-contracts/src/lib.rs`) | A2A uses JSON Schema for input validation. Colibri's contracts are already compatible — no change needed. | 0 | +| Component | Why A2A doesn't touch it | Lines saved | +| --------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| **Unix socket wire protocol** (`crates/colibri-daemon/src/socket.rs`) | A2A is cross-node HTTP. Local daemon control needs IPC — Unix socket is faster, auth-free (filesystem permissions), and doesn't need a network stack. | 0 | +| **Spawner** (`crates/colibri-daemon/src/spawner.rs`) | A2A routes tasks to existing agents. Colibri _creates_ agents by spawning subprocesses. A2A has no process lifecycle concept. | 0 | +| **Glasspane** (`crates/colibri-glasspane/src/lib.rs`) | A2A doesn't watch subprocess stdout. Glasspane is a PTY observer — it reads JSONL from child processes. A2A operates one layer above. | 0 | +| **Store** (`crates/colibri-store/src/lib.rs`) | A2A doesn't replace local SQLite coordination. Each node needs local persistence for task board, agents, skills — A2A is the _transport_, not the _database_. | 0 | +| **MCP editor bridge** | A2A is agent-to-agent. MCP is human-to-tool. Different protocols for different directions. They coexist. | 0 | +| **Contracts schemas** (`crates/colibri-contracts/src/lib.rs`) | A2A uses JSON Schema for input validation. Colibri's contracts are already compatible — no change needed. | 0 | **Total irreplaceable: ~5,000 lines.** A2A doesn't reduce this at all. @@ -138,16 +141,16 @@ TOTAL 5,524 5,467 A2A is not a complexity reduction play. It's an **interoperability and operational simplicity** play: -| Metric | MCP-over-SSH (current) | A2A (proposed) | -|---|---|---| -| **Lines of code** | ~5,524 (spread across 6 crates + 3 shell scripts) | ~5,467 (SSH scripts gone, A2A handler added) | -| **Protocol count** | 5 | 6 (A2A adds one) | -| **Operational complexity** | SSH keys × N nodes, forced-command allowlists, peer auth setup | One HTTPS endpoint, mTLS certs, well-known URL | -| **Discoverability** | Manual external MCP registry entries | Agent Card at well-known URL | -| **Interoperability** | Colibri-only | Any A2A client | -| **Debugability** | `ssh -v`, `psql`, `jq` | `curl`, browser devtools, standard HTTP tooling | -| **Ecosystem maturity** | N/A (Colibri-specific) | Protocol < 3 months old, zero adoption | -| **When it pays off** | Works today for 4 nodes | Pays off at 10+ nodes, or when 3rd-party tools ship A2A | +| Metric | MCP-over-SSH (current) | A2A (proposed) | +| -------------------------- | -------------------------------------------------------------- | ------------------------------------------------------- | +| **Lines of code** | ~5,524 (spread across 6 crates + 3 shell scripts) | ~5,467 (SSH scripts gone, A2A handler added) | +| **Protocol count** | 5 | 6 (A2A adds one) | +| **Operational complexity** | SSH keys × N nodes, forced-command allowlists, peer auth setup | One HTTPS endpoint, mTLS certs, well-known URL | +| **Discoverability** | Manual external MCP registry entries | Agent Card at well-known URL | +| **Interoperability** | Colibri-only | Any A2A client | +| **Debugability** | `ssh -v`, `psql`, `jq` | `curl`, browser devtools, standard HTTP tooling | +| **Ecosystem maturity** | N/A (Colibri-specific) | Protocol < 3 months old, zero adoption | +| **When it pays off** | Works today for 4 nodes | Pays off at 10+ nodes, or when 3rd-party tools ship A2A | --- diff --git a/docs/wiki/hive-pane.md b/docs/wiki/hive-pane.md index 00d608a..1a2c34e 100644 --- a/docs/wiki/hive-pane.md +++ b/docs/wiki/hive-pane.md @@ -161,13 +161,13 @@ as the wiki. A2A tasks map directly to Colibri's task board: -| A2A state | Colibri equivalent | -| -------------- | ------------------ | -| `submitted` | `Pending` | -| `working` | `Started` | -| `completed` | `Done` | -| `failed` | `Error` | -| `canceled` | (not yet modeled) | +| A2A state | Colibri equivalent | +| ----------- | ------------------ | +| `submitted` | `Pending` | +| `working` | `Started` | +| `completed` | `Done` | +| `failed` | `Error` | +| `canceled` | (not yet modeled) | Mother pushes a `node_register` task to a new USB node; the node executes it and returns the result. The task carries cost data as a typed A2A part: @@ -187,13 +187,13 @@ returns the result. The task carries cost data as a typed A2A part: ### What A2A adds over the current MCP bridge -| Concern | Current (MCP + SSH) | A2A | -| -------------------- | ----------------------------- | -------------------------------- | -| Discovery | Manual external MCP registry entry | Well-known Agent Card URL | -| Interop | Colibri-only | Any A2A client | -| Cost data | Embedded in task completion | Typed `application/json+cost` | -| Push notifications | Polling (heartbeat) | Optional webhook/push | -| Versioning | Ad-hoc | Agent Card version + schema pins | +| Concern | Current (MCP + SSH) | A2A | +| ------------------ | ---------------------------------- | -------------------------------- | +| Discovery | Manual external MCP registry entry | Well-known Agent Card URL | +| Interop | Colibri-only | Any A2A client | +| Cost data | Embedded in task completion | Typed `application/json+cost` | +| Push notifications | Polling (heartbeat) | Optional webhook/push | +| Versioning | Ad-hoc | Agent Card version + schema pins | A2A is not a replacement for the MCP bridge — it's the next layer. The MCP bridge handles local daemon commands (status, snapshot, spawn). A2A handles diff --git a/docs/wiki/hive-routing.md b/docs/wiki/hive-routing.md index ea9c7f1..b9eee71 100644 --- a/docs/wiki/hive-routing.md +++ b/docs/wiki/hive-routing.md @@ -10,15 +10,15 @@ ## What Exists Today -| Component | State | Gap | -|---|---|---| -| `mother_schema.sql` | `hive_nodes` table with `hw_profile` + `capabilities` JSONB | No stable node UUID; hostname is the key | -| `derive_capabilities()` trigger | Auto-computes `has_gpu`, `gpu_vendor`, `can_run_local_llm`, `max_model` from hw_profile | Only GPU/VRAM heuristics — doesn't probe running services | -| `clawdie-hw-probe` | Collects GPU, RAM, CPU, disks, ZFS, WiFi, Vulkan, Colibri status | No ollama/llama.cpp probing | -| `node-register-mcp` | UPSERTs hw_profile into `hive_nodes` on join | No UUID generation at join time | -| `crates/colibri-daemon/src/scheduler.rs` | Cron/interval/one-shot jobs, capability matching stubs | No cost-aware routing, no hive awareness | -| `colibri-store` | Local SQLite `agents` table with UUID (v4 random) | UUID is session-local, not hive-stable | -| T1.5 cost tracking | Per-task cost captured in local SQLite | No hive-level cost aggregation | +| Component | State | Gap | +| ---------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------- | +| `mother_schema.sql` | `hive_nodes` table with `hw_profile` + `capabilities` JSONB | No stable node UUID; hostname is the key | +| `derive_capabilities()` trigger | Auto-computes `has_gpu`, `gpu_vendor`, `can_run_local_llm`, `max_model` from hw_profile | Only GPU/VRAM heuristics — doesn't probe running services | +| `clawdie-hw-probe` | Collects GPU, RAM, CPU, disks, ZFS, WiFi, Vulkan, Colibri status | No ollama/llama.cpp probing | +| `node-register-mcp` | UPSERTs hw_profile into `hive_nodes` on join | No UUID generation at join time | +| `crates/colibri-daemon/src/scheduler.rs` | Cron/interval/one-shot jobs, capability matching stubs | No cost-aware routing, no hive awareness | +| `colibri-store` | Local SQLite `agents` table with UUID (v4 random) | UUID is session-local, not hive-stable | +| T1.5 cost tracking | Per-task cost captured in local SQLite | No hive-level cost aggregation | ## Design Goals @@ -81,17 +81,19 @@ A 32-character hex UUID generated once, stored locally, included in every hw-pro ``` **Properties:** + - **Stable across reboots**: stored on disk, not tmpfs - **Survives re-provisioning**: if the seed partition preserves `/var/db/machine-id`, the same physical machine keeps the same identity - **Not a secret**: it's an ID, not a key - **Verifiable**: mother can check "has node a1b2c3d4 ever joined?" — if yes, this is a rejoin, not a new node **Alternatives considered:** -| Approach | Pros | Cons | -|---|---|---| -| SMBIOS UUID (`hw.uuid`) | Truly hardware-bound, survives OS reinstall | Not available on all platforms (VPS, ARM); can be spoofed | -| SSH host key fingerprint | Cryptographically strong | Changes on OS reinstall; key rotation breaks identity | -| Random UUID (this design) | Portable, simple, survives seed restore | Can be copied/cloned (but same machine, same ID — that's correct) | + +| Approach | Pros | Cons | +| ------------------------- | ------------------------------------------- | ----------------------------------------------------------------- | +| SMBIOS UUID (`hw.uuid`) | Truly hardware-bound, survives OS reinstall | Not available on all platforms (VPS, ARM); can be spoofed | +| SSH host key fingerprint | Cryptographically strong | Changes on OS reinstall; key rotation breaks identity | +| Random UUID (this design) | Portable, simple, survives seed restore | Can be copied/cloned (but same machine, same ID — that's correct) | **Recommendation:** Generate on first boot, store in `/var/db/machine-id`. The hw-probe includes it as `machine_id`. Mother's `hive_nodes` table gets a `UNIQUE` constraint on `machine_id`. @@ -112,18 +114,18 @@ The `node-register-mcp` UPSERT switches from `ON CONFLICT (hostname)` to `ON CON Every capability is a boolean derived from hardware facts, not a self-declaration. The hw-probe collects hardware; the trigger derives capabilities. -| Capability | Derived from | Used for | -|---|---|---| -| `has_gpu` | GPU detected in pciconf | GPU-accelerated inference | -| `gpu_vendor` | amdgpu/nvidia driver | Model compatibility | -| `vulkan_compute` | vulkaninfo success | llama.cpp Vulkan backend | -| `can_run_local_llm` | RAM ≥ 16GB or has GPU | Eligibility for local task execution | -| `max_model` | RAM heuristic | Model size limit (3b, 7b-q4, 13b-q4, 34b-q4) | -| `cpu_only` | No GPU detected | Fallback only (slow) | -| `has_wifi` | wlan devices | Network capability | -| `has_zfs` | ZFS pools non-empty | Storage capability | -| `colibri_running` | service status | Agent host eligibility | -| `provider_api_keys` | MCP-reported (not hw probe) | Cloud provider availability | +| Capability | Derived from | Used for | +| ------------------- | --------------------------- | -------------------------------------------- | +| `has_gpu` | GPU detected in pciconf | GPU-accelerated inference | +| `gpu_vendor` | amdgpu/nvidia driver | Model compatibility | +| `vulkan_compute` | vulkaninfo success | llama.cpp Vulkan backend | +| `can_run_local_llm` | RAM ≥ 16GB or has GPU | Eligibility for local task execution | +| `max_model` | RAM heuristic | Model size limit (3b, 7b-q4, 13b-q4, 34b-q4) | +| `cpu_only` | No GPU detected | Fallback only (slow) | +| `has_wifi` | wlan devices | Network capability | +| `has_zfs` | ZFS pools non-empty | Storage capability | +| `colibri_running` | service status | Agent host eligibility | +| `provider_api_keys` | MCP-reported (not hw probe) | Cloud provider availability | ### Local LLM capabilities (NEW) @@ -143,14 +145,14 @@ Extend the hw-probe to detect running local LLM services and extend the trigger **New derived capabilities:** -| Capability | Derivation | -|---|---| -| `ollama_available` | `ollama_running == true` | -| `ollama_models` | Array of model tags (from `ollama list`) | -| `llama_cpp_available` | Binary at `/usr/local/bin/llama-server` or similar | -| `llama_cpp_models` | GGUFs in `/var/db/models/` or `/usr/local/share/models/` | -| `can_embed_locally` | `nomic-embed-text` in ollama OR any embedding model loaded | -| `inference_tier` | `local-fast` (GPU ≥ 24GB), `local-slow` (CPU-only, RAM ≥ 16GB), `cloud-only` | +| Capability | Derivation | +| --------------------- | ---------------------------------------------------------------------------- | +| `ollama_available` | `ollama_running == true` | +| `ollama_models` | Array of model tags (from `ollama list`) | +| `llama_cpp_available` | Binary at `/usr/local/bin/llama-server` or similar | +| `llama_cpp_models` | GGUFs in `/var/db/models/` or `/usr/local/share/models/` | +| `can_embed_locally` | `nomic-embed-text` in ollama OR any embedding model loaded | +| `inference_tier` | `local-fast` (GPU ≥ 24GB), `local-slow` (CPU-only, RAM ≥ 16GB), `cloud-only` | ### Probe additions to `clawdie-hw-probe` @@ -181,12 +183,12 @@ cache_weight: 0.0–1.0 (warm cache → higher weight) ### Cost tiers -| Tier | Provider | Cost per 1M tokens | Latency | Used when | -|---|---|---|---| -| T0 (free) | Local ollama/llama.cpp | $0.00 | 5–60s | Non-urgent, capability match | -| T1 (cheap) | DeepSeek V3 | $0.27 / $1.10 | 2–5s | Default for most tasks | -| T2 (balanced) | Gemini Flash | $0.15 / $0.60 | 1–3s | High cache-hit tasks | -| T3 (premium) | Claude Sonnet 4 | $3.00 / $15.00 | 3–8s | Complex reasoning, only when needed | +| Tier | Provider | Cost per 1M tokens | Latency | Used when | +| ------------- | ---------------------- | ------------------ | ------- | ----------------------------------- | +| T0 (free) | Local ollama/llama.cpp | $0.00 | 5–60s | Non-urgent, capability match | +| T1 (cheap) | DeepSeek V3 | $0.27 / $1.10 | 2–5s | Default for most tasks | +| T2 (balanced) | Gemini Flash | $0.15 / $0.60 | 1–3s | High cache-hit tasks | +| T3 (premium) | Claude Sonnet 4 | $3.00 / $15.00 | 3–8s | Complex reasoning, only when needed | ### Local LLM routing rules @@ -257,6 +259,7 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **What:** Mother is the brain. Nodes register, mother routes. No peer-to-peer. **Implementation:** + 1. Add `machine_id` to `hive_nodes` + hw-probe (1 day) 2. Extend `derive_capabilities()` for local LLM (1 day) 3. Add `routing_score()` function to mother's PostgreSQL (stored function — zero Rust changes) @@ -268,12 +271,14 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **Total:** ~3.5 days. **Pros:** + - Simple to reason about — one source of truth - Lowest implementation risk - Scheduler lives on mother (always-on) - Existing MCP bridge handles all communication **Cons:** + - Mother is single point of failure for routing (but not execution — once dispatched, the task runs independently) - Latency: scheduler must query mother on every tick - Doesn't scale to 100+ nodes (not a real concern for our use case) @@ -285,6 +290,7 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **What:** Mother stores the matrix, but nodes can also route tasks they own to peers directly. Hybrid: central registry + distributed execution. **Implementation:** + 1. All of Option A (3.5 days) 2. Add `capabilities` API to `colibri-daemon`'s Unix socket (self-awareness) — 1 day 3. Add local peer discovery via mDNS or Tailscale whois — 1 day @@ -294,12 +300,14 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **Total:** ~8.5 days. **Pros:** + - Lower latency for local dispatch - Survives mother downtime for peer-to-peer tasks - Natural fit for local LLM use case (beefy node is on same LAN) - Nodes that discover each other can route without phoning home **Cons:** + - Complexity: two code paths (central + peer-to-peer) - Security: peer-to-peer dispatch needs authentication (who can send tasks to my daemon?) - Harder to audit: cost tracking must handle peer-dispatched vs mother-dispatched tasks differently @@ -312,6 +320,7 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **What:** Don't build a routing engine at all. The capability matrix is exposed as an MCP tool that agents query. The agent itself decides where to route based on the matrix + its own reasoning. The matrix is advisory, not prescriptive. **Implementation:** + 1. All of Option A minus the routing_scoring function (2.5 days) 2. Add `colibri_query_hive_capabilities` MCP tool on mother — returns full online node matrix (0.5 day) 3. Add `colibri_dispatch_to_node` MCP tool — sends task to a specific node (1 day) @@ -320,6 +329,7 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The **Total:** ~4.5 days. **Zero scheduler changes.** **Pros:** + - Exploits Colibri's architecture-as-differentiator: the agent IS the intelligence - The routing decision is auditable in the conversation log (why did the agent pick this node?) - Natural fit for local LLM — the agent can reason "this task is low priority, I'll try the beefy node first" @@ -327,6 +337,7 @@ When the task completes, the local daemon writes cost to its SQLite (T1.5). The - The skill can be iterated without recompiling Colibri **Cons:** + - Each routing decision costs tokens (the agent must reason about it) - Agents make inscrutable routing choices (the LLM "just knows") - No hard guarantees — an agent might route a $5 task to Claude when DeepSeek would do fine @@ -350,26 +361,26 @@ The capability matrix, stable UUIDs, and local LLM probes are the foundation — ### Phase 1 — Identity & Capability Foundation -| Deliverable | Where | Lines | -|---|---|---| -| `machine_id` generation in `clawdie-firstboot` | clawdie-iso | ~15 | -| `collect_machine_id()` in hw-probe | clawdie-iso | ~10 | -| `collect_ollama_status()` in hw-probe | clawdie-iso | ~30 | -| `collect_llama_cpp()` in hw-probe | clawdie-iso | ~20 | -| `collect_local_llm()` aggregator in hw-probe | clawdie-iso | ~25 | -| `machine_id` column + constraint in mother_schema.sql | colibri | ~5 | -| Extended `derive_capabilities()` for `ollama_available`, `llama_cpp_available`, `inference_tier` | colibri | ~40 | -| `node-register-mcp` handling of `machine_id` key + new local_llm fields | colibri | ~15 | -| This design doc (hive-routing.md) | This file | ~0 (done) | +| Deliverable | Where | Lines | +| ------------------------------------------------------------------------------------------------ | ----------- | --------- | +| `machine_id` generation in `clawdie-firstboot` | clawdie-iso | ~15 | +| `collect_machine_id()` in hw-probe | clawdie-iso | ~10 | +| `collect_ollama_status()` in hw-probe | clawdie-iso | ~30 | +| `collect_llama_cpp()` in hw-probe | clawdie-iso | ~20 | +| `collect_local_llm()` aggregator in hw-probe | clawdie-iso | ~25 | +| `machine_id` column + constraint in mother_schema.sql | colibri | ~5 | +| Extended `derive_capabilities()` for `ollama_available`, `llama_cpp_available`, `inference_tier` | colibri | ~40 | +| `node-register-mcp` handling of `machine_id` key + new local_llm fields | colibri | ~15 | +| This design doc (hive-routing.md) | This file | ~0 (done) | ### Phase 2 — Routing Engine -| Deliverable | Where | -|---|---| -| `colibri_query_hive_capabilities` MCP tool | colibri-mcp | -| `colibri_dispatch_to_node` MCP tool | colibri-mcp | -| `hive-routing` skill | `.agent/skills/` | -| `Task.routing` JSONB field in colibri-store | colibri-store | +| Deliverable | Where | +| -------------------------------------------------------------------------------------------------------------- | ----------------- | +| `colibri_query_hive_capabilities` MCP tool | colibri-mcp | +| `colibri_dispatch_to_node` MCP tool | colibri-mcp | +| `hive-routing` skill | `.agent/skills/` | +| `Task.routing` JSONB field in colibri-store | colibri-store | | Mother-side routing score as PostgreSQL function (optional — only if agent-driven routing proves insufficient) | mother_schema.sql | --- diff --git a/docs/wiki/index.md b/docs/wiki/index.md index fe8c530..3b3830d 100644 --- a/docs/wiki/index.md +++ b/docs/wiki/index.md @@ -53,21 +53,21 @@ warning. | [headroom-sidecar](./headroom-sidecar.md) | Optional tool-result compression sidecar and its Unix-socket protocol | | [jail-confinement](./jail-confinement.md) | Persistent vs ephemeral jails, priv-mode policy, reuse of spawner confinement for MCP servers | | [mother-hive](./mother-hive.md) | Mother MCP architecture — forced-command SSH, single-home-in-colibri, peer auth, key-on-seed | -| [hive-routing](./hive-routing.md) | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing | +| [hive-routing](./hive-routing.md) | Hive member identity (machine UUID), capability matrix + local LLM probes, cost-aware task routing | | [hive-pane](./hive-pane.md) | Glasspane for the hive — multi-node cost observability, A2A discovery, and operator board | -| [a2a-complexity-audit](./a2a-complexity-audit.md) | A2A code complexity impact — 6-protocol surface audit, when A2A pays off | +| [a2a-complexity-audit](./a2a-complexity-audit.md) | A2A code complexity impact — 6-protocol surface audit, when A2A pays off | | [naming-decisions](./naming-decisions.md) | Ledger of harness-neutral / architecture renames — shipped and in-flight | | [daemon-not-demon](./daemon-not-demon.md) | Why we say daemon (helper spirit) not demon (bad spirit) — English + Slovenian | | [layered-soul](./layered-soul.md) | How Colibri consumes the layered-soul reviewed-context repo today vs planned | | [task-board](./task-board.md) | Capability match scoring, cron scheduling, intake drain, SQLite backing | | [quality-gates](./quality-gates.md) | `ci-checks.sh` as the pre-merge gate; why drift reached `main` before | -| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-test), golden tests | +| [contracts](./contracts.md) | Stable JSON schemas (run-manifest, runtime-inventory, provider-test), golden tests | | [store-schema](./store-schema.md) | SQLite coordination schema and migration discipline | | [external-mcp](./external-mcp.md) | MCP bridge for editors + external stdio MCP host; read/write/external-call gates | | [operator-cli](./operator-cli.md) | The `colibri` CLI as a thin typed Unix-socket client over the daemon API | | [tui](./tui.md) | Terminal dashboard client (colibri-tui) vs the colibri-glasspane state machine | | [terminal](./terminal.md) | Terminal capability decision (Kitty, extended-key reporting, tmux passthrough, SSH terminfo) | | [runtime-inventory](./runtime-inventory.md) | Host runtime inventory + watchdog status reader; additive, read-only integrations | -| [skills-catalog](./skills-catalog.md) | Read-only runtime consumer for reviewed skill artifacts | +| [skills-catalog](./skills-catalog.md) | Read-only runtime consumer for reviewed skill artifacts | | [vault-provision](./vault-provision.md) | Vaultwarden-driven env-file provisioning into jails after agent spawn | | [deployment](./deployment.md) | Host installer (clawdie): ZFS layout, rc.d/systemd service, dry-run safety | diff --git a/docs/wiki/sl/contracts.md b/docs/wiki/sl/contracts.md index 10748c7..0ccf2b7 100644 --- a/docs/wiki/sl/contracts.md +++ b/docs/wiki/sl/contracts.md @@ -23,7 +23,7 @@ _sheme in (De)serialize_, ne poslovne logike. | -------------------------------------- | --------------------- | ------------------------------------------------------------------------- | | `clawdie.interagent.run-manifest.v1` | `RunManifest` | Beleži tek gradnje/testa — vloga, agent, artefakti, povzetek. | | `clawdie.runtime-version-inventory.v1` | `RuntimeInventory` | Posnetek izvajalnega okolja gostitelja — OS, različice paketov, npm/node. | -| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. | +| `clawdie.provider-test.result.v1` | `ProviderSmokeResult` | Rezultat sonde predpomnilnika DeepSeek in obračun žetonov. | Konstante shem in strukture živijo v `crates/colibri-contracts/src/lib.rs`. diff --git a/docs/wiki/sl/index.md b/docs/wiki/sl/index.md index 847fa76..2426a1b 100644 --- a/docs/wiki/sl/index.md +++ b/docs/wiki/sl/index.md @@ -59,19 +59,19 @@ clippy. | [headroom-sidecar](./headroom-sidecar.md) | Neobvezni stranski vagon za stiskanje rezultatov orodij in njegov protokol Unix vtičnice | | [jail-confinement](./jail-confinement.md) | Trajne proti prehodnim ječam, pravilnik načina priv, ponovna uporaba omejitve zaganjalnika za strežnike MCP | | [mother-hive](./mother-hive.md) | Arhitektura matičnega MCP — SSH s prisiljenim ukazom, enojni-dom-v-colibri, peer avtentikacija, ključ-na-semenu | -| [hive-pane](./hive-pane.md) | Steklena plošča za panj — opazovanje stroškov več vozlišč, odkrivanje A2A in operaterska nadzorna plošča | +| [hive-pane](./hive-pane.md) | Steklena plošča za panj — opazovanje stroškov več vozlišč, odkrivanje A2A in operaterska nadzorna plošča | | [naming-decisions](./naming-decisions.md) | Imenik preimenovanj, nevtralnih glede na opremo / arhitekturnih — dostavljenih in v teku | | [daemon-not-demon](./daemon-not-demon.md) | Zakaj rečemo daemon (duh pomočnik) in ne demon (hudič) — angleško + slovensko | | [layered-soul](./layered-soul.md) | Kako Colibri danes uporablja repozitorij pregledanega konteksta layered-soul proti načrtovanemu | | [task-board](./task-board.md) | Točkovanje po zmožnostih, cron razporejanje, praznjenje vnosne vrste, podlaga SQLite | | [quality-gates](./quality-gates.md) | `ci-checks.sh` kot preverjanje pred združitvijo; zakaj je odmik prej dosegel `main` | -| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-test), zlati testi | +| [contracts](./contracts.md) | Stabilne JSON sheme (run-manifest, runtime-inventory, provider-test), zlati testi | | [store-schema](./store-schema.md) | Usklajevalna shema SQLite in disciplina migracij | | [external-mcp](./external-mcp.md) | Most MCP za urejevalnike + zunanji gostitelj stdio MCP; dovoljenja za branje/pisanje/zunanji-klic | | [operator-cli](./operator-cli.md) | CLI `colibri` kot tanek tipiziran odjemalec Unix vtičnice prek API procesa v ozadju | | [tui](./tui.md) | Odjemalec terminalske nadzorne plošče (colibri-tui) proti avtomatu stanj colibri-glasspane | | [terminal](./terminal.md) | Odločitev o terminalski zmožnosti (Kitty, razširjeno poročanje tipk, prehod tmux, SSH terminfo) | | [runtime-inventory](./runtime-inventory.md) | Popis izvajalnega okolja gostitelja + bralnik statusa čuvaja; aditivne, bralne integracije | -| [skills-catalog](./skills-catalog.md) | Bralni izvajalni porabnik za pregledane artefakte veščin | +| [skills-catalog](./skills-catalog.md) | Bralni izvajalni porabnik za pregledane artefakte veščin | | [vault-provision](./vault-provision.md) | Oskrba datotek env, gnana z Vaultwarden, v ječe po zagonu agenta | | [deployment](./deployment.md) | Nameščevalnik gostitelja (clawdie): postavitev ZFS, storitev rc.d/systemd, varnost suhega teka | diff --git a/docs/wiki/tui.md b/docs/wiki/tui.md index 6073b4d..e3c9b96 100644 --- a/docs/wiki/tui.md +++ b/docs/wiki/tui.md @@ -81,7 +81,7 @@ should be revisited. | `s` | Spawn a local `colibri-test-agent` | | `x` | Stop the selected pane | | `Enter` | Open/close the detail pane for the selected row | -| `Tab` / `Shift+Tab` | Cycle through distinct sessions (incl. "All") | +| `Tab` / `Shift+Tab` | Cycle through distinct sessions (incl. "All") | | `j` / `k` or `↓` / `↑` | Navigate the pane table | | `n` / `N` | Jump to next / previous **attention** pane | | `a` | Toggle the attention filter (only attention) | @@ -137,10 +137,10 @@ renders. This makes attention impossible to miss without consuming extra space. ### Row highlight inverts on selection -| Row state | Normal | Selected | -| --------- | --------------------------- | ----------------------------------------- | -| Attention | `bg(DarkRed)` + `fg(White)` | `bg(DarkGray)` + `fg(LightRed)` + bold | -| Normal | (plain) | `bg(DarkGray)` | +| Row state | Normal | Selected | +| --------- | --------------------------- | -------------------------------------- | +| Attention | `bg(DarkRed)` + `fg(White)` | `bg(DarkGray)` + `fg(LightRed)` + bold | +| Normal | (plain) | `bg(DarkGray)` | Attention rows are impossible to miss; the inversion on selection confirms which one the cursor is on without losing the attention signal. diff --git a/scripts/check-format.sh b/scripts/check-format.sh index 9e22314..fff24df 100755 --- a/scripts/check-format.sh +++ b/scripts/check-format.sh @@ -8,4 +8,4 @@ ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" cd "$ROOT_DIR" -exec npx --yes prettier@3 --check '**/*.md' +exec npx --yes prettier@3.8.4 --check '**/*.md'