feat: scaffold colibri-skills crate — split-brain read consumer #2
5 changed files with 528 additions and 168 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -180,7 +180,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
|
||||
dependencies = [
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"wasm-bindgen",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
|
|
@ -284,6 +287,15 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colibri-skills"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colibri-store"
|
||||
version = "0.0.1"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[workspace]
|
||||
members = ["crates/colibri-contracts", "crates/colibri-deepseek", "crates/colibri-runtime", "crates/colibri-glasspane", "crates/colibri-daemon", "crates/colibri-client", "crates/colibri-glasspane-tui", "crates/colibri-store"]
|
||||
members = ["crates/colibri-contracts", "crates/colibri-deepseek", "crates/colibri-runtime", "crates/colibri-glasspane", "crates/colibri-daemon", "crates/colibri-client", "crates/colibri-glasspane-tui", "crates/colibri-store", "crates/colibri-skills"]
|
||||
|
||||
[package]
|
||||
name = "colibri"
|
||||
|
|
|
|||
11
crates/colibri-skills/Cargo.toml
Normal file
11
crates/colibri-skills/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "colibri-skills"
|
||||
version = "0.0.1"
|
||||
edition = "2021"
|
||||
description = "Colibri skills read consumer — indexes Clawdie-AI skill artifacts into SQLite"
|
||||
license = "AGPL-3.0-only"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
317
crates/colibri-skills/src/lib.rs
Normal file
317
crates/colibri-skills/src/lib.rs
Normal file
|
|
@ -0,0 +1,317 @@
|
|||
//! Colibri Skills — read-only consumer for Clawdie-AI skill artifacts.
|
||||
//!
|
||||
//! This crate indexes committed, reviewed skill artifacts from the Clawdie-AI
|
||||
//! repository into SQLite. It does NOT author, edit, or store skill content —
|
||||
//! that responsibility lives in Clawdie-AI.
|
||||
//!
|
||||
//! ```text
|
||||
//! Clawdie-AI repo (source of truth)
|
||||
//! docs/astro-howto/
|
||||
//! docs/forgejo-admin/
|
||||
//! ...
|
||||
//!
|
||||
//! colibri-skills (read-only consumer)
|
||||
//! reads run_manifest.json
|
||||
//! validates checksums
|
||||
//! indexes markdown/transcript chunks
|
||||
//! serves CLI/TUI search
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// ── Core types ────────────────────────────────────────────────────────────
|
||||
|
||||
/// A read-only skill artifact indexed from Clawdie-AI.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Skill {
|
||||
pub skill_id: String,
|
||||
pub display_name: String,
|
||||
/// Relative path within the Clawdie-AI repo (e.g. "docs/astro-howto").
|
||||
pub source_path: String,
|
||||
pub manifest: SkillManifest,
|
||||
pub artifacts: Vec<SkillArtifact>,
|
||||
pub status: SkillStatus,
|
||||
/// Natural-language verification test.
|
||||
pub verification: Option<String>,
|
||||
}
|
||||
|
||||
/// Parsed from `run_manifest.json` in a skill directory.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SkillManifest {
|
||||
pub run_id: String,
|
||||
pub created: String,
|
||||
pub source: Option<ManifestSource>,
|
||||
pub pipeline_stages: Vec<PipelineStage>,
|
||||
pub models_used: Vec<ModelUsage>,
|
||||
pub notes: Option<String>,
|
||||
}
|
||||
|
||||
/// Source media metadata from the manifest.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestSource {
|
||||
pub path: Option<String>,
|
||||
pub size_human: Option<String>,
|
||||
pub codec: Option<String>,
|
||||
pub resolution: Option<String>,
|
||||
pub duration_human: Option<String>,
|
||||
pub original_untouched: Option<bool>,
|
||||
}
|
||||
|
||||
/// A single stage in the skill generation pipeline.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PipelineStage {
|
||||
pub stage: String,
|
||||
pub tool: String,
|
||||
pub model: Option<String>,
|
||||
pub output: Option<String>,
|
||||
pub duration_s: Option<f64>,
|
||||
pub lines: Option<u64>,
|
||||
pub size_human: Option<String>,
|
||||
}
|
||||
|
||||
/// A model used during skill generation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ModelUsage {
|
||||
pub model: String,
|
||||
pub purpose: String,
|
||||
pub api_key_used: bool,
|
||||
}
|
||||
|
||||
/// An individual file within a skill directory.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SkillArtifact {
|
||||
pub artifact_type: ArtifactType,
|
||||
/// Path relative to the skill directory.
|
||||
pub relative_path: String,
|
||||
pub file_name: String,
|
||||
pub mime_type: Option<String>,
|
||||
pub size_bytes: u64,
|
||||
pub sha256_hash: String,
|
||||
}
|
||||
|
||||
/// Classification of a skill artifact file.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ArtifactType {
|
||||
Document,
|
||||
Image,
|
||||
Script,
|
||||
Transcript,
|
||||
Manifest,
|
||||
Checksum,
|
||||
Report,
|
||||
ContactSheet,
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl ArtifactType {
|
||||
pub fn from_path(path: &str) -> Self {
|
||||
let lower = path.to_lowercase();
|
||||
// Check specific patterns before generic extensions.
|
||||
if lower.ends_with(".py") || lower.ends_with(".sh") || lower.ends_with(".bash") {
|
||||
return ArtifactType::Script;
|
||||
}
|
||||
if lower.contains("contact_sheet") {
|
||||
return ArtifactType::ContactSheet;
|
||||
}
|
||||
if lower.contains("run_manifest") && lower.ends_with(".json") {
|
||||
return ArtifactType::Manifest;
|
||||
}
|
||||
if lower.contains("sha256") || lower.contains("checksum") {
|
||||
return ArtifactType::Checksum;
|
||||
}
|
||||
if lower.contains("report") && lower.ends_with(".json") {
|
||||
return ArtifactType::Report;
|
||||
}
|
||||
if lower.ends_with(".md") {
|
||||
ArtifactType::Document
|
||||
} else if lower.ends_with(".jpg") || lower.ends_with(".png") || lower.ends_with(".webp") {
|
||||
ArtifactType::Image
|
||||
} else if lower.ends_with(".txt") && lower.contains("transcript") {
|
||||
ArtifactType::Transcript
|
||||
} else {
|
||||
ArtifactType::Other(path.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk of searchable content extracted from a skill artifact.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SkillChunk {
|
||||
pub chunk_id: Option<i64>,
|
||||
pub skill_id: String,
|
||||
pub artifact_relative_path: String,
|
||||
pub chunk_type: ChunkType,
|
||||
/// Nearest markdown heading, if applicable.
|
||||
pub heading: Option<String>,
|
||||
pub content: String,
|
||||
pub line_start: u64,
|
||||
pub line_end: u64,
|
||||
pub tokens_estimate: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkType {
|
||||
MarkdownSection,
|
||||
TranscriptSegment,
|
||||
CommandBlock,
|
||||
CodeBlock,
|
||||
Table,
|
||||
}
|
||||
|
||||
// ── Status ────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SkillStatus {
|
||||
#[default]
|
||||
Active,
|
||||
Archived,
|
||||
Superseded,
|
||||
}
|
||||
|
||||
// ── Import summary ────────────────────────────────────────────────────────
|
||||
|
||||
/// Returned after indexing a Clawdie-AI checkout.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ImportSummary {
|
||||
pub skills_found: usize,
|
||||
pub skills_indexed: usize,
|
||||
pub skills_skipped: usize,
|
||||
pub artifacts_total: usize,
|
||||
pub chunks_total: usize,
|
||||
pub checksum_failures: usize,
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
impl ImportSummary {
|
||||
pub fn success(&self) -> bool {
|
||||
self.errors.is_empty() && self.checksum_failures == 0
|
||||
}
|
||||
}
|
||||
|
||||
// ── Search result ─────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchResult {
|
||||
pub skill_id: String,
|
||||
pub display_name: String,
|
||||
pub chunk_type: ChunkType,
|
||||
pub heading: Option<String>,
|
||||
pub snippet: String,
|
||||
pub artifact_path: String,
|
||||
pub line_start: u64,
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_document() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("docs/HOWTO.md"),
|
||||
ArtifactType::Document
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_image() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("screenshots/001_00-01-05_intro.jpg"),
|
||||
ArtifactType::Image
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_script_python() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("scripts/generate_contact_sheet.py"),
|
||||
ArtifactType::Script
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_script_shell() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("scripts/extract_screenshots.sh"),
|
||||
ArtifactType::Script
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_transcript() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("transcript_local.txt"),
|
||||
ArtifactType::Transcript
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_manifest() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("run_manifest.json"),
|
||||
ArtifactType::Manifest
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_checksum() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("artifacts.sha256"),
|
||||
ArtifactType::Checksum
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_report() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("contact-sheet/report.json"),
|
||||
ArtifactType::Report
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_type_from_path_contact_sheet() {
|
||||
assert_eq!(
|
||||
ArtifactType::from_path("contact-sheet/contact_sheet.jpg"),
|
||||
ArtifactType::ContactSheet
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skill_status_default_is_active() {
|
||||
assert_eq!(SkillStatus::default(), SkillStatus::Active);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_summary_success_empty() {
|
||||
let s = ImportSummary {
|
||||
skills_found: 0,
|
||||
skills_indexed: 0,
|
||||
skills_skipped: 0,
|
||||
artifacts_total: 0,
|
||||
chunks_total: 0,
|
||||
checksum_failures: 0,
|
||||
errors: vec![],
|
||||
};
|
||||
assert!(s.success());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_summary_failure_on_error() {
|
||||
let s = ImportSummary {
|
||||
skills_found: 1,
|
||||
skills_indexed: 0,
|
||||
skills_skipped: 1,
|
||||
artifacts_total: 0,
|
||||
chunks_total: 0,
|
||||
checksum_failures: 0,
|
||||
errors: vec!["manifest parse error".into()],
|
||||
};
|
||||
assert!(!s.success());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,211 +1,231 @@
|
|||
# Colibri Skills Plan
|
||||
|
||||
**Date:** 27.maj.2026
|
||||
**Status:** PLANNED — docs first, no crate scaffold yet
|
||||
**Status:** Phase 1 scaffolded — read-only split-brain consumer
|
||||
|
||||
**Crate:** `crates/colibri-skills`
|
||||
|
||||
## Purpose
|
||||
|
||||
Define the next Colibri slice for Clawdie's split-brain model:
|
||||
`colibri-skills` is Colibri's read-only runtime consumer for reviewed skill
|
||||
artifacts authored in the Clawdie-AI repo. It does not author, edit, or store
|
||||
canonical skills. Clawdie-AI remains the source of truth; Colibri indexes and
|
||||
serves typed/runtime views.
|
||||
|
||||
- **built-in knowledge** stays separate from
|
||||
- **user/agent memory** and
|
||||
- **operational state**
|
||||
```text
|
||||
Clawdie-AI repo (source of truth)
|
||||
docs/astro-howto/
|
||||
docs/forgejo-admin/
|
||||
docs/vaultwarden-onboarding/
|
||||
...
|
||||
|
||||
The product explanation is simple:
|
||||
Colibri colibri-skills crate (read-only consumer)
|
||||
reads committed skill artifacts
|
||||
validates checksums
|
||||
indexes Markdown/transcript chunks
|
||||
exposes Skill, SkillArtifact, SkillChunk structs
|
||||
serves CLI/TUI/search later
|
||||
```
|
||||
|
||||
> Think of it like the machine already ships with its handbook inside.
|
||||
This keeps the split-brain model explicit:
|
||||
|
||||
In Clawdie terms, that handbook is the **committed built-in knowledge
|
||||
artifact** imported into `system_skills`.
|
||||
- `system_skills`: committed built-in knowledge / manuals / reviewed skillpacks
|
||||
- `system_brain`: user and agent memory
|
||||
- `system_ops`: live runtime, task, service, and daemon state
|
||||
|
||||
This plan names the Rust-side consumer for that lane:
|
||||
## Seed artifact: Astro how-to
|
||||
|
||||
- **crate name:** `colibri-skills`
|
||||
The first concrete skillpack is `docs/astro-howto/` in Clawdie-AI. It is useful
|
||||
because it is not just prose; it includes transcript, generated how-to docs,
|
||||
commands, screenshots, contact sheet, manifest, checksums, and scripts.
|
||||
|
||||
It is a **read path first**, not a new generator, not a new database, and not a
|
||||
memory replacement.
|
||||
```json
|
||||
{
|
||||
"skill_id": "astro-howto",
|
||||
"source": "local video-derived training artifact",
|
||||
"inputs": [
|
||||
"transcript_local.txt",
|
||||
"screenshots/",
|
||||
"contact-sheet/contact_sheet.jpg"
|
||||
],
|
||||
"outputs": [
|
||||
"docs/HOWTO.md",
|
||||
"docs/COMMANDS.md",
|
||||
"docs/SCREENSHOTS.md",
|
||||
"docs/SUMMARY.md"
|
||||
],
|
||||
"verification": "can user create and run an Astro project?",
|
||||
"media": "screenshots/*.jpg (paths + hashes, not blobs)",
|
||||
"manifest": "run_manifest.json",
|
||||
"checksums": "artifacts.sha256"
|
||||
}
|
||||
```
|
||||
|
||||
## Background
|
||||
Pipeline shape:
|
||||
|
||||
The current split-brain design already exists in `clawdie-ai`:
|
||||
```text
|
||||
video → local transcript → topic extraction → how-to/runbook
|
||||
→ screenshots/contact sheet → commands → verification test
|
||||
→ manifest + checksums → reviewed skill artifact → Colibri read-only index
|
||||
```
|
||||
|
||||
- `system_skills`
|
||||
- built-in knowledge / manuals already included
|
||||
- sourced from `bootstrap/skills-memory/artifact.sql`
|
||||
- `system_brain`
|
||||
- dynamic user/agent memory
|
||||
- `system_ops`
|
||||
- runtime and operational state
|
||||
## Ownership
|
||||
|
||||
Colibri should reflect that boundary explicitly instead of flattening all
|
||||
knowledge into one vague "brain."
|
||||
| Layer | Role | Writes | Reads |
|
||||
|-------|------|--------|-------|
|
||||
| Clawdie-AI | Source of truth | Skill artifacts via PR | N/A |
|
||||
| `colibri-skills` | Runtime consumer | Never to source repo | Indexed skill structs from committed artifacts |
|
||||
| Agents | Authors/reviewers | Candidate skill artifact PRs | Skill content for task routing |
|
||||
| `system_brain` | Agent/user memory | Personal/user/agent context | Not canonical skill docs |
|
||||
| `system_ops` | Runtime state | Live task/service state | Not skills |
|
||||
|
||||
## What `colibri-skills` should own
|
||||
## What `colibri-skills` does
|
||||
|
||||
`colibri-skills` should own the Rust-side access layer for **built-in
|
||||
knowledge only**.
|
||||
- Read skill manifests from a configured Clawdie-AI checkout path
|
||||
- Parse `run_manifest.json`
|
||||
- Validate checksums against `artifacts.sha256`
|
||||
- Classify artifacts as document, image, script, transcript, manifest, checksum,
|
||||
report, contact sheet, or other
|
||||
- Index Markdown/transcript chunks for search
|
||||
- Expose stable typed structs for daemon/client/TUI callers
|
||||
- Persist runtime index metadata in SQLite
|
||||
|
||||
Phase-1 responsibilities:
|
||||
## What `colibri-skills` does not do
|
||||
|
||||
- connect to the `system_skills` database
|
||||
- report artifact/import status
|
||||
- expose stable typed results for built-in knowledge lookup
|
||||
- support text search over committed handbook content
|
||||
- return structured summaries that other Colibri crates can consume
|
||||
- Author, edit, or create skills
|
||||
- Store image blobs in SQLite; store paths and hashes only
|
||||
- Replace `system_brain`
|
||||
- Replace `system_ops`
|
||||
- Own provider/API budget logic
|
||||
- Require nonportable local source media paths at runtime
|
||||
|
||||
Good callers later:
|
||||
## Phase 1 delivered
|
||||
|
||||
- `colibri-daemon`
|
||||
- `colibri-client`
|
||||
- `colibri-glasspane-tui`
|
||||
The scaffold crate now provides:
|
||||
|
||||
## What `colibri-skills` must not own
|
||||
- `Skill`
|
||||
- `SkillManifest`
|
||||
- `SkillArtifact`
|
||||
- `SkillChunk`
|
||||
- `ArtifactType`
|
||||
- `SkillStatus`
|
||||
- `ImportSummary`
|
||||
- `SearchResult`
|
||||
- unit tests for artifact classification and status/summary behavior
|
||||
|
||||
It should **not** own:
|
||||
Phase 1 is intentionally scaffold-only: compile and type proof, no runtime
|
||||
import behavior yet.
|
||||
|
||||
- user memory retrieval from `system_brain`
|
||||
- task/session/runtime state from `system_ops`
|
||||
- artifact generation or embedding refresh
|
||||
- OpenRouter or embedding-provider budget logic
|
||||
- ISO build-time content packing
|
||||
## SQLite schema target
|
||||
|
||||
Those already belong elsewhere.
|
||||
```sql
|
||||
CREATE TABLE system_skills (
|
||||
skill_id TEXT PRIMARY KEY,
|
||||
display_name TEXT NOT NULL,
|
||||
source_path TEXT NOT NULL, -- relative within Clawdie-AI repo
|
||||
manifest_hash TEXT, -- sha256 of run_manifest.json
|
||||
created_at TEXT NOT NULL, -- ISO 8601
|
||||
updated_at TEXT NOT NULL,
|
||||
verification TEXT, -- natural-language verification test
|
||||
status TEXT NOT NULL DEFAULT 'active' -- active, archived, superseded
|
||||
);
|
||||
|
||||
## Why a separate crate
|
||||
CREATE TABLE system_skill_artifacts (
|
||||
artifact_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
skill_id TEXT NOT NULL REFERENCES system_skills(skill_id),
|
||||
artifact_type TEXT NOT NULL,
|
||||
relative_path TEXT NOT NULL, -- within the skill directory
|
||||
file_name TEXT NOT NULL,
|
||||
mime_type TEXT,
|
||||
size_bytes INTEGER,
|
||||
sha256_hash TEXT NOT NULL,
|
||||
UNIQUE(skill_id, relative_path)
|
||||
);
|
||||
|
||||
This wants its own crate because it is a separate architectural lane:
|
||||
CREATE TABLE system_skill_chunks (
|
||||
chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
skill_id TEXT NOT NULL REFERENCES system_skills(skill_id),
|
||||
artifact_id INTEGER NOT NULL REFERENCES system_skill_artifacts(artifact_id),
|
||||
chunk_type TEXT NOT NULL,
|
||||
heading TEXT,
|
||||
content TEXT NOT NULL,
|
||||
line_start INTEGER,
|
||||
line_end INTEGER,
|
||||
tokens_estimate INTEGER
|
||||
);
|
||||
|
||||
- distinct data source
|
||||
- distinct lifecycle
|
||||
- distinct operator story
|
||||
- distinct proof gates
|
||||
CREATE INDEX idx_skills_status ON system_skills(status);
|
||||
CREATE INDEX idx_artifacts_skill ON system_skill_artifacts(skill_id);
|
||||
CREATE INDEX idx_artifacts_type ON system_skill_artifacts(artifact_type);
|
||||
CREATE INDEX idx_chunks_skill ON system_skill_chunks(skill_id);
|
||||
CREATE INDEX idx_chunks_type ON system_skill_chunks(chunk_type);
|
||||
|
||||
It is not just a helper module inside `colibri-daemon`.
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS skill_fts USING fts5(
|
||||
content,
|
||||
heading,
|
||||
skill_id,
|
||||
chunk_type,
|
||||
content=system_skill_chunks,
|
||||
content_rowid=chunk_id
|
||||
);
|
||||
```
|
||||
|
||||
That keeps the split-brain boundary visible in code:
|
||||
## Import flow target
|
||||
|
||||
- `colibri-runtime` = host/runtime facts
|
||||
- `colibri-skills` = built-in handbook facts
|
||||
- future memory lane = user/agent memory, if needed later
|
||||
1. Read Clawdie-AI checkout path from config/env.
|
||||
2. Scan for directories containing `run_manifest.json`.
|
||||
3. Parse manifest and derive skill metadata.
|
||||
4. Read artifacts, compute SHA-256, and verify `artifacts.sha256` when present.
|
||||
5. Chunk Markdown by heading and transcripts by timestamp/segment.
|
||||
6. Upsert SQLite rows idempotently.
|
||||
7. Return `ImportSummary` with skills found/indexed/skipped, artifacts, chunks,
|
||||
checksum failures, and errors.
|
||||
|
||||
## Phase 0 — source map before scaffolding
|
||||
## CLI surface target
|
||||
|
||||
Before adding the crate, copy the TypeScript contract into this document or a
|
||||
small fixture so the Rust implementation does not guess at table names or env
|
||||
wiring.
|
||||
```sh
|
||||
colibri list-skills
|
||||
colibri show-skill <id>
|
||||
colibri search-skills <query>
|
||||
colibri index-skills
|
||||
colibri verify-skill <id>
|
||||
```
|
||||
|
||||
Known current tables from `clawdie-ai/src/split-brain-status.ts` and
|
||||
`bootstrap/skills-memory/artifact.sql`:
|
||||
## Portability rules
|
||||
|
||||
- `builtin_knowledge_artifacts`
|
||||
- `builtin_knowledge_documents`
|
||||
- `builtin_knowledge_chunks`
|
||||
- `builtin_knowledge_embeddings`
|
||||
- Store image paths and hashes, not blobs.
|
||||
- Treat local provenance paths like `/home/samob/Videos/...` as metadata only.
|
||||
- Verify checksums against committed artifacts, not local source paths.
|
||||
- Store paths relative to the Clawdie-AI repo.
|
||||
- Normal tests must not require PostgreSQL, remote Forgejo, or local source
|
||||
videos/audio.
|
||||
|
||||
Phase-0 checks:
|
||||
## Future skillpacks
|
||||
|
||||
1. Identify the exact read-only connection source/env var used for the skills
|
||||
DB in production.
|
||||
2. Record the minimum queries needed for status:
|
||||
- artifact row count
|
||||
- document row count
|
||||
- chunk row count
|
||||
- latest `artifact_version`
|
||||
3. Choose an opt-in live-test env var so normal workspace tests do **not**
|
||||
require PostgreSQL.
|
||||
4. Add offline fixture rows/golden JSON before any live DB test.
|
||||
```text
|
||||
astro-howto
|
||||
forgejo-admin
|
||||
vaultwarden-onboarding
|
||||
freebsd-update-reboot
|
||||
colibri-iso-build
|
||||
zed-on-freebsd
|
||||
pi-headless-login
|
||||
```
|
||||
|
||||
## Phase 1 — read-only consumer
|
||||
## Implementation phases
|
||||
|
||||
The first implementation should stay intentionally small.
|
||||
|
||||
Deliverables:
|
||||
|
||||
1. New crate scaffold:
|
||||
- `crates/colibri-skills`
|
||||
2. DB-facing types for:
|
||||
- artifact metadata
|
||||
- documents
|
||||
- chunks
|
||||
- text search results
|
||||
3. Read-only API for:
|
||||
- artifact/import status
|
||||
- list current artifact version
|
||||
- search built-in knowledge by text
|
||||
4. Unit tests with fixture rows or golden snapshots
|
||||
5. Optional ignored/opt-in integration test for a real skills DB
|
||||
|
||||
Phase 1 goal:
|
||||
|
||||
> prove Colibri can read the "manuals already included" lane cleanly, without
|
||||
> touching memory or generator code.
|
||||
|
||||
## Phase 2 — daemon integration
|
||||
|
||||
After Phase 1 is proven, wire it into Colibri surfaces:
|
||||
|
||||
- `colibri-daemon`
|
||||
- expose a query path for built-in handbook lookups
|
||||
- `colibri-client`
|
||||
- typed client method
|
||||
- `colibri-glasspane-tui`
|
||||
- optional "manuals included" / artifact-status pane
|
||||
|
||||
This should still remain read-only.
|
||||
|
||||
## Phase 3 — optional deeper work
|
||||
|
||||
Only after the read path is stable should we consider:
|
||||
|
||||
- vector similarity for built-in knowledge
|
||||
- richer ranking/grounding
|
||||
- parity checks against the TypeScript retrieval layer
|
||||
- eventual Rust ownership of artifact generation
|
||||
|
||||
That is explicitly **not** phase 1.
|
||||
|
||||
## Contracts and proof
|
||||
|
||||
The proof for `colibri-skills` should be simple:
|
||||
|
||||
1. Offline tests can parse fixture rows and return typed status/search results.
|
||||
2. With an explicit live DB env var, Linux and FreeBSD can connect read-only to
|
||||
the skills DB / `system_skills` lane.
|
||||
3. Colibri can report whether the committed artifact is imported.
|
||||
4. Colibri can query known built-in handbook content by text.
|
||||
5. Results are typed and stable enough for daemon/client use.
|
||||
|
||||
This is narrower than the daemon proof gates and should stay that way.
|
||||
|
||||
## Naming
|
||||
|
||||
Why `colibri-skills`:
|
||||
|
||||
- it matches the existing `system_skills` boundary
|
||||
- it stays concrete
|
||||
- it maps to the "manuals already included" story without pretending it owns
|
||||
all knowledge
|
||||
|
||||
Rejected names for now:
|
||||
|
||||
- `colibri-brain`
|
||||
- too vague; collides with user memory
|
||||
- `colibri-knowledge`
|
||||
- broader than the actual lane
|
||||
- `colibri-manuals`
|
||||
- good product language, but weaker alignment with existing technical names
|
||||
|
||||
## Non-goals
|
||||
|
||||
- no new repo
|
||||
- no crate inside `clawdie-iso`
|
||||
- no merge of `system_skills` with `system_brain`
|
||||
- no rewrite of `bootstrap/skills-memory/artifact.sql` generation yet
|
||||
- no embedding refresh port to Rust yet
|
||||
| Phase | What | Depends on |
|
||||
|-------|------|------------|
|
||||
| 1 | Scaffold crate + structs + schema plan | Nothing |
|
||||
| 2 | Manifest parser (`run_manifest.json` → `SkillManifest`) | Phase 1 |
|
||||
| 3 | Checksum validator (`artifacts.sha256` → verify) | Phase 2 |
|
||||
| 4 | Markdown/transcript chunker | Phase 1 |
|
||||
| 5 | SQLite storage + FTS5 search | Phases 3, 4 |
|
||||
| 6 | CLI commands (`list`, `show`, `search`, `index`, `verify`) | Phase 5 |
|
||||
| 7 | Daemon/client/TUI integration | Phase 6 |
|
||||
|
||||
## Related sources
|
||||
|
||||
- `clawdie-ai/docs/internal/SKILLS-ARTIFACT-V1-PLAN.md`
|
||||
- `clawdie-ai/html/docs-clawdie-si/docs/split-brain.html`
|
||||
- `clawdie-ai/setup/skills-memory.ts`
|
||||
- `clawdie-ai/docs/astro-howto/`
|
||||
- `clawdie-ai/docs/VAULTWARDEN-SETUP.md`
|
||||
- `clawdie-ai/bootstrap/skills-memory/artifact.sql`
|
||||
- `clawdie-ai/src/split-brain-status.ts`
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue