clawdie-ai/docs/internal/sql/builtin-knowledge-base.sql
Clawdie AI cc37d2c8cf Public/internal docs restructure; CMS/verify cleanup (Sam & Codex)
---

Build: pass | Tests: pass - 603 passed (44 files)

---
Build: pass | Tests: pass — Tests  603 passed (603)
2026-04-05 06:29:19 +00:00

157 lines
5.3 KiB
PL/PgSQL

-- Built-in knowledge base schema
--
-- Apply this to the agent-system skills database.
--
-- Prerequisites:
-- CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- CREATE EXTENSION IF NOT EXISTS vector;
--------------------------------------------------------------------------------
-- ARTIFACT METADATA
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS builtin_knowledge_artifacts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
artifact_version TEXT NOT NULL UNIQUE,
schema_version TEXT NOT NULL,
source_snapshot TEXT,
chunker_name TEXT NOT NULL,
chunker_version TEXT NOT NULL,
chunk_size INTEGER NOT NULL,
chunk_overlap INTEGER NOT NULL DEFAULT 0,
embedding_provider TEXT,
embedding_model TEXT,
embedding_dimensions INTEGER,
generated_at TIMESTAMPTZ NOT NULL,
document_count INTEGER,
chunk_count INTEGER,
embedding_count INTEGER,
git_commit TEXT,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
--------------------------------------------------------------------------------
-- DOCUMENTS
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS builtin_knowledge_documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
artifact_id UUID NOT NULL REFERENCES builtin_knowledge_artifacts(id) ON DELETE CASCADE,
source_path TEXT NOT NULL,
source_type TEXT NOT NULL DEFAULT 'doc',
title TEXT NOT NULL,
locale TEXT,
checksum TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (artifact_id, source_path)
);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_documents_artifact_id
ON builtin_knowledge_documents(artifact_id);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_documents_locale
ON builtin_knowledge_documents(locale);
--------------------------------------------------------------------------------
-- CHUNKS
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS builtin_knowledge_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES builtin_knowledge_documents(id) ON DELETE CASCADE,
chunk_order INTEGER NOT NULL,
chunk_text TEXT NOT NULL,
content_hash TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
search_vector tsvector,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (document_id, chunk_order)
);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_chunks_document_id
ON builtin_knowledge_chunks(document_id);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_chunks_search_vector
ON builtin_knowledge_chunks
USING GIN(search_vector);
CREATE OR REPLACE FUNCTION builtin_knowledge_chunks_search_vector_update()
RETURNS trigger
AS $$
BEGIN
NEW.search_vector := to_tsvector('simple', COALESCE(NEW.chunk_text, ''));
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trg_builtin_knowledge_chunks_search_vector_update
ON builtin_knowledge_chunks;
CREATE TRIGGER trg_builtin_knowledge_chunks_search_vector_update
BEFORE INSERT OR UPDATE OF chunk_text
ON builtin_knowledge_chunks
FOR EACH ROW
EXECUTE FUNCTION builtin_knowledge_chunks_search_vector_update();
UPDATE builtin_knowledge_chunks
SET search_vector = to_tsvector('simple', COALESCE(chunk_text, ''))
WHERE search_vector IS NULL;
--------------------------------------------------------------------------------
-- OPTIONAL EMBEDDINGS
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS builtin_knowledge_embeddings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chunk_id UUID NOT NULL UNIQUE REFERENCES builtin_knowledge_chunks(id) ON DELETE CASCADE,
embedding vector(1024) NOT NULL,
embedding_provider TEXT NOT NULL,
embedding_model TEXT NOT NULL,
chunking_version TEXT DEFAULT 'v1',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_embeddings_hnsw
ON builtin_knowledge_embeddings
USING hnsw (embedding vector_cosine_ops);
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_embeddings_chunk_id
ON builtin_knowledge_embeddings(chunk_id);
--------------------------------------------------------------------------------
-- BASE SEARCH FUNCTION
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION search_builtin_knowledge(
query_text TEXT,
max_results INTEGER DEFAULT 10
)
RETURNS TABLE (
artifact_version TEXT,
source_path TEXT,
source_type TEXT,
title TEXT,
locale TEXT,
chunk_id UUID,
chunk_text TEXT,
text_rank REAL
)
AS $$
SELECT
a.artifact_version,
d.source_path,
d.source_type,
d.title,
d.locale,
c.id AS chunk_id,
c.chunk_text,
ts_rank(c.search_vector, plainto_tsquery('simple', query_text)) AS text_rank
FROM builtin_knowledge_chunks c
JOIN builtin_knowledge_documents d ON d.id = c.document_id
JOIN builtin_knowledge_artifacts a ON a.id = d.artifact_id
WHERE c.search_vector @@ plainto_tsquery('simple', query_text)
ORDER BY text_rank DESC, c.created_at DESC
LIMIT GREATEST(max_results, 1);
$$ LANGUAGE SQL STABLE;