--- Build: pass | Tests: pass - 603 passed (44 files) --- Build: pass | Tests: pass — Tests 603 passed (603)
157 lines
5.3 KiB
PL/PgSQL
157 lines
5.3 KiB
PL/PgSQL
-- Built-in knowledge base schema
|
|
--
|
|
-- Apply this to the agent-system skills database.
|
|
--
|
|
-- Prerequisites:
|
|
-- CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
-- CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- ARTIFACT METADATA
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE TABLE IF NOT EXISTS builtin_knowledge_artifacts (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
artifact_version TEXT NOT NULL UNIQUE,
|
|
schema_version TEXT NOT NULL,
|
|
source_snapshot TEXT,
|
|
chunker_name TEXT NOT NULL,
|
|
chunker_version TEXT NOT NULL,
|
|
chunk_size INTEGER NOT NULL,
|
|
chunk_overlap INTEGER NOT NULL DEFAULT 0,
|
|
embedding_provider TEXT,
|
|
embedding_model TEXT,
|
|
embedding_dimensions INTEGER,
|
|
generated_at TIMESTAMPTZ NOT NULL,
|
|
document_count INTEGER,
|
|
chunk_count INTEGER,
|
|
embedding_count INTEGER,
|
|
git_commit TEXT,
|
|
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- DOCUMENTS
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE TABLE IF NOT EXISTS builtin_knowledge_documents (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
artifact_id UUID NOT NULL REFERENCES builtin_knowledge_artifacts(id) ON DELETE CASCADE,
|
|
source_path TEXT NOT NULL,
|
|
source_type TEXT NOT NULL DEFAULT 'doc',
|
|
title TEXT NOT NULL,
|
|
locale TEXT,
|
|
checksum TEXT NOT NULL,
|
|
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (artifact_id, source_path)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_documents_artifact_id
|
|
ON builtin_knowledge_documents(artifact_id);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_documents_locale
|
|
ON builtin_knowledge_documents(locale);
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- CHUNKS
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE TABLE IF NOT EXISTS builtin_knowledge_chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id UUID NOT NULL REFERENCES builtin_knowledge_documents(id) ON DELETE CASCADE,
|
|
chunk_order INTEGER NOT NULL,
|
|
chunk_text TEXT NOT NULL,
|
|
content_hash TEXT NOT NULL,
|
|
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
search_vector tsvector,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (document_id, chunk_order)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_chunks_document_id
|
|
ON builtin_knowledge_chunks(document_id);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_chunks_search_vector
|
|
ON builtin_knowledge_chunks
|
|
USING GIN(search_vector);
|
|
|
|
CREATE OR REPLACE FUNCTION builtin_knowledge_chunks_search_vector_update()
|
|
RETURNS trigger
|
|
AS $$
|
|
BEGIN
|
|
NEW.search_vector := to_tsvector('simple', COALESCE(NEW.chunk_text, ''));
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS trg_builtin_knowledge_chunks_search_vector_update
|
|
ON builtin_knowledge_chunks;
|
|
|
|
CREATE TRIGGER trg_builtin_knowledge_chunks_search_vector_update
|
|
BEFORE INSERT OR UPDATE OF chunk_text
|
|
ON builtin_knowledge_chunks
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION builtin_knowledge_chunks_search_vector_update();
|
|
|
|
UPDATE builtin_knowledge_chunks
|
|
SET search_vector = to_tsvector('simple', COALESCE(chunk_text, ''))
|
|
WHERE search_vector IS NULL;
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- OPTIONAL EMBEDDINGS
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE TABLE IF NOT EXISTS builtin_knowledge_embeddings (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
chunk_id UUID NOT NULL UNIQUE REFERENCES builtin_knowledge_chunks(id) ON DELETE CASCADE,
|
|
embedding vector(1024) NOT NULL,
|
|
embedding_provider TEXT NOT NULL,
|
|
embedding_model TEXT NOT NULL,
|
|
chunking_version TEXT DEFAULT 'v1',
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_embeddings_hnsw
|
|
ON builtin_knowledge_embeddings
|
|
USING hnsw (embedding vector_cosine_ops);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_builtin_knowledge_embeddings_chunk_id
|
|
ON builtin_knowledge_embeddings(chunk_id);
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- BASE SEARCH FUNCTION
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE OR REPLACE FUNCTION search_builtin_knowledge(
|
|
query_text TEXT,
|
|
max_results INTEGER DEFAULT 10
|
|
)
|
|
RETURNS TABLE (
|
|
artifact_version TEXT,
|
|
source_path TEXT,
|
|
source_type TEXT,
|
|
title TEXT,
|
|
locale TEXT,
|
|
chunk_id UUID,
|
|
chunk_text TEXT,
|
|
text_rank REAL
|
|
)
|
|
AS $$
|
|
SELECT
|
|
a.artifact_version,
|
|
d.source_path,
|
|
d.source_type,
|
|
d.title,
|
|
d.locale,
|
|
c.id AS chunk_id,
|
|
c.chunk_text,
|
|
ts_rank(c.search_vector, plainto_tsquery('simple', query_text)) AS text_rank
|
|
FROM builtin_knowledge_chunks c
|
|
JOIN builtin_knowledge_documents d ON d.id = c.document_id
|
|
JOIN builtin_knowledge_artifacts a ON a.id = d.artifact_id
|
|
WHERE c.search_vector @@ plainto_tsquery('simple', query_text)
|
|
ORDER BY text_rank DESC, c.created_at DESC
|
|
LIMIT GREATEST(max_results, 1);
|
|
$$ LANGUAGE SQL STABLE;
|