All hardcoded 'clawdie' references in production code now derive from AGENT_NAME (default: 'clawdie'). This makes the mevy canary strategy reliable — changing AGENT_NAME is all that's needed. Changes: - Hardcoded paths: CMS_WEBROOT, ASTRO_SITE_PATH, verify checks, controlplane dashboard dir, sessions dir, output dir, chown user - Prometheus metrics: prefixed with AGENT_NAME for multi-install dashboards - hostd log strings: use AGENT_NAME instead of 'clawdie-hostd' - MCP server name: derived from AGENT_NAME - Skill modify patches: container image and mount allowlist use AGENT_NAME - SQL migration file renamed: clawdie-brain-hybrid-upgrade → brain-hybrid-upgrade - Temp dir prefixes: all use AGENT_NAME Kept as-is (correct pattern): - 'clawdie' as default fallback when AGENT_NAME is unset - .pi/extensions/clawdie-harness/ directory (pi package identity) - html/docs-clawdie-si/ (public docs site URL) --- Build: pass | Tests: pass — 1527 passed, 3 failed (2 files, pre-existing)
131 lines
4.1 KiB
PL/PgSQL
131 lines
4.1 KiB
PL/PgSQL
-- Clawdie hybrid memory upgrade
|
|
-- User-facing reference date: 09.mar.2026 16:30:00
|
|
--
|
|
-- Assumptions:
|
|
-- CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
-- CREATE EXTENSION IF NOT EXISTS vector;
|
|
-- Existing base table:
|
|
-- memories
|
|
--
|
|
-- This migration adds:
|
|
-- 1. chunk-level full-text search
|
|
-- 2. chunk-level vector embeddings
|
|
-- 3. a hybrid retrieval baseline
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- CHUNKS
|
|
--------------------------------------------------------------------------------
|
|
|
|
CREATE TABLE IF NOT EXISTS memory_chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
memory_id UUID NOT NULL REFERENCES memories(id) ON DELETE CASCADE,
|
|
chunk_order INTEGER NOT NULL,
|
|
chunk_text TEXT NOT NULL,
|
|
content_hash TEXT NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
UNIQUE (memory_id, chunk_order)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_memory_chunks_memory_id
|
|
ON memory_chunks(memory_id);
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- FULL-TEXT SEARCH
|
|
--------------------------------------------------------------------------------
|
|
|
|
ALTER TABLE memory_chunks
|
|
ADD COLUMN IF NOT EXISTS search_vector tsvector;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_memory_chunks_search_vector
|
|
ON memory_chunks
|
|
USING GIN(search_vector);
|
|
|
|
CREATE OR REPLACE FUNCTION memory_chunks_search_vector_update()
|
|
RETURNS trigger
|
|
AS $$
|
|
BEGIN
|
|
NEW.search_vector := to_tsvector('english', COALESCE(NEW.chunk_text, ''));
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS trg_memory_chunks_search_vector_update ON memory_chunks;
|
|
|
|
CREATE TRIGGER trg_memory_chunks_search_vector_update
|
|
BEFORE INSERT OR UPDATE OF chunk_text
|
|
ON memory_chunks
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION memory_chunks_search_vector_update();
|
|
|
|
UPDATE memory_chunks
|
|
SET search_vector = to_tsvector('english', COALESCE(chunk_text, ''))
|
|
WHERE search_vector IS NULL;
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- EMBEDDINGS
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Using 1024 dimensions: bge-m3 via local llama-server.
|
|
-- Provider configured via EMBED_BASE_URL/EMBED_MODEL in .env.
|
|
-- FTS-only fallback if EMBED_BASE_URL is unset.
|
|
CREATE TABLE IF NOT EXISTS memory_embeddings (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
chunk_id UUID NOT NULL UNIQUE REFERENCES memory_chunks(id) ON DELETE CASCADE,
|
|
embedding vector(1024) NOT NULL,
|
|
embedding_provider TEXT NOT NULL,
|
|
embedding_model TEXT NOT NULL,
|
|
chunking_version TEXT DEFAULT 'v1',
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_memory_embeddings_hnsw
|
|
ON memory_embeddings
|
|
USING hnsw (embedding vector_cosine_ops);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_memory_embeddings_chunk_id
|
|
ON memory_embeddings(chunk_id);
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- HYBRID SEARCH EXAMPLE
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Replace:
|
|
-- :query_text
|
|
-- :query_embedding
|
|
--
|
|
-- Example retrieval pattern:
|
|
--
|
|
-- WITH
|
|
-- text_results AS (
|
|
-- SELECT
|
|
-- mc.id AS chunk_id,
|
|
-- mc.memory_id,
|
|
-- row_number() OVER (
|
|
-- ORDER BY ts_rank(mc.search_vector, plainto_tsquery('english', :query_text)) DESC
|
|
-- ) AS text_rank
|
|
-- FROM memory_chunks mc
|
|
-- WHERE mc.search_vector @@ plainto_tsquery('english', :query_text)
|
|
-- LIMIT 20
|
|
-- ),
|
|
-- vector_results AS (
|
|
-- SELECT
|
|
-- mc.id AS chunk_id,
|
|
-- mc.memory_id,
|
|
-- row_number() OVER (
|
|
-- ORDER BY me.embedding <=> :query_embedding
|
|
-- ) AS vector_rank
|
|
-- FROM memory_embeddings me
|
|
-- JOIN memory_chunks mc ON mc.id = me.chunk_id
|
|
-- ORDER BY me.embedding <=> :query_embedding
|
|
-- LIMIT 20
|
|
-- )
|
|
-- SELECT
|
|
-- COALESCE(t.chunk_id, v.chunk_id) AS chunk_id,
|
|
-- COALESCE(t.memory_id, v.memory_id) AS memory_id,
|
|
-- COALESCE(1.0 / (60 + t.text_rank), 0) +
|
|
-- COALESCE(1.0 / (60 + v.vector_rank), 0) AS rrf_score
|
|
-- FROM text_results t
|
|
-- FULL OUTER JOIN vector_results v
|
|
-- ON t.chunk_id = v.chunk_id
|
|
-- ORDER BY rrf_score DESC
|
|
-- LIMIT 10;
|