clawdie-ai/docs/internal/sql/brain-hybrid-upgrade.sql
Charlie Root 9498ad28bd fix: replace hardcoded 'clawdie' with AGENT_NAME across 22 files
All hardcoded 'clawdie' references in production code now derive from
AGENT_NAME (default: 'clawdie'). This makes the mevy canary strategy
reliable — changing AGENT_NAME is all that's needed.

Changes:
- Hardcoded paths: CMS_WEBROOT, ASTRO_SITE_PATH, verify checks,
  controlplane dashboard dir, sessions dir, output dir, chown user
- Prometheus metrics: prefixed with AGENT_NAME for multi-install dashboards
- hostd log strings: use AGENT_NAME instead of 'clawdie-hostd'
- MCP server name: derived from AGENT_NAME
- Skill modify patches: container image and mount allowlist use AGENT_NAME
- SQL migration file renamed: clawdie-brain-hybrid-upgrade → brain-hybrid-upgrade
- Temp dir prefixes: all use AGENT_NAME

Kept as-is (correct pattern):
- 'clawdie' as default fallback when AGENT_NAME is unset
- .pi/extensions/clawdie-harness/ directory (pi package identity)
- html/docs-clawdie-si/ (public docs site URL)

---
Build: pass | Tests: pass — 1527 passed, 3 failed (2 files, pre-existing)
2026-04-15 21:41:41 +00:00

131 lines
4.1 KiB
PL/PgSQL

-- Clawdie hybrid memory upgrade
-- User-facing reference date: 09.mar.2026 16:30:00
--
-- Assumptions:
-- CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- CREATE EXTENSION IF NOT EXISTS vector;
-- Existing base table:
-- memories
--
-- This migration adds:
-- 1. chunk-level full-text search
-- 2. chunk-level vector embeddings
-- 3. a hybrid retrieval baseline
--------------------------------------------------------------------------------
-- CHUNKS
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS memory_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
memory_id UUID NOT NULL REFERENCES memories(id) ON DELETE CASCADE,
chunk_order INTEGER NOT NULL,
chunk_text TEXT NOT NULL,
content_hash TEXT NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE (memory_id, chunk_order)
);
CREATE INDEX IF NOT EXISTS idx_memory_chunks_memory_id
ON memory_chunks(memory_id);
--------------------------------------------------------------------------------
-- FULL-TEXT SEARCH
--------------------------------------------------------------------------------
ALTER TABLE memory_chunks
ADD COLUMN IF NOT EXISTS search_vector tsvector;
CREATE INDEX IF NOT EXISTS idx_memory_chunks_search_vector
ON memory_chunks
USING GIN(search_vector);
CREATE OR REPLACE FUNCTION memory_chunks_search_vector_update()
RETURNS trigger
AS $$
BEGIN
NEW.search_vector := to_tsvector('english', COALESCE(NEW.chunk_text, ''));
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trg_memory_chunks_search_vector_update ON memory_chunks;
CREATE TRIGGER trg_memory_chunks_search_vector_update
BEFORE INSERT OR UPDATE OF chunk_text
ON memory_chunks
FOR EACH ROW
EXECUTE FUNCTION memory_chunks_search_vector_update();
UPDATE memory_chunks
SET search_vector = to_tsvector('english', COALESCE(chunk_text, ''))
WHERE search_vector IS NULL;
--------------------------------------------------------------------------------
-- EMBEDDINGS
--------------------------------------------------------------------------------
-- Using 1024 dimensions: bge-m3 via local llama-server.
-- Provider configured via EMBED_BASE_URL/EMBED_MODEL in .env.
-- FTS-only fallback if EMBED_BASE_URL is unset.
CREATE TABLE IF NOT EXISTS memory_embeddings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chunk_id UUID NOT NULL UNIQUE REFERENCES memory_chunks(id) ON DELETE CASCADE,
embedding vector(1024) NOT NULL,
embedding_provider TEXT NOT NULL,
embedding_model TEXT NOT NULL,
chunking_version TEXT DEFAULT 'v1',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_memory_embeddings_hnsw
ON memory_embeddings
USING hnsw (embedding vector_cosine_ops);
CREATE INDEX IF NOT EXISTS idx_memory_embeddings_chunk_id
ON memory_embeddings(chunk_id);
--------------------------------------------------------------------------------
-- HYBRID SEARCH EXAMPLE
--------------------------------------------------------------------------------
-- Replace:
-- :query_text
-- :query_embedding
--
-- Example retrieval pattern:
--
-- WITH
-- text_results AS (
-- SELECT
-- mc.id AS chunk_id,
-- mc.memory_id,
-- row_number() OVER (
-- ORDER BY ts_rank(mc.search_vector, plainto_tsquery('english', :query_text)) DESC
-- ) AS text_rank
-- FROM memory_chunks mc
-- WHERE mc.search_vector @@ plainto_tsquery('english', :query_text)
-- LIMIT 20
-- ),
-- vector_results AS (
-- SELECT
-- mc.id AS chunk_id,
-- mc.memory_id,
-- row_number() OVER (
-- ORDER BY me.embedding <=> :query_embedding
-- ) AS vector_rank
-- FROM memory_embeddings me
-- JOIN memory_chunks mc ON mc.id = me.chunk_id
-- ORDER BY me.embedding <=> :query_embedding
-- LIMIT 20
-- )
-- SELECT
-- COALESCE(t.chunk_id, v.chunk_id) AS chunk_id,
-- COALESCE(t.memory_id, v.memory_id) AS memory_id,
-- COALESCE(1.0 / (60 + t.text_rank), 0) +
-- COALESCE(1.0 / (60 + v.vector_rank), 0) AS rrf_score
-- FROM text_results t
-- FULL OUTER JOIN vector_results v
-- ON t.chunk_id = v.chunk_id
-- ORDER BY rrf_score DESC
-- LIMIT 10;