clawdie-ai/docs/internal/sql/search-memories.sql
Clawdie AI cc37d2c8cf Public/internal docs restructure; CMS/verify cleanup (Sam & Codex)
---

Build: pass | Tests: pass - 603 passed (44 files)

---
Build: pass | Tests: pass — Tests  603 passed (603)
2026-04-05 06:29:19 +00:00

68 lines
2.1 KiB
PL/PgSQL

-- Hybrid search function for ai_brain
-- Date: 10.mar.2026 (updated: 27.mar.2026)
--
-- Uses Reciprocal Rank Fusion (RRF) to merge full-text and vector results.
-- Embedding model: bge-m3 at 1024 dims via local llama-server.
-- Degrades to FTS-only if no embeddings are stored.
--
-- Usage:
-- SELECT * FROM search_memories('query text', '<embedding>'::vector, 10);
CREATE OR REPLACE FUNCTION search_memories(
query_text TEXT,
query_embedding vector(1024),
match_limit INTEGER DEFAULT 10
)
RETURNS TABLE (
memory_id UUID,
chunk_text TEXT,
summary TEXT,
importance INTEGER,
topics TEXT[],
created_at TIMESTAMPTZ,
combined_score FLOAT
) AS $$
BEGIN
RETURN QUERY
WITH text_results AS (
SELECT
mc.id AS chunk_id,
mc.memory_id,
mc.chunk_text,
row_number() OVER (
ORDER BY ts_rank(mc.search_vector,
plainto_tsquery('english', query_text)) DESC
) AS text_rank
FROM memory_chunks mc
WHERE mc.search_vector @@ plainto_tsquery('english', query_text)
LIMIT 20
),
vector_results AS (
SELECT
mc.id AS chunk_id,
mc.memory_id,
mc.chunk_text,
row_number() OVER (
ORDER BY me.embedding <=> query_embedding
) AS vector_rank
FROM memory_embeddings me
JOIN memory_chunks mc ON mc.id = me.chunk_id
ORDER BY me.embedding <=> query_embedding
LIMIT 20
)
SELECT
COALESCE(t.memory_id, v.memory_id) AS memory_id,
COALESCE(t.chunk_text, v.chunk_text) AS chunk_text,
m.summary,
m.importance,
m.topics,
m.created_at,
(COALESCE(1.0 / (60 + t.text_rank), 0) +
COALESCE(1.0 / (60 + v.vector_rank), 0))::FLOAT AS combined_score
FROM text_results t
FULL OUTER JOIN vector_results v ON t.chunk_id = v.chunk_id
JOIN memories m ON m.id = COALESCE(t.memory_id, v.memory_id)
ORDER BY combined_score DESC
LIMIT match_limit;
END;
$$ LANGUAGE plpgsql;