diff --git a/gateway/run.py b/gateway/run.py
index 984ce1f0a..54de48e66 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -11538,7 +11538,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             # when we successfully transcribed the audio — it's redundant.
             _placeholder = "(The user sent a message with no text content)"
             if user_text and user_text.strip() == _placeholder:
-                return prefix
+                return prefix, successful_transcripts
             if user_text:
                 return f"{prefix}\n\n{user_text}", successful_transcripts
             return prefix, successful_transcripts
diff --git a/tests/gateway/test_stt_config.py b/tests/gateway/test_stt_config.py
index 004dd907e..6f98a058d 100644
--- a/tests/gateway/test_stt_config.py
+++ b/tests/gateway/test_stt_config.py
@@ -102,6 +102,45 @@ async def test_enrich_message_with_transcription_avoids_bogus_no_provider_messag
     assert transcripts == []
 
 
+@pytest.mark.asyncio
+async def test_enrich_message_with_transcription_returns_tuple_for_empty_content_placeholder():
+    """A successful transcription whose caption is the empty-content placeholder
+    must still return the ``(text, transcripts)`` tuple.
+
+    The Discord adapter delivers a captionless voice note as the literal
+    ``"(The user sent a message with no text content)"`` placeholder. When STT
+    succeeds we strip that redundant placeholder and return just the transcript
+    prefix — but the method's contract (and every caller, which unpacks the
+    result as ``text, transcripts = ...``) requires a 2-tuple. Returning a bare
+    string here raised ``ValueError: too many values to unpack`` and dropped the
+    whole voice message on the floor.
+    """
+    from gateway.run import GatewayRunner
+
+    runner = GatewayRunner.__new__(GatewayRunner)
+    runner.config = GatewayConfig(stt_enabled=True)
+    runner._has_setup_skill = lambda: False
+
+    with patch(
+        "tools.transcription_tools.transcribe_audio",
+        return_value={
+            "success": True,
+            "transcript": "hello from a captionless voice note",
+            "provider": "local_command",
+        },
+    ):
+        result, transcripts = await runner._enrich_message_with_transcription(
+            "(The user sent a message with no text content)",
+            ["/tmp/voice.ogg"],
+        )
+
+    # The redundant placeholder is stripped, leaving only the transcript prefix.
+    assert "hello from a captionless voice note" in result
+    assert "(The user sent a message with no text content)" not in result
+    # Crucially, the transcripts are still surfaced so callers can echo them.
+    assert transcripts == ["hello from a captionless voice note"]
+
+
 @pytest.mark.asyncio
 async def test_prepare_inbound_message_text_transcribes_queued_voice_event():
     from gateway.run import GatewayRunner