fix(memory,skills): repair write-approval inline prompt, gateway staging, and gateway /skills review (#43452)
Follow-ups to #38199/#43354 found in post-merge review: - Inline CLI memory approval never worked: the per-thread approval callback was not passed to prompt_dangerous_approval, so the prompt_toolkit fail-closed guard (#15216) denied every gated foreground write without showing a prompt. Now invokes the registered callback directly; a crashed prompt falls back to staging instead of a silent deny. - Gateway sessions claimed inline support but prompt_dangerous_approval has no gateway round-trip (that lives in the pending-approval queue), so gated gateway memory writes hit the input() fallback and denied. Gateway contexts now stage for /memory pending review. - /skills pending|approve|reject|diff|approval now works on the gateway (gateway_config_gate on skills.write_approval), so skills staged from a messaging session can be reviewed there. Diff output truncated for chat. - memory_tool validates required params before the gate so invalid writes are rejected immediately instead of staged and failing at approve time. - Stale tri-state write_mode docstrings updated to the boolean gate; docs table corrected (inline prompt is interactive-CLI-only). - 6 new tests covering the interactive approve/deny/error paths, gateway staging, skills never-prompt invariant, and pre-gate validation.
This commit is contained in:
parent
a5c32cdf30
commit
70d5d7e39b
7 changed files with 247 additions and 57 deletions
|
|
@ -7065,6 +7065,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
if canonical == "memory":
|
||||
return await self._handle_memory_command(event)
|
||||
|
||||
if canonical == "skills":
|
||||
return await self._handle_skills_command(event)
|
||||
|
||||
if canonical == "fast":
|
||||
return await self._handle_fast_command(event)
|
||||
|
||||
|
|
|
|||
|
|
@ -2058,6 +2058,64 @@ class GatewaySlashCommandsMixin:
|
|||
"reject <id>, approval <on|off>.")
|
||||
return out
|
||||
|
||||
async def _handle_skills_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /skills on the gateway — pending skill-write review only.
|
||||
|
||||
The full skills hub (search/browse/install) stays CLI-only; this
|
||||
handler covers the write-approval review surface (pending / approve /
|
||||
reject / diff / approval) so a skill staged from a gateway session can
|
||||
be reviewed from that same session. Gated by ``skills.write_approval``
|
||||
via the CommandDef's ``gateway_config_gate``; also answers when staged
|
||||
writes still exist after the gate was turned off (so they are never
|
||||
stranded).
|
||||
|
||||
``diff`` output is truncated for chat bubbles — the full diff lives in
|
||||
the CLI (``/skills diff <id>``) and the pending JSON file.
|
||||
"""
|
||||
from gateway.run import _hermes_home
|
||||
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
||||
from tools import write_approval as wa
|
||||
|
||||
raw_args = event.get_command_args().strip()
|
||||
args = raw_args.split() if raw_args else []
|
||||
session_key = self._session_key_for_source(event.source)
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
|
||||
gate_on = wa.write_approval_enabled(wa.SKILLS)
|
||||
wants_toggle = bool(args) and args[0].lower() in {"approval", "mode"}
|
||||
if not gate_on and not wants_toggle and wa.pending_count(wa.SKILLS) == 0:
|
||||
return ("Skill write approval is off (skills.write_approval). "
|
||||
"Enable it with /skills approval on, then review staged "
|
||||
"writes here with /skills pending.")
|
||||
|
||||
def _set_approval(enabled: bool):
|
||||
import yaml
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
user_config.setdefault("skills", {})["write_approval"] = bool(enabled)
|
||||
atomic_yaml_write(config_path, user_config)
|
||||
# New setting must take effect next message → drop cached agent.
|
||||
self._evict_cached_agent(session_key)
|
||||
|
||||
out = handle_pending_subcommand(
|
||||
wa.SKILLS, args, set_mode_fn=_set_approval,
|
||||
)
|
||||
if out is None:
|
||||
return ("Unknown /skills subcommand on this platform. Use: pending, "
|
||||
"approve <id>, reject <id>, diff <id>, approval <on|off>. "
|
||||
"(Search/install are CLI-only.)")
|
||||
|
||||
# Chat bubbles can't hold a full skill diff — truncate and point at
|
||||
# the real review surfaces.
|
||||
if args and args[0].lower() == "diff" and len(out) > 3000:
|
||||
pending_id = args[1] if len(args) > 1 else "<id>"
|
||||
out = (out[:3000]
|
||||
+ f"\n… (truncated — full diff: `/skills diff {pending_id}` "
|
||||
f"on the CLI, or ~/.hermes/pending/skills/{pending_id}.json)")
|
||||
return out
|
||||
|
||||
async def _handle_fast_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
|
||||
from gateway.run import _hermes_home, _load_gateway_config, _resolve_gateway_model
|
||||
|
|
|
|||
|
|
@ -167,6 +167,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
cli_only=True),
|
||||
CommandDef("skills", "Search, install, inspect, or manage skills",
|
||||
"Tools & Skills", cli_only=True,
|
||||
gateway_config_gate="skills.write_approval",
|
||||
subcommands=("search", "browse", "inspect", "install", "audit",
|
||||
"pending", "approve", "reject", "diff", "approval")),
|
||||
CommandDef("memory", "Review pending memory writes / toggle the approval gate",
|
||||
|
|
|
|||
|
|
@ -268,3 +268,117 @@ def test_handle_unknown_subcommand_returns_none(hermes_home):
|
|||
# the CLI falls through to the skills hub.
|
||||
out = handle_pending_subcommand(wa.SKILLS, ["search", "foo"])
|
||||
assert out is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inline (interactive CLI) approval path — regression for the bug where the
|
||||
# per-thread approval callback was never passed to prompt_dangerous_approval,
|
||||
# so every gated foreground memory write was silently denied.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def approval_callback_cleanup():
|
||||
yield
|
||||
from tools.terminal_tool import set_approval_callback
|
||||
set_approval_callback(None)
|
||||
|
||||
|
||||
def test_memory_inline_approve_writes(hermes_home, approval_callback_cleanup):
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools.terminal_tool import set_approval_callback
|
||||
from tools import write_approval as wa
|
||||
_set_approval("memory", True)
|
||||
|
||||
calls = []
|
||||
def approve_cb(command, description, **kw):
|
||||
calls.append((command, description))
|
||||
return "once"
|
||||
set_approval_callback(approve_cb)
|
||||
|
||||
store = MemoryStore(); store.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", "approved fact", store=store))
|
||||
assert r["success"] is True
|
||||
assert r.get("staged") is None # real write, not staged
|
||||
assert store.memory_entries == ["approved fact"]
|
||||
assert wa.pending_count("memory") == 0
|
||||
# The registered callback must actually be invoked (not the input() path).
|
||||
assert len(calls) == 1
|
||||
assert "approved fact" in calls[0][0]
|
||||
|
||||
|
||||
def test_memory_inline_deny_blocks(hermes_home, approval_callback_cleanup):
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools.terminal_tool import set_approval_callback
|
||||
from tools import write_approval as wa
|
||||
_set_approval("memory", True)
|
||||
set_approval_callback(lambda command, description, **kw: "deny")
|
||||
|
||||
store = MemoryStore(); store.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", "denied fact", store=store))
|
||||
assert r["success"] is False
|
||||
assert "denied" in r["error"].lower()
|
||||
assert store.memory_entries == []
|
||||
assert wa.pending_count("memory") == 0 # denied, not staged
|
||||
|
||||
|
||||
def test_memory_inline_callback_error_stages(hermes_home, approval_callback_cleanup):
|
||||
# If the prompt machinery fails, fall back to staging — never drop silently.
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools.terminal_tool import set_approval_callback
|
||||
from tools import write_approval as wa
|
||||
_set_approval("memory", True)
|
||||
def broken_cb(command, description, **kw):
|
||||
raise RuntimeError("boom")
|
||||
set_approval_callback(broken_cb)
|
||||
|
||||
store = MemoryStore(); store.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", "fallback fact", store=store))
|
||||
assert r.get("staged") is True
|
||||
assert wa.pending_count("memory") == 1
|
||||
|
||||
|
||||
def test_gateway_context_stages_not_prompts(hermes_home, monkeypatch):
|
||||
# A gateway session has no per-thread CLI callback; the dangerous-command
|
||||
# /approve round-trip lives in the pending-queue machinery which the gate
|
||||
# does not use. The gate must stage, never attempt an inline prompt
|
||||
# (which would hit the input() fallback and silently deny).
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools import write_approval as wa
|
||||
_set_approval("memory", True)
|
||||
monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1")
|
||||
|
||||
store = MemoryStore(); store.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", "gateway fact", store=store))
|
||||
assert r.get("staged") is True
|
||||
assert store.memory_entries == []
|
||||
assert wa.pending_count("memory") == 1
|
||||
|
||||
|
||||
def test_skills_never_prompt_inline_even_with_callback(hermes_home, approval_callback_cleanup):
|
||||
# Skills always stage — even when an interactive callback is registered.
|
||||
from tools.skill_manager_tool import skill_manage
|
||||
from tools.terminal_tool import set_approval_callback
|
||||
from tools import write_approval as wa
|
||||
_set_approval("skills", True)
|
||||
|
||||
calls = []
|
||||
set_approval_callback(lambda c, d, **kw: calls.append(1) or "once")
|
||||
|
||||
r = json.loads(skill_manage(
|
||||
action="create", name="test-inline-skill",
|
||||
content="---\nname: test-inline-skill\ndescription: x\n---\nbody\n"))
|
||||
assert r.get("staged") is True
|
||||
assert calls == [] # never prompted
|
||||
assert wa.pending_count("skills") == 1
|
||||
|
||||
|
||||
def test_memory_invalid_params_rejected_before_staging(hermes_home):
|
||||
# Param validation must run BEFORE the gate so a broken write is rejected
|
||||
# immediately instead of staged and failing at approve time.
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools import write_approval as wa
|
||||
_set_approval("memory", True)
|
||||
store = MemoryStore(); store.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", None, store=store))
|
||||
assert r["success"] is False
|
||||
assert wa.pending_count("memory") == 0
|
||||
|
|
|
|||
|
|
@ -681,27 +681,29 @@ def memory_tool(
|
|||
if target not in {"memory", "user"}:
|
||||
return tool_error(f"Invalid target '{target}'. Use 'memory' or 'user'.", success=False)
|
||||
|
||||
# Approval gate: when on, stages the write (background) or prompts inline
|
||||
# (foreground); when off (default) passes straight through.
|
||||
# Validate required params BEFORE the gate so an invalid write is rejected
|
||||
# immediately instead of being staged and only failing at approve time.
|
||||
if action == "add" and not content:
|
||||
return tool_error("Content is required for 'add' action.", success=False)
|
||||
if action == "replace" and (not old_text or not content):
|
||||
missing = "old_text" if not old_text else "content"
|
||||
return tool_error(f"{missing} is required for 'replace' action.", success=False)
|
||||
if action == "remove" and not old_text:
|
||||
return tool_error("old_text is required for 'remove' action.", success=False)
|
||||
|
||||
# Approval gate: when on, stages the write (background/gateway) or prompts
|
||||
# inline (interactive CLI); when off (default) passes straight through.
|
||||
gate_result = _apply_write_gate(action, target, content, old_text)
|
||||
if gate_result is not None:
|
||||
return gate_result
|
||||
|
||||
if action == "add":
|
||||
if not content:
|
||||
return tool_error("Content is required for 'add' action.", success=False)
|
||||
result = store.add(target, content)
|
||||
|
||||
elif action == "replace":
|
||||
if not old_text:
|
||||
return tool_error("old_text is required for 'replace' action.", success=False)
|
||||
if not content:
|
||||
return tool_error("content is required for 'replace' action.", success=False)
|
||||
result = store.replace(target, old_text, content)
|
||||
|
||||
elif action == "remove":
|
||||
if not old_text:
|
||||
return tool_error("old_text is required for 'remove' action.", success=False)
|
||||
result = store.remove(target, old_text)
|
||||
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -15,24 +15,25 @@ Both stores are written from two origins:
|
|||
turn and autonomously decides what to save (the source of the
|
||||
"wrong assumptions" users complained about)
|
||||
|
||||
This module lets the user gate those writes per-subsystem with a tri-state
|
||||
``write_mode``:
|
||||
This module lets the user gate those writes per-subsystem with a boolean
|
||||
``write_approval``:
|
||||
|
||||
* ``on`` — write freely (current behaviour, default)
|
||||
* ``off`` — never write; the tool returns a clean "disabled" result
|
||||
* ``approve`` — do not commit the write; **stage** it to a pending store and
|
||||
surface it for the user to approve or reject out-of-band
|
||||
* ``false`` (default) — write freely (the pre-gate behaviour)
|
||||
* ``true`` — require approval: do not commit the write; either
|
||||
prompt inline (memory, interactive CLI only) or **stage** it to a pending
|
||||
store and surface it for the user to approve or reject out-of-band
|
||||
|
||||
The size asymmetry between memory and skills is real and unavoidable: a memory
|
||||
entry can be reviewed inline in a chat bubble; a 100 KB SKILL.md cannot. So
|
||||
``approve`` mode stages BOTH to disk, but review affordances differ by subsystem
|
||||
the gate stages BOTH to disk, but review affordances differ by subsystem
|
||||
(see ``hermes_cli`` slash handlers): memory shows full content, skills show
|
||||
metadata + a one-line gist + a ``diff`` escape hatch (CLI/dashboard/file).
|
||||
|
||||
Staging is mandatory for background-origin writes under ``approve`` (a daemon
|
||||
thread cannot block on an interactive prompt). Foreground memory writes may
|
||||
additionally block inline via the dangerous-command approval gate; foreground
|
||||
skill writes always stage (too big to eyeball mid-loop).
|
||||
Staging is mandatory for background-origin writes (a daemon thread cannot
|
||||
block on an interactive prompt) and for gateway sessions (no inline prompt
|
||||
channel — review happens via ``/memory pending``). Foreground CLI memory
|
||||
writes prompt inline via the dangerous-command approval callback; skill
|
||||
writes always stage (too big to eyeball mid-loop).
|
||||
|
||||
Pending records live under ``<HERMES_HOME>/pending/{memory,skills}/<id>.json``
|
||||
so they survive process restarts and can be reviewed from CLI, gateway, or the
|
||||
|
|
@ -230,14 +231,14 @@ class GateDecision:
|
|||
"""Result of evaluating the write gate for a single write attempt.
|
||||
|
||||
Exactly one of the boolean flags is True:
|
||||
* ``allow`` — proceed with the real write (mode ``on``, or an inline
|
||||
* ``allow`` — proceed with the real write (gate off, or an inline
|
||||
approval was granted).
|
||||
* ``blocked`` — refuse the write (mode ``off``, or an inline approval was
|
||||
denied). ``message`` explains why; surface it to the agent.
|
||||
* ``blocked`` — refuse the write (the user denied an inline approval
|
||||
prompt). ``message`` explains why; surface it to the agent.
|
||||
* ``stage`` — do not write; the caller should stage the payload via
|
||||
``stage_write`` (mode ``approve`` for a background write, or a
|
||||
foreground write with no interactive prompt available). ``message`` is
|
||||
the user-facing "staged for approval" note.
|
||||
``stage_write`` (gate on, and no inline prompt is available — gateway,
|
||||
background review, script, or any skill write). ``message`` is the
|
||||
user-facing "staged for approval" note.
|
||||
"""
|
||||
|
||||
__slots__ = ("allow", "blocked", "stage", "message")
|
||||
|
|
@ -261,10 +262,10 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "",
|
|||
are small; skills never take the inline path).
|
||||
|
||||
Decision matrix:
|
||||
gate off (default) → allow (writes flow freely)
|
||||
gate on, memory + foreground → inline approve/deny prompt
|
||||
gate on, memory + background → stage
|
||||
gate on, skills (any origin) → stage (too big to review inline)
|
||||
gate off (default) → allow (writes flow freely)
|
||||
gate on, memory + interactive CLI → inline approve/deny prompt
|
||||
gate on, memory + gateway/script/bg → stage
|
||||
gate on, skills (any origin) → stage (too big to review inline)
|
||||
|
||||
Note: there is no config-driven "blocked" outcome — the gate only ever
|
||||
delays a write for approval, never silently refuses it. ``blocked`` is
|
||||
|
|
@ -287,10 +288,10 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "",
|
|||
),
|
||||
)
|
||||
|
||||
# Memory + foreground: if an interactive approval channel exists (CLI
|
||||
# prompt_toolkit callback, or a gateway approve/deny round-trip), prompt
|
||||
# inline — entries are small enough to show in full. Otherwise (script,
|
||||
# batch, no listener) stage instead of forcing a blind deny.
|
||||
# Memory + foreground: if an interactive approval channel exists (a CLI
|
||||
# approval callback registered on this thread), prompt inline — entries
|
||||
# are small enough to show in full. Otherwise (gateway, script, batch,
|
||||
# no listener) stage instead of forcing a blind deny.
|
||||
if _interactive_approval_available():
|
||||
granted = _prompt_inline_memory_approval(inline_summary, inline_detail)
|
||||
if granted is True:
|
||||
|
|
@ -314,19 +315,21 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "",
|
|||
def _interactive_approval_available() -> bool:
|
||||
"""True when a foreground memory write can be approved inline.
|
||||
|
||||
Either a per-thread approval callback is registered (interactive CLI), or
|
||||
the call is inside a gateway/API session that supports the /approve //deny
|
||||
round-trip. Scripts, cron, and background threads have neither → stage.
|
||||
Inline prompting requires a per-thread approval callback registered by the
|
||||
interactive CLI (``tools.terminal_tool.set_approval_callback``). Every
|
||||
other surface stages instead:
|
||||
|
||||
* **Gateway/API sessions** — the dangerous-command ``/approve`` round-trip
|
||||
lives in the pending-approval queue (``submit_pending`` +
|
||||
``_await_gateway_decision``), which ``prompt_dangerous_approval`` never
|
||||
reaches; trying to prompt from a gateway session would hit the
|
||||
``input()`` fallback and silently deny. Staging gives the user a real
|
||||
review affordance (``/memory pending``) instead.
|
||||
* Scripts, cron, and background threads — no user present.
|
||||
"""
|
||||
try:
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
if _get_approval_callback() is not None:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.approval import _is_gateway_approval_context
|
||||
return bool(_is_gateway_approval_context())
|
||||
return _get_approval_callback() is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
|
@ -335,28 +338,37 @@ def _prompt_inline_memory_approval(summary: str, detail: str) -> Optional[bool]:
|
|||
"""Prompt the user inline to approve a memory write.
|
||||
|
||||
Returns True (approved), False (denied), or None (no interactive prompt
|
||||
available on this thread → caller should stage instead).
|
||||
available / prompt failed → caller should stage instead).
|
||||
|
||||
Reuses the dangerous-command approval machinery so the CLI prompt_toolkit
|
||||
callback and the gateway ``/approve`` ``/deny`` round-trip both work without
|
||||
duplicating that plumbing.
|
||||
Reuses the per-thread CLI approval callback registered for dangerous
|
||||
commands (``tools.terminal_tool.set_approval_callback``). The callback is
|
||||
invoked directly — NOT via ``prompt_dangerous_approval`` — because that
|
||||
wrapper falls back to ``input()`` (deadlock-prone under prompt_toolkit,
|
||||
see #15216) and converts callback errors into a silent deny; here a
|
||||
failed prompt must stage the write instead.
|
||||
"""
|
||||
try:
|
||||
from tools.approval import prompt_dangerous_approval
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
callback = _get_approval_callback()
|
||||
if callback is None:
|
||||
# No interactive channel on this thread — stage rather than risk the
|
||||
# input() fallback (deadlock under prompt_toolkit, EOF-deny in tests).
|
||||
return None
|
||||
|
||||
header = summary.strip() or "Save to memory?"
|
||||
body = detail.strip()
|
||||
description = f"Save to memory: {header}"
|
||||
command = body if body else header
|
||||
# Invoke the callback directly instead of via prompt_dangerous_approval:
|
||||
# that wrapper swallows callback exceptions into "deny", which would
|
||||
# silently refuse the write. Direct invocation lets a crashed prompt fall
|
||||
# back to staging (the gate only ever delays a write, never drops it).
|
||||
try:
|
||||
choice = prompt_dangerous_approval(
|
||||
command,
|
||||
description,
|
||||
allow_permanent=False,
|
||||
)
|
||||
except Exception as e: # pragma: no cover
|
||||
choice = callback(command, description, allow_permanent=False)
|
||||
except Exception as e:
|
||||
logger.error("Inline memory approval prompt failed: %s", e)
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ first, set `memory.write_approval: true`. It's a simple on/off gate applied to
|
|||
| `write_approval` | Behaviour |
|
||||
|------------------|-----------|
|
||||
| `false` (default) | Write freely — the gate is off (the pre-gate behaviour). |
|
||||
| `true` | Require approval before anything is saved. Foreground writes prompt you inline (entries are small enough to read in a chat bubble). Background-review writes are **staged** instead of committed (a background thread can't block on a prompt). |
|
||||
| `true` | Require approval before anything is saved. In the interactive CLI, foreground writes prompt you inline (entries are small enough to read in full). Everywhere else — messaging platforms, scripts, and the background self-improvement review — writes are **staged** for review with `/memory pending`. |
|
||||
|
||||
> To turn memory off entirely (not just gate it), set `memory_enabled: false`.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue