From 70d5d7e39b566090944688623f5bbea89e21b967 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 10 Jun 2026 02:57:15 -0700 Subject: [PATCH] fix(memory,skills): repair write-approval inline prompt, gateway staging, and gateway /skills review (#43452) Follow-ups to #38199/#43354 found in post-merge review: - Inline CLI memory approval never worked: the per-thread approval callback was not passed to prompt_dangerous_approval, so the prompt_toolkit fail-closed guard (#15216) denied every gated foreground write without showing a prompt. Now invokes the registered callback directly; a crashed prompt falls back to staging instead of a silent deny. - Gateway sessions claimed inline support but prompt_dangerous_approval has no gateway round-trip (that lives in the pending-approval queue), so gated gateway memory writes hit the input() fallback and denied. Gateway contexts now stage for /memory pending review. - /skills pending|approve|reject|diff|approval now works on the gateway (gateway_config_gate on skills.write_approval), so skills staged from a messaging session can be reviewed there. Diff output truncated for chat. - memory_tool validates required params before the gate so invalid writes are rejected immediately instead of staged and failing at approve time. - Stale tri-state write_mode docstrings updated to the boolean gate; docs table corrected (inline prompt is interactive-CLI-only). - 6 new tests covering the interactive approve/deny/error paths, gateway staging, skills never-prompt invariant, and pre-gate validation. --- gateway/run.py | 3 + gateway/slash_commands.py | 58 +++++++++++ hermes_cli/commands.py | 1 + tests/tools/test_write_approval.py | 114 +++++++++++++++++++++ tools/memory_tool.py | 22 ++-- tools/write_approval.py | 104 ++++++++++--------- website/docs/user-guide/features/memory.md | 2 +- 7 files changed, 247 insertions(+), 57 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 9e30c923c..58c68a4b9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7065,6 +7065,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew if canonical == "memory": return await self._handle_memory_command(event) + if canonical == "skills": + return await self._handle_skills_command(event) + if canonical == "fast": return await self._handle_fast_command(event) diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py index 25a3a868a..107b5645e 100644 --- a/gateway/slash_commands.py +++ b/gateway/slash_commands.py @@ -2058,6 +2058,64 @@ class GatewaySlashCommandsMixin: "reject , approval .") return out + async def _handle_skills_command(self, event: MessageEvent) -> str: + """Handle /skills on the gateway — pending skill-write review only. + + The full skills hub (search/browse/install) stays CLI-only; this + handler covers the write-approval review surface (pending / approve / + reject / diff / approval) so a skill staged from a gateway session can + be reviewed from that same session. Gated by ``skills.write_approval`` + via the CommandDef's ``gateway_config_gate``; also answers when staged + writes still exist after the gate was turned off (so they are never + stranded). + + ``diff`` output is truncated for chat bubbles — the full diff lives in + the CLI (``/skills diff ``) and the pending JSON file. + """ + from gateway.run import _hermes_home + from hermes_cli.write_approval_commands import handle_pending_subcommand + from tools import write_approval as wa + + raw_args = event.get_command_args().strip() + args = raw_args.split() if raw_args else [] + session_key = self._session_key_for_source(event.source) + config_path = _hermes_home / "config.yaml" + + gate_on = wa.write_approval_enabled(wa.SKILLS) + wants_toggle = bool(args) and args[0].lower() in {"approval", "mode"} + if not gate_on and not wants_toggle and wa.pending_count(wa.SKILLS) == 0: + return ("Skill write approval is off (skills.write_approval). " + "Enable it with /skills approval on, then review staged " + "writes here with /skills pending.") + + def _set_approval(enabled: bool): + import yaml + user_config = {} + if config_path.exists(): + with open(config_path, encoding="utf-8") as f: + user_config = yaml.safe_load(f) or {} + user_config.setdefault("skills", {})["write_approval"] = bool(enabled) + atomic_yaml_write(config_path, user_config) + # New setting must take effect next message → drop cached agent. + self._evict_cached_agent(session_key) + + out = handle_pending_subcommand( + wa.SKILLS, args, set_mode_fn=_set_approval, + ) + if out is None: + return ("Unknown /skills subcommand on this platform. Use: pending, " + "approve , reject , diff , approval . " + "(Search/install are CLI-only.)") + + # Chat bubbles can't hold a full skill diff — truncate and point at + # the real review surfaces. + if args and args[0].lower() == "diff" and len(out) > 3000: + pending_id = args[1] if len(args) > 1 else "" + out = (out[:3000] + + f"\n… (truncated — full diff: `/skills diff {pending_id}` " + f"on the CLI, or ~/.hermes/pending/skills/{pending_id}.json)") + return out + async def _handle_fast_command(self, event: MessageEvent) -> str: """Handle /fast — mirror the CLI Priority Processing toggle in gateway chats.""" from gateway.run import _hermes_home, _load_gateway_config, _resolve_gateway_model diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 520f4fac0..f23d1960d 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -167,6 +167,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ cli_only=True), CommandDef("skills", "Search, install, inspect, or manage skills", "Tools & Skills", cli_only=True, + gateway_config_gate="skills.write_approval", subcommands=("search", "browse", "inspect", "install", "audit", "pending", "approve", "reject", "diff", "approval")), CommandDef("memory", "Review pending memory writes / toggle the approval gate", diff --git a/tests/tools/test_write_approval.py b/tests/tools/test_write_approval.py index d00a95e71..fbfa804fb 100644 --- a/tests/tools/test_write_approval.py +++ b/tests/tools/test_write_approval.py @@ -268,3 +268,117 @@ def test_handle_unknown_subcommand_returns_none(hermes_home): # the CLI falls through to the skills hub. out = handle_pending_subcommand(wa.SKILLS, ["search", "foo"]) assert out is None + + +# --------------------------------------------------------------------------- +# Inline (interactive CLI) approval path — regression for the bug where the +# per-thread approval callback was never passed to prompt_dangerous_approval, +# so every gated foreground memory write was silently denied. +# --------------------------------------------------------------------------- + +@pytest.fixture +def approval_callback_cleanup(): + yield + from tools.terminal_tool import set_approval_callback + set_approval_callback(None) + + +def test_memory_inline_approve_writes(hermes_home, approval_callback_cleanup): + from tools.memory_tool import memory_tool, MemoryStore + from tools.terminal_tool import set_approval_callback + from tools import write_approval as wa + _set_approval("memory", True) + + calls = [] + def approve_cb(command, description, **kw): + calls.append((command, description)) + return "once" + set_approval_callback(approve_cb) + + store = MemoryStore(); store.load_from_disk() + r = json.loads(memory_tool("add", "memory", "approved fact", store=store)) + assert r["success"] is True + assert r.get("staged") is None # real write, not staged + assert store.memory_entries == ["approved fact"] + assert wa.pending_count("memory") == 0 + # The registered callback must actually be invoked (not the input() path). + assert len(calls) == 1 + assert "approved fact" in calls[0][0] + + +def test_memory_inline_deny_blocks(hermes_home, approval_callback_cleanup): + from tools.memory_tool import memory_tool, MemoryStore + from tools.terminal_tool import set_approval_callback + from tools import write_approval as wa + _set_approval("memory", True) + set_approval_callback(lambda command, description, **kw: "deny") + + store = MemoryStore(); store.load_from_disk() + r = json.loads(memory_tool("add", "memory", "denied fact", store=store)) + assert r["success"] is False + assert "denied" in r["error"].lower() + assert store.memory_entries == [] + assert wa.pending_count("memory") == 0 # denied, not staged + + +def test_memory_inline_callback_error_stages(hermes_home, approval_callback_cleanup): + # If the prompt machinery fails, fall back to staging — never drop silently. + from tools.memory_tool import memory_tool, MemoryStore + from tools.terminal_tool import set_approval_callback + from tools import write_approval as wa + _set_approval("memory", True) + def broken_cb(command, description, **kw): + raise RuntimeError("boom") + set_approval_callback(broken_cb) + + store = MemoryStore(); store.load_from_disk() + r = json.loads(memory_tool("add", "memory", "fallback fact", store=store)) + assert r.get("staged") is True + assert wa.pending_count("memory") == 1 + + +def test_gateway_context_stages_not_prompts(hermes_home, monkeypatch): + # A gateway session has no per-thread CLI callback; the dangerous-command + # /approve round-trip lives in the pending-queue machinery which the gate + # does not use. The gate must stage, never attempt an inline prompt + # (which would hit the input() fallback and silently deny). + from tools.memory_tool import memory_tool, MemoryStore + from tools import write_approval as wa + _set_approval("memory", True) + monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1") + + store = MemoryStore(); store.load_from_disk() + r = json.loads(memory_tool("add", "memory", "gateway fact", store=store)) + assert r.get("staged") is True + assert store.memory_entries == [] + assert wa.pending_count("memory") == 1 + + +def test_skills_never_prompt_inline_even_with_callback(hermes_home, approval_callback_cleanup): + # Skills always stage — even when an interactive callback is registered. + from tools.skill_manager_tool import skill_manage + from tools.terminal_tool import set_approval_callback + from tools import write_approval as wa + _set_approval("skills", True) + + calls = [] + set_approval_callback(lambda c, d, **kw: calls.append(1) or "once") + + r = json.loads(skill_manage( + action="create", name="test-inline-skill", + content="---\nname: test-inline-skill\ndescription: x\n---\nbody\n")) + assert r.get("staged") is True + assert calls == [] # never prompted + assert wa.pending_count("skills") == 1 + + +def test_memory_invalid_params_rejected_before_staging(hermes_home): + # Param validation must run BEFORE the gate so a broken write is rejected + # immediately instead of staged and failing at approve time. + from tools.memory_tool import memory_tool, MemoryStore + from tools import write_approval as wa + _set_approval("memory", True) + store = MemoryStore(); store.load_from_disk() + r = json.loads(memory_tool("add", "memory", None, store=store)) + assert r["success"] is False + assert wa.pending_count("memory") == 0 diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 78ce592ce..6cedb405f 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -681,27 +681,29 @@ def memory_tool( if target not in {"memory", "user"}: return tool_error(f"Invalid target '{target}'. Use 'memory' or 'user'.", success=False) - # Approval gate: when on, stages the write (background) or prompts inline - # (foreground); when off (default) passes straight through. + # Validate required params BEFORE the gate so an invalid write is rejected + # immediately instead of being staged and only failing at approve time. + if action == "add" and not content: + return tool_error("Content is required for 'add' action.", success=False) + if action == "replace" and (not old_text or not content): + missing = "old_text" if not old_text else "content" + return tool_error(f"{missing} is required for 'replace' action.", success=False) + if action == "remove" and not old_text: + return tool_error("old_text is required for 'remove' action.", success=False) + + # Approval gate: when on, stages the write (background/gateway) or prompts + # inline (interactive CLI); when off (default) passes straight through. gate_result = _apply_write_gate(action, target, content, old_text) if gate_result is not None: return gate_result if action == "add": - if not content: - return tool_error("Content is required for 'add' action.", success=False) result = store.add(target, content) elif action == "replace": - if not old_text: - return tool_error("old_text is required for 'replace' action.", success=False) - if not content: - return tool_error("content is required for 'replace' action.", success=False) result = store.replace(target, old_text, content) elif action == "remove": - if not old_text: - return tool_error("old_text is required for 'remove' action.", success=False) result = store.remove(target, old_text) else: diff --git a/tools/write_approval.py b/tools/write_approval.py index d5b3913d7..b017299d8 100644 --- a/tools/write_approval.py +++ b/tools/write_approval.py @@ -15,24 +15,25 @@ Both stores are written from two origins: turn and autonomously decides what to save (the source of the "wrong assumptions" users complained about) -This module lets the user gate those writes per-subsystem with a tri-state -``write_mode``: +This module lets the user gate those writes per-subsystem with a boolean +``write_approval``: - * ``on`` — write freely (current behaviour, default) - * ``off`` — never write; the tool returns a clean "disabled" result - * ``approve`` — do not commit the write; **stage** it to a pending store and - surface it for the user to approve or reject out-of-band + * ``false`` (default) — write freely (the pre-gate behaviour) + * ``true`` — require approval: do not commit the write; either + prompt inline (memory, interactive CLI only) or **stage** it to a pending + store and surface it for the user to approve or reject out-of-band The size asymmetry between memory and skills is real and unavoidable: a memory entry can be reviewed inline in a chat bubble; a 100 KB SKILL.md cannot. So -``approve`` mode stages BOTH to disk, but review affordances differ by subsystem +the gate stages BOTH to disk, but review affordances differ by subsystem (see ``hermes_cli`` slash handlers): memory shows full content, skills show metadata + a one-line gist + a ``diff`` escape hatch (CLI/dashboard/file). -Staging is mandatory for background-origin writes under ``approve`` (a daemon -thread cannot block on an interactive prompt). Foreground memory writes may -additionally block inline via the dangerous-command approval gate; foreground -skill writes always stage (too big to eyeball mid-loop). +Staging is mandatory for background-origin writes (a daemon thread cannot +block on an interactive prompt) and for gateway sessions (no inline prompt +channel — review happens via ``/memory pending``). Foreground CLI memory +writes prompt inline via the dangerous-command approval callback; skill +writes always stage (too big to eyeball mid-loop). Pending records live under ``/pending/{memory,skills}/.json`` so they survive process restarts and can be reviewed from CLI, gateway, or the @@ -230,14 +231,14 @@ class GateDecision: """Result of evaluating the write gate for a single write attempt. Exactly one of the boolean flags is True: - * ``allow`` — proceed with the real write (mode ``on``, or an inline + * ``allow`` — proceed with the real write (gate off, or an inline approval was granted). - * ``blocked`` — refuse the write (mode ``off``, or an inline approval was - denied). ``message`` explains why; surface it to the agent. + * ``blocked`` — refuse the write (the user denied an inline approval + prompt). ``message`` explains why; surface it to the agent. * ``stage`` — do not write; the caller should stage the payload via - ``stage_write`` (mode ``approve`` for a background write, or a - foreground write with no interactive prompt available). ``message`` is - the user-facing "staged for approval" note. + ``stage_write`` (gate on, and no inline prompt is available — gateway, + background review, script, or any skill write). ``message`` is the + user-facing "staged for approval" note. """ __slots__ = ("allow", "blocked", "stage", "message") @@ -261,10 +262,10 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "", are small; skills never take the inline path). Decision matrix: - gate off (default) → allow (writes flow freely) - gate on, memory + foreground → inline approve/deny prompt - gate on, memory + background → stage - gate on, skills (any origin) → stage (too big to review inline) + gate off (default) → allow (writes flow freely) + gate on, memory + interactive CLI → inline approve/deny prompt + gate on, memory + gateway/script/bg → stage + gate on, skills (any origin) → stage (too big to review inline) Note: there is no config-driven "blocked" outcome — the gate only ever delays a write for approval, never silently refuses it. ``blocked`` is @@ -287,10 +288,10 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "", ), ) - # Memory + foreground: if an interactive approval channel exists (CLI - # prompt_toolkit callback, or a gateway approve/deny round-trip), prompt - # inline — entries are small enough to show in full. Otherwise (script, - # batch, no listener) stage instead of forcing a blind deny. + # Memory + foreground: if an interactive approval channel exists (a CLI + # approval callback registered on this thread), prompt inline — entries + # are small enough to show in full. Otherwise (gateway, script, batch, + # no listener) stage instead of forcing a blind deny. if _interactive_approval_available(): granted = _prompt_inline_memory_approval(inline_summary, inline_detail) if granted is True: @@ -314,19 +315,21 @@ def evaluate_gate(subsystem: str, *, inline_summary: str = "", def _interactive_approval_available() -> bool: """True when a foreground memory write can be approved inline. - Either a per-thread approval callback is registered (interactive CLI), or - the call is inside a gateway/API session that supports the /approve //deny - round-trip. Scripts, cron, and background threads have neither → stage. + Inline prompting requires a per-thread approval callback registered by the + interactive CLI (``tools.terminal_tool.set_approval_callback``). Every + other surface stages instead: + + * **Gateway/API sessions** — the dangerous-command ``/approve`` round-trip + lives in the pending-approval queue (``submit_pending`` + + ``_await_gateway_decision``), which ``prompt_dangerous_approval`` never + reaches; trying to prompt from a gateway session would hit the + ``input()`` fallback and silently deny. Staging gives the user a real + review affordance (``/memory pending``) instead. + * Scripts, cron, and background threads — no user present. """ try: from tools.terminal_tool import _get_approval_callback - if _get_approval_callback() is not None: - return True - except Exception: - pass - try: - from tools.approval import _is_gateway_approval_context - return bool(_is_gateway_approval_context()) + return _get_approval_callback() is not None except Exception: return False @@ -335,28 +338,37 @@ def _prompt_inline_memory_approval(summary: str, detail: str) -> Optional[bool]: """Prompt the user inline to approve a memory write. Returns True (approved), False (denied), or None (no interactive prompt - available on this thread → caller should stage instead). + available / prompt failed → caller should stage instead). - Reuses the dangerous-command approval machinery so the CLI prompt_toolkit - callback and the gateway ``/approve`` ``/deny`` round-trip both work without - duplicating that plumbing. + Reuses the per-thread CLI approval callback registered for dangerous + commands (``tools.terminal_tool.set_approval_callback``). The callback is + invoked directly — NOT via ``prompt_dangerous_approval`` — because that + wrapper falls back to ``input()`` (deadlock-prone under prompt_toolkit, + see #15216) and converts callback errors into a silent deny; here a + failed prompt must stage the write instead. """ try: - from tools.approval import prompt_dangerous_approval + from tools.terminal_tool import _get_approval_callback except Exception: return None + callback = _get_approval_callback() + if callback is None: + # No interactive channel on this thread — stage rather than risk the + # input() fallback (deadlock under prompt_toolkit, EOF-deny in tests). + return None + header = summary.strip() or "Save to memory?" body = detail.strip() description = f"Save to memory: {header}" command = body if body else header + # Invoke the callback directly instead of via prompt_dangerous_approval: + # that wrapper swallows callback exceptions into "deny", which would + # silently refuse the write. Direct invocation lets a crashed prompt fall + # back to staging (the gate only ever delays a write, never drops it). try: - choice = prompt_dangerous_approval( - command, - description, - allow_permanent=False, - ) - except Exception as e: # pragma: no cover + choice = callback(command, description, allow_permanent=False) + except Exception as e: logger.error("Inline memory approval prompt failed: %s", e) return None diff --git a/website/docs/user-guide/features/memory.md b/website/docs/user-guide/features/memory.md index ef68bf6fd..d0873d6b9 100644 --- a/website/docs/user-guide/features/memory.md +++ b/website/docs/user-guide/features/memory.md @@ -222,7 +222,7 @@ first, set `memory.write_approval: true`. It's a simple on/off gate applied to | `write_approval` | Behaviour | |------------------|-----------| | `false` (default) | Write freely — the gate is off (the pre-gate behaviour). | -| `true` | Require approval before anything is saved. Foreground writes prompt you inline (entries are small enough to read in a chat bubble). Background-review writes are **staged** instead of committed (a background thread can't block on a prompt). | +| `true` | Require approval before anything is saved. In the interactive CLI, foreground writes prompt you inline (entries are small enough to read in full). Everywhere else — messaging platforms, scripts, and the background self-improvement review — writes are **staged** for review with `/memory pending`. | > To turn memory off entirely (not just gate it), set `memory_enabled: false`.