2026-05-16 18:24:05 -07:00
|
|
|
|
"""Tool-call execution — sequential and concurrent dispatch.
|
|
|
|
|
|
|
|
|
|
|
|
Both AIAgent methods (``_execute_tool_calls_sequential`` and
|
|
|
|
|
|
``_execute_tool_calls_concurrent``) live here as module-level
|
|
|
|
|
|
functions that take the parent ``AIAgent`` as their first argument.
|
|
|
|
|
|
|
|
|
|
|
|
``run_agent`` keeps thin wrappers so existing call sites work; tests
|
|
|
|
|
|
that patch ``run_agent._set_interrupt`` are honored because the
|
|
|
|
|
|
extracted functions reach back through the ``run_agent`` module via
|
|
|
|
|
|
``_ra()`` for that symbol.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import concurrent.futures
|
|
|
|
|
|
import json
|
|
|
|
|
|
import logging
|
|
|
|
|
|
import os
|
|
|
|
|
|
import random
|
|
|
|
|
|
import threading
|
|
|
|
|
|
import time
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
from typing import Any, Optional
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
|
|
|
|
|
from agent.display import (
|
|
|
|
|
|
KawaiiSpinner,
|
|
|
|
|
|
build_tool_preview as _build_tool_preview,
|
|
|
|
|
|
get_cute_tool_message as _get_cute_tool_message_impl,
|
|
|
|
|
|
get_tool_emoji as _get_tool_emoji,
|
|
|
|
|
|
_detect_tool_failure,
|
|
|
|
|
|
)
|
|
|
|
|
|
from agent.tool_guardrails import ToolGuardrailDecision
|
|
|
|
|
|
from agent.tool_dispatch_helpers import (
|
|
|
|
|
|
_is_destructive_command,
|
|
|
|
|
|
_is_multimodal_tool_result,
|
|
|
|
|
|
_multimodal_text_summary,
|
|
|
|
|
|
_append_subdir_hint_to_multimodal,
|
2026-05-19 20:24:30 +01:00
|
|
|
|
make_tool_result_message,
|
2026-05-16 18:24:05 -07:00
|
|
|
|
)
|
|
|
|
|
|
from tools.terminal_tool import (
|
|
|
|
|
|
get_active_env,
|
|
|
|
|
|
)
|
2026-05-28 17:47:09 -04:00
|
|
|
|
from tools.thread_context import propagate_context_to_thread
|
2026-05-16 18:24:05 -07:00
|
|
|
|
from tools.tool_result_storage import (
|
|
|
|
|
|
maybe_persist_tool_result,
|
|
|
|
|
|
enforce_turn_budget,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
# Maximum number of concurrent worker threads for parallel tool execution.
|
|
|
|
|
|
# Mirrors the constant in ``run_agent`` for tests/imports that look here.
|
|
|
|
|
|
_MAX_TOOL_WORKERS = 8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _ra():
|
|
|
|
|
|
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
|
|
|
|
|
|
import run_agent
|
|
|
|
|
|
return run_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
def _emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
*,
|
|
|
|
|
|
function_name: str,
|
|
|
|
|
|
function_args: dict,
|
|
|
|
|
|
result: Any,
|
|
|
|
|
|
effective_task_id: str,
|
|
|
|
|
|
tool_call_id: str,
|
|
|
|
|
|
duration_ms: int = 0,
|
|
|
|
|
|
status: str | None = None,
|
|
|
|
|
|
error_type: str | None = None,
|
|
|
|
|
|
error_message: str | None = None,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
try:
|
2026-06-03 06:05:35 -07:00
|
|
|
|
from model_tools import _emit_post_tool_call_hook
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_post_tool_call_hook(
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=result,
|
|
|
|
|
|
task_id=effective_task_id or "",
|
|
|
|
|
|
session_id=getattr(agent, "session_id", "") or "",
|
|
|
|
|
|
tool_call_id=tool_call_id or "",
|
|
|
|
|
|
turn_id=getattr(agent, "_current_turn_id", "") or "",
|
|
|
|
|
|
api_request_id=getattr(agent, "_current_api_request_id", "") or "",
|
|
|
|
|
|
duration_ms=duration_ms,
|
|
|
|
|
|
status=status,
|
|
|
|
|
|
error_type=error_type,
|
|
|
|
|
|
error_message=error_message,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _cancelled_tool_result(reason: str = "user interrupt") -> str:
|
|
|
|
|
|
return json.dumps(
|
|
|
|
|
|
{
|
|
|
|
|
|
"error": f"Tool execution cancelled by {reason}",
|
|
|
|
|
|
"status": "cancelled",
|
|
|
|
|
|
},
|
|
|
|
|
|
ensure_ascii=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _emit_cancelled_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
*,
|
|
|
|
|
|
function_name: str,
|
|
|
|
|
|
function_args: dict,
|
|
|
|
|
|
effective_task_id: str,
|
|
|
|
|
|
tool_call_id: str,
|
|
|
|
|
|
start_time: float,
|
|
|
|
|
|
reason: str = "user interrupt",
|
|
|
|
|
|
error_type: str = "keyboard_interrupt",
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
result = _cancelled_tool_result(reason)
|
|
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=tool_call_id,
|
|
|
|
|
|
duration_ms=int((time.time() - start_time) * 1000),
|
|
|
|
|
|
status="cancelled",
|
|
|
|
|
|
error_type=error_type,
|
|
|
|
|
|
error_message=f"Tool execution cancelled by {reason}",
|
|
|
|
|
|
)
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
def _tool_search_scoped_names(agent) -> frozenset:
|
|
|
|
|
|
"""Return the deferrable tool names the session may invoke via tool_call.
|
|
|
|
|
|
|
|
|
|
|
|
The Tool Search unwrap dispatches the underlying tool directly, bypassing
|
|
|
|
|
|
the bridge branch (and its scope check) in
|
|
|
|
|
|
``model_tools.handle_function_call``. To keep a restricted-toolset session
|
|
|
|
|
|
(subagent, kanban worker, curated gateway session) from reaching tools it
|
|
|
|
|
|
was never granted, the unwrap validates the underlying name against this
|
|
|
|
|
|
set: the deferrable subset of the session's own enabled/disabled toolset
|
|
|
|
|
|
scope.
|
|
|
|
|
|
|
|
|
|
|
|
Result is cached on the agent and refreshed when the tool registry's
|
|
|
|
|
|
generation changes (e.g. an MCP server reconnects), so the common case is
|
|
|
|
|
|
a dict lookup, not a full tool-defs rebuild on every tool call.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import model_tools
|
|
|
|
|
|
from tools import tool_search as _ts
|
|
|
|
|
|
from tools.registry import registry as _registry
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return frozenset()
|
|
|
|
|
|
|
|
|
|
|
|
enabled = getattr(agent, "enabled_toolsets", None)
|
|
|
|
|
|
disabled = getattr(agent, "disabled_toolsets", None)
|
|
|
|
|
|
cache_key = (
|
|
|
|
|
|
getattr(_registry, "_generation", 0),
|
|
|
|
|
|
frozenset(enabled) if enabled is not None else None,
|
|
|
|
|
|
frozenset(disabled) if disabled is not None else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
cached = getattr(agent, "_tool_search_scope_cache", None)
|
|
|
|
|
|
if cached is not None and cached[0] == cache_key:
|
|
|
|
|
|
return cached[1]
|
|
|
|
|
|
try:
|
|
|
|
|
|
scoped_defs = model_tools.get_tool_definitions(
|
|
|
|
|
|
enabled_toolsets=enabled,
|
|
|
|
|
|
disabled_toolsets=disabled,
|
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
|
skip_tool_search_assembly=True,
|
|
|
|
|
|
) or []
|
|
|
|
|
|
names = _ts.scoped_deferrable_names(scoped_defs)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
names = frozenset()
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent._tool_search_scope_cache = (cache_key, names)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return names
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-16 18:24:05 -07:00
|
|
|
|
def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
|
|
|
|
|
"""Execute multiple tool calls concurrently using a thread pool.
|
|
|
|
|
|
|
|
|
|
|
|
Results are collected in the original tool-call order and appended to
|
|
|
|
|
|
messages so the API sees them in the expected sequence.
|
|
|
|
|
|
"""
|
|
|
|
|
|
tool_calls = assistant_message.tool_calls
|
|
|
|
|
|
num_tools = len(tool_calls)
|
|
|
|
|
|
|
|
|
|
|
|
# ── Pre-flight: interrupt check ──────────────────────────────────
|
|
|
|
|
|
if agent._interrupt_requested:
|
|
|
|
|
|
print(f"{agent.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
|
|
|
|
|
|
for tc in tool_calls:
|
2026-05-19 20:24:30 +01:00
|
|
|
|
messages.append(make_tool_result_message(
|
|
|
|
|
|
tc.function.name,
|
|
|
|
|
|
f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
|
|
|
|
|
|
tc.id,
|
|
|
|
|
|
))
|
2026-05-16 18:24:05 -07:00
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# ── Parse args + pre-execution bookkeeping ───────────────────────
|
|
|
|
|
|
parsed_calls = [] # list of (tool_call, function_name, function_args)
|
|
|
|
|
|
for tool_call in tool_calls:
|
|
|
|
|
|
function_name = tool_call.function.name
|
|
|
|
|
|
|
|
|
|
|
|
# Reset nudge counters
|
|
|
|
|
|
if function_name == "memory":
|
|
|
|
|
|
agent._turns_since_memory = 0
|
|
|
|
|
|
elif function_name == "skill_manage":
|
|
|
|
|
|
agent._iters_since_skill = 0
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_args = json.loads(tool_call.function.arguments)
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
function_args = {}
|
|
|
|
|
|
if not isinstance(function_args, dict):
|
|
|
|
|
|
function_args = {}
|
|
|
|
|
|
|
feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
2026-05-23 15:22:01 -07:00
|
|
|
|
# ── Tool Search unwrap ────────────────────────────────────────
|
|
|
|
|
|
# When the model invokes the tool_call bridge, peel it open so
|
|
|
|
|
|
# every downstream check (checkpointing, guardrails, plugin
|
|
|
|
|
|
# pre-tool-call hooks, the display/activity feed, the post-call
|
|
|
|
|
|
# callback) sees the underlying tool — not the bridge. This is
|
|
|
|
|
|
# the OpenClaw lesson: hooks must observe the real tool name.
|
|
|
|
|
|
#
|
|
|
|
|
|
# The original tool_call entry on ``tool_call.function`` is left
|
|
|
|
|
|
# untouched so the conversation transcript and the matching
|
|
|
|
|
|
# tool_call_id are preserved exactly as the model emitted them.
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
#
|
|
|
|
|
|
# Scope gate: the unwrap dispatches the underlying tool directly
|
|
|
|
|
|
# (bypassing the bridge branch in handle_function_call and its
|
|
|
|
|
|
# scope check), so we enforce session toolset scope HERE. A tool
|
|
|
|
|
|
# the session was not granted is rejected before any checkpoint,
|
|
|
|
|
|
# hook, or dispatch fires.
|
|
|
|
|
|
_ts_scope_block = None
|
feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
2026-05-23 15:22:01 -07:00
|
|
|
|
try:
|
|
|
|
|
|
from tools import tool_search as _ts
|
|
|
|
|
|
if function_name == _ts.TOOL_CALL_NAME:
|
|
|
|
|
|
_underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
|
|
|
|
|
|
if not _err and _underlying:
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
if _underlying in _tool_search_scoped_names(agent):
|
|
|
|
|
|
function_name = _underlying
|
|
|
|
|
|
function_args = _underlying_args
|
|
|
|
|
|
else:
|
|
|
|
|
|
_ts_scope_block = json.dumps({
|
|
|
|
|
|
"error": (
|
|
|
|
|
|
f"'{_underlying}' is not available in this session. "
|
|
|
|
|
|
"Use tool_search to find tools you can call."
|
|
|
|
|
|
),
|
|
|
|
|
|
}, ensure_ascii=False)
|
feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
2026-05-23 15:22:01 -07:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-29 15:40:01 -05:00
|
|
|
|
# ── Block evaluation (BEFORE checkpoint preflight) ───────────
|
|
|
|
|
|
# We must know whether the tool will execute before touching
|
|
|
|
|
|
# checkpoint state (dedup slot, real snapshots).
|
2026-05-16 18:24:05 -07:00
|
|
|
|
block_result = None
|
|
|
|
|
|
blocked_by_guardrail = False
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
if _ts_scope_block is not None:
|
|
|
|
|
|
# Out-of-scope tool_call: reject before hooks/guardrails/dispatch.
|
|
|
|
|
|
block_result = _ts_scope_block
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=block_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
status="blocked",
|
|
|
|
|
|
error_type="tool_scope_block",
|
|
|
|
|
|
error_message=_ts_scope_block,
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
else:
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.plugins import get_pre_tool_call_block_message
|
|
|
|
|
|
block_message = get_pre_tool_call_block_message(
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
function_name,
|
|
|
|
|
|
function_args,
|
|
|
|
|
|
task_id=effective_task_id or "",
|
|
|
|
|
|
session_id=getattr(agent, "session_id", "") or "",
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
turn_id=getattr(agent, "_current_turn_id", "") or "",
|
|
|
|
|
|
api_request_id=getattr(agent, "_current_api_request_id", "") or "",
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
block_message = None
|
|
|
|
|
|
|
|
|
|
|
|
if block_message is not None:
|
|
|
|
|
|
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=block_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
status="blocked",
|
|
|
|
|
|
error_type="plugin_block",
|
|
|
|
|
|
error_message=block_message,
|
|
|
|
|
|
)
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
else:
|
|
|
|
|
|
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
|
|
|
|
|
if not guardrail_decision.allows_execution:
|
|
|
|
|
|
block_result = agent._guardrail_block_result(guardrail_decision)
|
|
|
|
|
|
blocked_by_guardrail = True
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=block_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
status="blocked",
|
|
|
|
|
|
error_type="guardrail_block",
|
|
|
|
|
|
error_message=getattr(guardrail_decision, "message", None) or "Tool blocked by guardrail policy",
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
2026-05-29 15:40:01 -05:00
|
|
|
|
# ── Checkpoint preflight (only for tools that will execute) ──
|
|
|
|
|
|
if block_result is None:
|
|
|
|
|
|
# Checkpoint for file-mutating tools
|
|
|
|
|
|
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_path = function_args.get("path", "")
|
|
|
|
|
|
if file_path:
|
|
|
|
|
|
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
|
|
|
|
|
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# Checkpoint before destructive terminal commands
|
|
|
|
|
|
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
cmd = function_args.get("command", "")
|
|
|
|
|
|
if _is_destructive_command(cmd):
|
|
|
|
|
|
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
|
|
|
|
|
agent._checkpoint_mgr.ensure_checkpoint(
|
|
|
|
|
|
cwd, f"before terminal: {cmd[:60]}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-16 18:24:05 -07:00
|
|
|
|
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
|
|
|
|
|
|
|
|
|
|
|
|
# ── Logging / callbacks ──────────────────────────────────────────
|
|
|
|
|
|
tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
|
|
|
|
|
|
if not agent.quiet_mode:
|
|
|
|
|
|
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
|
|
|
|
|
|
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
|
|
|
|
|
|
args_str = json.dumps(args, ensure_ascii=False)
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
|
|
|
|
|
|
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
|
|
|
|
|
|
else:
|
|
|
|
|
|
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
|
|
|
|
|
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
|
|
|
|
|
if block_result is not None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if agent.tool_progress_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
preview = _build_tool_preview(name, args)
|
|
|
|
|
|
agent.tool_progress_callback("tool.started", name, preview, args)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool progress callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
|
|
|
|
|
if block_result is not None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if agent.tool_start_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_start_callback(tc.id, name, args)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool start callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
# ── Concurrent execution ─────────────────────────────────────────
|
|
|
|
|
|
# Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
|
|
|
|
|
|
results = [None] * num_tools
|
|
|
|
|
|
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
|
|
|
|
|
if block_result is not None:
|
|
|
|
|
|
results[i] = (name, args, block_result, 0.0, True, True)
|
|
|
|
|
|
|
|
|
|
|
|
# Touch activity before launching workers so the gateway knows
|
|
|
|
|
|
# we're executing tools (not stuck).
|
|
|
|
|
|
agent._current_tool = tool_names_str
|
|
|
|
|
|
agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
|
|
|
|
|
|
|
|
|
|
|
|
def _run_tool(index, tool_call, function_name, function_args):
|
|
|
|
|
|
"""Worker function executed in a thread."""
|
|
|
|
|
|
# Register this worker tid so the agent can fan out an interrupt
|
|
|
|
|
|
# to it — see AIAgent.interrupt(). Must happen first thing, and
|
|
|
|
|
|
# must be paired with discard + clear in the finally block.
|
|
|
|
|
|
_worker_tid = threading.current_thread().ident
|
|
|
|
|
|
with agent._tool_worker_threads_lock:
|
|
|
|
|
|
agent._tool_worker_threads.add(_worker_tid)
|
|
|
|
|
|
# Race: if the agent was interrupted between fan-out (which
|
|
|
|
|
|
# snapshotted an empty/earlier set) and our registration, apply
|
|
|
|
|
|
# the interrupt to our own tid now so is_interrupted() inside
|
|
|
|
|
|
# the tool returns True on the next poll.
|
|
|
|
|
|
if agent._interrupt_requested:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_ra()._set_interrupt(True, _worker_tid)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# Set the activity callback on THIS worker thread so
|
|
|
|
|
|
# _wait_for_process (terminal commands) can fire heartbeats.
|
|
|
|
|
|
# The callback is thread-local; the main thread's callback
|
|
|
|
|
|
# is invisible to worker threads.
|
|
|
|
|
|
try:
|
|
|
|
|
|
from tools.environments.base import set_activity_callback
|
|
|
|
|
|
set_activity_callback(agent._touch_activity)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-05-28 17:47:09 -04:00
|
|
|
|
# Approval/sudo callbacks (thread-local) and the agent turn's
|
|
|
|
|
|
# ContextVars are propagated by propagate_context_to_thread() at the
|
|
|
|
|
|
# submit site below (GHSA-qg5c-hvr5-hjgr, #13617).
|
2026-05-16 18:24:05 -07:00
|
|
|
|
start = time.time()
|
|
|
|
|
|
try:
|
2026-05-30 19:45:18 +08:00
|
|
|
|
try:
|
|
|
|
|
|
result = agent._invoke_tool(
|
|
|
|
|
|
function_name,
|
|
|
|
|
|
function_args,
|
|
|
|
|
|
effective_task_id,
|
|
|
|
|
|
tool_call.id,
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
pre_tool_block_checked=True,
|
|
|
|
|
|
)
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.interrupt("keyboard interrupt")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
result = _emit_cancelled_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
start_time=start,
|
|
|
|
|
|
)
|
|
|
|
|
|
duration = time.time() - start
|
|
|
|
|
|
logger.info("tool %s cancelled (%.2fs)", function_name, duration)
|
|
|
|
|
|
results[index] = (function_name, function_args, result, duration, True, False)
|
|
|
|
|
|
return
|
2026-05-30 19:45:18 +08:00
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
result = f"Error executing tool '{function_name}': {tool_error}"
|
|
|
|
|
|
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
|
|
|
|
|
|
duration = time.time() - start
|
|
|
|
|
|
is_error, _ = _detect_tool_failure(function_name, result)
|
|
|
|
|
|
if is_error:
|
|
|
|
|
|
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
|
|
|
|
|
|
results[index] = (function_name, function_args, result, duration, is_error, False)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
# Tear down worker-tid tracking. Clear any interrupt bit we may
|
|
|
|
|
|
# have set so the next task scheduled onto this recycled tid
|
|
|
|
|
|
# starts with a clean slate. This MUST be in a finally block
|
|
|
|
|
|
# because BaseException subclasses (CancelledError, KeyboardInterrupt)
|
|
|
|
|
|
# bypass ``except Exception`` and would otherwise leak the tid
|
|
|
|
|
|
# into _interrupted_threads, poisoning the recycled thread.
|
|
|
|
|
|
with agent._tool_worker_threads_lock:
|
|
|
|
|
|
agent._tool_worker_threads.discard(_worker_tid)
|
|
|
|
|
|
try:
|
|
|
|
|
|
_ra()._set_interrupt(False, _worker_tid)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
|
|
|
|
|
# Start spinner for CLI mode (skip when TUI handles tool progress)
|
|
|
|
|
|
spinner = None
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=agent._print_fn)
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
runnable_calls = [
|
|
|
|
|
|
(i, tc, name, args)
|
|
|
|
|
|
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
|
|
|
|
|
|
if block_result is None
|
|
|
|
|
|
]
|
|
|
|
|
|
futures = []
|
|
|
|
|
|
if runnable_calls:
|
|
|
|
|
|
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
|
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
|
|
|
|
for i, tc, name, args in runnable_calls:
|
2026-05-28 17:47:09 -04:00
|
|
|
|
# Propagate the agent turn's ContextVars (e.g.
|
|
|
|
|
|
# _approval_session_key) AND thread-local approval/sudo
|
|
|
|
|
|
# callbacks into the worker thread; clears callbacks on exit.
|
|
|
|
|
|
f = executor.submit(
|
|
|
|
|
|
propagate_context_to_thread(_run_tool), i, tc, name, args
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
futures.append(f)
|
|
|
|
|
|
|
|
|
|
|
|
# Wait for all to complete with periodic heartbeats so the
|
|
|
|
|
|
# gateway's inactivity monitor doesn't kill us during long
|
|
|
|
|
|
# concurrent tool batches. Also check for user interrupts
|
|
|
|
|
|
# so we don't block indefinitely when the user sends /stop
|
|
|
|
|
|
# or a new message during concurrent tool execution.
|
|
|
|
|
|
_conc_start = time.time()
|
|
|
|
|
|
_interrupt_logged = False
|
|
|
|
|
|
while True:
|
|
|
|
|
|
done, not_done = concurrent.futures.wait(
|
|
|
|
|
|
futures, timeout=5.0,
|
|
|
|
|
|
)
|
|
|
|
|
|
if not not_done:
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# Check for interrupt — the per-thread interrupt signal
|
|
|
|
|
|
# already causes individual tools (terminal, execute_code)
|
|
|
|
|
|
# to abort, but tools without interrupt checks (web_search,
|
|
|
|
|
|
# read_file) will run to completion. Cancel any futures
|
|
|
|
|
|
# that haven't started yet so we don't block on them.
|
|
|
|
|
|
if agent._interrupt_requested:
|
|
|
|
|
|
if not _interrupt_logged:
|
|
|
|
|
|
_interrupt_logged = True
|
|
|
|
|
|
agent._vprint(
|
|
|
|
|
|
f"{agent.log_prefix}⚡ Interrupt: cancelling "
|
|
|
|
|
|
f"{len(not_done)} pending concurrent tool(s)",
|
|
|
|
|
|
force=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
for f in not_done:
|
|
|
|
|
|
f.cancel()
|
|
|
|
|
|
# Give already-running tools a moment to notice the
|
|
|
|
|
|
# per-thread interrupt signal and exit gracefully.
|
|
|
|
|
|
concurrent.futures.wait(not_done, timeout=3.0)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
_conc_elapsed = int(time.time() - _conc_start)
|
|
|
|
|
|
# Heartbeat every ~30s (6 × 5s poll intervals)
|
|
|
|
|
|
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
|
|
|
|
|
|
_still_running = [
|
|
|
|
|
|
parsed_calls[futures.index(f)][1]
|
|
|
|
|
|
for f in not_done
|
|
|
|
|
|
if f in futures
|
|
|
|
|
|
]
|
|
|
|
|
|
agent._touch_activity(
|
|
|
|
|
|
f"concurrent tools running ({_conc_elapsed}s, "
|
|
|
|
|
|
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
|
|
|
|
|
|
)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
if spinner:
|
|
|
|
|
|
# Build a summary message for the spinner stop
|
|
|
|
|
|
completed = sum(1 for r in results if r is not None)
|
|
|
|
|
|
total_dur = sum(r[3] for r in results if r is not None)
|
|
|
|
|
|
spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total")
|
|
|
|
|
|
|
|
|
|
|
|
# ── Post-execution: display per-tool results ─────────────────────
|
|
|
|
|
|
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
|
|
|
|
|
r = results[i]
|
|
|
|
|
|
blocked = False
|
|
|
|
|
|
if r is None:
|
|
|
|
|
|
# Tool was cancelled (interrupt) or thread didn't return
|
|
|
|
|
|
if agent._interrupt_requested:
|
|
|
|
|
|
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=name,
|
|
|
|
|
|
function_args=args,
|
|
|
|
|
|
result=function_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tc, "id", "") or "",
|
|
|
|
|
|
status="cancelled",
|
|
|
|
|
|
error_type="keyboard_interrupt",
|
|
|
|
|
|
error_message="Tool execution cancelled by user interrupt",
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
else:
|
|
|
|
|
|
function_result = f"Error executing tool '{name}': thread did not return a result"
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=name,
|
|
|
|
|
|
function_args=args,
|
|
|
|
|
|
result=function_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tc, "id", "") or "",
|
|
|
|
|
|
status="error",
|
|
|
|
|
|
error_type="thread_missing_result",
|
|
|
|
|
|
error_message=function_result,
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
tool_duration = 0.0
|
|
|
|
|
|
else:
|
|
|
|
|
|
function_name, function_args, function_result, tool_duration, is_error, blocked = r
|
|
|
|
|
|
|
|
|
|
|
|
if not blocked:
|
|
|
|
|
|
function_result = agent._append_guardrail_observation(
|
|
|
|
|
|
function_name,
|
|
|
|
|
|
function_args,
|
|
|
|
|
|
function_result,
|
|
|
|
|
|
failed=is_error,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if is_error:
|
|
|
|
|
|
_err_text = _multimodal_text_summary(function_result)
|
|
|
|
|
|
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
|
|
|
|
|
|
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
|
|
|
|
|
|
|
|
|
|
|
# Track file-mutation outcome for the turn-end verifier.
|
|
|
|
|
|
# `blocked` calls never actually ran — don't let a guardrail
|
|
|
|
|
|
# block count as either a failure or a success.
|
|
|
|
|
|
if not blocked:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent._record_file_mutation_result(
|
|
|
|
|
|
function_name, function_args, function_result, is_error,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as _ver_err:
|
|
|
|
|
|
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
|
|
|
|
|
|
|
|
|
|
|
if not blocked and agent.tool_progress_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_progress_callback(
|
|
|
|
|
|
"tool.completed", function_name, None, None,
|
|
|
|
|
|
duration=tool_duration, is_error=is_error,
|
2026-05-23 20:54:17 -07:00
|
|
|
|
result=function_result,
|
2026-05-16 18:24:05 -07:00
|
|
|
|
)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool progress callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
|
|
|
|
|
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
|
|
|
|
|
|
|
|
|
|
|
|
# Print cute message per tool
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
|
|
|
|
|
|
agent._safe_print(f" {cute_msg}")
|
|
|
|
|
|
elif not agent.quiet_mode:
|
|
|
|
|
|
_preview_str = _multimodal_text_summary(function_result)
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s")
|
|
|
|
|
|
print(agent._wrap_verbose("Result: ", _preview_str))
|
|
|
|
|
|
else:
|
|
|
|
|
|
response_preview = _preview_str[:agent.log_prefix_chars] + "..." if len(_preview_str) > agent.log_prefix_chars else _preview_str
|
|
|
|
|
|
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
agent._current_tool = None
|
|
|
|
|
|
agent._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
|
|
|
|
|
|
|
|
|
|
|
|
if not blocked and agent.tool_complete_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_complete_callback(tc.id, name, args, function_result)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool complete callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
function_result = maybe_persist_tool_result(
|
|
|
|
|
|
content=function_result,
|
|
|
|
|
|
tool_name=name,
|
|
|
|
|
|
tool_use_id=tc.id,
|
|
|
|
|
|
env=get_active_env(effective_task_id),
|
|
|
|
|
|
) if not _is_multimodal_tool_result(function_result) else function_result
|
|
|
|
|
|
|
|
|
|
|
|
subdir_hints = agent._subdirectory_hints.check_tool_call(name, args)
|
|
|
|
|
|
if subdir_hints:
|
|
|
|
|
|
if _is_multimodal_tool_result(function_result):
|
|
|
|
|
|
# Append the hint to the text summary part so the model
|
|
|
|
|
|
# still sees it; don't touch the image blocks.
|
|
|
|
|
|
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
|
|
|
|
|
else:
|
|
|
|
|
|
function_result += subdir_hints
|
|
|
|
|
|
|
|
|
|
|
|
# Unwrap _multimodal dicts to an OpenAI-style content list so any
|
|
|
|
|
|
# vision-capable provider receives [{type:text},{type:image_url}]
|
|
|
|
|
|
# rather than a raw Python dict. The Anthropic adapter already
|
|
|
|
|
|
# accepts content lists; vision-capable OpenAI-compatible servers
|
|
|
|
|
|
# (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
|
|
|
|
|
|
# Text-only servers get a string-safe fallback here so a rejected
|
|
|
|
|
|
# image tool result never poisons canonical session history.
|
|
|
|
|
|
# String results pass through unchanged.
|
|
|
|
|
|
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
|
2026-05-19 20:24:30 +01:00
|
|
|
|
messages.append(make_tool_result_message(name, _tool_content, tc.id))
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
|
|
|
|
|
# ── Per-tool /steer drain ───────────────────────────────────
|
|
|
|
|
|
# Same as the sequential path: drain between each collected
|
|
|
|
|
|
# result so the steer lands as early as possible.
|
|
|
|
|
|
agent._apply_pending_steer_to_tool_results(messages, 1)
|
|
|
|
|
|
|
|
|
|
|
|
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
|
|
|
|
|
num_tools = len(parsed_calls)
|
|
|
|
|
|
if num_tools > 0:
|
|
|
|
|
|
turn_tool_msgs = messages[-num_tools:]
|
|
|
|
|
|
enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
|
|
|
|
|
|
|
|
|
|
|
|
# ── /steer injection ──────────────────────────────────────────────
|
|
|
|
|
|
# Append any pending user steer text to the last tool result so the
|
|
|
|
|
|
# agent sees it on its next iteration. Runs AFTER budget enforcement
|
|
|
|
|
|
# so the steer marker is never truncated. See steer() for details.
|
|
|
|
|
|
if num_tools > 0:
|
|
|
|
|
|
agent._apply_pending_steer_to_tool_results(messages, num_tools)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def execute_tool_calls_sequential(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
|
|
|
|
|
"""Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
|
|
|
|
|
|
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
|
|
|
|
|
# SAFETY: check interrupt BEFORE starting each tool.
|
|
|
|
|
|
# If the user sent "stop" during a previous tool's execution,
|
|
|
|
|
|
# do NOT start any more tools -- skip them all immediately.
|
|
|
|
|
|
if agent._interrupt_requested:
|
|
|
|
|
|
remaining_calls = assistant_message.tool_calls[i-1:]
|
|
|
|
|
|
if remaining_calls:
|
|
|
|
|
|
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
|
|
|
|
|
|
for skipped_tc in remaining_calls:
|
|
|
|
|
|
skipped_name = skipped_tc.function.name
|
|
|
|
|
|
skip_msg = {
|
|
|
|
|
|
"role": "tool",
|
|
|
|
|
|
"name": skipped_name,
|
|
|
|
|
|
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
|
|
|
|
|
"tool_call_id": skipped_tc.id,
|
|
|
|
|
|
}
|
|
|
|
|
|
messages.append(skip_msg)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
function_name = tool_call.function.name
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_args = json.loads(tool_call.function.arguments)
|
|
|
|
|
|
except json.JSONDecodeError as e:
|
2026-05-21 14:09:30 +03:00
|
|
|
|
logger.warning(f"Unexpected JSON error after validation: {e}")
|
2026-05-16 18:24:05 -07:00
|
|
|
|
function_args = {}
|
|
|
|
|
|
if not isinstance(function_args, dict):
|
|
|
|
|
|
function_args = {}
|
|
|
|
|
|
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
# Tool Search unwrap — see execute_tool_calls_concurrent for full
|
|
|
|
|
|
# rationale, including the scope gate (the unwrap dispatches the
|
|
|
|
|
|
# underlying tool directly, so session toolset scope is enforced here).
|
|
|
|
|
|
_ts_scope_block: Optional[str] = None
|
feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
2026-05-23 15:22:01 -07:00
|
|
|
|
try:
|
|
|
|
|
|
from tools import tool_search as _ts
|
|
|
|
|
|
if function_name == _ts.TOOL_CALL_NAME:
|
|
|
|
|
|
_underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
|
|
|
|
|
|
if not _err and _underlying:
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
if _underlying in _tool_search_scoped_names(agent):
|
|
|
|
|
|
function_name = _underlying
|
|
|
|
|
|
function_args = _underlying_args
|
|
|
|
|
|
else:
|
|
|
|
|
|
_ts_scope_block = (
|
|
|
|
|
|
f"'{_underlying}' is not available in this session. "
|
|
|
|
|
|
"Use tool_search to find tools you can call."
|
|
|
|
|
|
)
|
feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
2026-05-23 15:22:01 -07:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-16 18:24:05 -07:00
|
|
|
|
# Check plugin hooks for a block directive before executing.
|
|
|
|
|
|
_block_msg: Optional[str] = None
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_block_error_type = "plugin_block"
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
if _ts_scope_block is not None:
|
|
|
|
|
|
_block_msg = _ts_scope_block
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_block_error_type = "tool_scope_block"
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.plugins import get_pre_tool_call_block_message
|
|
|
|
|
|
_block_msg = get_pre_tool_call_block_message(
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
function_name,
|
|
|
|
|
|
function_args,
|
|
|
|
|
|
task_id=effective_task_id or "",
|
|
|
|
|
|
session_id=getattr(agent, "session_id", "") or "",
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
turn_id=getattr(agent, "_current_turn_id", "") or "",
|
|
|
|
|
|
api_request_id=getattr(agent, "_current_api_request_id", "") or "",
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
|
|
|
|
|
_guardrail_block_decision: ToolGuardrailDecision | None = None
|
|
|
|
|
|
if _block_msg is None:
|
|
|
|
|
|
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
|
|
|
|
|
if not guardrail_decision.allows_execution:
|
|
|
|
|
|
_guardrail_block_decision = guardrail_decision
|
|
|
|
|
|
|
|
|
|
|
|
_execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
|
|
|
|
|
|
|
|
|
|
|
|
if _execution_blocked:
|
|
|
|
|
|
# Tool blocked by plugin or guardrail policy — skip counters,
|
|
|
|
|
|
# callbacks, checkpointing, activity mutation, and real execution.
|
|
|
|
|
|
pass
|
|
|
|
|
|
# Reset nudge counters when the relevant tool is actually used
|
|
|
|
|
|
elif function_name == "memory":
|
|
|
|
|
|
agent._turns_since_memory = 0
|
|
|
|
|
|
elif function_name == "skill_manage":
|
|
|
|
|
|
agent._iters_since_skill = 0
|
|
|
|
|
|
|
|
|
|
|
|
if not agent.quiet_mode:
|
|
|
|
|
|
args_str = json.dumps(function_args, ensure_ascii=False)
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
|
|
|
|
|
|
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
|
|
|
|
|
|
else:
|
|
|
|
|
|
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
|
|
|
|
|
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
if not _execution_blocked:
|
|
|
|
|
|
agent._current_tool = function_name
|
|
|
|
|
|
agent._touch_activity(f"executing tool: {function_name}")
|
|
|
|
|
|
|
|
|
|
|
|
# Set activity callback for long-running tool execution (terminal
|
|
|
|
|
|
# commands, etc.) so the gateway's inactivity monitor doesn't kill
|
|
|
|
|
|
# the agent while a command is running.
|
|
|
|
|
|
if not _execution_blocked:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from tools.environments.base import set_activity_callback
|
|
|
|
|
|
set_activity_callback(agent._touch_activity)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if not _execution_blocked and agent.tool_progress_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args)
|
|
|
|
|
|
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool progress callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
if not _execution_blocked and agent.tool_start_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_start_callback(tool_call.id, function_name, function_args)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool start callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
# Checkpoint: snapshot working dir before file-mutating tools
|
|
|
|
|
|
if not _execution_blocked and function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_path = function_args.get("path", "")
|
|
|
|
|
|
if file_path:
|
|
|
|
|
|
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
|
|
|
|
|
agent._checkpoint_mgr.ensure_checkpoint(
|
|
|
|
|
|
work_dir, f"before {function_name}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # never block tool execution
|
|
|
|
|
|
|
|
|
|
|
|
# Checkpoint before destructive terminal commands
|
|
|
|
|
|
if not _execution_blocked and function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
cmd = function_args.get("command", "")
|
|
|
|
|
|
if _is_destructive_command(cmd):
|
|
|
|
|
|
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
|
|
|
|
|
agent._checkpoint_mgr.ensure_checkpoint(
|
|
|
|
|
|
cwd, f"before terminal: {cmd[:60]}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # never block tool execution
|
|
|
|
|
|
|
|
|
|
|
|
tool_start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
if _block_msg is not None:
|
|
|
|
|
|
# Tool blocked by plugin policy — return error without executing.
|
|
|
|
|
|
function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
|
|
|
|
|
|
tool_duration = 0.0
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=function_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
status="blocked",
|
|
|
|
|
|
error_type=_block_error_type,
|
|
|
|
|
|
error_message=_block_msg,
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
elif _guardrail_block_decision is not None:
|
|
|
|
|
|
# Tool blocked by tool-loop guardrail — synthesize exactly one
|
|
|
|
|
|
# tool result for the original tool_call_id without executing.
|
|
|
|
|
|
function_result = agent._guardrail_block_result(_guardrail_block_decision)
|
|
|
|
|
|
tool_duration = 0.0
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=function_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
status="blocked",
|
|
|
|
|
|
error_type="guardrail_block",
|
|
|
|
|
|
error_message=getattr(_guardrail_block_decision, "message", None) or "Tool blocked by guardrail policy",
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
elif function_name == "todo":
|
|
|
|
|
|
from tools.todo_tool import todo_tool as _todo_tool
|
|
|
|
|
|
function_result = _todo_tool(
|
|
|
|
|
|
todos=function_args.get("todos"),
|
|
|
|
|
|
merge=function_args.get("merge", False),
|
|
|
|
|
|
store=agent._todo_store,
|
|
|
|
|
|
)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
|
|
|
|
|
elif function_name == "session_search":
|
|
|
|
|
|
session_db = agent._get_session_db_for_recall()
|
|
|
|
|
|
if not session_db:
|
|
|
|
|
|
from hermes_state import format_session_db_unavailable
|
|
|
|
|
|
function_result = json.dumps({"success": False, "error": format_session_db_unavailable()})
|
|
|
|
|
|
else:
|
|
|
|
|
|
from tools.session_search_tool import session_search as _session_search
|
|
|
|
|
|
function_result = _session_search(
|
|
|
|
|
|
query=function_args.get("query", ""),
|
|
|
|
|
|
role_filter=function_args.get("role_filter"),
|
|
|
|
|
|
limit=function_args.get("limit", 3),
|
feat(session_search): single-shape tool with discovery, scroll, browse — no LLM (#27590)
* feat(session_search): single-shape tool with discovery, scroll, browse — no LLM
Replaces the LLM-summarized session_search with a single-shape tool that
returns actual messages from the DB. Three calling shapes inferred from
args (no mode parameter):
1. Discovery — pass query. FTS5 + anchored ±5 window + bookends per hit,
all in one call. ~20ms on a real DB instead of ~90s for the previous
three aux-LLM calls.
2. Scroll — pass session_id + around_message_id. Returns a window
centered on the anchor. To paginate, re-anchor on the first/last id
of the returned window. Boundary message appears in both windows
as the orientation marker. ~1ms per scroll call.
3. Browse — no args. Recent sessions chronologically.
Bookend_start (first 3 user+assistant msgs) and bookend_end (last 3) give
the agent goal + resolution on every discovery hit, so a single tool call
reconstructs a long session's arc without loading the whole transcript.
The aux-LLM summary path is gone: it cost ~$0.30/call, took ~30s, and
laundered FTS5 hits through a model that could confabulate when the right
session wasn't in the hit list. The merged shape returns byte-for-byte
content from SQLite.
History:
- PR #20238 (JabberELF) seeded the fast/summary dual-mode split.
- PR #26419 (yoniebans) expanded to fast/guided/summary with bookends,
multi-anchor drill-down, default-mode config, and a teaching skill.
This PR collapses that toolkit into one shape with explicit scroll
support, drops the summary path, drops the mode parameter, drops the
config knob, drops the skill. JabberELF's seed work is acknowledged via
the AUTHOR_MAP entry.
Validation:
- 38/38 tool tests pass (tests/tools/test_session_search.py)
- 12/12 get_messages_around tests pass (tests/hermes_state/)
- 11/11 get_anchored_view tests pass (tests/hermes_state/)
- Full tests/tools/ run: 5168 passing, 2 failures pre-exist on main
(test ordering in test_delegate.py, unrelated)
- E2E against live state DB: discovery 20ms, scroll 1ms, browse 280ms;
pagination forward+backward works with boundary-message orientation;
error paths return clean tool_error responses
Co-authored-by: JabberELF <abcdjmm970703@gmail.com>
Co-authored-by: yoniebans <jonny@nousresearch.com>
* chore(session_search): prune dead LLM-summary config and docs
Companion to the single-shape rewrite. The auxiliary.session_search config
block, max_concurrency / extra_body tunables, and matching docs sections
all referenced the removed LLM summarization path. Removing them so users
don't try to tune knobs that nothing reads.
- hermes_cli/config.py: drop dead auxiliary.session_search block from
DEFAULT_CONFIG. Leftover keys in user config.yaml are harmless and
ignored.
- hermes_cli/tips.py: drop two tips referencing the removed
max_concurrency / extra_body knobs.
- website/docs/user-guide/configuration.md: drop 'Session Search Tuning'
section and the auxiliary.session_search block from the example.
- website/docs/user-guide/features/fallback-providers.md: drop session_search
rows from the auxiliary-tasks tables and the dedicated tuning subsection.
- website/docs/reference/tools-reference.md: rewrite the session_search
entry to describe the new three-shape behaviour.
- CONTRIBUTING.md: update the file-tree description.
- tests/tools/test_llm_content_none_guard.py: remove TestSessionSearchContentNone
class and test_session_search_tool_guarded — both guard against an
unguarded .content.strip() call site in _summarize_session() that no
longer exists.
Validation: 97/97 targeted tests still pass (hermes_state + session_search +
llm_content_none_guard). Config tests 55/55.
---------
Co-authored-by: JabberELF <abcdjmm970703@gmail.com>
Co-authored-by: yoniebans <jonny@nousresearch.com>
2026-05-17 23:28:45 -07:00
|
|
|
|
session_id=function_args.get("session_id"),
|
|
|
|
|
|
around_message_id=function_args.get("around_message_id"),
|
|
|
|
|
|
window=function_args.get("window", 5),
|
|
|
|
|
|
sort=function_args.get("sort"),
|
2026-05-16 18:24:05 -07:00
|
|
|
|
db=session_db,
|
|
|
|
|
|
current_session_id=agent.session_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
|
|
|
|
|
elif function_name == "memory":
|
|
|
|
|
|
target = function_args.get("target", "memory")
|
|
|
|
|
|
from tools.memory_tool import memory_tool as _memory_tool
|
|
|
|
|
|
function_result = _memory_tool(
|
|
|
|
|
|
action=function_args.get("action"),
|
|
|
|
|
|
target=target,
|
|
|
|
|
|
content=function_args.get("content"),
|
|
|
|
|
|
old_text=function_args.get("old_text"),
|
|
|
|
|
|
store=agent._memory_store,
|
|
|
|
|
|
)
|
|
|
|
|
|
# Bridge: notify external memory provider of built-in memory writes
|
|
|
|
|
|
if agent._memory_manager and function_args.get("action") in {"add", "replace"}:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent._memory_manager.on_memory_write(
|
|
|
|
|
|
function_args.get("action", ""),
|
|
|
|
|
|
target,
|
|
|
|
|
|
function_args.get("content", ""),
|
|
|
|
|
|
metadata=agent._build_memory_write_metadata(
|
|
|
|
|
|
task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", None),
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
|
|
|
|
|
elif function_name == "clarify":
|
|
|
|
|
|
from tools.clarify_tool import clarify_tool as _clarify_tool
|
|
|
|
|
|
function_result = _clarify_tool(
|
|
|
|
|
|
question=function_args.get("question", ""),
|
|
|
|
|
|
choices=function_args.get("choices"),
|
|
|
|
|
|
callback=agent.clarify_callback,
|
|
|
|
|
|
)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
|
|
|
|
|
elif function_name == "delegate_task":
|
|
|
|
|
|
tasks_arg = function_args.get("tasks")
|
|
|
|
|
|
if tasks_arg and isinstance(tasks_arg, list):
|
2026-05-30 12:54:41 +05:30
|
|
|
|
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks · (/agents to monitor)"
|
2026-05-16 18:24:05 -07:00
|
|
|
|
else:
|
|
|
|
|
|
goal_preview = (function_args.get("goal") or "")[:30]
|
2026-05-30 12:54:41 +05:30
|
|
|
|
spinner_label = (
|
|
|
|
|
|
f"🔀 {goal_preview} · (/agents to monitor)"
|
|
|
|
|
|
if goal_preview
|
|
|
|
|
|
else "🔀 delegating · (/agents to monitor)"
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
spinner = None
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=agent._print_fn)
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
agent._delegate_spinner = spinner
|
|
|
|
|
|
_delegate_result = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_result = agent._dispatch_delegate_task(function_args)
|
|
|
|
|
|
_delegate_result = function_result
|
|
|
|
|
|
finally:
|
|
|
|
|
|
agent._delegate_spinner = None
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
|
|
|
|
|
|
if spinner:
|
|
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
elif agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {cute_msg}")
|
|
|
|
|
|
elif agent._context_engine_tool_names and function_name in agent._context_engine_tool_names:
|
|
|
|
|
|
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
|
|
|
|
|
|
spinner = None
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
|
|
|
|
|
emoji = _get_tool_emoji(function_name)
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args) or function_name
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
_ce_result = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_result = agent.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
|
|
|
|
|
|
_ce_result = function_result
|
|
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
|
|
|
|
|
|
logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
|
|
|
|
|
|
if spinner:
|
|
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
elif agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {cute_msg}")
|
|
|
|
|
|
elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
|
|
|
|
|
|
# Memory provider tools (hindsight_retain, honcho_search, etc.)
|
|
|
|
|
|
# These are not in the tool registry — route through MemoryManager.
|
|
|
|
|
|
spinner = None
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
|
|
|
|
|
emoji = _get_tool_emoji(function_name)
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args) or function_name
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
_mem_result = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_result = agent._memory_manager.handle_tool_call(function_name, function_args)
|
|
|
|
|
|
_mem_result = function_result
|
|
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
|
|
|
|
|
|
logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
|
|
|
|
|
|
if spinner:
|
|
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
elif agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {cute_msg}")
|
|
|
|
|
|
elif agent.quiet_mode:
|
|
|
|
|
|
spinner = None
|
|
|
|
|
|
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
|
|
|
|
|
emoji = _get_tool_emoji(function_name)
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args) or function_name
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
_spinner_result = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_result = _ra().handle_function_call(
|
|
|
|
|
|
function_name, function_args, effective_task_id,
|
|
|
|
|
|
tool_call_id=tool_call.id,
|
|
|
|
|
|
session_id=agent.session_id or "",
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
turn_id=getattr(agent, "_current_turn_id", "") or "",
|
|
|
|
|
|
api_request_id=getattr(agent, "_current_api_request_id", "") or "",
|
2026-05-16 18:24:05 -07:00
|
|
|
|
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
|
|
|
|
|
skip_pre_tool_call_hook=True,
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
|
|
|
|
|
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
|
2026-05-16 18:24:05 -07:00
|
|
|
|
)
|
|
|
|
|
|
_spinner_result = function_result
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
function_result = _emit_cancelled_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
start_time=tool_start_time,
|
|
|
|
|
|
)
|
|
|
|
|
|
_spinner_result = function_result
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.interrupt("keyboard interrupt")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
raise
|
2026-05-16 18:24:05 -07:00
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
|
|
|
|
|
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
|
|
|
|
|
|
if spinner:
|
|
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
elif agent._should_emit_quiet_tool_messages():
|
|
|
|
|
|
agent._vprint(f" {cute_msg}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
function_result = _ra().handle_function_call(
|
|
|
|
|
|
function_name, function_args, effective_task_id,
|
|
|
|
|
|
tool_call_id=tool_call.id,
|
|
|
|
|
|
session_id=agent.session_id or "",
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
turn_id=getattr(agent, "_current_turn_id", "") or "",
|
|
|
|
|
|
api_request_id=getattr(agent, "_current_api_request_id", "") or "",
|
2026-05-16 18:24:05 -07:00
|
|
|
|
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
|
|
|
|
|
skip_pre_tool_call_hook=True,
|
fix(tool-search): scope bridge catalog + dispatch to the session's toolsets
Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:
1. tool_search the entire process registry, not just its granted tools, and
2. tool_call any registered plugin/MCP tool it was never given, because
registry.dispatch() has no enabled_tools gate for non-execute_code tools.
A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.
Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
dispatch scopes get_tool_definitions to them (also stops polluting the
process-global _last_resolved_tool_names with out-of-scope tools, which
leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
same scope before dispatch, since it unwraps tool_call -> underlying name
and bypasses the bridge branch. New _tool_search_scoped_names() helper,
cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.
Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
|
|
|
|
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
|
|
|
|
|
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
|
2026-05-16 18:24:05 -07:00
|
|
|
|
)
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
_emit_cancelled_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
start_time=tool_start_time,
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.interrupt("keyboard interrupt")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
raise
|
2026-05-16 18:24:05 -07:00
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
|
|
|
|
|
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
|
|
|
|
|
|
if isinstance(function_result, str):
|
|
|
|
|
|
result_preview = function_result if agent.verbose_logging else (
|
|
|
|
|
|
function_result[:200] if len(function_result) > 200 else function_result
|
|
|
|
|
|
)
|
|
|
|
|
|
_result_len = len(function_result)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Multimodal dict result (_multimodal=True) — not sliceable as string
|
|
|
|
|
|
result_preview = function_result
|
|
|
|
|
|
_result_len = len(str(function_result))
|
|
|
|
|
|
|
|
|
|
|
|
# Log tool errors to the persistent error log so [error] tags
|
|
|
|
|
|
# in the UI always have a corresponding detailed entry on disk.
|
|
|
|
|
|
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.
Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.
Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.
Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 17:44:13 +05:30
|
|
|
|
# The agent-runtime tools above (todo, session_search, memory,
|
|
|
|
|
|
# context-engine, memory-manager, clarify, delegate_task) are
|
|
|
|
|
|
# dispatched inline — they never reach handle_function_call, so the
|
|
|
|
|
|
# executor is the one that has to fire post_tool_call. For
|
|
|
|
|
|
# registry-dispatched tools the else-branch above invoked
|
|
|
|
|
|
# handle_function_call, which already fires the hook.
|
|
|
|
|
|
from agent.agent_runtime_helpers import agent_runtime_owns_post_tool_hook
|
|
|
|
|
|
_executor_must_emit_post_hook = (
|
|
|
|
|
|
not _execution_blocked
|
|
|
|
|
|
and agent_runtime_owns_post_tool_hook(agent, function_name)
|
|
|
|
|
|
)
|
|
|
|
|
|
if _executor_must_emit_post_hook:
|
|
|
|
|
|
_emit_terminal_post_tool_call(
|
|
|
|
|
|
agent,
|
|
|
|
|
|
function_name=function_name,
|
|
|
|
|
|
function_args=function_args,
|
|
|
|
|
|
result=function_result,
|
|
|
|
|
|
effective_task_id=effective_task_id,
|
|
|
|
|
|
tool_call_id=getattr(tool_call, "id", "") or "",
|
|
|
|
|
|
duration_ms=int(tool_duration * 1000),
|
|
|
|
|
|
)
|
2026-05-16 18:24:05 -07:00
|
|
|
|
if not _execution_blocked:
|
|
|
|
|
|
function_result = agent._append_guardrail_observation(
|
|
|
|
|
|
function_name,
|
|
|
|
|
|
function_args,
|
|
|
|
|
|
function_result,
|
|
|
|
|
|
failed=_is_error_result,
|
|
|
|
|
|
)
|
|
|
|
|
|
result_preview = function_result if agent.verbose_logging else (
|
|
|
|
|
|
function_result[:200] if len(function_result) > 200 else function_result
|
|
|
|
|
|
)
|
|
|
|
|
|
if _is_error_result:
|
|
|
|
|
|
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
|
|
|
|
|
|
|
|
|
|
|
|
# Track file-mutation outcome for the turn-end verifier. See
|
|
|
|
|
|
# the concurrent path for the rationale; both paths must feed
|
|
|
|
|
|
# the same state so the footer reflects every tool call in the
|
|
|
|
|
|
# turn, not just the parallel ones.
|
|
|
|
|
|
if not _execution_blocked:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent._record_file_mutation_result(
|
|
|
|
|
|
function_name, function_args, function_result, _is_error_result,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as _ver_err:
|
|
|
|
|
|
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
|
|
|
|
|
|
|
|
|
|
|
if not _execution_blocked and agent.tool_progress_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_progress_callback(
|
|
|
|
|
|
"tool.completed", function_name, None, None,
|
|
|
|
|
|
duration=tool_duration, is_error=_is_error_result,
|
2026-05-23 20:54:17 -07:00
|
|
|
|
result=function_result,
|
2026-05-16 18:24:05 -07:00
|
|
|
|
)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool progress callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
agent._current_tool = None
|
|
|
|
|
|
agent._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
|
|
|
|
|
|
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
|
|
|
|
|
_log_result = _multimodal_text_summary(function_result)
|
|
|
|
|
|
logging.debug(f"Tool result ({len(_log_result)} chars): {_log_result}")
|
|
|
|
|
|
|
|
|
|
|
|
if not _execution_blocked and agent.tool_complete_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
|
|
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool complete callback error: {cb_err}")
|
|
|
|
|
|
|
|
|
|
|
|
function_result = maybe_persist_tool_result(
|
|
|
|
|
|
content=function_result,
|
|
|
|
|
|
tool_name=function_name,
|
|
|
|
|
|
tool_use_id=tool_call.id,
|
|
|
|
|
|
env=get_active_env(effective_task_id),
|
|
|
|
|
|
) if not _is_multimodal_tool_result(function_result) else function_result
|
|
|
|
|
|
|
|
|
|
|
|
# Discover subdirectory context files from tool arguments
|
|
|
|
|
|
subdir_hints = agent._subdirectory_hints.check_tool_call(function_name, function_args)
|
|
|
|
|
|
if subdir_hints:
|
|
|
|
|
|
if _is_multimodal_tool_result(function_result):
|
|
|
|
|
|
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
|
|
|
|
|
else:
|
|
|
|
|
|
function_result += subdir_hints
|
|
|
|
|
|
|
|
|
|
|
|
# Unwrap _multimodal dicts to an OpenAI-style content list
|
|
|
|
|
|
# (see parallel path for rationale). String results pass through.
|
|
|
|
|
|
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
|
2026-05-19 20:24:30 +01:00
|
|
|
|
messages.append(make_tool_result_message(function_name, _tool_content, tool_call.id))
|
2026-05-16 18:24:05 -07:00
|
|
|
|
|
|
|
|
|
|
# ── Per-tool /steer drain ───────────────────────────────────
|
|
|
|
|
|
# Drain pending steer BETWEEN individual tool calls so the
|
|
|
|
|
|
# injection lands as soon as a tool finishes — not after the
|
|
|
|
|
|
# entire batch. The model sees it on the next API iteration.
|
|
|
|
|
|
agent._apply_pending_steer_to_tool_results(messages, 1)
|
|
|
|
|
|
|
|
|
|
|
|
if not agent.quiet_mode:
|
|
|
|
|
|
if agent.verbose_logging:
|
|
|
|
|
|
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s")
|
|
|
|
|
|
print(agent._wrap_verbose("Result: ", function_result))
|
|
|
|
|
|
else:
|
|
|
|
|
|
_fr_str = function_result if isinstance(function_result, str) else str(function_result)
|
|
|
|
|
|
response_preview = _fr_str[:agent.log_prefix_chars] + "..." if len(_fr_str) > agent.log_prefix_chars else _fr_str
|
|
|
|
|
|
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
if agent._interrupt_requested and i < len(assistant_message.tool_calls):
|
|
|
|
|
|
remaining = len(assistant_message.tool_calls) - i
|
|
|
|
|
|
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
|
|
|
|
|
|
for skipped_tc in assistant_message.tool_calls[i:]:
|
|
|
|
|
|
skipped_name = skipped_tc.function.name
|
2026-05-19 20:24:30 +01:00
|
|
|
|
messages.append(make_tool_result_message(
|
|
|
|
|
|
skipped_name,
|
|
|
|
|
|
f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
|
|
|
|
|
|
skipped_tc.id,
|
|
|
|
|
|
))
|
2026-05-16 18:24:05 -07:00
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
|
|
|
|
|
time.sleep(agent.tool_delay)
|
|
|
|
|
|
|
|
|
|
|
|
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
|
|
|
|
|
num_tools_seq = len(assistant_message.tool_calls)
|
|
|
|
|
|
if num_tools_seq > 0:
|
|
|
|
|
|
enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
|
|
|
|
|
|
|
|
|
|
|
|
# ── /steer injection ──────────────────────────────────────────────
|
|
|
|
|
|
# See _execute_tool_calls_parallel for the rationale. Same hook,
|
|
|
|
|
|
# applied to sequential execution as well.
|
|
|
|
|
|
if num_tools_seq > 0:
|
|
|
|
|
|
agent._apply_pending_steer_to_tool_results(messages, num_tools_seq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
|
"execute_tool_calls_concurrent",
|
|
|
|
|
|
"execute_tool_calls_sequential",
|
|
|
|
|
|
]
|