hermes-bsd/tests/tools/test_patch_failure_tracking.py

"""Tests for per-file consecutive patch-failure tracking.

When the agent repeatedly fails to patch the same file with similar but
non-matching old_strings, it's usually stuck in a loop with a stale view
of the file.  After 3 consecutive failures on the same path, the patch
tool injects an escalating ``_hint`` that tells the model to break out
of the loop (re-read, use longer context, or fall back to write_file).

See issue #507 (Roo Code deep-dive, item 2f).
"""

import json

import pytest


@pytest.fixture
def hermes_home(monkeypatch, tmp_path):
    """Isolate HERMES_HOME and clear module-level caches afterward so the
    real shell-out side effects from _handle_patch don't leak into
    subsequent tests (see test_line_ending_preservation.py for details)."""
    home = tmp_path / "hermes"
    home.mkdir()
    monkeypatch.setenv("HERMES_HOME", str(home))
    yield home
    try:
        from tools.file_tools import clear_file_ops_cache, _read_tracker_lock, _read_tracker
        clear_file_ops_cache()
        with _read_tracker_lock:
            _read_tracker.clear()
    except Exception:
        pass
    try:
        from tools.terminal_tool import _active_environments, _env_lock
        with _env_lock:
            _active_environments.clear()
    except Exception:
        pass


@pytest.fixture
def fresh_tracker():
    """Reset the module-level tracker before each test so the count starts
    at zero regardless of prior test order."""
    from tools.file_tools import _patch_failure_tracker, _patch_failure_lock

    with _patch_failure_lock:
        _patch_failure_tracker.clear()
    yield
    with _patch_failure_lock:
        _patch_failure_tracker.clear()


class TestPatchFailureEscalation:
    def test_first_two_failures_use_normal_hint(self, hermes_home, tmp_path, fresh_tracker):
        from tools.file_tools import _handle_patch

        target = tmp_path / "f.py"
        target.write_text("def foo():\n    return 1\n")

        for _i in range(2):
            result = _handle_patch(
                {
                    "mode": "replace",
                    "path": str(target),
                    "old_string": f"NONEXISTENT_{_i}_XYZQQQ",
                    "new_string": "x",
                },
                task_id="esc_t1",
            )
            d = json.loads(result)
            hint = d.get("_hint", "") or ""
            assert "failure #" not in hint, (
                f"Escalating hint fired too early on attempt {_i + 1}: {hint!r}"
            )

    def test_third_consecutive_failure_escalates(self, hermes_home, tmp_path, fresh_tracker):
        from tools.file_tools import _handle_patch

        target = tmp_path / "f.py"
        target.write_text("def foo():\n    return 1\n")

        last_hint = ""
        for _i in range(3):
            result = _handle_patch(
                {
                    "mode": "replace",
                    "path": str(target),
                    "old_string": f"DOES_NOT_EXIST_{_i}_FOOFOOFOO",
                    "new_string": "x",
                },
                task_id="esc_t2",
            )
            d = json.loads(result)
            last_hint = d.get("_hint", "") or ""

        assert "failure #3" in last_hint, repr(last_hint)
        assert "Stop retrying" in last_hint
        assert "write_file" in last_hint, (
            "Escalating hint should mention write_file fallback"
        )

    def test_success_clears_failure_counter(self, hermes_home, tmp_path, fresh_tracker):
        from tools.file_tools import _handle_patch

        target = tmp_path / "f.py"
        target.write_text("def foo():\n    return 1\n")

        # Three failures: counter at 3.
        for _i in range(3):
            _handle_patch(
                {
                    "mode": "replace",
                    "path": str(target),
                    "old_string": f"GHOST_{_i}_ABCABC",
                    "new_string": "x",
                },
                task_id="esc_t3",
            )

        # Successful patch: clears the counter.
        result = _handle_patch(
            {
                "mode": "replace",
                "path": str(target),
                "old_string": "return 1",
                "new_string": "return 99",
            },
            task_id="esc_t3",
        )
        d = json.loads(result)
        assert not d.get("error"), d

        # Next failure should be back to "attempt 1" — generic hint only.
        result = _handle_patch(
            {
                "mode": "replace",
                "path": str(target),
                "old_string": "STILL_GHOST_XYZ",
                "new_string": "x",
            },
            task_id="esc_t3",
        )
        d = json.loads(result)
        hint = d.get("_hint", "") or ""
        assert "failure #" not in hint, (
            f"Counter should have been reset after success: {hint!r}"
        )

    def test_different_paths_have_independent_counters(
        self, hermes_home, tmp_path, fresh_tracker
    ):
        from tools.file_tools import _handle_patch

        a = tmp_path / "a.py"
        a.write_text("x = 1\n")
        b = tmp_path / "b.py"
        b.write_text("y = 2\n")

        # Three failures on a.py.
        for _i in range(3):
            _handle_patch(
                {
                    "mode": "replace",
                    "path": str(a),
                    "old_string": f"NONE_A_{_i}_ZZZ",
                    "new_string": "x",
                },
                task_id="esc_t4",
            )

        # One failure on b.py — should NOT inherit a.py's count.
        result = _handle_patch(
            {
                "mode": "replace",
                "path": str(b),
                "old_string": "NONE_B_ZZZ",
                "new_string": "x",
            },
            task_id="esc_t4",
        )
        d = json.loads(result)
        hint = d.get("_hint", "") or ""
        assert "failure #" not in hint, (
            f"b.py's hint inherited a.py's count: {hint!r}"
        )

    def test_different_tasks_have_independent_counters(
        self, hermes_home, tmp_path, fresh_tracker
    ):
        from tools.file_tools import _handle_patch

        target = tmp_path / "shared.py"
        target.write_text("z = 0\n")

        # Three failures under task A.
        for _i in range(3):
            _handle_patch(
                {
                    "mode": "replace",
                    "path": str(target),
                    "old_string": f"GHOST_A_{_i}_QWE",
                    "new_string": "x",
                },
                task_id="task_A",
            )

        # First failure under task B — should NOT see escalation.
        result = _handle_patch(
            {
                "mode": "replace",
                "path": str(target),
                "old_string": "GHOST_B_QWE",
                "new_string": "x",
            },
            task_id="task_B",
        )
        d = json.loads(result)
        hint = d.get("_hint", "") or ""
        assert "failure #" not in hint, (
            f"task_B's hint cross-contaminated from task_A: {hint!r}"
        )
feat(patch): indentation preservation, CRLF preservation, per-file failure escalation (#507) (#32273) Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work. 2026-05-25 15:18:45 -07:00			`"""Tests for per-file consecutive patch-failure tracking.`

			`When the agent repeatedly fails to patch the same file with similar but`
			`non-matching old_strings, it's usually stuck in a loop with a stale view`
			`of the file. After 3 consecutive failures on the same path, the patch`
			tool injects an escalating ``_hint`` that tells the model to break out
			`of the loop (re-read, use longer context, or fall back to write_file).`

			`See issue #507 (Roo Code deep-dive, item 2f).`
			`"""`

			`import json`

			`import pytest`


			`@pytest.fixture`
			`def hermes_home(monkeypatch, tmp_path):`
			`"""Isolate HERMES_HOME and clear module-level caches afterward so the`
			`real shell-out side effects from _handle_patch don't leak into`
			`subsequent tests (see test_line_ending_preservation.py for details)."""`
			`home = tmp_path / "hermes"`
			`home.mkdir()`
			`monkeypatch.setenv("HERMES_HOME", str(home))`
			`yield home`
			`try:`
			`from tools.file_tools import clear_file_ops_cache, _read_tracker_lock, _read_tracker`
			`clear_file_ops_cache()`
			`with _read_tracker_lock:`
			`_read_tracker.clear()`
			`except Exception:`
			`pass`
			`try:`
			`from tools.terminal_tool import _active_environments, _env_lock`
			`with _env_lock:`
			`_active_environments.clear()`
			`except Exception:`
			`pass`


			`@pytest.fixture`
			`def fresh_tracker():`
			`"""Reset the module-level tracker before each test so the count starts`
			`at zero regardless of prior test order."""`
			`from tools.file_tools import _patch_failure_tracker, _patch_failure_lock`

			`with _patch_failure_lock:`
			`_patch_failure_tracker.clear()`
			`yield`
			`with _patch_failure_lock:`
			`_patch_failure_tracker.clear()`


			`class TestPatchFailureEscalation:`
			`def test_first_two_failures_use_normal_hint(self, hermes_home, tmp_path, fresh_tracker):`
			`from tools.file_tools import _handle_patch`

			`target = tmp_path / "f.py"`
			`target.write_text("def foo():\n return 1\n")`

			`for _i in range(2):`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": f"NONEXISTENT_{_i}_XYZQQQ",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t1",`
			`)`
			`d = json.loads(result)`
			`hint = d.get("_hint", "") or ""`
			`assert "failure #" not in hint, (`
			`f"Escalating hint fired too early on attempt {_i + 1}: {hint!r}"`
			`)`

			`def test_third_consecutive_failure_escalates(self, hermes_home, tmp_path, fresh_tracker):`
			`from tools.file_tools import _handle_patch`

			`target = tmp_path / "f.py"`
			`target.write_text("def foo():\n return 1\n")`

			`last_hint = ""`
			`for _i in range(3):`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": f"DOES_NOT_EXIST_{_i}_FOOFOOFOO",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t2",`
			`)`
			`d = json.loads(result)`
			`last_hint = d.get("_hint", "") or ""`

			`assert "failure #3" in last_hint, repr(last_hint)`
			`assert "Stop retrying" in last_hint`
			`assert "write_file" in last_hint, (`
			`"Escalating hint should mention write_file fallback"`
			`)`

			`def test_success_clears_failure_counter(self, hermes_home, tmp_path, fresh_tracker):`
			`from tools.file_tools import _handle_patch`

			`target = tmp_path / "f.py"`
			`target.write_text("def foo():\n return 1\n")`

			`# Three failures: counter at 3.`
			`for _i in range(3):`
			`_handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": f"GHOST_{_i}_ABCABC",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t3",`
			`)`

			`# Successful patch: clears the counter.`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": "return 1",`
			`"new_string": "return 99",`
			`},`
			`task_id="esc_t3",`
			`)`
			`d = json.loads(result)`
			`assert not d.get("error"), d`

			`# Next failure should be back to "attempt 1" — generic hint only.`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": "STILL_GHOST_XYZ",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t3",`
			`)`
			`d = json.loads(result)`
			`hint = d.get("_hint", "") or ""`
			`assert "failure #" not in hint, (`
			`f"Counter should have been reset after success: {hint!r}"`
			`)`

			`def test_different_paths_have_independent_counters(`
			`self, hermes_home, tmp_path, fresh_tracker`
			`):`
			`from tools.file_tools import _handle_patch`

			`a = tmp_path / "a.py"`
			`a.write_text("x = 1\n")`
			`b = tmp_path / "b.py"`
			`b.write_text("y = 2\n")`

			`# Three failures on a.py.`
			`for _i in range(3):`
			`_handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(a),`
			`"old_string": f"NONE_A_{_i}_ZZZ",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t4",`
			`)`

			`# One failure on b.py — should NOT inherit a.py's count.`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(b),`
			`"old_string": "NONE_B_ZZZ",`
			`"new_string": "x",`
			`},`
			`task_id="esc_t4",`
			`)`
			`d = json.loads(result)`
			`hint = d.get("_hint", "") or ""`
			`assert "failure #" not in hint, (`
			`f"b.py's hint inherited a.py's count: {hint!r}"`
			`)`

			`def test_different_tasks_have_independent_counters(`
			`self, hermes_home, tmp_path, fresh_tracker`
			`):`
			`from tools.file_tools import _handle_patch`

			`target = tmp_path / "shared.py"`
			`target.write_text("z = 0\n")`

			`# Three failures under task A.`
			`for _i in range(3):`
			`_handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": f"GHOST_A_{_i}_QWE",`
			`"new_string": "x",`
			`},`
			`task_id="task_A",`
			`)`

			`# First failure under task B — should NOT see escalation.`
			`result = _handle_patch(`
			`{`
			`"mode": "replace",`
			`"path": str(target),`
			`"old_string": "GHOST_B_QWE",`
			`"new_string": "x",`
			`},`
			`task_id="task_B",`
			`)`
			`d = json.loads(result)`
			`hint = d.get("_hint", "") or ""`
			`assert "failure #" not in hint, (`
			`f"task_B's hint cross-contaminated from task_A: {hint!r}"`
			`)`