fix(agent): make a binary @file: reference actionable instead of a dead end
A binary @file: ref (PDF, docx, spreadsheet, …) expanded to a bare "binary files are not supported" warning with no content. The model saw a failure and gave up — e.g. a dropped PDF came back as a text note claiming the type was unsupported, even though the file was staged on disk right next to it. Inject an actionable content block instead: the path, mime type, size, and a nudge to use its tools to read/convert/view the file (and explicitly not to tell the user the type is unsupported). General across every binary type — not PDF-specific. The file already resolves where the agent's tools run (local cwd or the staged copy in a remote session workspace), so it can act on it directly.
This commit is contained in:
parent
29147afd63
commit
7ffc216bc0
2 changed files with 55 additions and 5 deletions
|
|
@ -246,7 +246,14 @@ def _expand_file_reference(
|
||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
return f"{ref.raw}: path is not a file", None
|
return f"{ref.raw}: path is not a file", None
|
||||||
if _is_binary_file(path):
|
if _is_binary_file(path):
|
||||||
return f"{ref.raw}: binary files are not supported", None
|
# A binary file can't be inlined as text, but it IS on disk (the agent's
|
||||||
|
# tools run where this resolves — the local cwd, or the staged copy in a
|
||||||
|
# remote session workspace). Returning a bare "not supported" warning
|
||||||
|
# with no content was a dead end: the model saw a failure and gave up
|
||||||
|
# (told the user the file type wasn't supported). Instead, hand it an
|
||||||
|
# actionable block — the path, type, size, and a nudge to use its tools —
|
||||||
|
# so it can read/convert/view the file itself.
|
||||||
|
return None, _binary_reference_block(ref, path)
|
||||||
|
|
||||||
text = path.read_text(encoding="utf-8")
|
text = path.read_text(encoding="utf-8")
|
||||||
if ref.line_start is not None:
|
if ref.line_start is not None:
|
||||||
|
|
@ -493,6 +500,30 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||||
return files[:limit]
|
return files[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def _human_bytes(n: int) -> str:
|
||||||
|
size = float(n)
|
||||||
|
for unit in ("B", "KB", "MB", "GB"):
|
||||||
|
if size < 1024 or unit == "GB":
|
||||||
|
return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}"
|
||||||
|
size /= 1024
|
||||||
|
return f"{size:.1f} GB"
|
||||||
|
|
||||||
|
|
||||||
|
def _binary_reference_block(ref: ContextReference, path: Path) -> str:
|
||||||
|
mime, _ = mimetypes.guess_type(path.name)
|
||||||
|
mime = mime or "application/octet-stream"
|
||||||
|
try:
|
||||||
|
size = _human_bytes(path.stat().st_size)
|
||||||
|
except OSError:
|
||||||
|
size = "unknown size"
|
||||||
|
return (
|
||||||
|
f"📎 {ref.raw} ({mime}, {size}) — binary file, not inlined as text. "
|
||||||
|
f"It is available on disk at `{path}`. Use your tools to work with it "
|
||||||
|
f"(read or convert it, extract its text, or view/render it as needed); "
|
||||||
|
f"do not tell the user the file type is unsupported."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _file_metadata(path: Path) -> str:
|
def _file_metadata(path: Path) -> str:
|
||||||
if _is_binary_file(path):
|
if _is_binary_file(path):
|
||||||
return f"{path.stat().st_size} bytes"
|
return f"{path.stat().st_size} bytes"
|
||||||
|
|
|
||||||
|
|
@ -192,21 +192,40 @@ def test_expand_git_diff_staged_and_log(sample_repo: Path):
|
||||||
assert "VALUE = 2" in result.message
|
assert "VALUE = 2" in result.message
|
||||||
|
|
||||||
|
|
||||||
def test_binary_and_missing_files_become_warnings(sample_repo: Path):
|
def test_missing_file_becomes_warning(sample_repo: Path):
|
||||||
from agent.context_references import preprocess_context_references
|
from agent.context_references import preprocess_context_references
|
||||||
|
|
||||||
result = preprocess_context_references(
|
result = preprocess_context_references(
|
||||||
"Check @file:blob.bin and @file:nope.txt",
|
"Check @file:nope.txt",
|
||||||
cwd=sample_repo,
|
cwd=sample_repo,
|
||||||
context_length=100_000,
|
context_length=100_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert result.expanded
|
assert result.expanded
|
||||||
assert len(result.warnings) == 2
|
assert len(result.warnings) == 1
|
||||||
assert "binary" in result.message.lower()
|
|
||||||
assert "not found" in result.message.lower()
|
assert "not found" in result.message.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_binary_file_yields_actionable_block_not_a_dead_warning(sample_repo: Path):
|
||||||
|
from agent.context_references import preprocess_context_references
|
||||||
|
|
||||||
|
result = preprocess_context_references(
|
||||||
|
"Check @file:blob.bin",
|
||||||
|
cwd=sample_repo,
|
||||||
|
context_length=100_000,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.expanded
|
||||||
|
# The whole point: a binary attachment must NOT degrade into a discouraging
|
||||||
|
# warning that makes the model give up — it gets an actionable content block.
|
||||||
|
assert not result.warnings
|
||||||
|
assert "blob.bin" in result.message
|
||||||
|
assert "binary" in result.message.lower()
|
||||||
|
assert "not supported" not in result.message.lower()
|
||||||
|
# And it must point the agent at the file so it can act on it with tools.
|
||||||
|
assert str(sample_repo / "blob.bin") in result.message
|
||||||
|
|
||||||
|
|
||||||
def test_soft_budget_warns_and_hard_budget_refuses(sample_repo: Path):
|
def test_soft_budget_warns_and_hard_budget_refuses(sample_repo: Path):
|
||||||
from agent.context_references import preprocess_context_references
|
from agent.context_references import preprocess_context_references
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue