fix: UTF-8-safe truncation in compact_tool_result

cost.rs:124 sliced at a raw byte boundary which panics on multibyte
UTF-8 characters (ä, ö, ü, CJK, etc). FreeBSD tool output and agent
logs regularly contain non-ASCII.

Fix: use str::floor_char_boundary() to round down to the nearest valid
char boundary before slicing. This never panics and produces valid
UTF-8 output at or below the requested byte limit.

Added test: multibyte truncation with CJK + umlaut input.
This commit is contained in:
Sam & Claude 2026-05-27 23:09:34 +02:00
parent f3a221330b
commit 0dd7cf70af

View file

@ -121,12 +121,13 @@ pub fn compact_tool_result(raw: &str, max_bytes: u64, tool_name: &str) -> Option
return None;
}
let truncated = &raw[..max_bytes as usize];
let floor = raw.floor_char_boundary(max_bytes as usize);
let truncated = &raw[..floor];
Some(format!(
"[{tool_name} output truncated: {} bytes → {} bytes]\n{truncated}\n[... {} more bytes omitted]",
raw.len(),
max_bytes,
raw.len() - max_bytes as usize,
floor,
raw.len() - floor,
))
}
@ -195,4 +196,14 @@ mod tests {
let small = "ok";
assert!(compact_tool_result(small, 4_000, "test_tool").is_none());
}
#[test]
fn test_tool_result_compaction_multibyte_no_panic() {
let big = "äöü日本語".repeat(2_000);
let result = compact_tool_result(&big, 50, "unicode_tool");
assert!(result.is_some());
let compacted = result.unwrap();
assert!(compacted.contains("truncated"));
assert!(compacted.is_char_boundary(compacted.len()));
}
}