fix: UTF-8-safe truncation in compact_tool_result
cost.rs:124 sliced at a raw byte boundary which panics on multibyte UTF-8 characters (ä, ö, ü, CJK, etc). FreeBSD tool output and agent logs regularly contain non-ASCII. Fix: use str::floor_char_boundary() to round down to the nearest valid char boundary before slicing. This never panics and produces valid UTF-8 output at or below the requested byte limit. Added test: multibyte truncation with CJK + umlaut input.
This commit is contained in:
parent
f3a221330b
commit
0dd7cf70af
1 changed files with 14 additions and 3 deletions
|
|
@ -121,12 +121,13 @@ pub fn compact_tool_result(raw: &str, max_bytes: u64, tool_name: &str) -> Option
|
|||
return None;
|
||||
}
|
||||
|
||||
let truncated = &raw[..max_bytes as usize];
|
||||
let floor = raw.floor_char_boundary(max_bytes as usize);
|
||||
let truncated = &raw[..floor];
|
||||
Some(format!(
|
||||
"[{tool_name} output truncated: {} bytes → {} bytes]\n{truncated}\n[... {} more bytes omitted]",
|
||||
raw.len(),
|
||||
max_bytes,
|
||||
raw.len() - max_bytes as usize,
|
||||
floor,
|
||||
raw.len() - floor,
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -195,4 +196,14 @@ mod tests {
|
|||
let small = "ok";
|
||||
assert!(compact_tool_result(small, 4_000, "test_tool").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_result_compaction_multibyte_no_panic() {
|
||||
let big = "äöü日本語".repeat(2_000);
|
||||
let result = compact_tool_result(&big, 50, "unicode_tool");
|
||||
assert!(result.is_some());
|
||||
let compacted = result.unwrap();
|
||||
assert!(compacted.contains("truncated"));
|
||||
assert!(compacted.is_char_boundary(compacted.len()));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue