From 0dd7cf70af780379bc24ecf32b23af3fe29eff7d Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Wed, 27 May 2026 23:09:34 +0200 Subject: [PATCH] fix: UTF-8-safe truncation in compact_tool_result MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cost.rs:124 sliced at a raw byte boundary which panics on multibyte UTF-8 characters (ä, ö, ü, CJK, etc). FreeBSD tool output and agent logs regularly contain non-ASCII. Fix: use str::floor_char_boundary() to round down to the nearest valid char boundary before slicing. This never panics and produces valid UTF-8 output at or below the requested byte limit. Added test: multibyte truncation with CJK + umlaut input. --- crates/colibri-daemon/src/cost.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/colibri-daemon/src/cost.rs b/crates/colibri-daemon/src/cost.rs index 39b5eb3..6407c64 100644 --- a/crates/colibri-daemon/src/cost.rs +++ b/crates/colibri-daemon/src/cost.rs @@ -121,12 +121,13 @@ pub fn compact_tool_result(raw: &str, max_bytes: u64, tool_name: &str) -> Option return None; } - let truncated = &raw[..max_bytes as usize]; + let floor = raw.floor_char_boundary(max_bytes as usize); + let truncated = &raw[..floor]; Some(format!( "[{tool_name} output truncated: {} bytes → {} bytes]\n{truncated}\n[... {} more bytes omitted]", raw.len(), - max_bytes, - raw.len() - max_bytes as usize, + floor, + raw.len() - floor, )) } @@ -195,4 +196,14 @@ mod tests { let small = "ok"; assert!(compact_tool_result(small, 4_000, "test_tool").is_none()); } + + #[test] + fn test_tool_result_compaction_multibyte_no_panic() { + let big = "äöü日本語".repeat(2_000); + let result = compact_tool_result(&big, 50, "unicode_tool"); + assert!(result.is_some()); + let compacted = result.unwrap(); + assert!(compacted.contains("truncated")); + assert!(compacted.is_char_boundary(compacted.len())); + } }