fix: UTF-8-safe truncation in compact_tool_result

cost.rs:124 sliced at a raw byte boundary which panics on multibyte UTF-8 characters (ä, ö, ü, CJK, etc). FreeBSD tool output and agent logs regularly contain non-ASCII. Fix: use str::floor_char_boundary() to round down to the nearest valid char boundary before slicing. This never panics and produces valid UTF-8 output at or below the requested byte limit. Added test: multibyte truncation with CJK + umlaut input.
2026-05-27 23:09:34 +02:00 · 2026-05-27 23:09:34 +02:00 · 0dd7cf70af
commit 0dd7cf70af
parent f3a221330b
1 changed files with 14 additions and 3 deletions
--- a/crates/colibri-daemon/src/cost.rs
+++ b/crates/colibri-daemon/src/cost.rs
@ -121,12 +121,13 @@ pub fn compact_tool_result(raw: &str, max_bytes: u64, tool_name: &str) -> Option
        return None;
    }

-    let truncated = &raw[..max_bytes as usize];
+    let floor = raw.floor_char_boundary(max_bytes as usize);
+    let truncated = &raw[..floor];
    Some(format!(
        "[{tool_name} output truncated: {} bytes → {} bytes]\n{truncated}\n[... {} more bytes omitted]",
        raw.len(),
-        max_bytes,
-        raw.len() - max_bytes as usize,
+        floor,
+        raw.len() - floor,
    ))
 }

@ -195,4 +196,14 @@ mod tests {
        let small = "ok";
        assert!(compact_tool_result(small, 4_000, "test_tool").is_none());
    }
+
+    #[test]
+    fn test_tool_result_compaction_multibyte_no_panic() {
+        let big = "äöü日本語".repeat(2_000);
+        let result = compact_tool_result(&big, 50, "unicode_tool");
+        assert!(result.is_some());
+        let compacted = result.unwrap();
+        assert!(compacted.contains("truncated"));
+        assert!(compacted.is_char_boundary(compacted.len()));
+    }
 }