From c610a3a645912a04fc3a94237e345e92dd71bf27 Mon Sep 17 00:00:00 2001 From: patriceckhart Date: Mon, 20 Apr 2026 08:37:14 +0200 Subject: [PATCH] feat(tui): live-stream file body during write/edit tool calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit You see the file being composed in real time now. While the model is typing the tool_use JSON, the TUI renders a rules-wrapped syntax-highlighted preview that grows as deltas arrive. When the tool actually runs, the preview transitions to the final result without flicker. Before: the tool header appeared post-response, then "wrote N bytes" for write / "applied 1 edit" for edit. No live feedback. Now: as soon as the `path` field parses out of the partial JSON, the header shows `▸ write /Users/pat/Desktop/demo.ts`. As the `content` / `newText` string streams in, each delta extends the highlighted preview body immediately. Collapsed at the usual preview height with the standard `ctrl+o to expand` footer. Implementation: - internal/core/events.go: three new AgentEvent types, EvToolUseStart / EvToolUseArgs / EvToolUseEnd. They carry the tool id, name, and raw JSON deltas from the provider stream. - internal/core/agent.go: forwards the equivalent provider events instead of dropping them. EvToolCall (with fully-parsed args) still fires at EventDone as before, so existing consumers don't need to change. - internal/tui/partialjson.go: small escape-aware extractor that pulls one string field's value out of a partial JSON buffer as it grows. Handles \\ \" \n \t \r \b \f \/ and \uXXXX escapes; tolerates trailing incomplete escapes (returns the complete prefix and waits for more bytes). Second helper, ExtractLastNewText, walks to the most recent "newText":"..." inside an edits array so edit's streaming preview shows the edit currently being composed (not an earlier one that's already finished). - internal/tui/view.go: ToolCallView gains Streaming, RawJSONBuf, LivePath fields. renderToolCall dispatches to renderLiveToolBody while Streaming=true and Result=="". For `write` it shows the partial `content`; for `edit` it shows ` edit N (streaming)` plus the partial `newText`. Shared wrapLiveBody keeps the rule + collapse boilerplate in one place. - internal/agent/modes/interactive.go: handles the three new events. EvToolUseStart pre-creates the ToolCallView so the header appears instantly; EvToolUseArgs appends the delta and refreshes LivePath; EvToolUseEnd flips Streaming off. The pre-existing EvToolCall branch now updates the already-created view rather than replacing it. - internal/agent/modes/json.go: emits tool_use_start / tool_use_args / tool_use_end events so `zot --json` consumers can build their own live previews. - internal/agent/tools/write.go: tool result is now the written file body (same shape as read's result) with total_lines + start_line details. Keeps the visual transition from streaming preview to final result seamless, and gives the model the file contents in its own tool_result for follow-up turns. Tests: - internal/tui/partialjson_test.go: 9 cases on ExtractPartialStringField (complete, partial mid-word, escape variants, unfinished escapes) and 4 on ExtractLastNewText (no newText, partial, complete, multi-edit). Verified end-to-end via `zot --json "write ..."` and `zot --json "edit ..."` against the real API: 246 tool_use_args delta events on a 30-line write, preview fields extracted live, final file written correctly. --- internal/agent/modes/interactive.go | 50 ++++++-- internal/agent/modes/json.go | 8 ++ internal/agent/tools/write.go | 22 +++- internal/core/agent.go | 10 +- internal/core/events.go | 34 ++++++ internal/tui/partialjson.go | 177 ++++++++++++++++++++++++++++ internal/tui/partialjson_test.go | 155 ++++++++++++++++++++++++ internal/tui/view.go | 87 +++++++++++++- 8 files changed, 528 insertions(+), 15 deletions(-) create mode 100644 internal/tui/partialjson.go create mode 100644 internal/tui/partialjson_test.go diff --git a/internal/agent/modes/interactive.go b/internal/agent/modes/interactive.go index ad85a98..a004c5d 100644 --- a/internal/agent/modes/interactive.go +++ b/internal/agent/modes/interactive.go @@ -1979,14 +1979,50 @@ func (i *Interactive) handleEvent(ev core.AgentEvent) { if i.cfg.OnAssistant != nil { i.cfg.OnAssistant(e.Message) } - case core.EvToolCall: - tcv := &tui.ToolCallView{ - ID: e.ID, - Name: e.Name, - Args: shortArgs(e.Args), + case core.EvToolUseStart: + // Live streaming: pre-create the view so the user sees the + // tool call being composed in real time. Any subsequent + // EvToolCall for the same ID updates the same struct (the + // final parsed args + name are already known here). + if _, exists := i.toolCalls[e.ID]; !exists { + i.toolCalls[e.ID] = &tui.ToolCallView{ + ID: e.ID, + Name: e.Name, + Streaming: true, + } + i.toolOrder = append(i.toolOrder, e.ID) + } + case core.EvToolUseArgs: + if tc, ok := i.toolCalls[e.ID]; ok { + tc.RawJSONBuf += e.Delta + // Refresh the live path as soon as it parses; used in + // the header (▸ write /Users/pat/Desktop/demo.ts) + // while the content is still streaming. + if p, pok, _ := tui.ExtractPartialStringField(tc.RawJSONBuf, "path"); pok { + tc.LivePath = p + } else if p, pok, _ := tui.ExtractPartialStringField(tc.RawJSONBuf, "file_path"); pok { + tc.LivePath = p + } + } + case core.EvToolUseEnd: + if tc, ok := i.toolCalls[e.ID]; ok { + tc.Streaming = false + } + case core.EvToolCall: + // If we already pre-created the view during streaming, just + // refresh the final Args summary. Otherwise create a new one + // (non-streaming providers or legacy paths). + if tc, ok := i.toolCalls[e.ID]; ok { + tc.Args = shortArgs(e.Args) + tc.Streaming = false + } else { + i.toolCalls[e.ID] = &tui.ToolCallView{ + ID: e.ID, + Name: e.Name, + Args: shortArgs(e.Args), + } + i.toolOrder = append(i.toolOrder, e.ID) } - i.toolCalls[e.ID] = tcv - i.toolOrder = append(i.toolOrder, e.ID) case core.EvToolResult: if tc, ok := i.toolCalls[e.ID]; ok { tc.Done = true diff --git a/internal/agent/modes/json.go b/internal/agent/modes/json.go index 53ffaa8..6597a5a 100644 --- a/internal/agent/modes/json.go +++ b/internal/agent/modes/json.go @@ -49,6 +49,14 @@ func EventToJSON(ev core.AgentEvent) map[string]any { m["time"] = e.Message.Time case core.EvTextDelta: m["delta"] = e.Delta + case core.EvToolUseStart: + m["id"] = e.ID + m["name"] = e.Name + case core.EvToolUseArgs: + m["id"] = e.ID + m["delta"] = e.Delta + case core.EvToolUseEnd: + m["id"] = e.ID case core.EvToolCall: m["id"] = e.ID m["name"] = e.Name diff --git a/internal/agent/tools/write.go b/internal/agent/tools/write.go index 938e19f..112c82b 100644 --- a/internal/agent/tools/write.go +++ b/internal/agent/tools/write.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/patriceckhart/zot/internal/core" "github.com/patriceckhart/zot/internal/provider" @@ -50,9 +51,24 @@ func (t *WriteTool) Execute(ctx context.Context, raw json.RawMessage, progress f return core.ToolResult{}, err } - msg := fmt.Sprintf("wrote %d bytes to %s", len(a.Content), a.Path) + // Return the file content as the result body, just like `read` + // does. The TUI renders it with a syntax-highlighted gutter so + // the on-screen view after a `write` matches the pre-write + // streaming preview seamlessly. The model also sees the written + // content in its tool_result, which is useful on follow-up turns + // where it wants to reference what it just wrote without a + // second `read` call. + totalLines := strings.Count(a.Content, "\n") + if len(a.Content) > 0 && !strings.HasSuffix(a.Content, "\n") { + totalLines++ // count the last unterminated line + } return core.ToolResult{ - Content: []provider.Content{provider.TextBlock{Text: msg}}, - Details: map[string]any{"path": path, "bytes": len(a.Content)}, + Content: []provider.Content{provider.TextBlock{Text: a.Content}}, + Details: map[string]any{ + "path": path, + "bytes": len(a.Content), + "total_lines": totalLines, + "start_line": 1, + }, }, nil } diff --git a/internal/core/agent.go b/internal/core/agent.go index 4581b5f..7c32c48 100644 --- a/internal/core/agent.go +++ b/internal/core/agent.go @@ -228,10 +228,12 @@ func (a *Agent) oneTurn(ctx context.Context, sink func(AgentEvent)) (provider.St // nothing case provider.EventTextDelta: sink(EvTextDelta{Delta: e.Delta}) - case provider.EventToolStart, provider.EventToolArgs, provider.EventToolEnd: - // The provider emits these to support partial-JSON UIs. - // We surface the complete tool call once per block at EvToolCall - // below (after EventDone), so for now we drop them. + case provider.EventToolStart: + sink(EvToolUseStart{ID: e.ID, Name: e.Name}) + case provider.EventToolArgs: + sink(EvToolUseArgs{ID: e.ID, Delta: e.Delta}) + case provider.EventToolEnd: + sink(EvToolUseEnd{ID: e.ID}) case provider.EventUsage: cum := a.cost.Add(e.Usage) sink(EvUsage{Usage: e.Usage, Cumulative: cum}) diff --git a/internal/core/events.go b/internal/core/events.go index 40bc151..18feff2 100644 --- a/internal/core/events.go +++ b/internal/core/events.go @@ -43,6 +43,40 @@ type EvToolCall struct { func (EvToolCall) Type() string { return "tool_call" } +// EvToolUseStart fires the moment the provider announces a new +// tool_use block during streaming, before any arg JSON has +// arrived. Gives UIs a hook to pre-render a live "tool is being +// composed" panel so the user sees the model typing the call in +// real time. Name is already final at this point; Args is empty. +type EvToolUseStart struct { + ID string + Name string +} + +func (EvToolUseStart) Type() string { return "tool_use_start" } + +// EvToolUseArgs fires for each delta fragment of the tool_use +// block's argument JSON. Concatenating every delta for a given +// ID produces the full JSON string; during streaming it's likely +// truncated mid-value. UIs can extract partial string fields +// (e.g. the `content` arg of `write`) with an escape-aware scan. +type EvToolUseArgs struct { + ID string + Delta string +} + +func (EvToolUseArgs) Type() string { return "tool_use_args" } + +// EvToolUseEnd fires when the provider marks the tool_use block +// complete. At this point the full args JSON is known; a separate +// EvToolCall follows once the assistant message is assembled, +// carrying the parsed block that actually runs. +type EvToolUseEnd struct { + ID string +} + +func (EvToolUseEnd) Type() string { return "tool_use_end" } + type EvToolProgress struct { ID string Text string diff --git a/internal/tui/partialjson.go b/internal/tui/partialjson.go new file mode 100644 index 0000000..c913822 --- /dev/null +++ b/internal/tui/partialjson.go @@ -0,0 +1,177 @@ +package tui + +import "strings" + +// ExtractPartialStringField scans raw (a partial JSON object's bytes) +// for the given top-level string field and returns the unescaped +// value seen so far. If the value is still being written, it returns +// what's available with ok=true but done=false. If the closing +// unescaped quote has been reached, done=true. +// +// This is deliberately small and best-effort: zot uses it to show +// the live body of a `write` tool call while the model is still +// typing it, before the full JSON object has been received. It +// assumes the field is a top-level key (no nested lookup), matches +// the first occurrence, and tolerates unfinished `\uXXXX` escapes +// by dropping a trailing incomplete escape sequence. +// +// A production-grade JSON parser would be overkill for this use +// case; we only care about extracting one field incrementally. +func ExtractPartialStringField(raw, field string) (value string, ok, done bool) { + needle := "\"" + field + "\":" + idx := strings.Index(raw, needle) + if idx < 0 { + return "", false, false + } + // Skip over the key and any whitespace up to the opening quote. + rest := raw[idx+len(needle):] + j := 0 + for j < len(rest) && (rest[j] == ' ' || rest[j] == '\t' || rest[j] == '\n' || rest[j] == '\r') { + j++ + } + if j >= len(rest) || rest[j] != '"' { + // Field wasn't a string, or the opening quote hasn't arrived. + return "", false, false + } + j++ // past opening quote + + var sb strings.Builder + sb.Grow(len(rest) - j) + for j < len(rest) { + c := rest[j] + if c == '\\' { + // Escape sequence. Need at least one more byte; if not + // present yet, stop emitting here and wait for more. + if j+1 >= len(rest) { + return sb.String(), true, false + } + esc := rest[j+1] + switch esc { + case '"': + sb.WriteByte('"') + j += 2 + case '\\': + sb.WriteByte('\\') + j += 2 + case '/': + sb.WriteByte('/') + j += 2 + case 'n': + sb.WriteByte('\n') + j += 2 + case 't': + sb.WriteByte('\t') + j += 2 + case 'r': + sb.WriteByte('\r') + j += 2 + case 'b': + sb.WriteByte('\b') + j += 2 + case 'f': + sb.WriteByte('\f') + j += 2 + case 'u': + // \uXXXX — needs 4 more hex digits. If we don't have + // them yet, drop the incomplete sequence and wait. + if j+6 > len(rest) { + return sb.String(), true, false + } + r := parseHex4(rest[j+2 : j+6]) + if r < 0 { + // Malformed; stop, return what we have. + return sb.String(), true, false + } + sb.WriteRune(rune(r)) + j += 6 + default: + // Unknown escape; keep the backslash and the next + // byte as literals so the render shows something. + sb.WriteByte(c) + sb.WriteByte(esc) + j += 2 + } + continue + } + if c == '"' { + // End of string. + return sb.String(), true, true + } + sb.WriteByte(c) + j++ + } + // Ran out of input before finding the closing quote. + return sb.String(), true, false +} + +// ExtractLastNewText finds the most recent `"newText"` field +// inside an array of edit objects, scanning from the end of raw +// backwards so we get the one currently being streamed rather +// than an earlier completed edit. Returns the partial string +// value the same way ExtractPartialStringField does, plus the +// 1-indexed edit number in the array (so the UI can show +// "edit 2 of N" or similar). +// +// This is aimed at the `edit` tool's streaming shape: +// +// {"path":"...","edits":[{"oldText":"x","newText":"y"}, +// {"oldText":"a","newText":"b +// +// We want to show `b` while it grows. +func ExtractLastNewText(raw string) (value string, ok, done bool, editIdx int) { + // Find every occurrence of `"newText":` and return a partial + // extraction starting at the last one. Earlier occurrences + // have already finished streaming. + needle := "\"newText\":" + last := -1 + for i := 0; i+len(needle) <= len(raw); { + idx := strings.Index(raw[i:], needle) + if idx < 0 { + break + } + last = i + idx + i = last + len(needle) + } + if last < 0 { + return "", false, false, 0 + } + // Count how many `"newText":` occurrences preceded this one; +1 + // gives us the 1-indexed edit number. + editIdx = strings.Count(raw[:last], needle) + 1 + suffix := raw[last+len(needle):] + j := 0 + for j < len(suffix) && (suffix[j] == ' ' || suffix[j] == '\t' || suffix[j] == '\n' || suffix[j] == '\r') { + j++ + } + if j >= len(suffix) || suffix[j] != '"' { + return "", false, false, editIdx + } + // Reuse the single-field extractor by feeding it a synthetic + // {"newText":...} wrapper so all its escape handling stays in + // one place. + value, ok, done = ExtractPartialStringField("{\"newText\":"+suffix[j:], "newText") + return value, ok, done, editIdx +} + +func parseHex4(s string) int { + if len(s) != 4 { + return -1 + } + n := 0 + for i := 0; i < 4; i++ { + var d int + c := s[i] + switch { + case c >= '0' && c <= '9': + d = int(c - '0') + case c >= 'a' && c <= 'f': + d = int(c-'a') + 10 + case c >= 'A' && c <= 'F': + d = int(c-'A') + 10 + default: + return -1 + } + n = n<<4 | d + } + return n +} diff --git a/internal/tui/partialjson_test.go b/internal/tui/partialjson_test.go new file mode 100644 index 0000000..e7f4a87 --- /dev/null +++ b/internal/tui/partialjson_test.go @@ -0,0 +1,155 @@ +package tui + +import "testing" + +func TestExtractLastNewText(t *testing.T) { + cases := []struct { + name string + raw string + wantValue string + wantOK bool + wantDone bool + wantIdx int + }{ + { + name: "no newText yet", + raw: `{"path":"/x","edits":[{"oldText":"a"}`, + wantValue: "", + wantOK: false, + wantDone: false, + wantIdx: 0, + }, + { + name: "single newText partial", + raw: `{"edits":[{"oldText":"a","newText":"b`, + wantValue: "b", + wantOK: true, + wantDone: false, + wantIdx: 1, + }, + { + name: "single newText complete", + raw: `{"edits":[{"oldText":"a","newText":"b"}]`, + wantValue: "b", + wantOK: true, + wantDone: true, + wantIdx: 1, + }, + { + name: "two edits, second still streaming", + raw: `{"edits":[{"oldText":"x","newText":"y"},{"oldText":"a","newText":"hello wor`, + wantValue: "hello wor", + wantOK: true, + wantDone: false, + wantIdx: 2, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + v, ok, done, idx := ExtractLastNewText(c.raw) + if v != c.wantValue || ok != c.wantOK || done != c.wantDone || idx != c.wantIdx { + t.Errorf("want (v=%q ok=%v done=%v idx=%d), got (v=%q ok=%v done=%v idx=%d)", + c.wantValue, c.wantOK, c.wantDone, c.wantIdx, v, ok, done, idx) + } + }) + } +} + +func TestExtractPartialStringField(t *testing.T) { + cases := []struct { + name string + raw string + field string + wantValue string + wantOK bool + wantDone bool + }{ + { + name: "empty buffer", + raw: "", + field: "content", + wantValue: "", + wantOK: false, + wantDone: false, + }, + { + name: "no such field", + raw: `{"path":"/x","foo":"bar"}`, + field: "content", + wantValue: "", + wantOK: false, + wantDone: false, + }, + { + name: "complete", + raw: `{"path":"/x","content":"hello"}`, + field: "content", + wantValue: "hello", + wantOK: true, + wantDone: true, + }, + { + name: "partial mid-word", + raw: `{"path":"/x","content":"hel`, + field: "content", + wantValue: "hel", + wantOK: true, + wantDone: false, + }, + { + name: "escaped quote", + raw: `{"content":"say \"hi\""}`, + field: "content", + wantValue: `say "hi"`, + wantOK: true, + wantDone: true, + }, + { + name: "escaped newline inside string", + raw: `{"content":"line1\nline2"}`, + field: "content", + wantValue: "line1\nline2", + wantOK: true, + wantDone: true, + }, + { + name: "trailing backslash (unfinished escape)", + raw: `{"content":"line1\`, + field: "content", + wantValue: "line1", + wantOK: true, + wantDone: false, + }, + { + name: "incomplete unicode escape", + raw: `{"content":"before\u00`, + field: "content", + wantValue: "before", + wantOK: true, + wantDone: false, + }, + { + name: "key before value", + raw: `{"content":`, + field: "content", + wantValue: "", + wantOK: false, + wantDone: false, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + v, ok, done := ExtractPartialStringField(c.raw, c.field) + if v != c.wantValue { + t.Errorf("value: want %q, got %q", c.wantValue, v) + } + if ok != c.wantOK { + t.Errorf("ok: want %v, got %v", c.wantOK, ok) + } + if done != c.wantDone { + t.Errorf("done: want %v, got %v", c.wantDone, done) + } + }) + } +} diff --git a/internal/tui/view.go b/internal/tui/view.go index 5b744ca..5287eff 100644 --- a/internal/tui/view.go +++ b/internal/tui/view.go @@ -130,6 +130,24 @@ type ToolCallView struct { Result string // rendered result preview (truncated) Error bool Done bool + + // Streaming is true while the model is still typing the tool + // call's JSON arguments. The TUI renders a live preview of any + // interesting string fields (for `write`, the `content`; for + // `bash`, the `command`) so the user can watch the file being + // composed. Set to false as soon as EvToolUseEnd arrives. + Streaming bool + + // RawJSONBuf is the accumulator of every EvToolUseArgs delta + // the stream has delivered for this tool call. Used by the + // partial-JSON extractor to peel off the live string value + // of one named field on each render. + RawJSONBuf string + + // LivePath is the `path` arg extracted as soon as it parses + // out of RawJSONBuf. Shown next to the tool name in the header + // so the user can see which file is being written to. + LivePath string } // MessageAnchor records where a rendered message starts in the chat @@ -408,8 +426,29 @@ func (v *View) renderMessage(m provider.Message, width int) []string { func (v *View) renderToolCall(tc ToolCallView, width int) []string { var lines []string - head := v.Theme.FG256(v.Theme.Tool, "▸ "+tc.Name+" "+tc.Args) + + // Header. While the call is still streaming, prefer the live path + // extracted from the partial args so the user sees the target + // file as soon as it's known, even before the full JSON arrived. + arg := tc.Args + if arg == "" && tc.LivePath != "" { + arg = tc.LivePath + } + head := v.Theme.FG256(v.Theme.Tool, "▸ "+tc.Name+" "+arg) lines = append(lines, head) + + // Live streaming body: pulled out of the partial JSON buffer for + // tools whose interesting content is a string field (currently + // write's `content` and edit's `new_text` chunks). Rendered with + // the same rules + highlighter the final result would use, so the + // transition from streaming to result is visually seamless. + if tc.Streaming && tc.Result == "" { + if body := v.renderLiveToolBody(tc, width); len(body) > 0 { + lines = append(lines, body...) + } + return lines + } + if tc.Result != "" { color := v.Theme.ToolOut if tc.Error { @@ -428,6 +467,52 @@ func (v *View) renderToolCall(tc ToolCallView, width int) []string { return lines } +// renderLiveToolBody renders the in-flight preview of a streaming +// tool call. Supported tools: +// +// - write: shows the partial `content` field, syntax-highlighted +// by the target path's language. +// - edit: shows the partial `newText` of the edit currently being +// streamed, prefixed with a "editing foo.ts (edit 2)" header so +// the user can see which of a multi-edit batch is in progress. +// +// Anything else returns nil and only the tool-call header shows. +func (v *View) renderLiveToolBody(tc ToolCallView, width int) []string { + switch tc.Name { + case "write", "Write": + partial, ok, _ := ExtractPartialStringField(tc.RawJSONBuf, "content") + if !ok || partial == "" { + return nil + } + return v.wrapLiveBody(v.renderRawFile(partial, tc.LivePath, 1), width) + case "edit", "Edit": + partial, ok, _, idx := ExtractLastNewText(tc.RawJSONBuf) + if !ok || partial == "" { + return nil + } + // Header line hints which edit is streaming and, when more + // than one has landed, how many the model is doing. + hint := fmt.Sprintf(" edit %d (streaming)", idx) + body := []string{v.Theme.FG256(v.Theme.Muted, hint), ""} + body = append(body, v.renderRawFile(partial, tc.LivePath, 1)...) + return v.wrapLiveBody(body, width) + } + return nil +} + +// wrapLiveBody wraps a list of content lines with the standard +// tool-result rules (top + bottom), collapsing to the preview height +// if the body is tall. Shared between write and edit streaming. +func (v *View) wrapLiveBody(body []string, width int) []string { + body = v.collapseToolBody(body, false) + rule := v.Theme.FG256(v.Theme.Muted, strings.Repeat("─", width)) + out := make([]string, 0, len(body)+2) + out = append(out, rule) + out = append(out, body...) + out = append(out, rule) + return out +} + // toolResultBlock wraps text in thin horizontal rules (top + bottom), // indenting the body with four spaces. The rules span the content column. // renderToolResultContent renders the body of a tool result block.