feat(tui): live-stream file body during write/edit tool calls

You see the file being composed in real time now. While the model
is typing the tool_use JSON, the TUI renders a rules-wrapped
syntax-highlighted preview that grows as deltas arrive. When the
tool actually runs, the preview transitions to the final result
without flicker.

Before: the tool header appeared post-response, then "wrote N bytes"
for write / "applied 1 edit" for edit. No live feedback.

Now: as soon as the `path` field parses out of the partial JSON,
the header shows `▸ write /Users/pat/Desktop/demo.ts`. As the
`content` / `newText` string streams in, each delta extends the
highlighted preview body immediately. Collapsed at the usual
preview height with the standard `ctrl+o to expand` footer.

Implementation:

- internal/core/events.go: three new AgentEvent types,
  EvToolUseStart / EvToolUseArgs / EvToolUseEnd. They carry the
  tool id, name, and raw JSON deltas from the provider stream.
- internal/core/agent.go: forwards the equivalent provider events
  instead of dropping them. EvToolCall (with fully-parsed args)
  still fires at EventDone as before, so existing consumers
  don't need to change.
- internal/tui/partialjson.go: small escape-aware extractor that
  pulls one string field's value out of a partial JSON buffer as
  it grows. Handles \\ \" \n \t \r \b \f \/ and \uXXXX escapes;
  tolerates trailing incomplete escapes (returns the complete
  prefix and waits for more bytes). Second helper,
  ExtractLastNewText, walks to the most recent "newText":"..."
  inside an edits array so edit's streaming preview shows the
  edit currently being composed (not an earlier one that's
  already finished).
- internal/tui/view.go: ToolCallView gains Streaming, RawJSONBuf,
  LivePath fields. renderToolCall dispatches to renderLiveToolBody
  while Streaming=true and Result=="". For `write` it shows the
  partial `content`; for `edit` it shows `  edit N (streaming)`
  plus the partial `newText`. Shared wrapLiveBody keeps the rule
  + collapse boilerplate in one place.
- internal/agent/modes/interactive.go: handles the three new
  events. EvToolUseStart pre-creates the ToolCallView so the
  header appears instantly; EvToolUseArgs appends the delta and
  refreshes LivePath; EvToolUseEnd flips Streaming off. The
  pre-existing EvToolCall branch now updates the already-created
  view rather than replacing it.
- internal/agent/modes/json.go: emits tool_use_start /
  tool_use_args / tool_use_end events so `zot --json` consumers
  can build their own live previews.
- internal/agent/tools/write.go: tool result is now the written
  file body (same shape as read's result) with total_lines +
  start_line details. Keeps the visual transition from streaming
  preview to final result seamless, and gives the model the file
  contents in its own tool_result for follow-up turns.

Tests:

- internal/tui/partialjson_test.go: 9 cases on
  ExtractPartialStringField (complete, partial mid-word, escape
  variants, unfinished escapes) and 4 on ExtractLastNewText
  (no newText, partial, complete, multi-edit).

Verified end-to-end via `zot --json "write ..."` and
`zot --json "edit ..."` against the real API: 246 tool_use_args
delta events on a 30-line write, preview fields extracted live,
final file written correctly.
This commit is contained in:
patriceckhart 2026-04-20 08:37:14 +02:00
parent 3e59a3cfd3
commit c610a3a645
8 changed files with 528 additions and 15 deletions

View file

@ -1979,14 +1979,50 @@ func (i *Interactive) handleEvent(ev core.AgentEvent) {
if i.cfg.OnAssistant != nil {
i.cfg.OnAssistant(e.Message)
}
case core.EvToolCall:
tcv := &tui.ToolCallView{
ID: e.ID,
Name: e.Name,
Args: shortArgs(e.Args),
case core.EvToolUseStart:
// Live streaming: pre-create the view so the user sees the
// tool call being composed in real time. Any subsequent
// EvToolCall for the same ID updates the same struct (the
// final parsed args + name are already known here).
if _, exists := i.toolCalls[e.ID]; !exists {
i.toolCalls[e.ID] = &tui.ToolCallView{
ID: e.ID,
Name: e.Name,
Streaming: true,
}
i.toolOrder = append(i.toolOrder, e.ID)
}
case core.EvToolUseArgs:
if tc, ok := i.toolCalls[e.ID]; ok {
tc.RawJSONBuf += e.Delta
// Refresh the live path as soon as it parses; used in
// the header (▸ write /Users/pat/Desktop/demo.ts)
// while the content is still streaming.
if p, pok, _ := tui.ExtractPartialStringField(tc.RawJSONBuf, "path"); pok {
tc.LivePath = p
} else if p, pok, _ := tui.ExtractPartialStringField(tc.RawJSONBuf, "file_path"); pok {
tc.LivePath = p
}
}
case core.EvToolUseEnd:
if tc, ok := i.toolCalls[e.ID]; ok {
tc.Streaming = false
}
case core.EvToolCall:
// If we already pre-created the view during streaming, just
// refresh the final Args summary. Otherwise create a new one
// (non-streaming providers or legacy paths).
if tc, ok := i.toolCalls[e.ID]; ok {
tc.Args = shortArgs(e.Args)
tc.Streaming = false
} else {
i.toolCalls[e.ID] = &tui.ToolCallView{
ID: e.ID,
Name: e.Name,
Args: shortArgs(e.Args),
}
i.toolOrder = append(i.toolOrder, e.ID)
}
i.toolCalls[e.ID] = tcv
i.toolOrder = append(i.toolOrder, e.ID)
case core.EvToolResult:
if tc, ok := i.toolCalls[e.ID]; ok {
tc.Done = true

View file

@ -49,6 +49,14 @@ func EventToJSON(ev core.AgentEvent) map[string]any {
m["time"] = e.Message.Time
case core.EvTextDelta:
m["delta"] = e.Delta
case core.EvToolUseStart:
m["id"] = e.ID
m["name"] = e.Name
case core.EvToolUseArgs:
m["id"] = e.ID
m["delta"] = e.Delta
case core.EvToolUseEnd:
m["id"] = e.ID
case core.EvToolCall:
m["id"] = e.ID
m["name"] = e.Name

View file

@ -6,6 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/patriceckhart/zot/internal/core"
"github.com/patriceckhart/zot/internal/provider"
@ -50,9 +51,24 @@ func (t *WriteTool) Execute(ctx context.Context, raw json.RawMessage, progress f
return core.ToolResult{}, err
}
msg := fmt.Sprintf("wrote %d bytes to %s", len(a.Content), a.Path)
// Return the file content as the result body, just like `read`
// does. The TUI renders it with a syntax-highlighted gutter so
// the on-screen view after a `write` matches the pre-write
// streaming preview seamlessly. The model also sees the written
// content in its tool_result, which is useful on follow-up turns
// where it wants to reference what it just wrote without a
// second `read` call.
totalLines := strings.Count(a.Content, "\n")
if len(a.Content) > 0 && !strings.HasSuffix(a.Content, "\n") {
totalLines++ // count the last unterminated line
}
return core.ToolResult{
Content: []provider.Content{provider.TextBlock{Text: msg}},
Details: map[string]any{"path": path, "bytes": len(a.Content)},
Content: []provider.Content{provider.TextBlock{Text: a.Content}},
Details: map[string]any{
"path": path,
"bytes": len(a.Content),
"total_lines": totalLines,
"start_line": 1,
},
}, nil
}

View file

@ -228,10 +228,12 @@ func (a *Agent) oneTurn(ctx context.Context, sink func(AgentEvent)) (provider.St
// nothing
case provider.EventTextDelta:
sink(EvTextDelta{Delta: e.Delta})
case provider.EventToolStart, provider.EventToolArgs, provider.EventToolEnd:
// The provider emits these to support partial-JSON UIs.
// We surface the complete tool call once per block at EvToolCall
// below (after EventDone), so for now we drop them.
case provider.EventToolStart:
sink(EvToolUseStart{ID: e.ID, Name: e.Name})
case provider.EventToolArgs:
sink(EvToolUseArgs{ID: e.ID, Delta: e.Delta})
case provider.EventToolEnd:
sink(EvToolUseEnd{ID: e.ID})
case provider.EventUsage:
cum := a.cost.Add(e.Usage)
sink(EvUsage{Usage: e.Usage, Cumulative: cum})

View file

@ -43,6 +43,40 @@ type EvToolCall struct {
func (EvToolCall) Type() string { return "tool_call" }
// EvToolUseStart fires the moment the provider announces a new
// tool_use block during streaming, before any arg JSON has
// arrived. Gives UIs a hook to pre-render a live "tool is being
// composed" panel so the user sees the model typing the call in
// real time. Name is already final at this point; Args is empty.
type EvToolUseStart struct {
ID string
Name string
}
func (EvToolUseStart) Type() string { return "tool_use_start" }
// EvToolUseArgs fires for each delta fragment of the tool_use
// block's argument JSON. Concatenating every delta for a given
// ID produces the full JSON string; during streaming it's likely
// truncated mid-value. UIs can extract partial string fields
// (e.g. the `content` arg of `write`) with an escape-aware scan.
type EvToolUseArgs struct {
ID string
Delta string
}
func (EvToolUseArgs) Type() string { return "tool_use_args" }
// EvToolUseEnd fires when the provider marks the tool_use block
// complete. At this point the full args JSON is known; a separate
// EvToolCall follows once the assistant message is assembled,
// carrying the parsed block that actually runs.
type EvToolUseEnd struct {
ID string
}
func (EvToolUseEnd) Type() string { return "tool_use_end" }
type EvToolProgress struct {
ID string
Text string

177
internal/tui/partialjson.go Normal file
View file

@ -0,0 +1,177 @@
package tui
import "strings"
// ExtractPartialStringField scans raw (a partial JSON object's bytes)
// for the given top-level string field and returns the unescaped
// value seen so far. If the value is still being written, it returns
// what's available with ok=true but done=false. If the closing
// unescaped quote has been reached, done=true.
//
// This is deliberately small and best-effort: zot uses it to show
// the live body of a `write` tool call while the model is still
// typing it, before the full JSON object has been received. It
// assumes the field is a top-level key (no nested lookup), matches
// the first occurrence, and tolerates unfinished `\uXXXX` escapes
// by dropping a trailing incomplete escape sequence.
//
// A production-grade JSON parser would be overkill for this use
// case; we only care about extracting one field incrementally.
func ExtractPartialStringField(raw, field string) (value string, ok, done bool) {
needle := "\"" + field + "\":"
idx := strings.Index(raw, needle)
if idx < 0 {
return "", false, false
}
// Skip over the key and any whitespace up to the opening quote.
rest := raw[idx+len(needle):]
j := 0
for j < len(rest) && (rest[j] == ' ' || rest[j] == '\t' || rest[j] == '\n' || rest[j] == '\r') {
j++
}
if j >= len(rest) || rest[j] != '"' {
// Field wasn't a string, or the opening quote hasn't arrived.
return "", false, false
}
j++ // past opening quote
var sb strings.Builder
sb.Grow(len(rest) - j)
for j < len(rest) {
c := rest[j]
if c == '\\' {
// Escape sequence. Need at least one more byte; if not
// present yet, stop emitting here and wait for more.
if j+1 >= len(rest) {
return sb.String(), true, false
}
esc := rest[j+1]
switch esc {
case '"':
sb.WriteByte('"')
j += 2
case '\\':
sb.WriteByte('\\')
j += 2
case '/':
sb.WriteByte('/')
j += 2
case 'n':
sb.WriteByte('\n')
j += 2
case 't':
sb.WriteByte('\t')
j += 2
case 'r':
sb.WriteByte('\r')
j += 2
case 'b':
sb.WriteByte('\b')
j += 2
case 'f':
sb.WriteByte('\f')
j += 2
case 'u':
// \uXXXX — needs 4 more hex digits. If we don't have
// them yet, drop the incomplete sequence and wait.
if j+6 > len(rest) {
return sb.String(), true, false
}
r := parseHex4(rest[j+2 : j+6])
if r < 0 {
// Malformed; stop, return what we have.
return sb.String(), true, false
}
sb.WriteRune(rune(r))
j += 6
default:
// Unknown escape; keep the backslash and the next
// byte as literals so the render shows something.
sb.WriteByte(c)
sb.WriteByte(esc)
j += 2
}
continue
}
if c == '"' {
// End of string.
return sb.String(), true, true
}
sb.WriteByte(c)
j++
}
// Ran out of input before finding the closing quote.
return sb.String(), true, false
}
// ExtractLastNewText finds the most recent `"newText"` field
// inside an array of edit objects, scanning from the end of raw
// backwards so we get the one currently being streamed rather
// than an earlier completed edit. Returns the partial string
// value the same way ExtractPartialStringField does, plus the
// 1-indexed edit number in the array (so the UI can show
// "edit 2 of N" or similar).
//
// This is aimed at the `edit` tool's streaming shape:
//
// {"path":"...","edits":[{"oldText":"x","newText":"y"},
// {"oldText":"a","newText":"b<streaming>
//
// We want to show `b<streaming>` while it grows.
func ExtractLastNewText(raw string) (value string, ok, done bool, editIdx int) {
// Find every occurrence of `"newText":` and return a partial
// extraction starting at the last one. Earlier occurrences
// have already finished streaming.
needle := "\"newText\":"
last := -1
for i := 0; i+len(needle) <= len(raw); {
idx := strings.Index(raw[i:], needle)
if idx < 0 {
break
}
last = i + idx
i = last + len(needle)
}
if last < 0 {
return "", false, false, 0
}
// Count how many `"newText":` occurrences preceded this one; +1
// gives us the 1-indexed edit number.
editIdx = strings.Count(raw[:last], needle) + 1
suffix := raw[last+len(needle):]
j := 0
for j < len(suffix) && (suffix[j] == ' ' || suffix[j] == '\t' || suffix[j] == '\n' || suffix[j] == '\r') {
j++
}
if j >= len(suffix) || suffix[j] != '"' {
return "", false, false, editIdx
}
// Reuse the single-field extractor by feeding it a synthetic
// {"newText":...} wrapper so all its escape handling stays in
// one place.
value, ok, done = ExtractPartialStringField("{\"newText\":"+suffix[j:], "newText")
return value, ok, done, editIdx
}
func parseHex4(s string) int {
if len(s) != 4 {
return -1
}
n := 0
for i := 0; i < 4; i++ {
var d int
c := s[i]
switch {
case c >= '0' && c <= '9':
d = int(c - '0')
case c >= 'a' && c <= 'f':
d = int(c-'a') + 10
case c >= 'A' && c <= 'F':
d = int(c-'A') + 10
default:
return -1
}
n = n<<4 | d
}
return n
}

View file

@ -0,0 +1,155 @@
package tui
import "testing"
func TestExtractLastNewText(t *testing.T) {
cases := []struct {
name string
raw string
wantValue string
wantOK bool
wantDone bool
wantIdx int
}{
{
name: "no newText yet",
raw: `{"path":"/x","edits":[{"oldText":"a"}`,
wantValue: "",
wantOK: false,
wantDone: false,
wantIdx: 0,
},
{
name: "single newText partial",
raw: `{"edits":[{"oldText":"a","newText":"b`,
wantValue: "b",
wantOK: true,
wantDone: false,
wantIdx: 1,
},
{
name: "single newText complete",
raw: `{"edits":[{"oldText":"a","newText":"b"}]`,
wantValue: "b",
wantOK: true,
wantDone: true,
wantIdx: 1,
},
{
name: "two edits, second still streaming",
raw: `{"edits":[{"oldText":"x","newText":"y"},{"oldText":"a","newText":"hello wor`,
wantValue: "hello wor",
wantOK: true,
wantDone: false,
wantIdx: 2,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
v, ok, done, idx := ExtractLastNewText(c.raw)
if v != c.wantValue || ok != c.wantOK || done != c.wantDone || idx != c.wantIdx {
t.Errorf("want (v=%q ok=%v done=%v idx=%d), got (v=%q ok=%v done=%v idx=%d)",
c.wantValue, c.wantOK, c.wantDone, c.wantIdx, v, ok, done, idx)
}
})
}
}
func TestExtractPartialStringField(t *testing.T) {
cases := []struct {
name string
raw string
field string
wantValue string
wantOK bool
wantDone bool
}{
{
name: "empty buffer",
raw: "",
field: "content",
wantValue: "",
wantOK: false,
wantDone: false,
},
{
name: "no such field",
raw: `{"path":"/x","foo":"bar"}`,
field: "content",
wantValue: "",
wantOK: false,
wantDone: false,
},
{
name: "complete",
raw: `{"path":"/x","content":"hello"}`,
field: "content",
wantValue: "hello",
wantOK: true,
wantDone: true,
},
{
name: "partial mid-word",
raw: `{"path":"/x","content":"hel`,
field: "content",
wantValue: "hel",
wantOK: true,
wantDone: false,
},
{
name: "escaped quote",
raw: `{"content":"say \"hi\""}`,
field: "content",
wantValue: `say "hi"`,
wantOK: true,
wantDone: true,
},
{
name: "escaped newline inside string",
raw: `{"content":"line1\nline2"}`,
field: "content",
wantValue: "line1\nline2",
wantOK: true,
wantDone: true,
},
{
name: "trailing backslash (unfinished escape)",
raw: `{"content":"line1\`,
field: "content",
wantValue: "line1",
wantOK: true,
wantDone: false,
},
{
name: "incomplete unicode escape",
raw: `{"content":"before\u00`,
field: "content",
wantValue: "before",
wantOK: true,
wantDone: false,
},
{
name: "key before value",
raw: `{"content":`,
field: "content",
wantValue: "",
wantOK: false,
wantDone: false,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
v, ok, done := ExtractPartialStringField(c.raw, c.field)
if v != c.wantValue {
t.Errorf("value: want %q, got %q", c.wantValue, v)
}
if ok != c.wantOK {
t.Errorf("ok: want %v, got %v", c.wantOK, ok)
}
if done != c.wantDone {
t.Errorf("done: want %v, got %v", c.wantDone, done)
}
})
}
}

View file

@ -130,6 +130,24 @@ type ToolCallView struct {
Result string // rendered result preview (truncated)
Error bool
Done bool
// Streaming is true while the model is still typing the tool
// call's JSON arguments. The TUI renders a live preview of any
// interesting string fields (for `write`, the `content`; for
// `bash`, the `command`) so the user can watch the file being
// composed. Set to false as soon as EvToolUseEnd arrives.
Streaming bool
// RawJSONBuf is the accumulator of every EvToolUseArgs delta
// the stream has delivered for this tool call. Used by the
// partial-JSON extractor to peel off the live string value
// of one named field on each render.
RawJSONBuf string
// LivePath is the `path` arg extracted as soon as it parses
// out of RawJSONBuf. Shown next to the tool name in the header
// so the user can see which file is being written to.
LivePath string
}
// MessageAnchor records where a rendered message starts in the chat
@ -408,8 +426,29 @@ func (v *View) renderMessage(m provider.Message, width int) []string {
func (v *View) renderToolCall(tc ToolCallView, width int) []string {
var lines []string
head := v.Theme.FG256(v.Theme.Tool, "▸ "+tc.Name+" "+tc.Args)
// Header. While the call is still streaming, prefer the live path
// extracted from the partial args so the user sees the target
// file as soon as it's known, even before the full JSON arrived.
arg := tc.Args
if arg == "" && tc.LivePath != "" {
arg = tc.LivePath
}
head := v.Theme.FG256(v.Theme.Tool, "▸ "+tc.Name+" "+arg)
lines = append(lines, head)
// Live streaming body: pulled out of the partial JSON buffer for
// tools whose interesting content is a string field (currently
// write's `content` and edit's `new_text` chunks). Rendered with
// the same rules + highlighter the final result would use, so the
// transition from streaming to result is visually seamless.
if tc.Streaming && tc.Result == "" {
if body := v.renderLiveToolBody(tc, width); len(body) > 0 {
lines = append(lines, body...)
}
return lines
}
if tc.Result != "" {
color := v.Theme.ToolOut
if tc.Error {
@ -428,6 +467,52 @@ func (v *View) renderToolCall(tc ToolCallView, width int) []string {
return lines
}
// renderLiveToolBody renders the in-flight preview of a streaming
// tool call. Supported tools:
//
// - write: shows the partial `content` field, syntax-highlighted
// by the target path's language.
// - edit: shows the partial `newText` of the edit currently being
// streamed, prefixed with a "editing foo.ts (edit 2)" header so
// the user can see which of a multi-edit batch is in progress.
//
// Anything else returns nil and only the tool-call header shows.
func (v *View) renderLiveToolBody(tc ToolCallView, width int) []string {
switch tc.Name {
case "write", "Write":
partial, ok, _ := ExtractPartialStringField(tc.RawJSONBuf, "content")
if !ok || partial == "" {
return nil
}
return v.wrapLiveBody(v.renderRawFile(partial, tc.LivePath, 1), width)
case "edit", "Edit":
partial, ok, _, idx := ExtractLastNewText(tc.RawJSONBuf)
if !ok || partial == "" {
return nil
}
// Header line hints which edit is streaming and, when more
// than one has landed, how many the model is doing.
hint := fmt.Sprintf(" edit %d (streaming)", idx)
body := []string{v.Theme.FG256(v.Theme.Muted, hint), ""}
body = append(body, v.renderRawFile(partial, tc.LivePath, 1)...)
return v.wrapLiveBody(body, width)
}
return nil
}
// wrapLiveBody wraps a list of content lines with the standard
// tool-result rules (top + bottom), collapsing to the preview height
// if the body is tall. Shared between write and edit streaming.
func (v *View) wrapLiveBody(body []string, width int) []string {
body = v.collapseToolBody(body, false)
rule := v.Theme.FG256(v.Theme.Muted, strings.Repeat("─", width))
out := make([]string, 0, len(body)+2)
out = append(out, rule)
out = append(out, body...)
out = append(out, rule)
return out
}
// toolResultBlock wraps text in thin horizontal rules (top + bottom),
// indenting the body with four spaces. The rules span the content column.
// renderToolResultContent renders the body of a tool result block.