diff --git a/README.md b/README.md index ed7485d..67f02e1 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,10 @@ Type `/` in the TUI to open the autocomplete popup. Available commands: Extension-registered commands appear under a divider at the bottom of the popup, sorted by name. +### Shell escape (`!command`) + +Type `!` followed by a command to run it directly without going through the model. Everything after the `!` is passed to the same shell the `bash` tool uses (`/bin/sh -c` on Unix, `cmd /C` on Windows), runs in the session working directory, and honors the `/jail` sandbox. The output is appended below the transcript as a terminal-log block (command echo, output, exit code), styled by success or failure. It stays on screen until you send your next prompt (or run `/clear`), so it doesn't bleed into the model conversation. A running `!command` shares the busy state with the agent: `esc` cancels it, and you cannot start one while a turn (or another shell escape) is in flight. + ### `/sessions` Shows previous sessions for the current working directory, newest first, with timestamp, model, message count, cost, and the first user prompt. Pick one with `up`/`down`, `enter` to resume, `esc` to cancel. zot swaps the current session file for the selected one and replays the full transcript (including tool calls) into the agent. Sessions remember the model they ended on, so resuming picks up on that exact model even if your global default changed. diff --git a/packages/agent/modes/interactive.go b/packages/agent/modes/interactive.go index 776d851..77e521a 100644 --- a/packages/agent/modes/interactive.go +++ b/packages/agent/modes/interactive.go @@ -2,6 +2,7 @@ package modes import ( "context" + "encoding/json" "fmt" "os" "path/filepath" @@ -213,6 +214,7 @@ type chatCacheKey struct { statusErr string help string extNotes string + shellBlock string updateAvailable bool updateCurrent string updateLatest string @@ -378,6 +380,15 @@ type Interactive struct { // transcript) until cleared by /clear or another reset. extNotes []string + // shellBlock holds the rendered terminal-log lines of the most + // recent !command shell escape. It lives below the transcript + // (under extNotes) until the user sends their next prompt or runs + // /clear. shellRunning is true while a !command is executing; it + // shares i.busy/i.cancelTurn so esc cancels it and no turn or + // other shell escape can start while one is in flight. + shellBlock []string + shellRunning bool + // sessionLoading is true while a /sessions selection is being read // on a background goroutine. Keeping this off the input goroutine // lets ctrl+c/exit remain responsive for very large JSONL sessions. @@ -754,6 +765,7 @@ func (i *Interactive) chatCacheKeyLocked(cols int) (chatCacheKey, bool) { statusErr: i.statusErr, help: strings.Join(i.helpBlock, "\n"), extNotes: strings.Join(i.extNotes, "\n"), + shellBlock: strings.Join(i.shellBlock, "\n"), updateAvailable: i.updateInfo.Available, updateCurrent: i.updateInfo.Current, updateLatest: i.updateInfo.Latest, @@ -872,6 +884,14 @@ func (i *Interactive) buildChatLocked(cols int) []string { chat = append(chat, "") } + // Shell-escape terminal-log block (!command). Rendered below the + // transcript and extension notes; cleared when the next prompt is + // sent or on /clear so it never leaks into the model conversation. + if len(i.shellBlock) > 0 { + chat = append(chat, i.shellBlock...) + chat = append(chat, "") + } + // Strip trailing blank rows so the chat content sits flush // against the new "blank above status bar" row added by the // bottom-region assembly. Build() ends every message with a @@ -1041,8 +1061,19 @@ func (i *Interactive) redraw() { // from the chat below, instead of the diff path leaving stale // dialog content behind. Equivalent to the user pressing ctrl+l. overlayOpen := len(dialog) > 0 || len(suggest) > 0 - if i.prevOverlayOpen && !overlayOpen && i.rend != nil { - i.rend.Clear() + if i.rend != nil && i.prevOverlayOpen != overlayOpen { + // VS Code's terminal keeps scrollback (no \x1b[3J), so the + // diff path leaves stale rows behind when the slash/file popup + // opens AND when it closes. Force a full repaint on both + // transitions there, using Invalidate so we don't trigger VS + // Code's viewport-snap the way Clear would. Other terminals + // only need the close-transition Clear (the open paint is + // clean because scrollback was wiped). + if i.rend.KeepsScrollback() { + i.rend.Invalidate() + } else if i.prevOverlayOpen && !overlayOpen { + i.rend.Clear() + } } i.prevOverlayOpen = overlayOpen if len(suggest) > 0 { @@ -1845,14 +1876,21 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) { i.mu.Lock() hadHelp := len(i.helpBlock) > 0 hadNotes := len(i.extNotes) > 0 + // Only dismiss a parked shell-escape log on esc when nothing is + // running; if a !command is in flight, esc must fall through to + // the cancel path below instead of just hiding the (empty) block. + hadShell := len(i.shellBlock) > 0 && !i.shellRunning if hadHelp { i.helpBlock = nil } if hadNotes { i.extNotes = nil } + if hadShell { + i.shellBlock = nil + } i.mu.Unlock() - if hadHelp || hadNotes { + if hadHelp || hadNotes || hadShell { i.invalidate() return false } @@ -2072,6 +2110,11 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) { i.suggest.Reset() i.fileSuggest.Reset() + if cmd, ok := shellEscapeCommand(text); ok { + i.startShellEscape(ctx, cmd) + return false + } + if looksLikeSlashCommand(text) { head := text rest := "" @@ -2308,6 +2351,10 @@ func (i *Interactive) Notify(extName, level, message string) { // Submit feeds text through the agent loop as if the user had typed it. func (i *Interactive) Submit(text string) { + if cmd, ok := shellEscapeCommand(text); ok { + i.startShellEscape(i.runCtx, cmd) + return + } i.startTurn(i.runCtx, text) } @@ -2368,6 +2415,10 @@ func (i *Interactive) SubmitSlash(text string) { // forwarded — because the queued-prompt path is text-only; a // follow-up can expand the queue entry to carry images. func (i *Interactive) SubmitOrQueue(text string, images []provider.ImageBlock) { + if cmd, ok := shellEscapeCommand(text); ok { + i.startShellEscape(i.runCtx, cmd) + return + } i.mu.Lock() if i.agent == nil { i.statusErr = "not logged in. type /login first." @@ -3000,6 +3051,7 @@ func (i *Interactive) runSlash(ctx context.Context, cmd string) (done bool) { i.parkedTotal = 0 i.scrollOffset = 0 i.extNotes = nil + i.shellBlock = nil i.view.InvalidateRenderCache() i.mu.Unlock() case "/help": @@ -3937,6 +3989,134 @@ func (i *Interactive) runCompact(parent context.Context, auto bool) { }() } +// shellEscapeCommand reports whether text is a "!command" shell +// escape and, if so, returns the command with the leading '!' (and +// surrounding whitespace) stripped. A bare "!" with no command is +// treated as not an escape so it falls through to the normal prompt +// path rather than running an empty shell. +func shellEscapeCommand(text string) (string, bool) { + trimmed := strings.TrimLeft(text, " \t") + if !strings.HasPrefix(trimmed, "!") { + return "", false + } + cmd := strings.TrimSpace(strings.TrimPrefix(trimmed, "!")) + if cmd == "" { + return "", false + } + return cmd, true +} + +// startShellEscape runs a "!command" in the same shell the bash tool +// uses, in the session working directory, honoring the /jail sandbox. +// It shares the busy/cancel state with the agent: esc cancels it, and +// it refuses to start while a turn or another shell escape is already +// in flight. The terminal-log output is parked in i.shellBlock below +// the transcript until the next prompt or /clear, so it never enters +// the model conversation. +func (i *Interactive) startShellEscape(parent context.Context, cmd string) { + i.mu.Lock() + if i.busy || i.shellRunning { + i.statusErr = "busy — wait for the current turn to finish before running a shell command" + i.statusOK = "" + i.mu.Unlock() + i.invalidate() + return + } + if parent == nil { + parent = i.runCtx + } + if parent == nil { + parent = context.Background() + } + ctx, cancel := context.WithCancel(parent) + i.busy = true + i.shellRunning = true + i.cancelTurn = cancel + i.statusErr = "" + i.statusOK = "" + i.spin.StartFixed("running shell command") + // A new shell escape replaces the previous block; clear stale + // extension notes the same way a new turn would so the screen + // doesn't accumulate transient state. + i.shellBlock = nil + i.scrollOffset = 0 + i.parkedTurn = 0 + i.parkedTotal = 0 + i.helpBlock = nil + sandbox := i.cfg.Sandbox + cwd := i.cfg.CWD + i.mu.Unlock() + i.invalidate() + + go func() { + defer cancel() + raw, _ := json.Marshal(map[string]any{"command": cmd}) + bash := &tools.BashTool{CWD: cwd, Sandbox: sandbox} + res, err := bash.Execute(ctx, raw, nil) + + var out string + if err != nil { + out = "$ " + cmd + "\n\n" + err.Error() + "\n\n[error]" + } else { + for _, c := range res.Content { + if tb, ok := c.(provider.TextBlock); ok { + out += tb.Text + } + } + } + cancelled := ctx.Err() != nil + failed := err != nil || res.IsError || cancelled + if cancelled { + out += "\n\n[cancelled]" + } + + block := i.renderShellBlock(out, failed) + + i.mu.Lock() + i.shellRunning = false + i.busy = false + i.cancelTurn = nil + i.shellBlock = block + if failed { + if cancelled { + i.statusErr = "shell command cancelled" + } else { + i.statusErr = "shell command failed" + } + i.statusOK = "" + } else { + i.statusOK = "shell command finished" + i.statusErr = "" + } + i.mu.Unlock() + i.invalidate() + }() +} + +// renderShellBlock turns merged bash output into a styled terminal-log +// block: each line colored by overall success (tool/green) or failure +// (error/red), with the [exit ...] / [error] footer dimmed via the +// muted color so it reads as metadata. +func (i *Interactive) renderShellBlock(out string, failed bool) []string { + th := i.cfg.Theme + base := th.Tool + if failed { + base = th.Error + } + out = strings.TrimRight(out, "\n") + lines := strings.Split(out, "\n") + styled := make([]string, 0, len(lines)) + for _, line := range lines { + color := base + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "[exit ") || strings.HasPrefix(trimmed, "[error]") || strings.HasPrefix(trimmed, "[cancelled]") { + color = th.Muted + } + styled = append(styled, th.FG256(color, line)) + } + return styled +} + func (i *Interactive) startTurn(parent context.Context, prompt string) { i.startTurnWithImages(parent, prompt, nil) } @@ -3945,13 +4125,6 @@ func (i *Interactive) startTurnWithImages(parent context.Context, prompt string, if i.agent == nil { return } - // Force a full repaint when a new turn begins so any stray dialog, - // popup, or stale tool-progress rows don't leak into the visible - // chat area before the assistant starts streaming. Equivalent to - // the user pressing ctrl+l right before submit. - if i.rend != nil { - i.rend.Clear() - } // Pre-turn safety: if the most recent context measurement is // already past the auto-compact threshold, condense before // sending so the next outbound request stays under the limit. @@ -3984,6 +4157,7 @@ func (i *Interactive) startTurnWithImages(parent context.Context, prompt string, i.streamOn = true i.toolCalls = map[string]*tui.ToolCallView{} i.toolOrder = nil + i.shellBlock = nil // sending a prompt clears any parked shell-escape log i.scrollOffset = 0 // jump back to the bottom on new turn // Reset the auto-follow baseline so the very next render at // interactive.go:1053 doesn't see a synthetic shrink between diff --git a/packages/agent/modes/shell_escape_test.go b/packages/agent/modes/shell_escape_test.go new file mode 100644 index 0000000..30da547 --- /dev/null +++ b/packages/agent/modes/shell_escape_test.go @@ -0,0 +1,52 @@ +package modes + +import ( + "strings" + "testing" + + "github.com/patriceckhart/zot/packages/tui" +) + +func TestShellEscapeCommand(t *testing.T) { + cases := []struct { + in string + wantCmd string + wantOK bool + }{ + {"!ls -la", "ls -la", true}, + {" !pwd", "pwd", true}, + {"! go test ./... ", "go test ./...", true}, + {"!", "", false}, + {"! ", "", false}, + {"ls -la", "", false}, + {"/help", "", false}, + {"hello !world", "", false}, + } + for _, c := range cases { + cmd, ok := shellEscapeCommand(c.in) + if ok != c.wantOK || cmd != c.wantCmd { + t.Errorf("shellEscapeCommand(%q) = (%q,%v); want (%q,%v)", + c.in, cmd, ok, c.wantCmd, c.wantOK) + } + } +} + +func TestRenderShellBlockStylesFooterDimmed(t *testing.T) { + i := &Interactive{} + i.cfg.Theme = tui.Theme{Tool: 2, Error: 1, Muted: 8} + + ok := i.renderShellBlock("$ echo hi\n\nhi\n\n[exit 0] Took 0.1s", false) + if len(ok) == 0 { + t.Fatal("expected non-empty block") + } + // The success body uses the Tool color; the footer uses Muted. + body := strings.Join(ok, "\n") + if !strings.Contains(body, "echo hi") || !strings.Contains(body, "[exit 0]") { + t.Fatalf("block missing expected content: %q", body) + } + + fail := i.renderShellBlock("$ false\n\n[exit 1] Took 0.0s", true) + if len(fail) == 0 { + t.Fatal("expected non-empty failure block") + } +} diff --git a/packages/tui/render.go b/packages/tui/render.go index f955da7..59d2dda 100644 --- a/packages/tui/render.go +++ b/packages/tui/render.go @@ -122,14 +122,16 @@ func (r *Renderer) Resize(cols, rows int) { r.logViewportTop = 0 r.logHardwareRow = 0 r.logInit = false - if r.out != nil { + if r.out != nil && !r.keepScrollback { // Clear both screen and (where safe) scrollback so stale // content from the old width doesn't bleed through. Move // to (1,1) so the next DrawLog/writeFull starts from a // clean slate. Use the no-home variant: the explicit // MoveTo below sets the cursor without triggering VS // Code's viewport-snap. See Renderer.keepScrollback for - // why we skip \x1b[3J on VS Code's terminal. + // why we skip \x1b[3J on VS Code's terminal. On VS Code we + // skip the eager wipe entirely and let the next DrawLog + // repaint in place (see Clear). _, _ = io.WriteString(r.out, SeqDeleteKittyImages+SeqClearScreenNoHome+r.clearScrollbackSeq()+MoveTo(1, 1)) } } @@ -148,6 +150,15 @@ func (r *Renderer) Clear() { r.logViewportTop = 0 r.logHardwareRow = 0 r.logInit = false + if r.keepScrollback { + // Don't eagerly emit \x1b[2J here: on VS Code's xterm.js that + // scrolls the current frame up into scrollback, leaving a + // duplicate above the repaint. Just drop the diff state; the + // next DrawLog's writeFull does an in-place viewport clear + // (home + erase-to-end) that overwrites the old frame without + // duplicating it. + return + } _, _ = io.WriteString(r.out, SeqDeleteKittyImages+SeqClearScreenNoHome+r.clearScrollbackSeq()+MoveTo(1, 1)) } @@ -163,6 +174,12 @@ func (r *Renderer) clearScrollbackSeq() string { return SeqClearScrollback } +// KeepsScrollback reports whether this renderer suppresses the +// scrollback-clear escape (true under VS Code's terminal). Callers +// use it to pick a viewport-safe full repaint (Invalidate) over a +// scrollback-clearing one (Clear) when redrawing overlays. +func (r *Renderer) KeepsScrollback() bool { return r.keepScrollback } + // Invalidate forces a full repaint on the next Draw without clearing the // whole terminal first. Useful when the cached diff is unreliable but a // visible full-screen flash would be too distracting. @@ -452,9 +469,23 @@ func (r *Renderer) DrawLog(chat, bottom []string, cursorBottomRow, cursorCol int writeFull := func(clear bool) { if clear { w.WriteString(SeqDeleteKittyImages) - w.WriteString(SeqClearScreenNoHome) - w.WriteString(r.clearScrollbackSeq()) - w.WriteString(MoveTo(1, 1)) + if r.keepScrollback { + // VS Code's xterm.js scrolls the visible content up into + // scrollback on \x1b[2J, which duplicates the frame (the + // old paint stays above the new one). Home to the + // viewport top and erase-to-end (\x1b[0J) instead: that + // clears the visible screen in place without pushing the + // previous frame into scrollback. We still cannot drop + // existing scrollback (\x1b[3J snaps the viewport there), + // but a full repaint no longer stacks a fresh copy below + // the old one. + w.WriteString(SeqCursorHome) + w.WriteString(SeqClearToEnd) + } else { + w.WriteString(SeqClearScreenNoHome) + w.WriteString(r.clearScrollbackSeq()) + w.WriteString(MoveTo(1, 1)) + } } for idx, line := range lines { if idx > 0 { diff --git a/packages/tui/terminal.go b/packages/tui/terminal.go index 91568c1..1e49014 100644 --- a/packages/tui/terminal.go +++ b/packages/tui/terminal.go @@ -95,6 +95,13 @@ const ( // cursor identically without triggering that snap. SeqClearScreenNoHome = "\x1b[2J" SeqClearScrollback = "\x1b[3J" + // SeqCursorHome moves to the top-left of the visible viewport. + // SeqClearToEnd erases from the cursor to the end of the screen + // without scrolling content into scrollback (unlike \x1b[2J on + // xterm.js). Together they clear the visible frame in place, which + // the VS Code terminal needs for duplicate-free full repaints. + SeqCursorHome = "\x1b[H" + SeqClearToEnd = "\x1b[0J" SeqClearLine = "\x1b[2K" SeqResetScrollRegion = "\x1b[r" SeqDeleteKittyImages = "\x1b_Ga=d\x1b\\" diff --git a/packages/tui/view.go b/packages/tui/view.go index 967f9ae..d9ef79d 100644 --- a/packages/tui/view.go +++ b/packages/tui/view.go @@ -245,10 +245,7 @@ func (v *View) BuildLive(width int) []string { finalised := map[string]bool{} for _, m := range v.Messages { for _, c := range m.Content { - switch b := c.(type) { - case provider.ToolCallBlock: - finalised[b.ID] = true - case provider.ToolResultBlock: + if b, ok := c.(provider.ToolResultBlock); ok { finalised[b.CallID] = true } } @@ -370,21 +367,15 @@ func (v *View) BuildWithAnchors(width int) ([]string, []MessageAnchor) { } out = append(out, "") } - // Live tool-call overlay: skip any entry whose assistant - // tool_use block OR tool_result has already made it into the - // transcript. The EvAssistantMessage for a tool-use turn - // lands BEFORE executeTools runs, so between that moment and - // the tool-result being appended the overlay and the - // finalised transcript both render the same call. Checking - // for either side of the pair suppresses the duplicate in - // both windows. + // Live tool-call overlay: keep the in-flight box visible after + // the assistant tool_use is appended, then hide it only when the + // matching tool_result reaches the transcript. Assistant tool_use + // blocks render no rows of their own, so suppressing the overlay + // at that point makes the box disappear while the tool is running. finalised := map[string]bool{} for _, m := range v.Messages { for _, c := range m.Content { - switch b := c.(type) { - case provider.ToolCallBlock: - finalised[b.ID] = true - case provider.ToolResultBlock: + if b, ok := c.(provider.ToolResultBlock); ok { finalised[b.CallID] = true } } @@ -712,20 +703,20 @@ func (v *View) renderToolCall(tc ToolCallView, width int) []string { // here would double the gap during streaming and visibly tighten // when the overlay disappears. - // Streaming body (write/edit): top edge with the label, body - // rows wrapped with vertical edges, bottom edge to close the - // box. When the call finalises, the live overlay disappears - // the same frame the transcript renders the closed box, so - // there's no visible hop. - if tc.Streaming && tc.Result == "" { - lines = append(lines, toolBoxTop(v.Theme, label, width)) - lines = append(lines, toolBoxSide(v.Theme, "", width)) + // Live body (write/edit): keep the streamed preview visible until + // a real tool result arrives. The provider can finish the tool_use + // JSON before zot has executed the tool, so keying this on + // tc.Streaming makes write/edit boxes collapse for a moment between + // EvToolUseEnd and EvToolResult. + if tc.Result == "" { if body := v.renderLiveToolBody(tc, width); len(body) > 0 { + lines = append(lines, toolBoxTop(v.Theme, label, width)) + lines = append(lines, toolBoxSide(v.Theme, "", width)) lines = append(lines, body...) + lines = append(lines, toolBoxSide(v.Theme, "", width)) + lines = append(lines, toolBoxBottom(v.Theme, width)) + return lines } - lines = append(lines, toolBoxSide(v.Theme, "", width)) - lines = append(lines, toolBoxBottom(v.Theme, width)) - return lines } // Finished tool call with no body: just the labelled top edge diff --git a/packages/tui/view_tool_overlay_test.go b/packages/tui/view_tool_overlay_test.go new file mode 100644 index 0000000..47c3942 --- /dev/null +++ b/packages/tui/view_tool_overlay_test.go @@ -0,0 +1,94 @@ +package tui + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/patriceckhart/zot/packages/provider" +) + +func TestLiveToolOverlayRemainsAfterAssistantToolUse(t *testing.T) { + args := json.RawMessage(`{"command":"sleep 1"}`) + v := View{ + Theme: Dark, + Messages: []provider.Message{ + { + Role: provider.RoleAssistant, + Content: []provider.Content{ + provider.ToolCallBlock{ID: "toolu_1", Name: "bash", Arguments: args}, + }, + }, + }, + ToolCalls: []ToolCallView{ + {ID: "toolu_1", Name: "bash", Args: ShortArgs("bash", args), Done: false}, + }, + } + + plain := stripANSI(strings.Join(v.Build(80), "\n")) + if !strings.Contains(plain, "bash sleep 1") { + t.Fatalf("live tool overlay disappeared after assistant tool_use was appended:\n%s", plain) + } +} + +func TestLiveToolOverlayKeepsWritePreviewAfterArgsEnd(t *testing.T) { + args := json.RawMessage(`{"path":"/tmp/sample.ts","content":"export const n = 1\n"}`) + v := View{ + Theme: Dark, + Messages: []provider.Message{ + { + Role: provider.RoleAssistant, + Content: []provider.Content{ + provider.ToolCallBlock{ID: "toolu_1", Name: "write", Arguments: args}, + }, + }, + }, + ToolCalls: []ToolCallView{ + { + ID: "toolu_1", + Name: "write", + Args: ShortArgs("write", args), + Streaming: false, + RawJSONBuf: string(args), + LivePath: "/tmp/sample.ts", + }, + }, + } + + plain := stripANSI(strings.Join(v.Build(80), "\n")) + if !strings.Contains(plain, "export const n = 1") { + t.Fatalf("write preview collapsed after tool args ended but before tool_result arrived:\n%s", plain) + } +} + +func TestLiveToolOverlayHidesAfterToolResult(t *testing.T) { + args := json.RawMessage(`{"command":"sleep 1"}`) + v := View{ + Theme: Dark, + Messages: []provider.Message{ + { + Role: provider.RoleAssistant, + Content: []provider.Content{ + provider.ToolCallBlock{ID: "toolu_1", Name: "bash", Arguments: args}, + }, + }, + { + Role: provider.RoleTool, + Content: []provider.Content{ + provider.ToolResultBlock{ + CallID: "toolu_1", + Content: []provider.Content{provider.TextBlock{Text: "done"}}, + }, + }, + }, + }, + ToolCalls: []ToolCallView{ + {ID: "toolu_1", Name: "bash", Args: ShortArgs("bash", args), Result: "done", Done: true}, + }, + } + + plain := stripANSI(strings.Join(v.BuildLive(80), "\n")) + if strings.Contains(plain, "bash sleep 1") { + t.Fatalf("live tool overlay still rendered after tool_result was appended:\n%s", plain) + } +}