Compare commits

...

12 commits

Author SHA1 Message Date
patriceckhart
b325477870 Update zot welcome tagline
Some checks failed
ci / test (windows-latest) (push) Has been cancelled
ci / test (macos-latest) (push) Has been cancelled
ci / test (ubuntu-latest) (push) Has been cancelled
2026-06-24 07:33:11 +02:00
mi-skam
198b8cd284 Add opencode-go/qwen3.7-plus to built-in catalog
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
Adds Qwen3.7 Plus for both opencode-go and opencode providers,
following the same pattern as qwen3.5-plus and qwen3.6-plus.
Context window is 1M tokens per the OpenRouter model card.
2026-06-23 12:47:08 +02:00
patriceckhart
cf7ddf5322 Add quick model switch shortcuts (Ctrl+1..9) with /settings model shortcuts sub-view
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
2026-06-23 06:56:24 +02:00
patriceckhart
4bec50ae9c fix(swarm): inherit host model provider for auto-spawn
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
2026-06-22 17:39:21 +02:00
patriceckhart
6899695320 Merge pull request #43 from mi-skam/clipboard-paste-support
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
2026-06-22 07:54:35 +02:00
patriceckhart
c56e04afd5 fix(tui): correct btw cursor offset
Only count the dialog frame padding row when padDialogFrame actually inserts it, so the btw editor cursor stays on the input row after a turn.

Co-authored-by: mi-skam <40042054+mi-skam@users.noreply.github.com>
2026-06-22 07:47:19 +02:00
patriceckhart
ab3d4c5ced fix: harden clipboard image paste
Preserve prompt whitespace when stripping clipboard image markers, accept osascript warning noise, support clipboard image file paths, and keep the existing user bubble rendering.

Co-authored-by: mi-skam <40042054+mi-skam@users.noreply.github.com>
2026-06-22 07:47:19 +02:00
mi-skam
1f663cb867
fix: use osascript for macos clipboard image paste 2026-06-21 22:31:34 +02:00
patriceckhart
821f51a0c3 Merge pull request #44 from patriceckhart/fix-swarm-tool-loop-turn-end
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
2026-06-21 17:15:42 +02:00
mi-skam
2a4b07839d
fix(swarm): ignore tool-loop turn_end for completion 2026-06-21 15:25:47 +02:00
mi-skam
4a6d6915ca
Add clipboard paste support 2026-06-21 14:47:06 +02:00
patriceckhart
1a24a204b4 fix: correct image MIME by content and re-encode for kitty
Some checks are pending
ci / test (macos-latest) (push) Waiting to run
ci / test (ubuntu-latest) (push) Waiting to run
ci / test (windows-latest) (push) Waiting to run
read tool and Anthropic builder derived an image's media type from its
file extension, so a .png file containing JPEG bytes was declared
image/png. Anthropic sniffs the real bytes and 400s the whole request
on a mismatch, breaking the session and making it impossible to resume.

- read tool now sniffs the real format from magic bytes (sniffImageMIME)
- Anthropic request builder reconciles declared MIME against the actual
  bytes on every outbound image, repairing already-persisted sessions on
  continue/resume
- kitty renderer re-encodes non-PNG images to PNG (f=100 is PNG-only),
  fixing empty image boxes for JPEG/GIF screenshots
2026-06-20 18:18:52 +02:00
27 changed files with 1343 additions and 103 deletions

View file

@ -297,12 +297,13 @@ Background subagents that run alongside your main session. Each one is a separat
### `/settings`
Opens a dialog with every persistent setting. `up`/`down` to navigate, `enter` or `space` to change the selected row, `esc` to close. Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current settings:
Opens a dialog with every persistent setting. `up`/`down` to navigate, `enter` or `space` to change the selected row, `esc` to close (rows that open a sub-view, like model shortcuts, use `esc` to go back one level first). Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current settings:
- **render images when supported** — draw screenshots / `read`-returned images inline using the terminal's image protocol, or fall back to a text placeholder. Auto-detected from `TERM_PROGRAM`; the toggle overrides the detection. The row is greyed out and forced off on terminals that don't speak any image protocol.
- **auto-swarm** — let the main agent spawn background sub-agents in parallel via a built-in `swarm_spawn` tool. Off by default. When on, the tool is registered with the running agent, the system prompt gains a short addendum telling the model to delegate independent sub-tasks proactively, and zot watches every sub-agent the main agent spawns. As soon as the last sub-agent in a batch finishes its initial task, an `[auto-swarm update]` message is injected back into the chat with each agent's status / task / transcript tail, so the main agent can summarise the collective outcome. Flipping off mid-session removes the tool from the live agent and strips the addendum on the next turn — the model stops trying to delegate. See `/swarm` for the dashboard that lets you monitor, message, kill, or remove the spawned agents.
- **thinking level** — choose reasoning for supported models: off (default; no reasoning), minimum (~1k tokens), low (~2k), medium (~8k), high (~16k), maximum (~32k). The change is persisted to `config.json` and applied to the running agent's next model call.
- **color theme** — choose the built-in auto/dark/light theme or any JSON theme discovered under `$ZOT_HOME/themes` or a loaded extension. Theme files can override any subset of UI colors, syntax colors, and spinner frames/messages. Changes apply immediately; if a selected theme file is deleted, zot resets to auto. See [docs/themes.md](docs/themes.md).
- **model shortcuts** — opens a sub-view with nine slots (`model 1` ... `model 9`). `enter` on a slot opens the same `/model` selector and binds the chosen provider/model to that slot; `backspace` clears a slot. Once assigned, press `Ctrl+1` ... `Ctrl+9` from the editor to switch the active model instantly (the same cross-provider swap `/model` performs, transcript and cost carried over). Assigning a shortcut does not change the current model. Shortcuts are skipped while a turn is running.
### `/skills`
@ -576,7 +577,9 @@ Slash commands also work while the agent is busy. Read-only ones (`/help`, `/jum
| `ctrl+c` | Clear the input and queue (while idle) or arm the exit hint (while busy). Press again within 2s to exit. Use `esc` to cancel a running turn. |
| `ctrl+d` | Exit on empty input. |
| `ctrl+l` | Redraw the screen. |
| `ctrl+v` | Paste an image from the system clipboard through zot on macOS. Images are saved as temporary PNGs and attached to the next prompt. Other platforms currently use a no-op stub. Use your terminal/OS paste shortcut for text. |
| `ctrl+o` | Expand or collapse long tool results (read, write, edit, bash outputs over ~12 lines). |
| `ctrl+1` ... `ctrl+9` | Switch to the model bound to that quick-model slot (configured in `/settings` -> model shortcuts). No-op while a turn is running. |
| `@` | Open the file picker. Browse files and directories in the working directory. |
### File picker (`@`)

View file

@ -340,7 +340,7 @@ func PrintHelp(version string) {
if useColor {
headline = th.AccentBar(th.Assistant) + assistant(tui.Bold("i'm zot. yet another coding agent harness."))
} else {
headline = "i'm zot. yet another coding agent harness."
headline = "zot. yet another coding agent harness."
}
fmt.Fprintln(os.Stderr, headline)
fmt.Fprintln(os.Stderr, muted("ask anything, or type /help inside the tui to see commands."))

View file

@ -484,9 +484,11 @@ func runInteractive(ctx context.Context, args Args, version string) error {
return reg
}
reg["swarm_spawn"] = &tools.SwarmSpawnTool{
Swarm: swarmMgr,
Enabled: AutoSwarmEnabled,
OnSpawned: onSpawnedSwarm,
Swarm: swarmMgr,
Enabled: AutoSwarmEnabled,
DefaultModel: func() string { return r.Model },
DefaultProvider: func() string { return r.Provider },
OnSpawned: onSpawnedSwarm,
}
return reg
}
@ -929,6 +931,10 @@ func runInteractive(ctx context.Context, args Args, version string) error {
}()
initialCfg, _ := LoadConfig()
quickModelShortcuts := make([]modes.QuickModelShortcut, len(initialCfg.QuickModelShortcuts))
for idx, s := range initialCfg.QuickModelShortcuts {
quickModelShortcuts[idx] = modes.QuickModelShortcut{Provider: s.Provider, Model: s.Model}
}
theme, _, themeErr := tui.DetectThemeWithCustom(ZotHome(), initialCfg.Theme, 80*time.Millisecond)
if themeErr != nil {
fmt.Fprintln(os.Stderr, "theme load:", themeErr)
@ -956,6 +962,7 @@ func runInteractive(ctx context.Context, args Args, version string) error {
Theme: theme,
InlineImagesEnabled: initialCfg.InlineImagesEnabled,
AutoSwarmEnabled: initialCfg.AutoSwarmEnabled,
QuickModelShortcuts: quickModelShortcuts,
RecursiveFileSuggest: initialCfg.RecursiveFileSuggest,
RespectGitignore: initialCfg.RespectGitignore,
ThemeName: initialCfg.Theme,

View file

@ -15,6 +15,12 @@ import (
"github.com/patriceckhart/zot/packages/provider/auth"
)
// QuickModelShortcut is one configured keyboard shortcut slot.
type QuickModelShortcut struct {
Provider string `json:"provider"`
Model string `json:"model"`
}
// Config is the persisted user configuration.
type Config struct {
Provider string `json:"provider"`
@ -23,6 +29,10 @@ type Config struct {
Temperature *float32 `json:"temperature,omitempty"`
Theme string `json:"theme"`
// QuickModelShortcuts maps slots 1-9 to provider/model pairs used by
// Ctrl+1..9. Cmd+1..9 may also work on terminals that forward Super.
QuickModelShortcuts []QuickModelShortcut `json:"quick_model_shortcuts,omitempty"`
// InlineImagesEnabled controls whether zot draws screenshots inline
// when the terminal supports an image protocol. nil/missing means
// auto (enabled when supported); false disables; true forces the

View file

@ -378,12 +378,14 @@ func (d *btwDialog) CursorPos(width int) (row, col int) {
}
// Reproduce render's structure to find where the editor sits.
// Note: the parent (interactive.go) wraps every dialog with
// padDialogFrame, which injects a blank row right after the
// frame header. We have to count that injected row here too;
// otherwise the reported cursor lands one row above the editor.
// padDialogFrame. It only injects a blank row after the frame
// header when Render did not already put one there. With existing
// turns or a loading spinner, Render's first body row is already
// blank, so counting an extra pad row would place the cursor one
// row too low.
editorOffset := 1 // header
editorOffset++ // padDialogFrame's post-header blank
if len(d.turns) == 0 && !d.loading {
editorOffset++ // padDialogFrame's post-header blank
editorOffset++ // muted "ask anything..." line
}
for _, t := range d.turns {

View file

@ -0,0 +1,61 @@
package modes
import (
"strings"
"github.com/patriceckhart/zot/packages/provider"
)
type clipboardImageAttachment struct {
Marker string
Image provider.ImageBlock
}
func preparePromptWithClipboardImages(text string, pending []clipboardImageAttachment) (string, []provider.ImageBlock) {
if len(pending) == 0 {
return text, nil
}
out := text
images := make([]provider.ImageBlock, 0, len(pending))
for _, item := range pending {
if item.Marker == "" || !strings.Contains(out, item.Marker) {
continue
}
out = removeClipboardMarker(out, item.Marker)
images = append(images, item.Image)
}
return strings.TrimSpace(out), images
}
func removeClipboardMarker(text, marker string) string {
for {
idx := strings.Index(text, marker)
if idx < 0 {
return text
}
end := idx + len(marker)
prevInline := idx > 0 && isInlineWhitespace(text[idx-1])
nextInline := end < len(text) && isInlineWhitespace(text[end])
prevLineBreak := idx == 0 || idx > 0 && isLineBreak(text[idx-1])
nextLineBreak := end == len(text) || end < len(text) && isLineBreak(text[end])
switch {
case prevInline && nextInline:
text = text[:idx] + text[end+1:]
case prevInline && nextLineBreak:
text = text[:idx-1] + text[end:]
case prevLineBreak && nextInline:
text = text[:idx] + text[end+1:]
default:
text = text[:idx] + text[end:]
}
}
}
func isInlineWhitespace(b byte) bool {
return b == ' ' || b == '\t'
}
func isLineBreak(b byte) bool {
return b == '\n' || b == '\r'
}

View file

@ -0,0 +1,120 @@
package modes
import (
"strings"
"testing"
"github.com/patriceckhart/zot/packages/provider"
)
func testClipboardImage(marker string, data string) clipboardImageAttachment {
return clipboardImageAttachment{
Marker: marker,
Image: provider.ImageBlock{MimeType: "image/png", Data: []byte(data)},
}
}
func TestPreparePromptWithClipboardImagesStripsPresentMarker(t *testing.T) {
pending := []clipboardImageAttachment{testClipboardImage("[clipboard image #1]", "png-1")}
text, images := preparePromptWithClipboardImages("describe this [clipboard image #1] please", pending)
if text != "describe this please" {
t.Fatalf("text = %q, want %q", text, "describe this please")
}
if len(images) != 1 {
t.Fatalf("len(images) = %d, want 1", len(images))
}
if string(images[0].Data) != "png-1" {
t.Fatalf("image data = %q, want png-1", string(images[0].Data))
}
}
func TestPreparePromptWithClipboardImagesAllowsImageOnlyPrompt(t *testing.T) {
pending := []clipboardImageAttachment{testClipboardImage("[clipboard image #1]", "png-1")}
text, images := preparePromptWithClipboardImages("[clipboard image #1]", pending)
if text != "" {
t.Fatalf("text = %q, want empty", text)
}
if len(images) != 1 {
t.Fatalf("len(images) = %d, want 1", len(images))
}
}
func TestPreparePromptWithClipboardImagesIgnoresDeletedMarker(t *testing.T) {
pending := []clipboardImageAttachment{testClipboardImage("[clipboard image #1]", "png-1")}
text, images := preparePromptWithClipboardImages("describe this from memory", pending)
if text != "describe this from memory" {
t.Fatalf("text = %q, want unchanged", text)
}
if len(images) != 0 {
t.Fatalf("len(images) = %d, want 0", len(images))
}
}
func TestPreparePromptWithClipboardImagesHandlesMultipleImagesInPasteOrder(t *testing.T) {
pending := []clipboardImageAttachment{
testClipboardImage("[clipboard image #1]", "png-1"),
testClipboardImage("[clipboard image #2]", "png-2"),
}
text, images := preparePromptWithClipboardImages("compare [clipboard image #2] with [clipboard image #1]", pending)
if text != "compare with" {
t.Fatalf("text = %q, want %q", text, "compare with")
}
if len(images) != 2 {
t.Fatalf("len(images) = %d, want 2", len(images))
}
if string(images[0].Data) != "png-1" || string(images[1].Data) != "png-2" {
t.Fatalf("images not attached in paste order: %q, %q", string(images[0].Data), string(images[1].Data))
}
}
func TestPreparePromptWithClipboardImagesDuplicateMarkerAttachesOnce(t *testing.T) {
pending := []clipboardImageAttachment{testClipboardImage("[clipboard image #1]", "png-1")}
text, images := preparePromptWithClipboardImages("[clipboard image #1] and again [clipboard image #1]", pending)
if strings.Contains(text, "[clipboard image #1]") {
t.Fatalf("text still contains marker: %q", text)
}
if text != "and again" {
t.Fatalf("text = %q, want %q", text, "and again")
}
if len(images) != 1 {
t.Fatalf("len(images) = %d, want 1", len(images))
}
}
func TestPreparePromptWithClipboardImagesNoPendingPreservesWhitespace(t *testing.T) {
input := "please review:\n\nfunc main() {\n\tfmt.Println(\"hi\")\n}\n"
text, images := preparePromptWithClipboardImages(input, nil)
if text != input {
t.Fatalf("text changed without pending images:\n got %q\nwant %q", text, input)
}
if images != nil {
t.Fatalf("images = %#v, want nil", images)
}
}
func TestPreparePromptWithClipboardImagesPreservesMultilinePrompt(t *testing.T) {
pending := []clipboardImageAttachment{testClipboardImage("[clipboard image #1]", "png-1")}
input := "compare this image [clipboard image #1]\n\nwith this code:\n\treturn 1"
want := "compare this image\n\nwith this code:\n\treturn 1"
text, images := preparePromptWithClipboardImages(input, pending)
if text != want {
t.Fatalf("text = %q, want %q", text, want)
}
if len(images) != 1 {
t.Fatalf("len(images) = %d, want 1", len(images))
}
}

View file

@ -6,6 +6,8 @@ import (
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
@ -58,6 +60,12 @@ type InteractiveConfig struct {
// ThemeName mirrors the persisted config theme value. Empty means auto.
ThemeName string
// QuickModelShortcuts maps slots 1-9 to provider/model pairs. The
// shortcuts are Ctrl+1..9. Cmd+1..9 may also work when the terminal
// forwards Command/Super keypresses, but Ctrl is the displayed chord.
QuickModelShortcuts []QuickModelShortcut
// ExtensionThemes returns themes bundled with loaded extensions.
ExtensionThemes func() []tui.ThemeOption
@ -234,8 +242,15 @@ type chatCacheKey struct {
tailLimit int
}
// QuickModelShortcut is one configured quick model switch slot.
type QuickModelShortcut struct {
Provider string
Model string
}
// SettingsStore persists user-toggleable settings surfaced by /settings.
type SettingsStore interface {
SetQuickModelShortcut(slot int, providerName, model string) error
SetInlineImages(enabled bool) error
SetAutoSwarm(enabled bool) error
SetRecursiveFileSuggest(enabled bool) error
@ -353,6 +368,7 @@ type Interactive struct {
logoutDialog *logoutDialog
telegramDialog *telegramDialog
settingsDialog *settingsDialog
quickModelAssign int
telegramBridge *telegram.Bridge
sessionOpsDialog *sessionOpsDialog
sessionTreeDialog *sessionTreeDialog
@ -424,6 +440,8 @@ type Interactive struct {
// them (or when the dialog is dismissed via esc).
pendingRescuePrompt string
pendingRescueImages []provider.ImageBlock
clipboardImages []clipboardImageAttachment
}
// welcomeVersionDuration is how long the welcome banner shows the
@ -506,6 +524,9 @@ func NewInteractive(cfg InteractiveConfig) *Interactive {
i.view.TailLimit = initialResumeTailLimit
}
}
if cfg.AutoSwarmEnabled != nil && *cfg.AutoSwarmEnabled {
i.applyAutoSwarmTool(true)
}
return i
}
@ -1726,11 +1747,20 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
if i.modelDialog.Active() {
if k.Kind == tui.KeyCtrlC {
i.modelDialog.Close()
i.quickModelAssign = 0
return false
}
act := i.modelDialog.HandleKey(k)
if act.Close {
i.quickModelAssign = 0
}
if act.Select {
i.applyModelSelection(act.Provider, act.Model)
if i.quickModelAssign > 0 {
i.applyQuickModelSelection(i.quickModelAssign, act.Provider, act.Model)
i.quickModelAssign = 0
} else {
i.applyModelSelection(act.Provider, act.Model)
}
}
return false
}
@ -1813,6 +1843,9 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
return false
}
act := i.settingsDialog.HandleKey(k)
if act.ModelShortcutSlot > 0 {
i.openQuickModelPicker(act.ModelShortcutSlot)
}
if act.Toggle {
i.applySettingChange(act)
}
@ -1912,6 +1945,11 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
return false
}
if slot := quickModelShortcutSlot(k); slot > 0 {
i.applyQuickModelShortcut(slot)
return false
}
// Global keys.
switch k.Kind {
case tui.KeyCtrlC:
@ -1952,6 +1990,7 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
hadInput := !i.ed.IsEmpty() || len(i.queued) > 0 || pending > 0
if hadInput {
i.ed.Clear()
i.clipboardImages = nil
i.suggest.Reset()
if ag != nil {
ag.DrainQueuedMessages()
@ -2021,6 +2060,9 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
i.rend.Clear()
i.invalidate()
return false
case tui.KeyPasteClipboard:
i.pasteClipboard()
return false
case tui.KeyCtrlO:
// Toggle expansion of collapsed tool results. Affects every tool
// call in the transcript — press again to re-collapse.
@ -2215,6 +2257,9 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
i.inputHistoryIndex = -1
}
if k.Kind == tui.KeyEsc {
i.clipboardImages = nil
}
if submit := i.ed.HandleKey(k); submit {
// SubmitValue() expands any [pasted text #N +L lines]
// placeholders back into their bodies; the raw Value()
@ -2222,9 +2267,11 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
text := strings.TrimRight(i.ed.SubmitValue(), "\n")
// Expand [file:name] and [dir:name/] chips to full paths.
text = expandFileChips(text, i.cfg.CWD)
if text == "" {
text, images := preparePromptWithClipboardImages(text, i.clipboardImages)
if text == "" && len(images) == 0 {
return false
}
i.clipboardImages = nil
i.ed.Clear()
i.inputHistoryIndex = -1
i.suggest.Reset()
@ -2291,6 +2338,13 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
ag := i.agent
i.mu.Unlock()
if busy {
if len(images) > 0 {
i.mu.Lock()
i.statusErr = "can't queue clipboard images while a turn is running; wait for the current turn to finish"
i.mu.Unlock()
i.invalidate()
return false
}
if ag != nil {
ag.QueueMessage(text)
} else {
@ -2301,11 +2355,39 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
i.invalidate()
return false
}
i.startTurn(ctx, text)
i.startTurnWithImages(ctx, text, images)
}
return false
}
func (i *Interactive) pasteClipboard() {
_, data, ok, err := tui.ReadClipboardImagePNG()
if err != nil {
i.mu.Lock()
i.statusErr = "clipboard paste failed: " + err.Error()
i.statusOK = ""
i.mu.Unlock()
return
}
if ok {
i.mu.Lock()
marker := fmt.Sprintf("[clipboard image #%d]", len(i.clipboardImages)+1)
i.clipboardImages = append(i.clipboardImages, clipboardImageAttachment{
Marker: marker,
Image: provider.ImageBlock{MimeType: "image/png", Data: data},
})
i.ed.Insert(marker + " ")
i.statusOK = ""
i.statusErr = ""
i.mu.Unlock()
return
}
i.mu.Lock()
i.statusErr = "clipboard does not contain an image"
i.statusOK = ""
i.mu.Unlock()
}
func (i *Interactive) handleInputHistoryKey(k tui.Key) bool {
if k.Kind != tui.KeyLeft && k.Kind != tui.KeyRight {
return false
@ -2724,6 +2806,7 @@ func (i *Interactive) openSettingsDialog() {
recursiveFiles := i.cfg.RecursiveFileSuggest != nil && *i.cfg.RecursiveFileSuggest
respectGitignore := i.cfg.RespectGitignore == nil || *i.cfg.RespectGitignore
quickItems := i.quickModelSettingItems()
reasoningOptions := []settingsOption{
{value: "", label: "off", desc: "no reasoning"},
@ -2771,7 +2854,7 @@ func (i *Interactive) openSettingsDialog() {
}
}
i.settingsDialog.Open([]settingsItem{
items := []settingsItem{
{
key: "inline_images_enabled",
label: "render images when supported",
@ -2815,20 +2898,193 @@ func (i *Interactive) openSettingsDialog() {
options: themeOptions,
choice: themeChoice,
},
})
}
if len(quickItems) > 0 {
items = append(items, settingsItem{
key: "quick_models",
label: "model shortcuts",
desc: "configure " + quickModelShortcutPrefix() + "+1 through " + quickModelShortcutPrefix() + "+9 quick model switches",
children: quickItems,
})
}
i.settingsDialog.Open(items)
}
func (i *Interactive) applySettingChange(act settingsAction) {
switch act.Key {
case "reasoning":
switch {
case strings.HasPrefix(act.Key, "quick_model_"):
i.applyQuickModelSetting(act.Key, act.StringValue)
case act.Key == "reasoning":
i.applyReasoningSetting(act.StringValue)
case "theme":
case act.Key == "theme":
i.applyThemeSetting(act.StringValue)
default:
i.applySettingToggle(act.Key, act.Value)
}
}
func (i *Interactive) quickModelSettingItems() []settingsItem {
if len(i.cfg.QuickModelShortcuts) < 9 {
next := make([]QuickModelShortcut, 9)
copy(next, i.cfg.QuickModelShortcuts)
i.cfg.QuickModelShortcuts = next
}
items := make([]settingsItem, 0, 9)
for slot := 1; slot <= 9; slot++ {
items = append(items, i.quickModelSettingItem(slot))
}
return items
}
func (i *Interactive) quickModelSettingItem(slot int) settingsItem {
current := QuickModelShortcut{}
if slot >= 1 && len(i.cfg.QuickModelShortcuts) >= slot {
current = i.cfg.QuickModelShortcuts[slot-1]
}
hint := "not assigned"
if current.Provider != "" && current.Model != "" {
hint = current.Provider + " / " + current.Model
}
return settingsItem{
key: "quick_model_" + strconv.Itoa(slot),
label: "model " + strconv.Itoa(slot),
desc: quickModelShortcutLabel(slot) + " switches to this model. Enter opens the /model selector, Backspace clears.",
picker: true,
hint: hint,
}
}
func quickModelShortcutSlot(k tui.Key) int {
if k.Kind != tui.KeyRune || k.Rune < '1' || k.Rune > '9' {
return 0
}
if runtime.GOOS == "darwin" {
if !k.Super && !k.Ctrl {
return 0
}
} else if !k.Ctrl {
return 0
}
return int(k.Rune - '0')
}
func quickModelShortcutPrefix() string {
return "Ctrl"
}
func quickModelShortcutLabel(slot int) string {
return quickModelShortcutPrefix() + "+" + strconv.Itoa(slot)
}
func (i *Interactive) openQuickModelPicker(slot int) {
if slot < 1 || slot > 9 {
return
}
i.quickModelAssign = slot
current := i.cfg.Model
if len(i.cfg.QuickModelShortcuts) >= slot && i.cfg.QuickModelShortcuts[slot-1].Model != "" {
current = i.cfg.QuickModelShortcuts[slot-1].Model
}
var loggedIn []string
if i.cfg.LoggedInProviders != nil {
loggedIn = i.cfg.LoggedInProviders()
}
i.modelDialog.Open(current, loggedIn)
}
func (i *Interactive) applyQuickModelSelection(slot int, providerName, model string) {
i.setQuickModelShortcut(slot, providerName, model)
}
func (i *Interactive) applyQuickModelShortcut(slot int) {
if slot < 1 || slot > 9 {
return
}
if i.busy {
i.mu.Lock()
i.statusErr = "cannot switch model while a turn is running"
i.statusOK = ""
i.mu.Unlock()
i.invalidate()
return
}
if len(i.cfg.QuickModelShortcuts) < slot {
i.mu.Lock()
i.statusErr = quickModelShortcutLabel(slot) + " is not assigned"
i.statusOK = ""
i.mu.Unlock()
i.invalidate()
return
}
shortcut := i.cfg.QuickModelShortcuts[slot-1]
if shortcut.Provider == "" || shortcut.Model == "" {
i.mu.Lock()
i.statusErr = quickModelShortcutLabel(slot) + " is not assigned"
i.statusOK = ""
i.mu.Unlock()
i.invalidate()
return
}
i.swapModel(shortcut.Provider, shortcut.Model, i.cfg.BuildAgentFor, false)
i.invalidate()
}
func (i *Interactive) applyQuickModelSetting(key, value string) {
slotText := strings.TrimPrefix(key, "quick_model_")
slot, err := strconv.Atoi(slotText)
if err != nil || slot < 1 || slot > 9 {
return
}
providerName, model := "", ""
if value != "" {
parts := strings.SplitN(value, "\t", 2)
if len(parts) == 2 {
providerName, model = parts[0], parts[1]
}
}
i.setQuickModelShortcut(slot, providerName, model)
}
func (i *Interactive) setQuickModelShortcut(slot int, providerName, model string) {
if len(i.cfg.QuickModelShortcuts) < slot {
next := make([]QuickModelShortcut, slot)
copy(next, i.cfg.QuickModelShortcuts)
i.cfg.QuickModelShortcuts = next
}
i.cfg.QuickModelShortcuts[slot-1] = QuickModelShortcut{Provider: providerName, Model: model}
if i.cfg.SettingsStore != nil {
if err := i.cfg.SettingsStore.SetQuickModelShortcut(slot, providerName, model); err != nil {
i.mu.Lock()
i.statusErr = "settings: " + err.Error()
i.mu.Unlock()
return
}
}
i.mu.Lock()
if model == "" {
i.statusOK = quickModelShortcutLabel(slot) + " cleared"
} else {
i.statusOK = quickModelShortcutLabel(slot) + " set to " + providerName + " / " + model
}
i.statusErr = ""
i.mu.Unlock()
i.refreshQuickModelSettingsItem(slot)
i.invalidate()
}
func (i *Interactive) refreshQuickModelSettingsItem(slot int) {
if i.settingsDialog == nil || !i.settingsDialog.Active() || len(i.settingsDialog.items) == 0 {
return
}
key := "quick_model_" + strconv.Itoa(slot)
for idx, it := range i.settingsDialog.items {
if it.key == key {
i.settingsDialog.items[idx] = i.quickModelSettingItem(slot)
return
}
}
}
func (i *Interactive) applySettingToggle(key string, value bool) {
// Every setting toggle forces a full repaint at the end — same
// effect as the user pressing Ctrl+L — so any per-setting visual
@ -4129,9 +4385,10 @@ func (i *Interactive) swapModel(prov, model string, builder func(string, string)
// to invalidate.
i.mu.Unlock()
// The new agent was built off the base tool registry, so any
// dynamically-registered tools (telegram_send_*) need to be
// reattached. applyTelegramTools is a no-op when the bridge is
// idle so the cross-provider path still works on a vanilla setup.
// dynamically-registered tools need to be reattached. The apply
// helpers are no-ops when their feature is inactive, so the
// cross-provider path still works on a vanilla setup.
i.applyAutoSwarmTool(i.autoSwarmEnabled())
i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active())
if i.cfg.PersistModel != nil {
i.cfg.PersistModel(p, md)
@ -4160,6 +4417,7 @@ func (i *Interactive) handleAuthEvent(ev auth.Event) {
i.statusErr = ""
i.statusOK = "logged in to " + ev.Provider + " via " + ev.Method
i.mu.Unlock()
i.applyAutoSwarmTool(i.autoSwarmEnabled())
i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active())
i.dialog.ShowResult(true, "")
}
@ -5103,12 +5361,13 @@ func (i *Interactive) TrackSwarmAgent(a *swarm.Agent, task string) {
}
// trackSwarmAgent records a freshly-spawned auto-swarm agent and
// subscribes to its turn_end events. Sub-agents are long-lived
// daemons that keep running on the inbox after the initial task,
// so we can't wait on agent.Wait() — it never returns until the
// whole daemon dies. Instead we mark each entry done on its first
// turn_end (the initial task finishing), and when every tracked
// entry has reported in, flush a single summary into the main chat.
// subscribes to its prompt-level task completion events. Sub-agents
// are long-lived daemons that keep running on the inbox after the
// initial task, so we can't wait on agent.Wait() — it never returns
// until the whole daemon dies. Instead we mark each entry done when
// the swarm daemon reports the initial prompt has finished, and when
// every tracked entry has reported in, flush a single summary into
// the main chat.
//
// Wired in from cli.go via SwarmSpawnTool.OnSpawned only when auto-
// swarm is enabled, so this is a no-op when the feature is off.
@ -5222,6 +5481,10 @@ func (i *Interactive) applyAutoSwarmSystemPrompt(active bool) {
// when /settings -> auto-swarm is enabled. Mirrors applyTelegramTools'
// snapshot+mutate pattern so extension tools and /reload-ext additions
// survive a toggle.
func (i *Interactive) autoSwarmEnabled() bool {
return i.cfg.AutoSwarmEnabled != nil && *i.cfg.AutoSwarmEnabled
}
func (i *Interactive) applyAutoSwarmTool(active bool) {
if i.agent == nil {
return
@ -5236,9 +5499,11 @@ func (i *Interactive) applyAutoSwarmTool(active bool) {
}
if active && i.cfg.Swarm != nil {
next["swarm_spawn"] = &tools.SwarmSpawnTool{
Swarm: i.cfg.Swarm,
Enabled: func() bool { return true },
OnSpawned: i.trackSwarmAgent,
Swarm: i.cfg.Swarm,
Enabled: func() bool { return true },
DefaultModel: func() string { return i.cfg.Model },
DefaultProvider: func() string { return i.cfg.Provider },
OnSpawned: i.trackSwarmAgent,
}
}
i.agent.SetTools(next)

View file

@ -10,10 +10,13 @@ import (
type settingsDialog struct {
active bool
title string
items []settingsItem
cursor int
selecting bool
optionCursor int
parentItems []settingsItem
parentCursor int
}
type settingsItem struct {
@ -22,6 +25,8 @@ type settingsItem struct {
desc string
value bool
options []settingsOption
children []settingsItem
picker bool
choice int
disabled bool
hint string
@ -34,11 +39,12 @@ type settingsOption struct {
}
type settingsAction struct {
Toggle bool
Key string
Value bool
StringValue string
Close bool
Toggle bool
Key string
Value bool
StringValue string
ModelShortcutSlot int
Close bool
}
func newSettingsDialog() *settingsDialog { return &settingsDialog{} }
@ -47,10 +53,13 @@ func (d *settingsDialog) Open(items []settingsItem) bool {
if len(items) == 0 {
return false
}
d.title = "settings"
d.items = items
d.cursor = 0
d.selecting = false
d.optionCursor = 0
d.parentItems = nil
d.parentCursor = 0
d.active = true
return true
}
@ -58,6 +67,7 @@ func (d *settingsDialog) Open(items []settingsItem) bool {
func (d *settingsDialog) Close() {
d.active = false
d.selecting = false
d.parentItems = nil
}
func (d *settingsDialog) Active() bool { return d != nil && d.active }
@ -74,7 +84,22 @@ func (d *settingsDialog) HandleKey(k tui.Key) settingsAction {
if d.cursor < len(d.items)-1 {
d.cursor++
}
case tui.KeyBackspace:
if len(d.items) > 0 {
it := d.items[d.cursor]
if strings.HasPrefix(it.key, "quick_model_") {
return settingsAction{Toggle: true, Key: it.key, StringValue: ""}
}
}
case tui.KeyEsc:
if len(d.parentItems) > 0 {
d.items = d.parentItems
d.cursor = d.parentCursor
d.parentItems = nil
d.parentCursor = 0
d.title = "settings"
return settingsAction{}
}
d.Close()
return settingsAction{Close: true}
case tui.KeyEnter:
@ -119,6 +144,27 @@ func (d *settingsDialog) toggleCurrent() settingsAction {
if it.disabled {
return settingsAction{}
}
if it.picker {
slotText := strings.TrimPrefix(it.key, "quick_model_")
slot := 0
for _, r := range slotText {
if r < '0' || r > '9' {
slot = 0
break
}
slot = slot*10 + int(r-'0')
}
return settingsAction{ModelShortcutSlot: slot}
}
if len(it.children) > 0 {
d.parentItems = d.items
d.parentCursor = d.cursor
d.items = it.children
d.cursor = 0
d.optionCursor = 0
d.title = "settings: " + it.label
return settingsAction{}
}
if len(it.options) > 0 {
d.optionCursor = it.choice
if d.optionCursor < 0 || d.optionCursor >= len(it.options) {
@ -159,15 +205,22 @@ func (d *settingsDialog) Render(th tui.Theme, width int) []string {
return d.renderOptions(th, width)
}
var lines []string
lines = append(lines, frameHeader(th, "settings", width))
lines = append(lines, th.FG256(th.Muted, "change with enter/space, esc to close:"))
lines = append(lines, frameHeader(th, d.title, width))
if len(d.parentItems) > 0 {
lines = append(lines, th.FG256(th.Muted, "change with enter/space, esc to go back:"))
} else {
lines = append(lines, th.FG256(th.Muted, "change with enter/space, esc to close:"))
}
for i, it := range d.items {
box := "[ ]"
if it.value {
box = "[✓]"
}
plain := " " + box + " " + it.label
if len(it.options) > 0 {
if it.picker || len(it.children) > 0 {
box = "[→]"
plain = " " + box + " " + it.label
} else if len(it.options) > 0 {
box = "[→]"
if it.choice < 0 || it.choice >= len(it.options) {
it.choice = 0

View file

@ -10,9 +10,9 @@ import "github.com/patriceckhart/zot/packages/tui"
// the moment zot starts. After welcomeVersionDuration the caller
// flips showVersion off and the headline reverts to plain text.
func welcomeBanner(th tui.Theme, version string, showVersion bool) []string {
text := "i'm zot. yet another coding agent harness."
text := "zot. yet another coding agent harness."
if showVersion && version != "" {
text = "i'm zot (" + version + "). yet another coding agent harness."
text = "zot (" + version + "). yet another coding agent harness."
}
headline := th.AccentBar(th.Assistant) + th.FG256(th.Assistant, tui.Bold(text))
return []string{

View file

@ -4,6 +4,31 @@ import "github.com/patriceckhart/zot/packages/provider"
type configSettingsStore struct{}
func (configSettingsStore) SetQuickModelShortcut(slot int, providerName, model string) error {
if slot < 1 || slot > 9 {
return nil
}
cfg, err := LoadConfig()
if err != nil {
return err
}
if len(cfg.QuickModelShortcuts) < slot {
next := make([]QuickModelShortcut, slot)
copy(next, cfg.QuickModelShortcuts)
cfg.QuickModelShortcuts = next
}
cfg.QuickModelShortcuts[slot-1] = QuickModelShortcut{Provider: providerName, Model: model}
// Trim trailing empty slots so config.json stays compact.
for len(cfg.QuickModelShortcuts) > 0 {
last := cfg.QuickModelShortcuts[len(cfg.QuickModelShortcuts)-1]
if last.Provider != "" || last.Model != "" {
break
}
cfg.QuickModelShortcuts = cfg.QuickModelShortcuts[:len(cfg.QuickModelShortcuts)-1]
}
return SaveConfig(cfg)
}
func (configSettingsStore) SetInlineImages(enabled bool) error {
cfg, err := LoadConfig()
if err != nil {

View file

@ -71,12 +71,15 @@ type Agent struct {
finished time.Time
lastErr error
// OnTurnEnd, if set, fires once per turn_end event the runner
// observes from the child daemon. Used by auto-swarm watchers
// to detect that a sub-agent's first (or n-th) task has
// finished without waiting for the long-lived daemon itself to
// exit — sub-agents keep running on the inbox even after the
// initial task completes, so Wait() never unblocks for them.
// OnTurnEnd, if set, fires once per prompt-level turn_end event
// emitted by the swarm daemon wrapper. Provider/tool-loop
// turn_end events (for example stop=tool_use) are ignored by the
// runner because they do not mean the sub-agent task finished.
// Used by auto-swarm watchers to detect that a sub-agent's first
// (or n-th) task has finished without waiting for the long-lived
// daemon itself to exit — sub-agents keep running on the inbox
// even after the initial task completes, so Wait() never unblocks
// for them.
OnTurnEnd func(step int, errMsg string)
ctx context.Context
@ -128,10 +131,10 @@ func (a *Agent) Err() error {
// and by /swarm wait <id>.
func (a *Agent) Wait() { <-a.done }
// SetOnTurnEnd installs (or clears, with nil) the per-turn callback
// fired from the runner when the child daemon emits a turn_end
// event. Safe to call from any goroutine: the runner reads the
// callback under the same mutex.
// SetOnTurnEnd installs (or clears, with nil) the callback fired
// from the runner when the child daemon emits a prompt-level
// turn_end event with a step field. Safe to call from any goroutine:
// the runner reads the callback under the same mutex.
func (a *Agent) SetOnTurnEnd(fn func(step int, errMsg string)) {
a.mu.Lock()
a.OnTurnEnd = fn

View file

@ -209,21 +209,13 @@ func (r *execRunner) Run(ctx context.Context, sink Sink) error {
if ev, ok := parseEventLine(trimmed); ok {
_ = log.Append(ev)
applyEventToSink(ev, sink)
// Fan turn_end up to any subscriber on the
// supervised Agent. Daemons stay alive across
// many turns, so Wait()-style hooks would
// never fire; per-turn callbacks let auto-
// swarm summarise as each task completes.
if ev.Type == "turn_end" && r.agent != nil {
r.agent.mu.Lock()
fn := r.agent.OnTurnEnd
r.agent.mu.Unlock()
if fn != nil {
step, _ := ev.Data["step"].(float64)
errMsg, _ := ev.Data["error"].(string)
go fn(int(step), errMsg)
}
}
// Fan prompt-level task completions up to any
// subscriber on the supervised Agent. The child
// also forwards provider/tool-loop turn_end
// events (for example stop=tool_use); those do
// not contain step and must not be treated as
// swarm task completion.
notifyPromptTurnEnd(r.agent, ev)
} else {
// Non-JSON output. Keep it as transcript so an
// accidental fmt.Println in the child still
@ -301,6 +293,28 @@ func parseEventLine(line string) (Event, bool) {
return ev, true
}
// notifyPromptTurnEnd calls Agent.OnTurnEnd only for the swarm
// daemon's prompt-level completion event. Provider/tool-loop
// turn_end events (such as stop=tool_use) do not include step and
// are not terminal for the delegated task.
func notifyPromptTurnEnd(a *Agent, ev Event) {
if a == nil || ev.Type != "turn_end" {
return
}
step, ok := ev.Data["step"].(float64)
if !ok {
return
}
a.mu.Lock()
fn := a.OnTurnEnd
a.mu.Unlock()
if fn != nil {
errMsg, _ := ev.Data["error"].(string)
go fn(int(step), errMsg)
}
}
// applyEventToSink translates an Event into Sink updates. Only a
// few event types are interpreted; the rest still land in the
// durable log via the caller.

View file

@ -3,6 +3,7 @@ package swarm
import (
"strings"
"testing"
"time"
)
// TestSwarmAgentArgs locks in the exact flag set the subprocess
@ -123,6 +124,45 @@ func TestDefaultChildArgsResumeOmitsTask(t *testing.T) {
}
}
func TestNotifyPromptTurnEndIgnoresProviderToolLoopTurnEnd(t *testing.T) {
called := make(chan struct{}, 1)
a := &Agent{}
a.SetOnTurnEnd(func(step int, errMsg string) {
called <- struct{}{}
})
notifyPromptTurnEnd(a, NewEvent("turn_end", map[string]any{"stop": "tool_use"}))
select {
case <-called:
t.Fatal("OnTurnEnd fired for provider/tool-loop turn_end without step")
case <-time.After(50 * time.Millisecond):
}
}
func TestNotifyPromptTurnEndFiresForDaemonPromptCompletion(t *testing.T) {
type got struct {
step int
err string
}
called := make(chan got, 1)
a := &Agent{}
a.SetOnTurnEnd(func(step int, errMsg string) {
called <- got{step: step, err: errMsg}
})
notifyPromptTurnEnd(a, NewEvent("turn_end", map[string]any{"step": float64(2), "error": "boom"}))
select {
case g := <-called:
if g.step != 2 || g.err != "boom" {
t.Fatalf("callback = (%d, %q); want (2, boom)", g.step, g.err)
}
case <-time.After(time.Second):
t.Fatal("OnTurnEnd did not fire for prompt-level turn_end with step")
}
}
func indexOf(xs []string, want string) int {
for i, x := range xs {
if x == want {

View file

@ -2,6 +2,7 @@
package tools
import (
"bytes"
"context"
"encoding/json"
"fmt"
@ -69,6 +70,16 @@ func (t *ReadTool) Execute(ctx context.Context, raw json.RawMessage, progress fu
if err != nil {
return core.ToolResult{}, err
}
// The extension is only a hint. Files are routinely mislabeled
// (a .png that is actually JPEG bytes, a renamed download, an
// editor that re-encoded on save). Anthropic and other providers
// sniff the real bytes and reject the whole request when the
// declared media type disagrees, which would break the session.
// Always derive the MIME from the actual content; fall back to
// the extension-based guess only when the bytes are unrecognized.
if sniffed := sniffImageMIME(data); sniffed != "" {
mime = sniffed
}
return core.ToolResult{
Content: []provider.Content{provider.ImageBlock{MimeType: mime, Data: data}},
}, nil
@ -180,6 +191,26 @@ func imageMIME(path string) string {
return ""
}
// sniffImageMIME inspects the leading bytes of an image file and
// returns the real media type, independent of the file's extension.
// Providers validate the declared media type against the actual bytes
// and 400 the whole request on a mismatch, so the extension can never
// be trusted. Returns "" when the format is not one zot ships images
// for, leaving the caller's extension-based guess in place.
func sniffImageMIME(data []byte) string {
switch {
case len(data) >= 8 && bytes.Equal(data[:8], []byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}):
return "image/png"
case len(data) >= 3 && data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF:
return "image/jpeg"
case len(data) >= 6 && (bytes.Equal(data[:6], []byte("GIF87a")) || bytes.Equal(data[:6], []byte("GIF89a"))):
return "image/gif"
case len(data) >= 12 && bytes.Equal(data[:4], []byte("RIFF")) && bytes.Equal(data[8:12], []byte("WEBP")):
return "image/webp"
}
return ""
}
// looksBinary returns true if the buffer contains a NUL byte in its first 8 KiB.
func looksBinary(b []byte) bool {
n := len(b)

View file

@ -31,6 +31,13 @@ type SwarmSpawnTool struct {
// is treated as disabled.
Enabled func() bool
// DefaultModel and DefaultProvider return the host agent's resolved
// model and provider. They are used when the tool call omits both
// fields, so auto-swarm follows the same auth route as the user sees
// in the parent session.
DefaultModel func() string
DefaultProvider func() string
// OnSpawned, if set, is called after every successful spawn with
// the new agent + the task it was started with. Used by the
// interactive host to track agents and surface a summary back
@ -53,11 +60,11 @@ const swarmSpawnSchema = `{
},
"model": {
"type": "string",
"description": "Optional model id to pin the sub-agent to (e.g. \"claude-sonnet-4-5\", \"gpt-5\"). Defaults to the host's current model."
"description": "Optional model id to pin the sub-agent to. Normally omit both model and provider so the sub-agent inherits the host session's resolved provider/model/auth route. Do not infer provider from model name. If you override this, also provide provider."
},
"provider": {
"type": "string",
"description": "Optional provider id (e.g. \"anthropic\", \"openai\"). Usually paired with model."
"description": "Optional provider id. Normally omit both model and provider so the sub-agent inherits the host session. If you override this, also provide model. Note: openai means public OpenAI API-key auth; openai-codex means ChatGPT/Codex subscription auth."
}
},
"required": ["task"]
@ -85,10 +92,24 @@ func (t *SwarmSpawnTool) Execute(ctx context.Context, raw json.RawMessage, progr
return toolErr("swarm_spawn: task is required"), nil
}
model := strings.TrimSpace(a.Model)
providerID := strings.TrimSpace(a.Provider)
if (model == "") != (providerID == "") {
return toolErr("swarm_spawn: omit both model/provider to inherit the host, or provide both explicitly"), nil
}
if model == "" && providerID == "" {
if t.DefaultModel != nil {
model = strings.TrimSpace(t.DefaultModel())
}
if t.DefaultProvider != nil {
providerID = strings.TrimSpace(t.DefaultProvider())
}
}
agent, err := t.Swarm.SpawnReq(ctx, swarm.SpawnRequest{
Task: task,
Model: strings.TrimSpace(a.Model),
Provider: strings.TrimSpace(a.Provider),
Model: model,
Provider: providerID,
})
if err != nil {
return core.ToolResult{}, fmt.Errorf("swarm_spawn: %w", err)
@ -100,11 +121,11 @@ func (t *SwarmSpawnTool) Execute(ctx context.Context, raw json.RawMessage, progr
var sb strings.Builder
fmt.Fprintf(&sb, "spawned sub-agent %s\n", agent.ID)
fmt.Fprintf(&sb, "task: %s\n", truncateTask(task, 200))
if a.Model != "" {
fmt.Fprintf(&sb, "model: %s\n", a.Model)
if model != "" {
fmt.Fprintf(&sb, "model: %s\n", model)
}
if a.Provider != "" {
fmt.Fprintf(&sb, "provider: %s\n", a.Provider)
if providerID != "" {
fmt.Fprintf(&sb, "provider: %s\n", providerID)
}
sb.WriteString("\nThe sub-agent is running in the background. Use /swarm in the TUI to monitor it. ")
sb.WriteString("This conversation continues immediately; do not wait for the sub-agent to finish before working on the next thing.")
@ -113,8 +134,8 @@ func (t *SwarmSpawnTool) Execute(ctx context.Context, raw json.RawMessage, progr
Details: map[string]any{
"agent_id": agent.ID,
"task": task,
"model": a.Model,
"provider": a.Provider,
"model": model,
"provider": providerID,
},
}, nil
}

View file

@ -0,0 +1,100 @@
package tools
import (
"context"
"encoding/json"
"path/filepath"
"strings"
"testing"
"github.com/patriceckhart/zot/packages/agent/swarm"
"github.com/patriceckhart/zot/packages/provider"
)
type noopSwarmRunner struct{}
func (noopSwarmRunner) Run(context.Context, swarm.Sink) error { return nil }
func newTestSwarm(t *testing.T) *swarm.Swarm {
t.Helper()
root := t.TempDir()
return swarm.New(swarm.Config{
Root: filepath.Join(root, "swarm"),
RepoRoot: root,
NewRunner: func(*swarm.Agent) swarm.Runner {
return noopSwarmRunner{}
},
})
}
func TestSwarmSpawnInheritsHostModelAndProviderWhenOmitted(t *testing.T) {
tool := &SwarmSpawnTool{
Swarm: newTestSwarm(t),
Enabled: func() bool { return true },
DefaultModel: func() string { return "gpt-5" },
DefaultProvider: func() string { return "openai-codex" },
}
res, err := tool.Execute(context.Background(), json.RawMessage(`{"task":"research docs"}`), nil)
if err != nil {
t.Fatal(err)
}
if res.IsError {
t.Fatalf("unexpected tool error: %s", textResult(res.Content))
}
details, ok := res.Details.(map[string]any)
if !ok {
t.Fatalf("details type = %T, want map[string]any", res.Details)
}
if got := details["model"]; got != "gpt-5" {
t.Fatalf("model detail = %v, want gpt-5", got)
}
if got := details["provider"]; got != "openai-codex" {
t.Fatalf("provider detail = %v, want openai-codex", got)
}
text := textResult(res.Content)
if !strings.Contains(text, "model: gpt-5") || !strings.Contains(text, "provider: openai-codex") {
t.Fatalf("result text missing inherited model/provider:\n%s", text)
}
agents := tool.Swarm.List()
if len(agents) != 1 {
t.Fatalf("spawned agents = %d, want 1", len(agents))
}
if agents[0].Model != "gpt-5" || agents[0].Provider != "openai-codex" {
t.Fatalf("agent model/provider = %q/%q, want gpt-5/openai-codex", agents[0].Model, agents[0].Provider)
}
}
func TestSwarmSpawnRejectsPartialModelProviderOverride(t *testing.T) {
tool := &SwarmSpawnTool{
Swarm: newTestSwarm(t),
Enabled: func() bool { return true },
DefaultModel: func() string { return "gpt-5" },
DefaultProvider: func() string { return "openai-codex" },
}
res, err := tool.Execute(context.Background(), json.RawMessage(`{"task":"research docs","provider":"openai"}`), nil)
if err != nil {
t.Fatal(err)
}
if !res.IsError {
t.Fatalf("expected partial override to fail")
}
if got := textResult(res.Content); !strings.Contains(got, "omit both model/provider") {
t.Fatalf("error text = %q", got)
}
if got := len(tool.Swarm.List()); got != 0 {
t.Fatalf("spawned agents = %d, want 0", got)
}
}
func textResult(content []provider.Content) string {
if len(content) == 0 {
return ""
}
if tb, ok := content[0].(provider.TextBlock); ok {
return tb.Text
}
return ""
}

View file

@ -38,6 +38,30 @@ func TestReadText(t *testing.T) {
}
}
func TestReadImageMimeFromContentNotExtension(t *testing.T) {
// A file named .png whose bytes are actually JPEG. The MIME must be
// sniffed from the content (image/jpeg), not the extension, or
// providers that validate the declared media type reject the request.
dir := t.TempDir()
p := filepath.Join(dir, "shot.png")
jpegBytes := []byte{0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 'J', 'F', 'I', 'F'}
if err := os.WriteFile(p, jpegBytes, 0o644); err != nil {
t.Fatal(err)
}
tool := &ReadTool{CWD: dir}
res, err := tool.Execute(context.Background(), mustJSON(t, map[string]any{"path": "shot.png"}), nil)
if err != nil {
t.Fatal(err)
}
img, ok := res.Content[0].(provider.ImageBlock)
if !ok {
t.Fatalf("expected ImageBlock, got %T", res.Content[0])
}
if img.MimeType != "image/jpeg" {
t.Fatalf("mime from extension not corrected: got %s want image/jpeg", img.MimeType)
}
}
func TestReadOffsetLimit(t *testing.T) {
dir := t.TempDir()
p := filepath.Join(dir, "a.txt")

View file

@ -24,6 +24,24 @@ import (
// kinds of screenshots / charts the model usually consumes.
const anthMaxImageSide = 2000
// anthSniffImageMIME returns the media type implied by an image's
// leading magic bytes, independent of any declared type or stdlib
// decoder registration. Returns "" when the signature is unrecognized,
// leaving the caller's declared MIME untouched.
func anthSniffImageMIME(data []byte) string {
switch {
case len(data) >= 8 && bytes.Equal(data[:8], []byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}):
return "image/png"
case len(data) >= 3 && data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF:
return "image/jpeg"
case len(data) >= 6 && (bytes.Equal(data[:6], []byte("GIF87a")) || bytes.Equal(data[:6], []byte("GIF89a"))):
return "image/gif"
case len(data) >= 12 && bytes.Equal(data[:4], []byte("RIFF")) && bytes.Equal(data[8:12], []byte("WEBP")):
return "image/webp"
}
return ""
}
// anthShrinkImageBytesIfTooBig returns data unchanged when the image
// already fits within Anthropic's per-image dimension cap. When it
// doesn't, the image is decoded, resampled with Catmull-Rom (a good
@ -43,6 +61,19 @@ func anthShrinkImageBytesIfTooBig(data []byte, mime string) ([]byte, string) {
if len(data) == 0 {
return data, mime
}
// Reconcile the declared media type with the actual bytes before
// anything else. Callers can mislabel images (a .png that is really
// JPEG, an extension that hardcodes a type), and so can already-
// persisted session transcripts created before this fix existed.
// Anthropic rejects the whole request when the declared type
// disagrees with the bytes it sniffs, which would make such a
// session impossible to continue. Sniffing the magic bytes here
// (independent of stdlib decoder registration, and resilient to a
// DecodeConfig failure below) makes the mismatch impossible to ship
// and lets a previously-broken session resume cleanly.
if real := anthSniffImageMIME(data); real != "" {
mime = real
}
cfg, format, err := image.DecodeConfig(bytes.NewReader(data))
if err != nil {
return data, mime

View file

@ -60,6 +60,42 @@ func TestAnthShrinkImage_PassesThroughWhenSmall(t *testing.T) {
}
}
func TestAnthShrinkImage_CorrectsMislabeledMimeWhenSmall(t *testing.T) {
// A JPEG that fits within the cap but is wrongly declared as PNG.
// Anthropic 400s on the mismatch, so the builder must rewrite the
// declared media type to match the bytes even without resizing.
src := encodeJPEG(t, makeRect(800, 600))
out, mime := anthShrinkImageBytesIfTooBig(src, "image/png")
if !bytes.Equal(out, src) {
t.Errorf("small image bytes were rewritten; expected pass-through")
}
if mime != "image/jpeg" {
t.Errorf("mislabeled mime not corrected: got %s want image/jpeg", mime)
}
}
func TestAnthBuildToolResultContent_RepairsMislabeledImageOnResume(t *testing.T) {
// Simulates continuing a session whose transcript already carries a
// tool_result image with the wrong declared media type (.png name,
// JPEG bytes). The outbound request builder must rewrite the media
// type to match the bytes so Anthropic accepts the resumed request.
jpegBytes := encodeJPEG(t, makeRect(64, 64))
blocks := []Content{
TextBlock{Text: "screenshot"},
ImageBlock{MimeType: "image/png", Data: jpegBytes},
}
raw, err := anthBuildToolResultContent(blocks)
if err != nil {
t.Fatalf("build tool result: %v", err)
}
if !bytes.Contains(raw, []byte(`"media_type":"image/jpeg"`)) {
t.Fatalf("media type not repaired in outbound request: %s", raw)
}
if bytes.Contains(raw, []byte(`"media_type":"image/png"`)) {
t.Fatalf("stale image/png media type still present: %s", raw)
}
}
func TestAnthShrinkImage_DownscalesWhenTooWide(t *testing.T) {
src := encodePNG(t, makeRect(4000, 1000))
out, mime := anthShrinkImageBytesIfTooBig(src, "image/png")

View file

@ -310,6 +310,7 @@ var builtinCatalog = []Model{
{Provider: "opencode-go", ID: "minimax-m2.7", DisplayName: "MiniMax M2.7", ContextWindow: 204800, MaxOutput: 131072, Reasoning: true, PriceInput: 0.3, PriceOutput: 1.2, PriceCacheRead: 0.06, BaseURL: "https://opencode.ai/zen/go/v1"},
{Provider: "opencode-go", ID: "qwen3.5-plus", DisplayName: "Qwen3.5 Plus", ContextWindow: 262144, MaxOutput: 65536, Reasoning: true, PriceInput: 0.2, PriceOutput: 1.2, PriceCacheRead: 0.02, PriceCacheWrite: 0.25, BaseURL: "https://opencode.ai/zen/go/v1"},
{Provider: "opencode-go", ID: "qwen3.6-plus", DisplayName: "Qwen3.6 Plus", ContextWindow: 262144, MaxOutput: 65536, Reasoning: true, PriceInput: 0.5, PriceOutput: 3, PriceCacheRead: 0.05, PriceCacheWrite: 0.625, BaseURL: "https://opencode.ai/zen/go/v1"},
{Provider: "opencode-go", ID: "qwen3.7-plus", DisplayName: "Qwen3.7 Plus", ContextWindow: 1000000, MaxOutput: 65536, Reasoning: true, PriceInput: 0.32, PriceOutput: 1.28, PriceCacheRead: 0.064, PriceCacheWrite: 0.4, BaseURL: "https://opencode.ai/zen/go/v1"},
// openrouter: discovered live via DiscoverOpenRouter, none baked in.
// ----- together -----
{Provider: "together", ID: "MiniMaxAI/MiniMax-M2.5", DisplayName: "MiniMax-M2.5", ContextWindow: 204800, MaxOutput: 131072, Reasoning: true, PriceInput: 0.3, PriceOutput: 1.2, PriceCacheRead: 0.06, BaseURL: "https://api.together.ai/v1"},
@ -677,4 +678,5 @@ var builtinCatalog = []Model{
{Provider: "opencode", ID: "nemotron-3-super-free", DisplayName: "Nemotron 3 Super Free", ContextWindow: 204800, MaxOutput: 128000, Reasoning: true, PriceInput: 0, PriceOutput: 0, PriceCacheRead: 0, BaseURL: "https://opencode.ai/zen/v1"},
{Provider: "opencode", ID: "qwen3.5-plus", DisplayName: "Qwen3.5 Plus", ContextWindow: 262144, MaxOutput: 65536, Reasoning: true, PriceInput: 0.2, PriceOutput: 1.2, PriceCacheRead: 0.02, PriceCacheWrite: 0.25, BaseURL: "https://opencode.ai/zen"},
{Provider: "opencode", ID: "qwen3.6-plus", DisplayName: "Qwen3.6 Plus", ContextWindow: 262144, MaxOutput: 65536, Reasoning: true, PriceInput: 0.5, PriceOutput: 3, PriceCacheRead: 0.05, PriceCacheWrite: 0.625, BaseURL: "https://opencode.ai/zen"},
{Provider: "opencode", ID: "qwen3.7-plus", DisplayName: "Qwen3.7 Plus", ContextWindow: 1000000, MaxOutput: 65536, Reasoning: true, PriceInput: 0.32, PriceOutput: 1.28, PriceCacheRead: 0.064, PriceCacheWrite: 0.4, BaseURL: "https://opencode.ai/zen"},
}

View file

@ -0,0 +1,229 @@
//go:build darwin
package tui
import (
"crypto/rand"
"encoding/hex"
"fmt"
"image/png"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"golang.org/x/image/tiff"
)
const readClipboardImageScript = `
on run argv
set outPath to item 1 of argv
try
set imgData to the clipboard as «class PNGf»
set imgKind to "png"
on error
try
set imgData to the clipboard as «class TIFF»
set imgKind to "tiff"
on error
return "NO_IMAGE"
end try
end try
set outFile to POSIX file outPath
set fileRef to open for access outFile with write permission
try
set eof of fileRef to 0
write imgData to fileRef
close access fileRef
on error errMsg number errNum
try
close access fileRef
end try
error errMsg number errNum
end try
return imgKind
end run
`
func ReadClipboardImagePNG() (string, []byte, bool, error) {
dir := clipboardImageDir()
if err := os.MkdirAll(dir, 0o700); err != nil {
return "", nil, false, err
}
path := filepath.Join(dir, "clipboard-"+time.Now().Format("20060102-150405")+"-"+randomHex(4)+".png")
rawPath := path + ".raw"
defer os.Remove(rawPath)
kind, err := writeClipboardImageData(rawPath)
if err != nil {
return "", nil, false, err
}
if kind == "" {
return "", nil, false, nil
}
switch kind {
case "png":
if err := os.Rename(rawPath, path); err != nil {
return "", nil, false, err
}
case "tiff":
if err := convertTIFFFileToPNG(rawPath, path); err != nil {
return "", nil, false, err
}
default:
clipPath, ok := findClipboardImagePath(kind)
if !ok {
return "", nil, false, fmt.Errorf("unexpected clipboard image kind %q", kind)
}
if err := copyClipboardImageFileToPNG(clipPath, path); err != nil {
return "", nil, false, err
}
}
data, err := os.ReadFile(path)
if err != nil {
return "", nil, false, err
}
return path, data, true, nil
}
func writeClipboardImageData(path string) (string, error) {
cmd := exec.Command("/usr/bin/osascript", "-e", readClipboardImageScript, path)
out, err := cmd.CombinedOutput()
trimmed := strings.TrimSpace(string(out))
if err != nil {
if strings.Contains(trimmed, "NO_IMAGE") || strings.Contains(trimmed, "Cant make") || strings.Contains(trimmed, "Can't make") {
return "", nil
}
if kind, ok := clipboardImageKind(trimmed); ok {
return kind, nil
}
if path, ok := findClipboardImagePath(trimmed); ok {
return path, nil
}
if trimmed == "" {
return "", fmt.Errorf("osascript failed: %w", err)
}
return "", fmt.Errorf("osascript failed: %s", trimmed)
}
if trimmed == "NO_IMAGE" {
return "", nil
}
if kind, ok := clipboardImageKind(trimmed); ok {
return kind, nil
}
return trimmed, nil
}
func clipboardImageKind(s string) (string, bool) {
for _, line := range strings.Split(s, "\n") {
switch strings.TrimSpace(line) {
case "png":
return "png", true
case "tiff":
return "tiff", true
}
}
return "", false
}
func copyClipboardImageFileToPNG(srcPath, dstPath string) error {
switch strings.ToLower(filepath.Ext(srcPath)) {
case ".png":
data, err := os.ReadFile(srcPath)
if err != nil {
return err
}
return os.WriteFile(dstPath, data, 0o600)
case ".tif", ".tiff":
return convertTIFFFileToPNG(srcPath, dstPath)
default:
return fmt.Errorf("clipboard file is not a supported image type: %s", srcPath)
}
}
func findClipboardImagePath(s string) (string, bool) {
if p, ok := clipboardImagePath(s); ok {
return p, true
}
for _, ext := range []string{".png", ".tiff", ".tif"} {
lower := strings.ToLower(s)
end := strings.Index(lower, ext)
if end < 0 {
continue
}
end += len(ext)
start := strings.LastIndex(s[:end], "/")
if start < 0 {
continue
}
for start > 0 {
c := s[start-1]
if c == '\'' || c == '"' || c == '\n' || c == '\r' || c == '\t' {
break
}
start--
}
if p, ok := clipboardImagePath(s[start:end]); ok {
return p, true
}
}
return "", false
}
func clipboardImagePath(s string) (string, bool) {
p := strings.TrimSpace(s)
p = strings.Trim(p, "'\"")
if p == "" {
return "", false
}
info, err := os.Stat(p)
if err != nil || info.IsDir() {
return "", false
}
switch strings.ToLower(filepath.Ext(p)) {
case ".png", ".tif", ".tiff":
return p, true
default:
return "", false
}
}
func convertTIFFFileToPNG(srcPath, dstPath string) error {
in, err := os.Open(srcPath)
if err != nil {
return err
}
defer in.Close()
img, err := tiff.Decode(in)
if err != nil {
return err
}
out, err := os.Create(dstPath)
if err != nil {
return err
}
defer out.Close()
return png.Encode(out, img)
}
func clipboardImageDir() string {
if info, err := os.Stat("/tmp"); err == nil && info.IsDir() {
return filepath.Join("/tmp", "zot-clipboard-images")
}
return filepath.Join(os.TempDir(), "zot-clipboard-images")
}
func randomHex(n int) string {
buf := make([]byte, n)
if _, err := rand.Read(buf); err != nil {
return fmt.Sprintf("%d", time.Now().UnixNano())
}
return hex.EncodeToString(buf)
}

View file

@ -0,0 +1,7 @@
//go:build !darwin
package tui
func ReadClipboardImagePNG() (string, []byte, bool, error) {
return "", nil, false, nil
}

View file

@ -7,7 +7,7 @@ import (
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"image/png"
"math"
"os"
"strconv"
@ -134,6 +134,27 @@ func renderITerm2(data []byte, maxCellsWide, maxCellsHigh int) string {
return sb.String()
}
// kittyEnsurePNG returns data unchanged when it is already a PNG, and
// otherwise decodes and re-encodes it to PNG so the Kitty graphics
// protocol (f=100, PNG-only) can render it. On any decode/encode error
// the original bytes are returned untouched: the worst case is the
// pre-existing empty-box behaviour rather than a panic or a broken
// escape, and ImageDimensions still works off the original bytes.
func kittyEnsurePNG(data []byte) []byte {
if len(data) >= 8 && bytes.Equal(data[:8], []byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}) {
return data
}
img, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return data
}
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
return data
}
return buf.Bytes()
}
// renderKitty builds a Kitty graphics protocol sequence. Supports chunked
// data via the "m" continuation flag; chunk size is 4096 to stay under
// terminal escape-buffer limits.
@ -145,6 +166,12 @@ func renderITerm2(data []byte, maxCellsWide, maxCellsHigh int) string {
//
// Reference: https://sw.kovidgoyal.net/kitty/graphics-protocol/
func renderKitty(data []byte, maxCellsWide, maxCellsHigh int) string {
// The Kitty graphics protocol with f=100 expects a PNG payload; it
// has no JPEG/GIF decoder. Feeding it non-PNG bytes makes kitty/
// ghostty reserve the cell rectangle but paint nothing, leaving an
// empty box. Re-encode anything that is not already PNG to PNG so
// the image actually renders regardless of its source format.
data = kittyEnsurePNG(data)
b64 := base64.StdEncoding.EncodeToString(data)
const chunk = 4096
var sb strings.Builder

View file

@ -2,9 +2,13 @@ package tui
import (
"bytes"
"encoding/base64"
"image"
"image/color"
"image/jpeg"
"image/png"
"regexp"
"strings"
"testing"
)
@ -23,6 +27,68 @@ func testPNG(t *testing.T, w, h int) []byte {
return buf.Bytes()
}
func testJPEG(t *testing.T, w, h int) []byte {
t.Helper()
img := image.NewRGBA(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
img.Set(x, y, color.RGBA{R: uint8(x), G: uint8(y), B: 128, A: 255})
}
}
var buf bytes.Buffer
if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 90}); err != nil {
t.Fatal(err)
}
return buf.Bytes()
}
var kittyPayloadRE = regexp.MustCompile(`\x1b_G[^;]*;([^\x1b]*)\x1b\\`)
// kittyDecodePayload reassembles the base64 chunks from a kitty
// graphics escape sequence and returns the decoded raw bytes.
func kittyDecodePayload(t *testing.T, seq string) []byte {
t.Helper()
ms := kittyPayloadRE.FindAllStringSubmatch(seq, -1)
if len(ms) == 0 {
t.Fatalf("no kitty payload chunks found in sequence")
}
var b64 strings.Builder
for _, m := range ms {
b64.WriteString(m[1])
}
raw, err := base64.StdEncoding.DecodeString(b64.String())
if err != nil {
t.Fatalf("decode base64 payload: %v", err)
}
return raw
}
func TestRenderKittyReencodesJPEGToPNG(t *testing.T) {
// A JPEG handed to the kitty renderer must be re-encoded to PNG,
// because kitty's f=100 path only decodes PNG. Otherwise the
// terminal reserves the cell rectangle but paints nothing, which is
// the empty-box symptom.
jpg := testJPEG(t, 40, 30)
seq := renderKitty(jpg, 20, 10)
if seq == "" {
t.Fatal("renderKitty returned empty sequence")
}
raw := kittyDecodePayload(t, seq)
pngMagic := []byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}
if len(raw) < 8 || !bytes.Equal(raw[:8], pngMagic) {
t.Fatalf("kitty payload is not PNG; got prefix %x", raw[:min(8, len(raw))])
}
}
func TestRenderKittyLeavesPNGUntouched(t *testing.T) {
src := testPNG(t, 40, 30)
seq := renderKitty(src, 20, 10)
raw := kittyDecodePayload(t, seq)
if !bytes.Equal(raw, src) {
t.Fatal("PNG payload was needlessly re-encoded")
}
}
func TestRowsForInlineImageRoundsUp(t *testing.T) {
t.Setenv("ZOT_CELL_ASPECT", "")
data := testPNG(t, 100, 51)

View file

@ -14,6 +14,7 @@ type Key struct {
Ctrl bool
Alt bool
Shift bool
Super bool
}
type KeyKind int
@ -44,6 +45,7 @@ const (
KeyCtrlW
KeyCtrlO
KeyPaste
KeyPasteClipboard
KeyMouseWheelUp
KeyMouseWheelDown
KeyUnknown
@ -90,6 +92,8 @@ func (r *Reader) Read() (Key, error) {
return Key{Kind: KeyCtrlW}, nil
case b == 0x0f:
return Key{Kind: KeyCtrlO}, nil
case b == 0x16:
return Key{Kind: KeyPasteClipboard, Ctrl: true}, nil
case b == '\r', b == '\n':
return Key{Kind: KeyEnter}, nil
case b == '\t':
@ -242,7 +246,7 @@ func (r *Reader) dispatchCSI(params string, final byte) Key {
return Key{Kind: KeyUnknown}
}
shift, alt := parseCSIModifiers(params)
shift, alt, super := parseCSIModifiers(params)
if final == 'u' {
if key, ok := parseCSIU(params); ok {
return key
@ -255,13 +259,13 @@ func (r *Reader) dispatchCSI(params string, final byte) Key {
}
switch final {
case 'A':
return Key{Kind: KeyUp, Alt: alt, Shift: shift}
return Key{Kind: KeyUp, Alt: alt, Shift: shift, Super: super}
case 'B':
return Key{Kind: KeyDown, Alt: alt, Shift: shift}
return Key{Kind: KeyDown, Alt: alt, Shift: shift, Super: super}
case 'C':
return Key{Kind: KeyRight, Alt: alt, Shift: shift}
return Key{Kind: KeyRight, Alt: alt, Shift: shift, Super: super}
case 'D':
return Key{Kind: KeyLeft, Alt: alt, Shift: shift}
return Key{Kind: KeyLeft, Alt: alt, Shift: shift, Super: super}
case 'H':
return Key{Kind: KeyHome}
case 'F':
@ -284,23 +288,20 @@ func (r *Reader) dispatchCSI(params string, final byte) Key {
return Key{Kind: KeyUnknown}
}
func parseCSIModifiers(params string) (shift, alt bool) {
func parseCSIModifiers(params string) (shift, alt, super bool) {
if params == "" {
return false, false
return false, false, false
}
i := strings.LastIndexByte(params, ';')
if i < 0 || i+1 >= len(params) {
return false, false
return false, false, false
}
mod, err := strconv.Atoi(params[i+1:])
if err != nil {
return false, false
mod, ok := parseModifierParam(params[i+1:])
if !ok {
return false, false, false
}
// Xterm-style modifier values are 1 plus a bitmask:
// 2=Shift, 3=Alt, 4=Shift+Alt, 5=Ctrl, 6=Shift+Ctrl,
// 7=Alt+Ctrl, 8=Shift+Alt+Ctrl.
bits := mod - 1
return bits&1 != 0, bits&2 != 0
shift, alt, _, super = modifierBits(mod)
return shift, alt, super
}
func parseCSIU(params string) (Key, bool) {
@ -314,7 +315,9 @@ func parseCSIU(params string) (Key, bool) {
}
mod := 1
if len(parts) >= 2 {
if mod, err = strconv.Atoi(parts[1]); err != nil {
var ok bool
mod, ok = parseModifierParam(parts[1])
if !ok {
return Key{}, false
}
}
@ -326,8 +329,8 @@ func parseModifyOtherKeys(params string) (Key, bool) {
if len(parts) != 3 || parts[0] != "27" {
return Key{}, false
}
mod, err := strconv.Atoi(parts[1])
if err != nil {
mod, ok := parseModifierParam(parts[1])
if !ok {
return Key{}, false
}
code, err := strconv.Atoi(parts[2])
@ -337,27 +340,40 @@ func parseModifyOtherKeys(params string) (Key, bool) {
return keyFromModifiedCode(code, mod)
}
func keyFromModifiedCode(code, mod int) (Key, bool) {
func parseModifierParam(s string) (int, bool) {
if i := strings.IndexByte(s, ':'); i >= 0 {
s = s[:i]
}
mod, err := strconv.Atoi(s)
if err != nil {
return 0, false
}
return mod, true
}
func modifierBits(mod int) (shift, alt, ctrl, super bool) {
bits := mod - 1
shift := bits&1 != 0
alt := bits&2 != 0
ctrl := bits&4 != 0
return bits&1 != 0, bits&2 != 0, bits&4 != 0, bits&8 != 0 || bits&32 != 0
}
func keyFromModifiedCode(code, mod int) (Key, bool) {
shift, alt, ctrl, super := modifierBits(mod)
// Kitty keyboard protocol (CSI ... u) reports control keys as their
// codepoints: Esc=27, Enter=13, Tab=9, Backspace=127. Without the
// enhanced-mode handling these arrive as raw bytes; with it enabled
// they come through here, so map them back to their dedicated keys.
switch code {
case 13:
return Key{Kind: KeyEnter, Shift: shift, Alt: alt, Ctrl: ctrl}, true
return Key{Kind: KeyEnter, Shift: shift, Alt: alt, Ctrl: ctrl, Super: super}, true
case 27:
return Key{Kind: KeyEsc, Shift: shift, Alt: alt, Ctrl: ctrl}, true
return Key{Kind: KeyEsc, Shift: shift, Alt: alt, Ctrl: ctrl, Super: super}, true
case 9:
if shift {
return Key{Kind: KeyShiftTab, Alt: alt, Ctrl: ctrl}, true
return Key{Kind: KeyShiftTab, Alt: alt, Ctrl: ctrl, Super: super}, true
}
return Key{Kind: KeyTab, Shift: shift, Alt: alt, Ctrl: ctrl}, true
return Key{Kind: KeyTab, Shift: shift, Alt: alt, Ctrl: ctrl, Super: super}, true
case 127, 8:
return Key{Kind: KeyBackspace, Shift: shift, Alt: alt, Ctrl: ctrl}, true
return Key{Kind: KeyBackspace, Shift: shift, Alt: alt, Ctrl: ctrl, Super: super}, true
}
if ctrl {
switch code {
@ -379,8 +395,13 @@ func keyFromModifiedCode(code, mod int) (Key, bool) {
return Key{Kind: KeyCtrlW, Shift: shift, Alt: alt, Ctrl: true}, true
case 'o', 'O':
return Key{Kind: KeyCtrlO, Shift: shift, Alt: alt, Ctrl: true}, true
case 'v', 'V':
return Key{Kind: KeyPasteClipboard, Shift: shift, Alt: alt, Ctrl: true}, true
}
}
if code >= '0' && code <= '9' {
return Key{Kind: KeyRune, Rune: rune(code), Shift: shift, Alt: alt, Ctrl: ctrl, Super: super}, true
}
return Key{}, false
}

View file

@ -23,6 +23,48 @@ func TestReaderParsesCSIUCtrlC(t *testing.T) {
}
}
func TestReaderParsesCSIUCtrlNumber(t *testing.T) {
k := readKey(t, "\x1b[49;5u")
if k.Kind != KeyRune || k.Rune != '1' || !k.Ctrl {
t.Fatalf("Read kind=%v rune=%q ctrl=%v, want ctrl+1", k.Kind, k.Rune, k.Ctrl)
}
}
func TestReaderParsesCSIUSuperNumber(t *testing.T) {
k := readKey(t, "\x1b[50;9u")
if k.Kind != KeyRune || k.Rune != '2' || !k.Super {
t.Fatalf("Read kind=%v rune=%q super=%v, want super+2", k.Kind, k.Rune, k.Super)
}
}
func TestReaderParsesCSIUSuperNumberWithEventType(t *testing.T) {
k := readKey(t, "\x1b[51;9:3u")
if k.Kind != KeyRune || k.Rune != '3' || !k.Super {
t.Fatalf("Read kind=%v rune=%q super=%v, want super+3", k.Kind, k.Rune, k.Super)
}
}
func TestReaderParsesCSIUHyperNumberAsSuper(t *testing.T) {
k := readKey(t, "\x1b[52;33u")
if k.Kind != KeyRune || k.Rune != '4' || !k.Super {
t.Fatalf("Read kind=%v rune=%q super=%v, want hyper+4 as super", k.Kind, k.Rune, k.Super)
}
}
func TestReaderParsesRawCtrlVAsClipboardPaste(t *testing.T) {
k := readKey(t, "\x16")
if k.Kind != KeyPasteClipboard || !k.Ctrl {
t.Fatalf("Read kind=%v ctrl=%v, want ctrl+v clipboard paste", k.Kind, k.Ctrl)
}
}
func TestReaderParsesCSIUCtrlVAsClipboardPaste(t *testing.T) {
k := readKey(t, "\x1b[118;5u")
if k.Kind != KeyPasteClipboard || !k.Ctrl {
t.Fatalf("Read kind=%v ctrl=%v, want enhanced ctrl+v clipboard paste", k.Kind, k.Ctrl)
}
}
func TestReaderParsesModifyOtherKeysCtrlC(t *testing.T) {
k := readKey(t, "\x1b[27;5;99~")
if k.Kind != KeyCtrlC || !k.Ctrl {