mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-26 21:36:31 +02:00
add configurable thinking level
This commit is contained in:
parent
d6a30a5967
commit
37ef90bbb3
16 changed files with 555 additions and 72 deletions
|
|
@ -141,7 +141,7 @@ zot --help
|
|||
| `--base-url <url>` | Override the provider base URL (tests, self-hosted). |
|
||||
| `--system-prompt <text>` | Replace the default system prompt for this run (also overrides `$ZOT_HOME/SYSTEM.md`). |
|
||||
| `--append-system-prompt <text>` | Append text to the system prompt (repeatable). |
|
||||
| `--reasoning low\|medium\|high` | Enable reasoning on supported models. |
|
||||
| `--reasoning off\|minimum\|low\|medium\|high\|maximum` | Set thinking level on supported models. |
|
||||
| `-c`, `--continue` | Resume the latest session for this cwd. |
|
||||
| `-r`, `--resume` | Pick a session to resume. |
|
||||
| `--session <path>` | Resume a specific session file. |
|
||||
|
|
@ -290,10 +290,11 @@ Background subagents that run alongside your main session. Each one is a separat
|
|||
|
||||
### `/settings`
|
||||
|
||||
Opens a dialog with every persistent toggle. `up`/`down` to navigate, `enter` or `space` to flip the selected row, `esc` to close. Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current toggles:
|
||||
Opens a dialog with every persistent setting. `up`/`down` to navigate, `enter` or `space` to change the selected row, `esc` to close. Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current settings:
|
||||
|
||||
- **render images when supported** — draw screenshots / `read`-returned images inline using the terminal's image protocol, or fall back to a text placeholder. Auto-detected from `TERM_PROGRAM`; the toggle overrides the detection. The row is greyed out and forced off on terminals that don't speak any image protocol.
|
||||
- **auto-swarm** — let the main agent spawn background sub-agents in parallel via a built-in `swarm_spawn` tool. Off by default. When on, the tool is registered with the running agent, the system prompt gains a short addendum telling the model to delegate independent sub-tasks proactively, and zot watches every sub-agent the main agent spawns. As soon as the last sub-agent in a batch finishes its initial task, an `[auto-swarm update]` message is injected back into the chat with each agent's status / task / transcript tail, so the main agent can summarise the collective outcome. Flipping off mid-session removes the tool from the live agent and strips the addendum on the next turn — the model stops trying to delegate. See `/swarm` for the dashboard that lets you monitor, message, kill, or remove the spawned agents.
|
||||
- **thinking level** — choose reasoning for supported models: off (no reasoning), minimum (~1k tokens), low (~2k), medium (~8k), high (~16k), maximum (~32k). The change is persisted to `config.json` and applied to the running agent's next model call.
|
||||
|
||||
### `/skills`
|
||||
|
||||
|
|
@ -468,7 +469,7 @@ Use `/login` and pick **api key** to paste an AI Studio key. zot probes `/v1beta
|
|||
|
||||
> **Free-tier rate limits.** AI Studio's free tier has tight per-minute and per-day caps that vary by model: `gemini-2.5-pro` is the strictest (a few requests per minute, ~50 per day), Flash and Flash-Lite are far more generous. If a Pro turn 429s with `"You exceeded your current quota"` while Flash on the same key still works, you've hit the Pro free-tier RPD. Either switch to Flash for agent loops, or [enable billing](https://aistudio.google.com/app/apikey) on your AI Studio project to flip the same key from free to pay-as-you-go pricing (`$1.25/M` input, `$10/M` output for Pro).
|
||||
|
||||
Reasoning levels (`--reasoning low|medium|high`) map differently per generation: 2.5 family uses `thinkingBudget` token budgets per model (Pro caps at 32k, Flash at 24k); Gemini 3.x uses the `thinkingLevel` enum (`MINIMAL`/`LOW`/`MEDIUM`/`HIGH`), with Gemini-3-Pro pinned to `LOW` minimum and `HIGH` for any "medium" or "high" request. 2.0-family models have no thinking config at all.
|
||||
Reasoning levels (`--reasoning off|minimum|low|medium|high|maximum`, also configurable in `/settings` as **thinking level**) map differently per generation. Budget-based providers use roughly 1k/2k/8k/16k/32k thinking tokens for minimum/low/medium/high/maximum, with provider/model caps applied (Gemini 2.5 Pro caps at 32k; Flash at 24k). Gemini 3.x uses the `thinkingLevel` enum (`MINIMAL`/`LOW`/`MEDIUM`/`HIGH`), with Gemini-3-Pro pinned to `LOW` minimum and `HIGH` for any "medium" or higher request. Effort-based OpenAI-compatible chat providers map minimum to `low`, low/medium directly, and high/maximum to `high`; the Codex/Responses backend maps maximum to `xhigh` where supported. `off` sends no reasoning config. 2.0-family Gemini models have no thinking config at all.
|
||||
|
||||
You can add additional Gemini model IDs to `models.json` under the `google` provider.
|
||||
|
||||
|
|
|
|||
|
|
@ -196,10 +196,10 @@ func ParseArgs(in []string) (Args, error) {
|
|||
return a, err
|
||||
}
|
||||
switch strings.ToLower(v) {
|
||||
case "", "low", "medium", "high":
|
||||
case "", "off", "minimum", "minimal", "low", "medium", "high", "maximum", "max":
|
||||
a.Reasoning = strings.ToLower(v)
|
||||
default:
|
||||
return a, fmt.Errorf("--reasoning must be low|medium|high")
|
||||
return a, fmt.Errorf("--reasoning must be off|minimum|low|medium|high|maximum")
|
||||
}
|
||||
case "--session":
|
||||
v, err := want(&i, arg)
|
||||
|
|
@ -360,7 +360,7 @@ func PrintHelp(version string) {
|
|||
row{"--model ID", "model id (see --list-models)"},
|
||||
row{"--api-key KEY", "api key for this run (env / auth.json fallback)"},
|
||||
row{"--base-url URL", "override provider api base url"},
|
||||
row{"--reasoning low|medium|high", "enable reasoning on supported models"},
|
||||
row{"--reasoning off|minimum|low|medium|high|maximum", "set thinking level on supported models"},
|
||||
)
|
||||
section("prompt and session flags",
|
||||
row{"--system-prompt TEXT", "replace the default system prompt"},
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ func Resolve(args Args, requireCred bool) (Resolved, error) {
|
|||
ZotDocsDir: docsDir,
|
||||
})
|
||||
|
||||
reasoning := firstNonEmpty(args.Reasoning, cfg.Reasoning)
|
||||
reasoning := provider.NormalizeReasoning(firstNonEmpty(args.Reasoning, cfg.Reasoning))
|
||||
|
||||
max := args.MaxSteps // 0 = unlimited
|
||||
|
||||
|
|
|
|||
|
|
@ -221,6 +221,7 @@ type chatCacheKey struct {
|
|||
type SettingsStore interface {
|
||||
SetInlineImages(enabled bool) error
|
||||
SetAutoSwarm(enabled bool) error
|
||||
SetReasoning(level string) error
|
||||
}
|
||||
|
||||
type Interactive struct {
|
||||
|
|
@ -1065,6 +1066,7 @@ func (i *Interactive) redraw() {
|
|||
Theme: i.cfg.Theme,
|
||||
Provider: i.cfg.Provider,
|
||||
Model: i.cfg.Model,
|
||||
Reasoning: i.cfg.Reasoning,
|
||||
Busy: i.busy,
|
||||
BusyPrefix: busyPrefix,
|
||||
CWD: i.cfg.CWD,
|
||||
|
|
@ -1662,7 +1664,7 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
|
|||
}
|
||||
act := i.settingsDialog.HandleKey(k)
|
||||
if act.Toggle {
|
||||
i.applySettingToggle(act.Key, act.Value)
|
||||
i.applySettingChange(act)
|
||||
}
|
||||
i.invalidate()
|
||||
return false
|
||||
|
|
@ -2503,6 +2505,27 @@ func (i *Interactive) openSettingsDialog() {
|
|||
autoSwarmHint = "swarm supervisor not available in this mode"
|
||||
}
|
||||
|
||||
reasoningOptions := []settingsOption{
|
||||
{value: "", label: "off", desc: "no reasoning"},
|
||||
{value: "minimum", label: "minimum", desc: "very brief (~1k tokens)"},
|
||||
{value: "low", label: "low", desc: "light (~2k tokens)"},
|
||||
{value: "medium", label: "medium", desc: "moderate (~8k tokens)"},
|
||||
{value: "high", label: "high", desc: "deep (~16k tokens)"},
|
||||
{value: "maximum", label: "maximum", desc: "highest (~32k tokens)"},
|
||||
}
|
||||
reasoning := provider.NormalizeReasoning(i.cfg.Reasoning)
|
||||
reasoningChoice := 0
|
||||
for idx, opt := range reasoningOptions {
|
||||
if opt.value == reasoning {
|
||||
reasoningChoice = idx
|
||||
break
|
||||
}
|
||||
}
|
||||
reasoningHint := ""
|
||||
if m, err := provider.FindModel(i.cfg.Provider, i.cfg.Model); err == nil && !m.Reasoning {
|
||||
reasoningHint = "current model does not support thinking"
|
||||
}
|
||||
|
||||
i.settingsDialog.Open([]settingsItem{
|
||||
{
|
||||
key: "inline_images_enabled",
|
||||
|
|
@ -2520,9 +2543,25 @@ func (i *Interactive) openSettingsDialog() {
|
|||
disabled: autoSwarmDisabled,
|
||||
hint: autoSwarmHint,
|
||||
},
|
||||
{
|
||||
key: "reasoning",
|
||||
label: "thinking level",
|
||||
desc: "reasoning depth for thinking-capable models",
|
||||
options: reasoningOptions,
|
||||
choice: reasoningChoice,
|
||||
hint: reasoningHint,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (i *Interactive) applySettingChange(act settingsAction) {
|
||||
if act.Key == "reasoning" {
|
||||
i.applyReasoningSetting(act.StringValue)
|
||||
return
|
||||
}
|
||||
i.applySettingToggle(act.Key, act.Value)
|
||||
}
|
||||
|
||||
func (i *Interactive) applySettingToggle(key string, value bool) {
|
||||
// Every setting toggle forces a full repaint at the end — same
|
||||
// effect as the user pressing Ctrl+L — so any per-setting visual
|
||||
|
|
@ -2579,6 +2618,36 @@ func (i *Interactive) applySettingToggle(key string, value bool) {
|
|||
}
|
||||
}
|
||||
|
||||
func (i *Interactive) applyReasoningSetting(level string) {
|
||||
defer func() {
|
||||
if i.rend != nil {
|
||||
i.rend.Clear()
|
||||
}
|
||||
i.invalidate()
|
||||
}()
|
||||
level = provider.NormalizeReasoning(level)
|
||||
i.cfg.Reasoning = level
|
||||
if i.cfg.SettingsStore != nil {
|
||||
if err := i.cfg.SettingsStore.SetReasoning(level); err != nil {
|
||||
i.mu.Lock()
|
||||
i.statusErr = "settings: " + err.Error()
|
||||
i.mu.Unlock()
|
||||
return
|
||||
}
|
||||
}
|
||||
i.mu.Lock()
|
||||
if i.agent != nil {
|
||||
i.agent.Reasoning = level
|
||||
}
|
||||
label := level
|
||||
if label == "" {
|
||||
label = "off"
|
||||
}
|
||||
i.statusOK = "thinking level " + label
|
||||
i.statusErr = ""
|
||||
i.mu.Unlock()
|
||||
}
|
||||
|
||||
// buildStudyPrompt returns the canned prompt the /study command
|
||||
// submits to the agent.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@ import (
|
|||
)
|
||||
|
||||
type settingsDialog struct {
|
||||
active bool
|
||||
items []settingsItem
|
||||
cursor int
|
||||
active bool
|
||||
items []settingsItem
|
||||
cursor int
|
||||
selecting bool
|
||||
optionCursor int
|
||||
}
|
||||
|
||||
type settingsItem struct {
|
||||
|
|
@ -15,15 +17,24 @@ type settingsItem struct {
|
|||
label string
|
||||
desc string
|
||||
value bool
|
||||
options []settingsOption
|
||||
choice int
|
||||
disabled bool
|
||||
hint string
|
||||
}
|
||||
|
||||
type settingsOption struct {
|
||||
value string
|
||||
label string
|
||||
desc string
|
||||
}
|
||||
|
||||
type settingsAction struct {
|
||||
Toggle bool
|
||||
Key string
|
||||
Value bool
|
||||
Close bool
|
||||
Toggle bool
|
||||
Key string
|
||||
Value bool
|
||||
StringValue string
|
||||
Close bool
|
||||
}
|
||||
|
||||
func newSettingsDialog() *settingsDialog { return &settingsDialog{} }
|
||||
|
|
@ -34,14 +45,22 @@ func (d *settingsDialog) Open(items []settingsItem) bool {
|
|||
}
|
||||
d.items = items
|
||||
d.cursor = 0
|
||||
d.selecting = false
|
||||
d.optionCursor = 0
|
||||
d.active = true
|
||||
return true
|
||||
}
|
||||
|
||||
func (d *settingsDialog) Close() { d.active = false }
|
||||
func (d *settingsDialog) Close() {
|
||||
d.active = false
|
||||
d.selecting = false
|
||||
}
|
||||
func (d *settingsDialog) Active() bool { return d != nil && d.active }
|
||||
|
||||
func (d *settingsDialog) HandleKey(k tui.Key) settingsAction {
|
||||
if d.selecting {
|
||||
return d.handleOptionKey(k)
|
||||
}
|
||||
switch k.Kind {
|
||||
case tui.KeyUp:
|
||||
if d.cursor > 0 {
|
||||
|
|
@ -64,6 +83,29 @@ func (d *settingsDialog) HandleKey(k tui.Key) settingsAction {
|
|||
return settingsAction{}
|
||||
}
|
||||
|
||||
func (d *settingsDialog) handleOptionKey(k tui.Key) settingsAction {
|
||||
it := d.items[d.cursor]
|
||||
switch k.Kind {
|
||||
case tui.KeyUp:
|
||||
if d.optionCursor > 0 {
|
||||
d.optionCursor--
|
||||
}
|
||||
case tui.KeyDown:
|
||||
if d.optionCursor < len(it.options)-1 {
|
||||
d.optionCursor++
|
||||
}
|
||||
case tui.KeyEsc:
|
||||
d.selecting = false
|
||||
case tui.KeyEnter:
|
||||
return d.selectCurrentOption()
|
||||
case tui.KeyRune:
|
||||
if k.Rune == ' ' {
|
||||
return d.selectCurrentOption()
|
||||
}
|
||||
}
|
||||
return settingsAction{}
|
||||
}
|
||||
|
||||
func (d *settingsDialog) toggleCurrent() settingsAction {
|
||||
if len(d.items) == 0 {
|
||||
d.Close()
|
||||
|
|
@ -73,24 +115,61 @@ func (d *settingsDialog) toggleCurrent() settingsAction {
|
|||
if it.disabled {
|
||||
return settingsAction{}
|
||||
}
|
||||
if len(it.options) > 0 {
|
||||
d.optionCursor = it.choice
|
||||
if d.optionCursor < 0 || d.optionCursor >= len(it.options) {
|
||||
d.optionCursor = 0
|
||||
}
|
||||
d.selecting = true
|
||||
return settingsAction{}
|
||||
}
|
||||
it.value = !it.value
|
||||
d.items[d.cursor] = it
|
||||
return settingsAction{Toggle: true, Key: it.key, Value: it.value}
|
||||
}
|
||||
|
||||
func (d *settingsDialog) selectCurrentOption() settingsAction {
|
||||
if len(d.items) == 0 {
|
||||
d.Close()
|
||||
return settingsAction{Close: true}
|
||||
}
|
||||
it := d.items[d.cursor]
|
||||
if len(it.options) == 0 {
|
||||
d.selecting = false
|
||||
return settingsAction{}
|
||||
}
|
||||
if d.optionCursor < 0 || d.optionCursor >= len(it.options) {
|
||||
d.optionCursor = 0
|
||||
}
|
||||
it.choice = d.optionCursor
|
||||
d.items[d.cursor] = it
|
||||
d.selecting = false
|
||||
return settingsAction{Toggle: true, Key: it.key, StringValue: it.options[it.choice].value}
|
||||
}
|
||||
|
||||
func (d *settingsDialog) Render(th tui.Theme, width int) []string {
|
||||
if !d.Active() {
|
||||
return nil
|
||||
}
|
||||
if d.selecting {
|
||||
return d.renderOptions(th, width)
|
||||
}
|
||||
var lines []string
|
||||
lines = append(lines, frameHeader(th, "settings", width))
|
||||
lines = append(lines, th.FG256(th.Muted, "toggle with enter/space, esc to close:"))
|
||||
lines = append(lines, th.FG256(th.Muted, "change with enter/space, esc to close:"))
|
||||
for i, it := range d.items {
|
||||
box := "[ ]"
|
||||
if it.value {
|
||||
box = "[x]"
|
||||
}
|
||||
plain := " " + box + " " + it.label
|
||||
if len(it.options) > 0 {
|
||||
box = "[>]"
|
||||
if it.choice < 0 || it.choice >= len(it.options) {
|
||||
it.choice = 0
|
||||
}
|
||||
plain = " " + box + " " + it.label + ": " + it.options[it.choice].label
|
||||
}
|
||||
if it.hint != "" {
|
||||
plain += " " + th.FG256(th.Muted, "("+it.hint+")")
|
||||
}
|
||||
|
|
@ -108,3 +187,33 @@ func (d *settingsDialog) Render(th tui.Theme, width int) []string {
|
|||
lines = append(lines, frameRule(th, width))
|
||||
return lines
|
||||
}
|
||||
|
||||
func (d *settingsDialog) renderOptions(th tui.Theme, width int) []string {
|
||||
if len(d.items) == 0 || d.cursor < 0 || d.cursor >= len(d.items) {
|
||||
d.selecting = false
|
||||
return d.Render(th, width)
|
||||
}
|
||||
it := d.items[d.cursor]
|
||||
lines := []string{frameHeader(th, "settings: "+it.label, width)}
|
||||
if it.desc != "" {
|
||||
lines = append(lines, th.FG256(th.Muted, it.desc))
|
||||
}
|
||||
lines = append(lines, th.FG256(th.Muted, "select with enter/space, esc to go back:"))
|
||||
for idx, opt := range it.options {
|
||||
marker := " "
|
||||
if idx == it.choice {
|
||||
marker = "✓ "
|
||||
}
|
||||
plain := " " + marker + opt.label
|
||||
if idx == d.optionCursor {
|
||||
lines = append(lines, th.PadHighlight(plain, width))
|
||||
} else {
|
||||
lines = append(lines, plain)
|
||||
}
|
||||
if opt.desc != "" {
|
||||
lines = append(lines, th.FG256(th.Muted, " "+opt.desc))
|
||||
}
|
||||
}
|
||||
lines = append(lines, frameRule(th, width))
|
||||
return lines
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package agent
|
||||
|
||||
import "github.com/patriceckhart/zot/internal/provider"
|
||||
|
||||
type configSettingsStore struct{}
|
||||
|
||||
func (configSettingsStore) SetInlineImages(enabled bool) error {
|
||||
|
|
@ -20,6 +22,15 @@ func (configSettingsStore) SetAutoSwarm(enabled bool) error {
|
|||
return SaveConfig(cfg)
|
||||
}
|
||||
|
||||
func (configSettingsStore) SetReasoning(level string) error {
|
||||
cfg, err := LoadConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cfg.Reasoning = provider.NormalizeReasoning(level)
|
||||
return SaveConfig(cfg)
|
||||
}
|
||||
|
||||
// AutoSwarmEnabled reads the current auto-swarm flag from config.
|
||||
// Used by the swarm_spawn tool at call time to gate execution.
|
||||
func AutoSwarmEnabled() bool {
|
||||
|
|
|
|||
|
|
@ -249,10 +249,18 @@ func (c *anthropicClient) buildRequest(req Request) (*anthRequest, error) {
|
|||
if req.Reasoning != "" && m.Reasoning {
|
||||
budget := anthropicReasoningBudget(req.Reasoning)
|
||||
if budget > 0 {
|
||||
// Reasoning requires max_tokens > budget. Keep at least a small
|
||||
// answer budget while respecting the model's advertised output cap.
|
||||
const minAnswerTokens = 1024
|
||||
if m.MaxOutput > minAnswerTokens && budget >= m.MaxOutput {
|
||||
budget = m.MaxOutput - minAnswerTokens
|
||||
}
|
||||
out.Thinking = &anthThinking{Type: "enabled", BudgetTokens: budget}
|
||||
// Reasoning requires max_tokens > budget.
|
||||
if out.MaxTokens <= budget {
|
||||
out.MaxTokens = budget + 4096
|
||||
out.MaxTokens = budget + minAnswerTokens
|
||||
if m.MaxOutput > 0 && out.MaxTokens > m.MaxOutput {
|
||||
out.MaxTokens = m.MaxOutput
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -361,16 +369,7 @@ func markLastBlockEphemeral(blocks []interface{}) {
|
|||
}
|
||||
|
||||
func anthropicReasoningBudget(level string) int {
|
||||
switch strings.ToLower(level) {
|
||||
case "low":
|
||||
return 2048
|
||||
case "medium":
|
||||
return 8192
|
||||
case "high":
|
||||
return 16384
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
return ReasoningBudget(level)
|
||||
}
|
||||
|
||||
func filterAnthAssistantContent(blocks []Content) []Content {
|
||||
|
|
|
|||
|
|
@ -354,7 +354,7 @@ func convertGemToolResultParts(blocks []Content) []gemPart {
|
|||
// generation: 2.5 family uses thinkingBudget (tokens), 3.x uses
|
||||
// thinkingLevel (enum). Returns nil when the level is unrecognised.
|
||||
func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
|
||||
level = strings.ToLower(level)
|
||||
level = NormalizeReasoning(level)
|
||||
id := strings.ToLower(modelID)
|
||||
|
||||
// Gemini 3.x: enum-based thinkingLevel. Pro can't go below LOW.
|
||||
|
|
@ -362,6 +362,12 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
|
|||
isPro := strings.Contains(id, "-pro")
|
||||
var lvl string
|
||||
switch level {
|
||||
case "minimum":
|
||||
if isPro {
|
||||
lvl = "LOW"
|
||||
} else {
|
||||
lvl = "MINIMAL"
|
||||
}
|
||||
case "low":
|
||||
lvl = "LOW"
|
||||
case "medium":
|
||||
|
|
@ -370,7 +376,7 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
|
|||
} else {
|
||||
lvl = "MEDIUM"
|
||||
}
|
||||
case "high":
|
||||
case "high", "maximum":
|
||||
lvl = "HIGH"
|
||||
default:
|
||||
return nil
|
||||
|
|
@ -379,33 +385,18 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
|
|||
}
|
||||
|
||||
// Gemini 2.5 family: token-budget per-model.
|
||||
var budget int
|
||||
budget := ReasoningBudget(level)
|
||||
switch {
|
||||
case strings.Contains(id, "2.5-pro"):
|
||||
switch level {
|
||||
case "low":
|
||||
budget = 2048
|
||||
case "medium":
|
||||
budget = 8192
|
||||
case "high":
|
||||
if budget > 32768 {
|
||||
budget = 32768
|
||||
}
|
||||
case strings.Contains(id, "2.5-flash-lite"):
|
||||
switch level {
|
||||
case "low":
|
||||
budget = 2048
|
||||
case "medium":
|
||||
budget = 8192
|
||||
case "high":
|
||||
if budget > 24576 {
|
||||
budget = 24576
|
||||
}
|
||||
case strings.Contains(id, "2.5-flash"):
|
||||
switch level {
|
||||
case "low":
|
||||
budget = 2048
|
||||
case "medium":
|
||||
budget = 8192
|
||||
case "high":
|
||||
if budget > 24576 {
|
||||
budget = 24576
|
||||
}
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -264,7 +264,9 @@ func TestGeminiThinkingConfig(t *testing.T) {
|
|||
{"gemini-3-pro", "low", "LOW", 0},
|
||||
{"gemini-3-pro", "medium", "HIGH", 0}, // Pro can't go below LOW; medium → HIGH
|
||||
{"gemini-3-flash", "medium", "MEDIUM", 0},
|
||||
{"gemini-2.5-pro", "high", "", 32768},
|
||||
{"gemini-2.5-pro", "high", "", 16384},
|
||||
{"gemini-2.5-pro", "maximum", "", 32768},
|
||||
{"gemini-2.5-flash", "minimum", "", 1024},
|
||||
{"gemini-2.5-flash", "low", "", 2048},
|
||||
{"gemini-2.0-flash", "high", "", 0}, // 2.0 has no thinking → nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -185,8 +185,8 @@ func (c *openaiClient) buildRequest(req Request) (*oaiRequest, error) {
|
|||
}
|
||||
if m.Reasoning {
|
||||
out.MaxCompletionTok = &maxTok
|
||||
if req.Reasoning != "" {
|
||||
out.ReasoningEffort = strings.ToLower(req.Reasoning)
|
||||
if effort := OpenAIReasoningEffort(req.Reasoning); effort != "" {
|
||||
out.ReasoningEffort = effort
|
||||
}
|
||||
} else {
|
||||
out.MaxTokens = &maxTok
|
||||
|
|
|
|||
|
|
@ -131,16 +131,21 @@ type codexTool struct {
|
|||
Parameters json.RawMessage `json:"parameters"`
|
||||
}
|
||||
|
||||
type codexReasoningConfig struct {
|
||||
Effort string `json:"effort,omitempty"`
|
||||
}
|
||||
|
||||
type codexRequest struct {
|
||||
Model string `json:"model"`
|
||||
Store bool `json:"store"`
|
||||
Stream bool `json:"stream"`
|
||||
Instructions string `json:"instructions,omitempty"`
|
||||
Input []any `json:"input"`
|
||||
Tools []codexTool `json:"tools,omitempty"`
|
||||
ToolChoice string `json:"tool_choice,omitempty"`
|
||||
ParallelToolCalls bool `json:"parallel_tool_calls"`
|
||||
Include []string `json:"include,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Store bool `json:"store"`
|
||||
Stream bool `json:"stream"`
|
||||
Instructions string `json:"instructions,omitempty"`
|
||||
Input []any `json:"input"`
|
||||
Tools []codexTool `json:"tools,omitempty"`
|
||||
ToolChoice string `json:"tool_choice,omitempty"`
|
||||
ParallelToolCalls bool `json:"parallel_tool_calls"`
|
||||
Include []string `json:"include,omitempty"`
|
||||
Reasoning *codexReasoningConfig `json:"reasoning,omitempty"`
|
||||
}
|
||||
|
||||
// ---- Request building ----
|
||||
|
|
@ -163,6 +168,11 @@ func (c *codexClient) buildRequest(req Request) (*codexRequest, error) {
|
|||
ParallelToolCalls: true,
|
||||
Include: []string{"reasoning.encrypted_content"},
|
||||
}
|
||||
if m.Reasoning {
|
||||
if effort := OpenAICodexReasoningEffort(req.Reasoning); effort != "" {
|
||||
body.Reasoning = &codexReasoningConfig{Effort: effort}
|
||||
}
|
||||
}
|
||||
if len(req.Tools) > 0 {
|
||||
body.ToolChoice = "auto"
|
||||
for _, t := range req.Tools {
|
||||
|
|
|
|||
|
|
@ -216,9 +216,10 @@ type Request struct {
|
|||
Tools []Tool
|
||||
MaxTokens int
|
||||
Temperature *float32
|
||||
// Reasoning is "", "low", "medium", or "high".
|
||||
// Anthropic maps it to extended-thinking budget tokens.
|
||||
// OpenAI maps it to reasoning_effort.
|
||||
// Reasoning is "", "minimum", "low", "medium", "high", or "maximum".
|
||||
// Empty disables reasoning. Budget-based providers map these to roughly
|
||||
// 1k/2k/8k/16k/32k thinking tokens; effort-based providers map them onto
|
||||
// their closest supported reasoning_effort values.
|
||||
Reasoning string
|
||||
}
|
||||
|
||||
|
|
|
|||
78
internal/provider/reasoning.go
Normal file
78
internal/provider/reasoning.go
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
package provider
|
||||
|
||||
import "strings"
|
||||
|
||||
// NormalizeReasoning canonicalizes zot's user-facing thinking levels.
|
||||
// Empty string means reasoning/thinking is disabled.
|
||||
func NormalizeReasoning(level string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(level)) {
|
||||
case "", "off", "none", "no", "false", "disabled":
|
||||
return ""
|
||||
case "min", "minimal", "minimum":
|
||||
return "minimum"
|
||||
case "low":
|
||||
return "low"
|
||||
case "med", "medium":
|
||||
return "medium"
|
||||
case "hi", "high":
|
||||
return "high"
|
||||
case "max", "maximum":
|
||||
return "maximum"
|
||||
default:
|
||||
return strings.ToLower(strings.TrimSpace(level))
|
||||
}
|
||||
}
|
||||
|
||||
// ReasoningBudget returns zot's approximate token budget for thinking-capable
|
||||
// providers that accept explicit budgets.
|
||||
func ReasoningBudget(level string) int {
|
||||
switch NormalizeReasoning(level) {
|
||||
case "minimum":
|
||||
return 1024
|
||||
case "low":
|
||||
return 2048
|
||||
case "medium":
|
||||
return 8192
|
||||
case "high":
|
||||
return 16384
|
||||
case "maximum":
|
||||
return 32768
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAIReasoningEffort maps zot's six-level setting onto the effort enum
|
||||
// accepted by OpenAI-compatible chat-completions endpoints.
|
||||
func OpenAIReasoningEffort(level string) string {
|
||||
switch NormalizeReasoning(level) {
|
||||
case "minimum", "low":
|
||||
// Many OpenAI-compatible endpoints only accept low/medium/high.
|
||||
// Use low for zot's minimum instead of the newer minimal enum.
|
||||
return "low"
|
||||
case "medium":
|
||||
return "medium"
|
||||
case "high", "maximum":
|
||||
return "high"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAICodexReasoningEffort maps zot levels onto the ChatGPT/Codex
|
||||
// Responses backend enum. That backend rejects "minimal" and uses
|
||||
// "xhigh" for the highest tier on recent GPT-5.x models.
|
||||
func OpenAICodexReasoningEffort(level string) string {
|
||||
switch NormalizeReasoning(level) {
|
||||
case "minimum", "low":
|
||||
return "low"
|
||||
case "medium":
|
||||
return "medium"
|
||||
case "high":
|
||||
return "high"
|
||||
case "maximum":
|
||||
return "xhigh"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
36
internal/provider/reasoning_test.go
Normal file
36
internal/provider/reasoning_test.go
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
package provider
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestReasoningEffortMappings(t *testing.T) {
|
||||
cases := []struct {
|
||||
level string
|
||||
openai string
|
||||
codex string
|
||||
budget int
|
||||
normalized string
|
||||
}{
|
||||
{"off", "", "", 0, ""},
|
||||
{"minimum", "low", "low", 1024, "minimum"},
|
||||
{"minimal", "low", "low", 1024, "minimum"},
|
||||
{"low", "low", "low", 2048, "low"},
|
||||
{"medium", "medium", "medium", 8192, "medium"},
|
||||
{"high", "high", "high", 16384, "high"},
|
||||
{"maximum", "high", "xhigh", 32768, "maximum"},
|
||||
{"max", "high", "xhigh", 32768, "maximum"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := NormalizeReasoning(tc.level); got != tc.normalized {
|
||||
t.Errorf("NormalizeReasoning(%q)=%q want %q", tc.level, got, tc.normalized)
|
||||
}
|
||||
if got := OpenAIReasoningEffort(tc.level); got != tc.openai {
|
||||
t.Errorf("OpenAIReasoningEffort(%q)=%q want %q", tc.level, got, tc.openai)
|
||||
}
|
||||
if got := OpenAICodexReasoningEffort(tc.level); got != tc.codex {
|
||||
t.Errorf("OpenAICodexReasoningEffort(%q)=%q want %q", tc.level, got, tc.codex)
|
||||
}
|
||||
if got := ReasoningBudget(tc.level); got != tc.budget {
|
||||
t.Errorf("ReasoningBudget(%q)=%d want %d", tc.level, got, tc.budget)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -57,6 +57,135 @@ func TestStatusBarNoCWD(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestStatusBarThinkingLevelBetweenModelAndStats(t *testing.T) {
|
||||
lines := StatusBar(StatusBarParams{
|
||||
Theme: Dark,
|
||||
Provider: "openai-codex",
|
||||
Model: "gpt-5.5",
|
||||
Reasoning: "minimum",
|
||||
CWD: "/tmp/x",
|
||||
Usage: provider.Usage{
|
||||
InputTokens: 4_300_000,
|
||||
OutputTokens: 2,
|
||||
},
|
||||
Cols: 500,
|
||||
})
|
||||
if len(lines) != 2 {
|
||||
t.Fatalf("want 2 lines, got %d: %q", len(lines), lines)
|
||||
}
|
||||
plain := stripANSI(lines[0])
|
||||
modelIdx := strings.Index(plain, "(openai-codex) gpt-5.5")
|
||||
thinkingIdx := strings.Index(plain, "thinking: minimal")
|
||||
statsIdx := strings.Index(plain, "↑4.3M")
|
||||
if modelIdx < 0 || thinkingIdx < 0 || statsIdx < 0 {
|
||||
t.Fatalf("line should contain model, thinking level, and stats, got %q", plain)
|
||||
}
|
||||
if !(modelIdx < thinkingIdx && thinkingIdx < statsIdx) {
|
||||
t.Fatalf("thinking level should sit between model and stats, got %q", plain)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusBarNarrowKeepsModelAndThinkingTogetherWhenTheyFit(t *testing.T) {
|
||||
lines := StatusBar(StatusBarParams{
|
||||
Theme: Dark,
|
||||
Provider: "openai-codex",
|
||||
Model: "gpt-5.5",
|
||||
Reasoning: "maximum",
|
||||
CWD: "/tmp/x",
|
||||
Usage: provider.Usage{
|
||||
CostUSD: 0,
|
||||
},
|
||||
Subscription: true,
|
||||
ContextUsed: 100,
|
||||
ContextMax: 1_000_000,
|
||||
Cols: 64,
|
||||
})
|
||||
if len(lines) != 3 {
|
||||
t.Fatalf("narrow status with model+thinking fit: want 3 lines, got %d: %q", len(lines), lines)
|
||||
}
|
||||
plain := make([]string, len(lines))
|
||||
for i, line := range lines {
|
||||
plain[i] = stripANSI(line)
|
||||
}
|
||||
if !strings.Contains(plain[0], "(openai-codex) gpt-5.5 thinking: maximum") {
|
||||
t.Fatalf("line 1 should contain model and thinking level, got %q", plain[0])
|
||||
}
|
||||
if !strings.Contains(plain[1], "$0.000 (sub)") || strings.Contains(plain[1], "thinking level") {
|
||||
t.Fatalf("line 2 should contain only stats, got %q", plain[1])
|
||||
}
|
||||
if !strings.Contains(plain[2], "/tmp/x") {
|
||||
t.Fatalf("line 3 should contain cwd, got %q", plain[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusBarNarrowSplitsAfterThinkingLevel(t *testing.T) {
|
||||
lines := StatusBar(StatusBarParams{
|
||||
Theme: Dark,
|
||||
Provider: "openai-codex",
|
||||
Model: "gpt-5.5",
|
||||
Reasoning: "minimum",
|
||||
CWD: "/tmp/x",
|
||||
Usage: provider.Usage{
|
||||
InputTokens: 4_300_000,
|
||||
OutputTokens: 2,
|
||||
},
|
||||
Cols: 40,
|
||||
})
|
||||
if len(lines) != 4 {
|
||||
t.Fatalf("narrow status with thinking: want 4 lines, got %d: %q", len(lines), lines)
|
||||
}
|
||||
plain := make([]string, len(lines))
|
||||
for i, line := range lines {
|
||||
plain[i] = stripANSI(line)
|
||||
}
|
||||
if !strings.Contains(plain[0], "(openai-codex) gpt-5.5") {
|
||||
t.Fatalf("line 1 should contain model info, got %q", plain[0])
|
||||
}
|
||||
if !strings.Contains(plain[1], "thinking: minimal") || strings.Contains(plain[1], "↑4.3M") {
|
||||
t.Fatalf("line 2 should contain only thinking level, got %q", plain[1])
|
||||
}
|
||||
if !strings.Contains(plain[2], "↑4.3M ↓2") {
|
||||
t.Fatalf("line 3 should contain stats, got %q", plain[2])
|
||||
}
|
||||
if !strings.Contains(plain[3], "/tmp/x") {
|
||||
t.Fatalf("line 4 should contain cwd, got %q", plain[3])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusBarVeryNarrowSplitsAfterThinkingLevel(t *testing.T) {
|
||||
lines := StatusBar(StatusBarParams{
|
||||
Theme: Dark,
|
||||
Provider: "openai-codex",
|
||||
Model: "gpt-5.5",
|
||||
Reasoning: "minimum",
|
||||
CWD: "/tmp/x",
|
||||
Usage: provider.Usage{
|
||||
InputTokens: 4_300_000,
|
||||
OutputTokens: 2,
|
||||
},
|
||||
Cols: 32,
|
||||
})
|
||||
if len(lines) != 4 {
|
||||
t.Fatalf("narrow status with thinking: want 4 lines, got %d: %q", len(lines), lines)
|
||||
}
|
||||
plain := make([]string, len(lines))
|
||||
for i, line := range lines {
|
||||
plain[i] = stripANSI(line)
|
||||
}
|
||||
if !strings.Contains(plain[0], "(openai-codex) gpt-5.5") {
|
||||
t.Fatalf("line 1 should contain model info, got %q", plain[0])
|
||||
}
|
||||
if !strings.Contains(plain[1], "thinking: minimal") {
|
||||
t.Fatalf("line 2 should contain thinking level, got %q", plain[1])
|
||||
}
|
||||
if !strings.Contains(plain[2], "↑4.3M ↓2") {
|
||||
t.Fatalf("line 3 should contain stats, got %q", plain[2])
|
||||
}
|
||||
if !strings.Contains(plain[3], "/tmp/x") {
|
||||
t.Fatalf("line 4 should contain cwd, got %q", plain[3])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusBarNoYoloTagPrecedesCWD(t *testing.T) {
|
||||
lines := StatusBar(StatusBarParams{
|
||||
Theme: Dark,
|
||||
|
|
|
|||
|
|
@ -1973,6 +1973,7 @@ type StatusBarParams struct {
|
|||
Theme Theme
|
||||
Provider string
|
||||
Model string
|
||||
Reasoning string // "" means thinking off
|
||||
Busy bool
|
||||
BusyPrefix string // spinner + funny line when busy
|
||||
CWD string
|
||||
|
|
@ -2069,7 +2070,20 @@ func StatusBar(p StatusBarParams) []string {
|
|||
const pad = " " // 2 spaces
|
||||
|
||||
left := fmt.Sprintf("(%s) %s", p.Provider, p.Model)
|
||||
middle := strings.Join(stats, " ")
|
||||
thinking := thinkingLevelLabel(p.Reasoning)
|
||||
thinkingText := ""
|
||||
if thinking != "" {
|
||||
thinkingText = "thinking: " + thinking
|
||||
}
|
||||
statsText := strings.Join(stats, " ")
|
||||
middleParts := make([]string, 0, 2)
|
||||
if thinkingText != "" {
|
||||
middleParts = append(middleParts, thinkingText)
|
||||
}
|
||||
if statsText != "" {
|
||||
middleParts = append(middleParts, statsText)
|
||||
}
|
||||
middle := strings.Join(middleParts, " ")
|
||||
|
||||
var leftBuilder strings.Builder
|
||||
if p.BusyPrefix != "" {
|
||||
|
|
@ -2122,8 +2136,7 @@ func StatusBar(p StatusBarParams) []string {
|
|||
modelLine := pad + th.FG256(th.Muted, left)
|
||||
lines := []string{busyLine}
|
||||
if middle != "" && visibleWidth(modelLine+pad+th.FG256(th.Muted, middle)) > p.Cols {
|
||||
lines = append(lines, modelLine)
|
||||
lines = append(lines, pad+th.FG256(th.Muted, middle))
|
||||
lines = appendWrappedStatusLines(lines, th, pad, left, thinkingText, statsText, p.Cols)
|
||||
} else {
|
||||
var infoBuilder strings.Builder
|
||||
infoBuilder.WriteString(modelLine)
|
||||
|
|
@ -2144,10 +2157,8 @@ func StatusBar(p StatusBarParams) []string {
|
|||
// avoids the terminal's hard wrap cutting the stats or pushing cwd
|
||||
// into an awkward position on small widths.
|
||||
if p.Cols > 0 && p.BusyPrefix == "" && middle != "" && visibleWidth(primary) > p.Cols {
|
||||
lines := []string{
|
||||
pad + th.FG256(th.Muted, left),
|
||||
pad + th.FG256(th.Muted, middle),
|
||||
}
|
||||
var lines []string
|
||||
lines = appendWrappedStatusLines(lines, th, pad, left, thinkingText, statsText, p.Cols)
|
||||
if cwd != "" {
|
||||
lines = append(lines, pad+th.FG256(th.Muted, cwd))
|
||||
}
|
||||
|
|
@ -2164,6 +2175,42 @@ func StatusBar(p StatusBarParams) []string {
|
|||
return []string{primary, cwdRendered}
|
||||
}
|
||||
|
||||
func appendWrappedStatusLines(lines []string, th Theme, pad, modelText, thinkingText, statsText string, cols int) []string {
|
||||
modelLine := pad + th.FG256(th.Muted, modelText)
|
||||
if thinkingText == "" {
|
||||
lines = append(lines, modelLine)
|
||||
if statsText != "" {
|
||||
lines = append(lines, pad+th.FG256(th.Muted, statsText))
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
modelThinkingPlain := pad + modelText + pad + thinkingText
|
||||
if cols <= 0 || visibleWidth(modelThinkingPlain) <= cols {
|
||||
lines = append(lines, pad+th.FG256(th.Muted, modelText+pad+thinkingText))
|
||||
} else {
|
||||
lines = append(lines, modelLine)
|
||||
lines = append(lines, pad+th.FG256(th.Muted, thinkingText))
|
||||
}
|
||||
if statsText != "" {
|
||||
lines = append(lines, pad+th.FG256(th.Muted, statsText))
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func thinkingLevelLabel(level string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(level)) {
|
||||
case "", "off", "none", "no", "false", "disabled":
|
||||
return ""
|
||||
case "minimum", "minimal", "min":
|
||||
return "minimal"
|
||||
case "maximum", "max", "xhigh":
|
||||
return "maximum"
|
||||
default:
|
||||
return strings.ToLower(strings.TrimSpace(level))
|
||||
}
|
||||
}
|
||||
|
||||
// contextUsage renders the "N%/ctxMax" fragment, returning the
|
||||
// rendered string plus the colour to wrap it in.
|
||||
func contextUsage(th Theme, used, max int) (string, int) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue