add configurable thinking level

2026-06-26 21:36:31 +02:00 · 2026-05-26 18:07:33 +02:00 · 2026-05-26 18:07:33 +02:00 · 37ef90bbb3
commit 37ef90bbb3
parent d6a30a5967
16 changed files with 555 additions and 72 deletions
--- a/README.md
+++ b/README.md
@ -141,7 +141,7 @@ zot --help
 | `--base-url <url>` | Override the provider base URL (tests, self-hosted). |
 | `--system-prompt <text>` | Replace the default system prompt for this run (also overrides `$ZOT_HOME/SYSTEM.md`). |
 | `--append-system-prompt <text>` | Append text to the system prompt (repeatable). |
-| `--reasoning low\|medium\|high` | Enable reasoning on supported models. |
+| `--reasoning off\|minimum\|low\|medium\|high\|maximum` | Set thinking level on supported models. |
 | `-c`, `--continue` | Resume the latest session for this cwd. |
 | `-r`, `--resume` | Pick a session to resume. |
 | `--session <path>` | Resume a specific session file. |
@ -290,10 +290,11 @@ Background subagents that run alongside your main session. Each one is a separat

 ### `/settings`

-Opens a dialog with every persistent toggle. `up`/`down` to navigate, `enter` or `space` to flip the selected row, `esc` to close. Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current toggles:
+Opens a dialog with every persistent setting. `up`/`down` to navigate, `enter` or `space` to change the selected row, `esc` to close. Changes are written to `$ZOT_HOME/config.json` and take effect on the next turn (no restart needed). Current settings:

 - **render images when supported** — draw screenshots / `read`-returned images inline using the terminal's image protocol, or fall back to a text placeholder. Auto-detected from `TERM_PROGRAM`; the toggle overrides the detection. The row is greyed out and forced off on terminals that don't speak any image protocol.
 - **auto-swarm** — let the main agent spawn background sub-agents in parallel via a built-in `swarm_spawn` tool. Off by default. When on, the tool is registered with the running agent, the system prompt gains a short addendum telling the model to delegate independent sub-tasks proactively, and zot watches every sub-agent the main agent spawns. As soon as the last sub-agent in a batch finishes its initial task, an `[auto-swarm update]` message is injected back into the chat with each agent's status / task / transcript tail, so the main agent can summarise the collective outcome. Flipping off mid-session removes the tool from the live agent and strips the addendum on the next turn — the model stops trying to delegate. See `/swarm` for the dashboard that lets you monitor, message, kill, or remove the spawned agents.
+- **thinking level** — choose reasoning for supported models: off (no reasoning), minimum (~1k tokens), low (~2k), medium (~8k), high (~16k), maximum (~32k). The change is persisted to `config.json` and applied to the running agent's next model call.

 ### `/skills`

@ -468,7 +469,7 @@ Use `/login` and pick **api key** to paste an AI Studio key. zot probes `/v1beta

 > **Free-tier rate limits.** AI Studio's free tier has tight per-minute and per-day caps that vary by model: `gemini-2.5-pro` is the strictest (a few requests per minute, ~50 per day), Flash and Flash-Lite are far more generous. If a Pro turn 429s with `"You exceeded your current quota"` while Flash on the same key still works, you've hit the Pro free-tier RPD. Either switch to Flash for agent loops, or [enable billing](https://aistudio.google.com/app/apikey) on your AI Studio project to flip the same key from free to pay-as-you-go pricing (`$1.25/M` input, `$10/M` output for Pro).

-Reasoning levels (`--reasoning low|medium|high`) map differently per generation: 2.5 family uses `thinkingBudget` token budgets per model (Pro caps at 32k, Flash at 24k); Gemini 3.x uses the `thinkingLevel` enum (`MINIMAL`/`LOW`/`MEDIUM`/`HIGH`), with Gemini-3-Pro pinned to `LOW` minimum and `HIGH` for any "medium" or "high" request. 2.0-family models have no thinking config at all.
+Reasoning levels (`--reasoning off|minimum|low|medium|high|maximum`, also configurable in `/settings` as **thinking level**) map differently per generation. Budget-based providers use roughly 1k/2k/8k/16k/32k thinking tokens for minimum/low/medium/high/maximum, with provider/model caps applied (Gemini 2.5 Pro caps at 32k; Flash at 24k). Gemini 3.x uses the `thinkingLevel` enum (`MINIMAL`/`LOW`/`MEDIUM`/`HIGH`), with Gemini-3-Pro pinned to `LOW` minimum and `HIGH` for any "medium" or higher request. Effort-based OpenAI-compatible chat providers map minimum to `low`, low/medium directly, and high/maximum to `high`; the Codex/Responses backend maps maximum to `xhigh` where supported. `off` sends no reasoning config. 2.0-family Gemini models have no thinking config at all.

 You can add additional Gemini model IDs to `models.json` under the `google` provider.

--- a/internal/agent/args.go
+++ b/internal/agent/args.go
@ -196,10 +196,10 @@ func ParseArgs(in []string) (Args, error) {
 				return a, err
 			}
 			switch strings.ToLower(v) {
-			case "", "low", "medium", "high":
+			case "", "off", "minimum", "minimal", "low", "medium", "high", "maximum", "max":
 				a.Reasoning = strings.ToLower(v)
 			default:
-				return a, fmt.Errorf("--reasoning must be low|medium|high")
+				return a, fmt.Errorf("--reasoning must be off|minimum|low|medium|high|maximum")
 			}
 		case "--session":
 			v, err := want(&i, arg)
@ -360,7 +360,7 @@ func PrintHelp(version string) {
 		row{"--model ID", "model id (see --list-models)"},
 		row{"--api-key KEY", "api key for this run (env / auth.json fallback)"},
 		row{"--base-url URL", "override provider api base url"},
-		row{"--reasoning low|medium|high", "enable reasoning on supported models"},
+		row{"--reasoning off|minimum|low|medium|high|maximum", "set thinking level on supported models"},
 	)
 	section("prompt and session flags",
 		row{"--system-prompt TEXT", "replace the default system prompt"},
--- a/internal/agent/build.go
+++ b/internal/agent/build.go
@ -426,7 +426,7 @@ func Resolve(args Args, requireCred bool) (Resolved, error) {
 		ZotDocsDir: docsDir,
 	})

-	reasoning := firstNonEmpty(args.Reasoning, cfg.Reasoning)
+	reasoning := provider.NormalizeReasoning(firstNonEmpty(args.Reasoning, cfg.Reasoning))

 	max := args.MaxSteps // 0 = unlimited

--- a/internal/agent/modes/interactive.go
+++ b/internal/agent/modes/interactive.go
@ -221,6 +221,7 @@ type chatCacheKey struct {
 type SettingsStore interface {
 	SetInlineImages(enabled bool) error
 	SetAutoSwarm(enabled bool) error
+	SetReasoning(level string) error
 }

 type Interactive struct {
@ -1065,6 +1066,7 @@ func (i *Interactive) redraw() {
 		Theme:          i.cfg.Theme,
 		Provider:       i.cfg.Provider,
 		Model:          i.cfg.Model,
+		Reasoning:      i.cfg.Reasoning,
 		Busy:           i.busy,
 		BusyPrefix:     busyPrefix,
 		CWD:            i.cfg.CWD,
@ -1662,7 +1664,7 @@ func (i *Interactive) handleKey(ctx context.Context, k tui.Key) (done bool) {
 		}
 		act := i.settingsDialog.HandleKey(k)
 		if act.Toggle {
-			i.applySettingToggle(act.Key, act.Value)
+			i.applySettingChange(act)
 		}
 		i.invalidate()
 		return false
@ -2503,6 +2505,27 @@ func (i *Interactive) openSettingsDialog() {
 		autoSwarmHint = "swarm supervisor not available in this mode"
 	}

+	reasoningOptions := []settingsOption{
+		{value: "", label: "off", desc: "no reasoning"},
+		{value: "minimum", label: "minimum", desc: "very brief (~1k tokens)"},
+		{value: "low", label: "low", desc: "light (~2k tokens)"},
+		{value: "medium", label: "medium", desc: "moderate (~8k tokens)"},
+		{value: "high", label: "high", desc: "deep (~16k tokens)"},
+		{value: "maximum", label: "maximum", desc: "highest (~32k tokens)"},
+	}
+	reasoning := provider.NormalizeReasoning(i.cfg.Reasoning)
+	reasoningChoice := 0
+	for idx, opt := range reasoningOptions {
+		if opt.value == reasoning {
+			reasoningChoice = idx
+			break
+		}
+	}
+	reasoningHint := ""
+	if m, err := provider.FindModel(i.cfg.Provider, i.cfg.Model); err == nil && !m.Reasoning {
+		reasoningHint = "current model does not support thinking"
+	}
+
 	i.settingsDialog.Open([]settingsItem{
 		{
 			key:      "inline_images_enabled",
@ -2520,9 +2543,25 @@ func (i *Interactive) openSettingsDialog() {
 			disabled: autoSwarmDisabled,
 			hint:     autoSwarmHint,
 		},
+		{
+			key:     "reasoning",
+			label:   "thinking level",
+			desc:    "reasoning depth for thinking-capable models",
+			options: reasoningOptions,
+			choice:  reasoningChoice,
+			hint:    reasoningHint,
+		},
 	})
 }

+func (i *Interactive) applySettingChange(act settingsAction) {
+	if act.Key == "reasoning" {
+		i.applyReasoningSetting(act.StringValue)
+		return
+	}
+	i.applySettingToggle(act.Key, act.Value)
+}
+
 func (i *Interactive) applySettingToggle(key string, value bool) {
 	// Every setting toggle forces a full repaint at the end — same
 	// effect as the user pressing Ctrl+L — so any per-setting visual
@ -2579,6 +2618,36 @@ func (i *Interactive) applySettingToggle(key string, value bool) {
 	}
 }

+func (i *Interactive) applyReasoningSetting(level string) {
+	defer func() {
+		if i.rend != nil {
+			i.rend.Clear()
+		}
+		i.invalidate()
+	}()
+	level = provider.NormalizeReasoning(level)
+	i.cfg.Reasoning = level
+	if i.cfg.SettingsStore != nil {
+		if err := i.cfg.SettingsStore.SetReasoning(level); err != nil {
+			i.mu.Lock()
+			i.statusErr = "settings: " + err.Error()
+			i.mu.Unlock()
+			return
+		}
+	}
+	i.mu.Lock()
+	if i.agent != nil {
+		i.agent.Reasoning = level
+	}
+	label := level
+	if label == "" {
+		label = "off"
+	}
+	i.statusOK = "thinking level " + label
+	i.statusErr = ""
+	i.mu.Unlock()
+}
+
 // buildStudyPrompt returns the canned prompt the /study command
 // submits to the agent.
 //
--- a/internal/agent/modes/settings_dialog.go
+++ b/internal/agent/modes/settings_dialog.go
@ -5,9 +5,11 @@ import (
 )

 type settingsDialog struct {
-	active bool
-	items  []settingsItem
-	cursor int
+	active       bool
+	items        []settingsItem
+	cursor       int
+	selecting    bool
+	optionCursor int
 }

 type settingsItem struct {
@ -15,15 +17,24 @@ type settingsItem struct {
 	label    string
 	desc     string
 	value    bool
+	options  []settingsOption
+	choice   int
 	disabled bool
 	hint     string
 }

+type settingsOption struct {
+	value string
+	label string
+	desc  string
+}
+
 type settingsAction struct {
-	Toggle bool
-	Key    string
-	Value  bool
-	Close  bool
+	Toggle      bool
+	Key         string
+	Value       bool
+	StringValue string
+	Close       bool
 }

 func newSettingsDialog() *settingsDialog { return &settingsDialog{} }
@ -34,14 +45,22 @@ func (d *settingsDialog) Open(items []settingsItem) bool {
 	}
 	d.items = items
 	d.cursor = 0
+	d.selecting = false
+	d.optionCursor = 0
 	d.active = true
 	return true
 }

-func (d *settingsDialog) Close()       { d.active = false }
+func (d *settingsDialog) Close() {
+	d.active = false
+	d.selecting = false
+}
 func (d *settingsDialog) Active() bool { return d != nil && d.active }

 func (d *settingsDialog) HandleKey(k tui.Key) settingsAction {
+	if d.selecting {
+		return d.handleOptionKey(k)
+	}
 	switch k.Kind {
 	case tui.KeyUp:
 		if d.cursor > 0 {
@ -64,6 +83,29 @@ func (d *settingsDialog) HandleKey(k tui.Key) settingsAction {
 	return settingsAction{}
 }

+func (d *settingsDialog) handleOptionKey(k tui.Key) settingsAction {
+	it := d.items[d.cursor]
+	switch k.Kind {
+	case tui.KeyUp:
+		if d.optionCursor > 0 {
+			d.optionCursor--
+		}
+	case tui.KeyDown:
+		if d.optionCursor < len(it.options)-1 {
+			d.optionCursor++
+		}
+	case tui.KeyEsc:
+		d.selecting = false
+	case tui.KeyEnter:
+		return d.selectCurrentOption()
+	case tui.KeyRune:
+		if k.Rune == ' ' {
+			return d.selectCurrentOption()
+		}
+	}
+	return settingsAction{}
+}
+
 func (d *settingsDialog) toggleCurrent() settingsAction {
 	if len(d.items) == 0 {
 		d.Close()
@ -73,24 +115,61 @@ func (d *settingsDialog) toggleCurrent() settingsAction {
 	if it.disabled {
 		return settingsAction{}
 	}
+	if len(it.options) > 0 {
+		d.optionCursor = it.choice
+		if d.optionCursor < 0 || d.optionCursor >= len(it.options) {
+			d.optionCursor = 0
+		}
+		d.selecting = true
+		return settingsAction{}
+	}
 	it.value = !it.value
 	d.items[d.cursor] = it
 	return settingsAction{Toggle: true, Key: it.key, Value: it.value}
 }

+func (d *settingsDialog) selectCurrentOption() settingsAction {
+	if len(d.items) == 0 {
+		d.Close()
+		return settingsAction{Close: true}
+	}
+	it := d.items[d.cursor]
+	if len(it.options) == 0 {
+		d.selecting = false
+		return settingsAction{}
+	}
+	if d.optionCursor < 0 || d.optionCursor >= len(it.options) {
+		d.optionCursor = 0
+	}
+	it.choice = d.optionCursor
+	d.items[d.cursor] = it
+	d.selecting = false
+	return settingsAction{Toggle: true, Key: it.key, StringValue: it.options[it.choice].value}
+}
+
 func (d *settingsDialog) Render(th tui.Theme, width int) []string {
 	if !d.Active() {
 		return nil
 	}
+	if d.selecting {
+		return d.renderOptions(th, width)
+	}
 	var lines []string
 	lines = append(lines, frameHeader(th, "settings", width))
-	lines = append(lines, th.FG256(th.Muted, "toggle with enter/space, esc to close:"))
+	lines = append(lines, th.FG256(th.Muted, "change with enter/space, esc to close:"))
 	for i, it := range d.items {
 		box := "[ ]"
 		if it.value {
 			box = "[x]"
 		}
 		plain := "  " + box + " " + it.label
+		if len(it.options) > 0 {
+			box = "[>]"
+			if it.choice < 0 || it.choice >= len(it.options) {
+				it.choice = 0
+			}
+			plain = "  " + box + " " + it.label + ": " + it.options[it.choice].label
+		}
 		if it.hint != "" {
 			plain += "  " + th.FG256(th.Muted, "("+it.hint+")")
 		}
@ -108,3 +187,33 @@ func (d *settingsDialog) Render(th tui.Theme, width int) []string {
 	lines = append(lines, frameRule(th, width))
 	return lines
 }
+
+func (d *settingsDialog) renderOptions(th tui.Theme, width int) []string {
+	if len(d.items) == 0 || d.cursor < 0 || d.cursor >= len(d.items) {
+		d.selecting = false
+		return d.Render(th, width)
+	}
+	it := d.items[d.cursor]
+	lines := []string{frameHeader(th, "settings: "+it.label, width)}
+	if it.desc != "" {
+		lines = append(lines, th.FG256(th.Muted, it.desc))
+	}
+	lines = append(lines, th.FG256(th.Muted, "select with enter/space, esc to go back:"))
+	for idx, opt := range it.options {
+		marker := "  "
+		if idx == it.choice {
+			marker = "✓ "
+		}
+		plain := "  " + marker + opt.label
+		if idx == d.optionCursor {
+			lines = append(lines, th.PadHighlight(plain, width))
+		} else {
+			lines = append(lines, plain)
+		}
+		if opt.desc != "" {
+			lines = append(lines, th.FG256(th.Muted, "      "+opt.desc))
+		}
+	}
+	lines = append(lines, frameRule(th, width))
+	return lines
+}
--- a/internal/agent/settings_store.go
+++ b/internal/agent/settings_store.go
@ -1,5 +1,7 @@
 package agent

+import "github.com/patriceckhart/zot/internal/provider"
+
 type configSettingsStore struct{}

 func (configSettingsStore) SetInlineImages(enabled bool) error {
@ -20,6 +22,15 @@ func (configSettingsStore) SetAutoSwarm(enabled bool) error {
 	return SaveConfig(cfg)
 }

+func (configSettingsStore) SetReasoning(level string) error {
+	cfg, err := LoadConfig()
+	if err != nil {
+		return err
+	}
+	cfg.Reasoning = provider.NormalizeReasoning(level)
+	return SaveConfig(cfg)
+}
+
 // AutoSwarmEnabled reads the current auto-swarm flag from config.
 // Used by the swarm_spawn tool at call time to gate execution.
 func AutoSwarmEnabled() bool {
--- a/internal/provider/anthropic.go
+++ b/internal/provider/anthropic.go
@ -249,10 +249,18 @@ func (c *anthropicClient) buildRequest(req Request) (*anthRequest, error) {
 	if req.Reasoning != "" && m.Reasoning {
 		budget := anthropicReasoningBudget(req.Reasoning)
 		if budget > 0 {
+			// Reasoning requires max_tokens > budget. Keep at least a small
+			// answer budget while respecting the model's advertised output cap.
+			const minAnswerTokens = 1024
+			if m.MaxOutput > minAnswerTokens && budget >= m.MaxOutput {
+				budget = m.MaxOutput - minAnswerTokens
+			}
 			out.Thinking = &anthThinking{Type: "enabled", BudgetTokens: budget}
-			// Reasoning requires max_tokens > budget.
 			if out.MaxTokens <= budget {
-				out.MaxTokens = budget + 4096
+				out.MaxTokens = budget + minAnswerTokens
+				if m.MaxOutput > 0 && out.MaxTokens > m.MaxOutput {
+					out.MaxTokens = m.MaxOutput
+				}
 			}
 		}
 	}
@ -361,16 +369,7 @@ func markLastBlockEphemeral(blocks []interface{}) {
 }

 func anthropicReasoningBudget(level string) int {
-	switch strings.ToLower(level) {
-	case "low":
-		return 2048
-	case "medium":
-		return 8192
-	case "high":
-		return 16384
-	default:
-		return 0
-	}
+	return ReasoningBudget(level)
 }

 func filterAnthAssistantContent(blocks []Content) []Content {
--- a/internal/provider/gemini.go
+++ b/internal/provider/gemini.go
@ -354,7 +354,7 @@ func convertGemToolResultParts(blocks []Content) []gemPart {
 // generation: 2.5 family uses thinkingBudget (tokens), 3.x uses
 // thinkingLevel (enum). Returns nil when the level is unrecognised.
 func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
-	level = strings.ToLower(level)
+	level = NormalizeReasoning(level)
 	id := strings.ToLower(modelID)

 	// Gemini 3.x: enum-based thinkingLevel. Pro can't go below LOW.
@ -362,6 +362,12 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
 		isPro := strings.Contains(id, "-pro")
 		var lvl string
 		switch level {
+		case "minimum":
+			if isPro {
+				lvl = "LOW"
+			} else {
+				lvl = "MINIMAL"
+			}
 		case "low":
 			lvl = "LOW"
 		case "medium":
@ -370,7 +376,7 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
 			} else {
 				lvl = "MEDIUM"
 			}
-		case "high":
+		case "high", "maximum":
 			lvl = "HIGH"
 		default:
 			return nil
@ -379,33 +385,18 @@ func geminiThinkingConfig(modelID, level string) *gemThinkingConfig {
 	}

 	// Gemini 2.5 family: token-budget per-model.
-	var budget int
+	budget := ReasoningBudget(level)
 	switch {
 	case strings.Contains(id, "2.5-pro"):
-		switch level {
-		case "low":
-			budget = 2048
-		case "medium":
-			budget = 8192
-		case "high":
+		if budget > 32768 {
 			budget = 32768
 		}
 	case strings.Contains(id, "2.5-flash-lite"):
-		switch level {
-		case "low":
-			budget = 2048
-		case "medium":
-			budget = 8192
-		case "high":
+		if budget > 24576 {
 			budget = 24576
 		}
 	case strings.Contains(id, "2.5-flash"):
-		switch level {
-		case "low":
-			budget = 2048
-		case "medium":
-			budget = 8192
-		case "high":
+		if budget > 24576 {
 			budget = 24576
 		}
 	default:
--- a/internal/provider/gemini_test.go
+++ b/internal/provider/gemini_test.go
@ -264,7 +264,9 @@ func TestGeminiThinkingConfig(t *testing.T) {
 		{"gemini-3-pro", "low", "LOW", 0},
 		{"gemini-3-pro", "medium", "HIGH", 0}, // Pro can't go below LOW; medium → HIGH
 		{"gemini-3-flash", "medium", "MEDIUM", 0},
-		{"gemini-2.5-pro", "high", "", 32768},
+		{"gemini-2.5-pro", "high", "", 16384},
+		{"gemini-2.5-pro", "maximum", "", 32768},
+		{"gemini-2.5-flash", "minimum", "", 1024},
 		{"gemini-2.5-flash", "low", "", 2048},
 		{"gemini-2.0-flash", "high", "", 0}, // 2.0 has no thinking → nil
 	}
--- a/internal/provider/openai.go
+++ b/internal/provider/openai.go
@ -185,8 +185,8 @@ func (c *openaiClient) buildRequest(req Request) (*oaiRequest, error) {
 	}
 	if m.Reasoning {
 		out.MaxCompletionTok = &maxTok
-		if req.Reasoning != "" {
-			out.ReasoningEffort = strings.ToLower(req.Reasoning)
+		if effort := OpenAIReasoningEffort(req.Reasoning); effort != "" {
+			out.ReasoningEffort = effort
 		}
 	} else {
 		out.MaxTokens = &maxTok
--- a/internal/provider/openai_codex.go
+++ b/internal/provider/openai_codex.go
@ -131,16 +131,21 @@ type codexTool struct {
 	Parameters  json.RawMessage `json:"parameters"`
 }

+type codexReasoningConfig struct {
+	Effort string `json:"effort,omitempty"`
+}
+
 type codexRequest struct {
-	Model             string      `json:"model"`
-	Store             bool        `json:"store"`
-	Stream            bool        `json:"stream"`
-	Instructions      string      `json:"instructions,omitempty"`
-	Input             []any       `json:"input"`
-	Tools             []codexTool `json:"tools,omitempty"`
-	ToolChoice        string      `json:"tool_choice,omitempty"`
-	ParallelToolCalls bool        `json:"parallel_tool_calls"`
-	Include           []string    `json:"include,omitempty"`
+	Model             string                `json:"model"`
+	Store             bool                  `json:"store"`
+	Stream            bool                  `json:"stream"`
+	Instructions      string                `json:"instructions,omitempty"`
+	Input             []any                 `json:"input"`
+	Tools             []codexTool           `json:"tools,omitempty"`
+	ToolChoice        string                `json:"tool_choice,omitempty"`
+	ParallelToolCalls bool                  `json:"parallel_tool_calls"`
+	Include           []string              `json:"include,omitempty"`
+	Reasoning         *codexReasoningConfig `json:"reasoning,omitempty"`
 }

 // ---- Request building ----
@ -163,6 +168,11 @@ func (c *codexClient) buildRequest(req Request) (*codexRequest, error) {
 		ParallelToolCalls: true,
 		Include:           []string{"reasoning.encrypted_content"},
 	}
+	if m.Reasoning {
+		if effort := OpenAICodexReasoningEffort(req.Reasoning); effort != "" {
+			body.Reasoning = &codexReasoningConfig{Effort: effort}
+		}
+	}
 	if len(req.Tools) > 0 {
 		body.ToolChoice = "auto"
 		for _, t := range req.Tools {
--- a/internal/provider/provider.go
+++ b/internal/provider/provider.go
@ -216,9 +216,10 @@ type Request struct {
 	Tools       []Tool
 	MaxTokens   int
 	Temperature *float32
-	// Reasoning is "", "low", "medium", or "high".
-	// Anthropic maps it to extended-thinking budget tokens.
-	// OpenAI maps it to reasoning_effort.
+	// Reasoning is "", "minimum", "low", "medium", "high", or "maximum".
+	// Empty disables reasoning. Budget-based providers map these to roughly
+	// 1k/2k/8k/16k/32k thinking tokens; effort-based providers map them onto
+	// their closest supported reasoning_effort values.
 	Reasoning string
 }

--- a/internal/provider/reasoning.go
+++ b/internal/provider/reasoning.go
@ -0,0 +1,78 @@
+package provider
+
+import "strings"
+
+// NormalizeReasoning canonicalizes zot's user-facing thinking levels.
+// Empty string means reasoning/thinking is disabled.
+func NormalizeReasoning(level string) string {
+	switch strings.ToLower(strings.TrimSpace(level)) {
+	case "", "off", "none", "no", "false", "disabled":
+		return ""
+	case "min", "minimal", "minimum":
+		return "minimum"
+	case "low":
+		return "low"
+	case "med", "medium":
+		return "medium"
+	case "hi", "high":
+		return "high"
+	case "max", "maximum":
+		return "maximum"
+	default:
+		return strings.ToLower(strings.TrimSpace(level))
+	}
+}
+
+// ReasoningBudget returns zot's approximate token budget for thinking-capable
+// providers that accept explicit budgets.
+func ReasoningBudget(level string) int {
+	switch NormalizeReasoning(level) {
+	case "minimum":
+		return 1024
+	case "low":
+		return 2048
+	case "medium":
+		return 8192
+	case "high":
+		return 16384
+	case "maximum":
+		return 32768
+	default:
+		return 0
+	}
+}
+
+// OpenAIReasoningEffort maps zot's six-level setting onto the effort enum
+// accepted by OpenAI-compatible chat-completions endpoints.
+func OpenAIReasoningEffort(level string) string {
+	switch NormalizeReasoning(level) {
+	case "minimum", "low":
+		// Many OpenAI-compatible endpoints only accept low/medium/high.
+		// Use low for zot's minimum instead of the newer minimal enum.
+		return "low"
+	case "medium":
+		return "medium"
+	case "high", "maximum":
+		return "high"
+	default:
+		return ""
+	}
+}
+
+// OpenAICodexReasoningEffort maps zot levels onto the ChatGPT/Codex
+// Responses backend enum. That backend rejects "minimal" and uses
+// "xhigh" for the highest tier on recent GPT-5.x models.
+func OpenAICodexReasoningEffort(level string) string {
+	switch NormalizeReasoning(level) {
+	case "minimum", "low":
+		return "low"
+	case "medium":
+		return "medium"
+	case "high":
+		return "high"
+	case "maximum":
+		return "xhigh"
+	default:
+		return ""
+	}
+}
--- a/internal/provider/reasoning_test.go
+++ b/internal/provider/reasoning_test.go
@ -0,0 +1,36 @@
+package provider
+
+import "testing"
+
+func TestReasoningEffortMappings(t *testing.T) {
+	cases := []struct {
+		level      string
+		openai     string
+		codex      string
+		budget     int
+		normalized string
+	}{
+		{"off", "", "", 0, ""},
+		{"minimum", "low", "low", 1024, "minimum"},
+		{"minimal", "low", "low", 1024, "minimum"},
+		{"low", "low", "low", 2048, "low"},
+		{"medium", "medium", "medium", 8192, "medium"},
+		{"high", "high", "high", 16384, "high"},
+		{"maximum", "high", "xhigh", 32768, "maximum"},
+		{"max", "high", "xhigh", 32768, "maximum"},
+	}
+	for _, tc := range cases {
+		if got := NormalizeReasoning(tc.level); got != tc.normalized {
+			t.Errorf("NormalizeReasoning(%q)=%q want %q", tc.level, got, tc.normalized)
+		}
+		if got := OpenAIReasoningEffort(tc.level); got != tc.openai {
+			t.Errorf("OpenAIReasoningEffort(%q)=%q want %q", tc.level, got, tc.openai)
+		}
+		if got := OpenAICodexReasoningEffort(tc.level); got != tc.codex {
+			t.Errorf("OpenAICodexReasoningEffort(%q)=%q want %q", tc.level, got, tc.codex)
+		}
+		if got := ReasoningBudget(tc.level); got != tc.budget {
+			t.Errorf("ReasoningBudget(%q)=%d want %d", tc.level, got, tc.budget)
+		}
+	}
+}
--- a/internal/tui/statusbar_test.go
+++ b/internal/tui/statusbar_test.go
@ -57,6 +57,135 @@ func TestStatusBarNoCWD(t *testing.T) {
 	}
 }

+func TestStatusBarThinkingLevelBetweenModelAndStats(t *testing.T) {
+	lines := StatusBar(StatusBarParams{
+		Theme:     Dark,
+		Provider:  "openai-codex",
+		Model:     "gpt-5.5",
+		Reasoning: "minimum",
+		CWD:       "/tmp/x",
+		Usage: provider.Usage{
+			InputTokens:  4_300_000,
+			OutputTokens: 2,
+		},
+		Cols: 500,
+	})
+	if len(lines) != 2 {
+		t.Fatalf("want 2 lines, got %d: %q", len(lines), lines)
+	}
+	plain := stripANSI(lines[0])
+	modelIdx := strings.Index(plain, "(openai-codex) gpt-5.5")
+	thinkingIdx := strings.Index(plain, "thinking: minimal")
+	statsIdx := strings.Index(plain, "↑4.3M")
+	if modelIdx < 0 || thinkingIdx < 0 || statsIdx < 0 {
+		t.Fatalf("line should contain model, thinking level, and stats, got %q", plain)
+	}
+	if !(modelIdx < thinkingIdx && thinkingIdx < statsIdx) {
+		t.Fatalf("thinking level should sit between model and stats, got %q", plain)
+	}
+}
+
+func TestStatusBarNarrowKeepsModelAndThinkingTogetherWhenTheyFit(t *testing.T) {
+	lines := StatusBar(StatusBarParams{
+		Theme:     Dark,
+		Provider:  "openai-codex",
+		Model:     "gpt-5.5",
+		Reasoning: "maximum",
+		CWD:       "/tmp/x",
+		Usage: provider.Usage{
+			CostUSD: 0,
+		},
+		Subscription: true,
+		ContextUsed:  100,
+		ContextMax:   1_000_000,
+		Cols:         64,
+	})
+	if len(lines) != 3 {
+		t.Fatalf("narrow status with model+thinking fit: want 3 lines, got %d: %q", len(lines), lines)
+	}
+	plain := make([]string, len(lines))
+	for i, line := range lines {
+		plain[i] = stripANSI(line)
+	}
+	if !strings.Contains(plain[0], "(openai-codex) gpt-5.5  thinking: maximum") {
+		t.Fatalf("line 1 should contain model and thinking level, got %q", plain[0])
+	}
+	if !strings.Contains(plain[1], "$0.000 (sub)") || strings.Contains(plain[1], "thinking level") {
+		t.Fatalf("line 2 should contain only stats, got %q", plain[1])
+	}
+	if !strings.Contains(plain[2], "/tmp/x") {
+		t.Fatalf("line 3 should contain cwd, got %q", plain[2])
+	}
+}
+
+func TestStatusBarNarrowSplitsAfterThinkingLevel(t *testing.T) {
+	lines := StatusBar(StatusBarParams{
+		Theme:     Dark,
+		Provider:  "openai-codex",
+		Model:     "gpt-5.5",
+		Reasoning: "minimum",
+		CWD:       "/tmp/x",
+		Usage: provider.Usage{
+			InputTokens:  4_300_000,
+			OutputTokens: 2,
+		},
+		Cols: 40,
+	})
+	if len(lines) != 4 {
+		t.Fatalf("narrow status with thinking: want 4 lines, got %d: %q", len(lines), lines)
+	}
+	plain := make([]string, len(lines))
+	for i, line := range lines {
+		plain[i] = stripANSI(line)
+	}
+	if !strings.Contains(plain[0], "(openai-codex) gpt-5.5") {
+		t.Fatalf("line 1 should contain model info, got %q", plain[0])
+	}
+	if !strings.Contains(plain[1], "thinking: minimal") || strings.Contains(plain[1], "↑4.3M") {
+		t.Fatalf("line 2 should contain only thinking level, got %q", plain[1])
+	}
+	if !strings.Contains(plain[2], "↑4.3M ↓2") {
+		t.Fatalf("line 3 should contain stats, got %q", plain[2])
+	}
+	if !strings.Contains(plain[3], "/tmp/x") {
+		t.Fatalf("line 4 should contain cwd, got %q", plain[3])
+	}
+}
+
+func TestStatusBarVeryNarrowSplitsAfterThinkingLevel(t *testing.T) {
+	lines := StatusBar(StatusBarParams{
+		Theme:     Dark,
+		Provider:  "openai-codex",
+		Model:     "gpt-5.5",
+		Reasoning: "minimum",
+		CWD:       "/tmp/x",
+		Usage: provider.Usage{
+			InputTokens:  4_300_000,
+			OutputTokens: 2,
+		},
+		Cols: 32,
+	})
+	if len(lines) != 4 {
+		t.Fatalf("narrow status with thinking: want 4 lines, got %d: %q", len(lines), lines)
+	}
+	plain := make([]string, len(lines))
+	for i, line := range lines {
+		plain[i] = stripANSI(line)
+	}
+	if !strings.Contains(plain[0], "(openai-codex) gpt-5.5") {
+		t.Fatalf("line 1 should contain model info, got %q", plain[0])
+	}
+	if !strings.Contains(plain[1], "thinking: minimal") {
+		t.Fatalf("line 2 should contain thinking level, got %q", plain[1])
+	}
+	if !strings.Contains(plain[2], "↑4.3M ↓2") {
+		t.Fatalf("line 3 should contain stats, got %q", plain[2])
+	}
+	if !strings.Contains(plain[3], "/tmp/x") {
+		t.Fatalf("line 4 should contain cwd, got %q", plain[3])
+	}
+}
+
 func TestStatusBarNoYoloTagPrecedesCWD(t *testing.T) {
 	lines := StatusBar(StatusBarParams{
 		Theme:    Dark,
--- a/internal/tui/view.go
+++ b/internal/tui/view.go
@ -1973,6 +1973,7 @@ type StatusBarParams struct {
 	Theme      Theme
 	Provider   string
 	Model      string
+	Reasoning  string // "" means thinking off
 	Busy       bool
 	BusyPrefix string // spinner + funny line when busy
 	CWD        string
@ -2069,7 +2070,20 @@ func StatusBar(p StatusBarParams) []string {
 	const pad = "  " // 2 spaces

 	left := fmt.Sprintf("(%s) %s", p.Provider, p.Model)
-	middle := strings.Join(stats, " ")
+	thinking := thinkingLevelLabel(p.Reasoning)
+	thinkingText := ""
+	if thinking != "" {
+		thinkingText = "thinking: " + thinking
+	}
+	statsText := strings.Join(stats, " ")
+	middleParts := make([]string, 0, 2)
+	if thinkingText != "" {
+		middleParts = append(middleParts, thinkingText)
+	}
+	if statsText != "" {
+		middleParts = append(middleParts, statsText)
+	}
+	middle := strings.Join(middleParts, "  ")

 	var leftBuilder strings.Builder
 	if p.BusyPrefix != "" {
@ -2122,8 +2136,7 @@ func StatusBar(p StatusBarParams) []string {
 		modelLine := pad + th.FG256(th.Muted, left)
 		lines := []string{busyLine}
 		if middle != "" && visibleWidth(modelLine+pad+th.FG256(th.Muted, middle)) > p.Cols {
-			lines = append(lines, modelLine)
-			lines = append(lines, pad+th.FG256(th.Muted, middle))
+			lines = appendWrappedStatusLines(lines, th, pad, left, thinkingText, statsText, p.Cols)
 		} else {
 			var infoBuilder strings.Builder
 			infoBuilder.WriteString(modelLine)
@ -2144,10 +2157,8 @@ func StatusBar(p StatusBarParams) []string {
 	// avoids the terminal's hard wrap cutting the stats or pushing cwd
 	// into an awkward position on small widths.
 	if p.Cols > 0 && p.BusyPrefix == "" && middle != "" && visibleWidth(primary) > p.Cols {
-		lines := []string{
-			pad + th.FG256(th.Muted, left),
-			pad + th.FG256(th.Muted, middle),
-		}
+		var lines []string
+		lines = appendWrappedStatusLines(lines, th, pad, left, thinkingText, statsText, p.Cols)
 		if cwd != "" {
 			lines = append(lines, pad+th.FG256(th.Muted, cwd))
 		}
@ -2164,6 +2175,42 @@ func StatusBar(p StatusBarParams) []string {
 	return []string{primary, cwdRendered}
 }

+func appendWrappedStatusLines(lines []string, th Theme, pad, modelText, thinkingText, statsText string, cols int) []string {
+	modelLine := pad + th.FG256(th.Muted, modelText)
+	if thinkingText == "" {
+		lines = append(lines, modelLine)
+		if statsText != "" {
+			lines = append(lines, pad+th.FG256(th.Muted, statsText))
+		}
+		return lines
+	}
+
+	modelThinkingPlain := pad + modelText + pad + thinkingText
+	if cols <= 0 || visibleWidth(modelThinkingPlain) <= cols {
+		lines = append(lines, pad+th.FG256(th.Muted, modelText+pad+thinkingText))
+	} else {
+		lines = append(lines, modelLine)
+		lines = append(lines, pad+th.FG256(th.Muted, thinkingText))
+	}
+	if statsText != "" {
+		lines = append(lines, pad+th.FG256(th.Muted, statsText))
+	}
+	return lines
+}
+
+func thinkingLevelLabel(level string) string {
+	switch strings.ToLower(strings.TrimSpace(level)) {
+	case "", "off", "none", "no", "false", "disabled":
+		return ""
+	case "minimum", "minimal", "min":
+		return "minimal"
+	case "maximum", "max", "xhigh":
+		return "maximum"
+	default:
+		return strings.ToLower(strings.TrimSpace(level))
+	}
+}
+
 // contextUsage renders the "N%/ctxMax" fragment, returning the
 // rendered string plus the colour to wrap it in.
 func contextUsage(th Theme, used, max int) (string, int) {