Merge remote-tracking branch 'origin/main' into pr-24

2026-06-26 21:36:31 +02:00 · 2026-06-09 19:22:05 +02:00 · 2026-06-09 19:22:05 +02:00 · b68008327d
commit b68008327d
parent bd648be324 c2c9a5ea28
5 changed files with 100 additions and 0 deletions
--- a/packages/agent/build.go
+++ b/packages/agent/build.go
@ -32,6 +32,13 @@ type Resolved struct {
 	MaxSteps     int
 	Sandbox      *tools.Sandbox

+	// MaxOutput is the resolved model's maximum output-token budget
+	// (from the catalog). Passed to the agent so each turn requests
+	// the model's full output capacity instead of the provider's
+	// conservative default (e.g. Bedrock's 4096, which truncates
+	// long writes/edits with stopReason=length).
+	MaxOutput int
+
 	// SkillTool is the on-demand skill loader registered with the
 	// agent's tool registry, or nil if no SKILL.md files were
 	// discovered. Exposed so the tui can list / preview skills.
@ -501,6 +508,7 @@ func Resolve(args Args, requireCred bool) (Resolved, error) {
 		ToolSummary:      summaries,
 		SystemPrompt:     sys,
 		MaxSteps:         max,
+		MaxOutput:        resolvedModel.MaxOutput,
 		Sandbox:          sandbox,
 		SkillTool:        skillTool,
 		systemAppend:     append_,
@ -768,6 +776,7 @@ func (r *Resolved) UseSandbox(s *tools.Sandbox) {
 func (r Resolved) NewAgent() *core.Agent {
 	a := core.NewAgent(r.NewClient(), r.Model, r.SystemPrompt, r.ToolRegistry)
 	a.MaxSteps = r.MaxSteps
+	a.MaxTokens = r.MaxOutput
 	a.Reasoning = r.Reasoning
 	return a
 }
--- a/packages/agent/modes/interactive.go
+++ b/packages/agent/modes/interactive.go
@ -4656,6 +4656,17 @@ func (i *Interactive) handleEvent(ev core.AgentEvent) {
 			i.statusOK = "cancelled"
 			return
 		}
+		if e.Stop == provider.StopLength {
+			// The model hit its output-token cap mid-response, so the
+			// reply (often a long write/edit) is truncated. Surface it
+			// explicitly, otherwise the turn just ends and reads like
+			// the UI gave up. The agent already requests the model's
+			// full MaxOutput budget, so this means the response genuinely
+			// exceeded that ceiling; ask the user to continue.
+			i.statusErr = "response hit the model's output-token limit and was cut off, ask it to continue"
+			i.statusOK = ""
+			return
+		}
 		// Don't surface mid-loop stream errors as a red banner here.
 		// EvTurnEnd fires after every step in a multi-step tool loop,
 		// so a transient 503 / network blip would briefly paint a red
--- a/packages/core/agent.go
+++ b/packages/core/agent.go
@ -22,6 +22,14 @@ type Agent struct {
 	MaxSteps  int
 	Reasoning string

+	// MaxTokens caps the model's output tokens per turn. Zero leaves
+	// the field unset on the provider request, letting each provider
+	// apply its own default (which can be conservative, e.g. Bedrock
+	// defaults to 4096, truncating long writes/edits). Hosts populate
+	// this from the resolved model's MaxOutput so large single-turn
+	// responses aren't silently cut off with stopReason=length.
+	MaxTokens int
+
 	// BeforeToolExecute, if set, is called immediately before each
 	// tool runs. Returning (allowed=false, reason) short-circuits
 	// the call with an error result containing reason. Optionally,
@ -515,6 +523,7 @@ func (a *Agent) oneTurn(ctx context.Context, sink func(AgentEvent)) (provider.St
 		Messages:  repairToolUseResultPairs(a.Messages()),
 		Tools:     a.Tools.Specs(),
 		Reasoning: a.Reasoning,
+		MaxTokens: a.MaxTokens,
 	}
 	stream, err := a.Client.Stream(ctx, req)
 	if err != nil {
--- a/packages/core/agent_retry_test.go
+++ b/packages/core/agent_retry_test.go
@ -109,3 +109,38 @@ func TestAgentDropsPartialAssistantBeforeRetry(t *testing.T) {
 		t.Fatalf("final assistant text = %q; want recovered", got)
 	}
 }
+
+// captureClient records the last Request it received so tests can
+// assert what the agent put on the wire.
+type captureClient struct {
+	lastReq provider.Request
+}
+
+func (c *captureClient) Name() string { return "capture" }
+
+func (c *captureClient) Stream(ctx context.Context, req provider.Request) (<-chan provider.Event, error) {
+	c.lastReq = req
+	out := make(chan provider.Event, 3)
+	go func() {
+		defer close(out)
+		out <- provider.EventStart{Provider: "capture", Model: req.Model}
+		out <- provider.EventDone{Stop: provider.StopEnd, Message: provider.Message{
+			Role:    provider.RoleAssistant,
+			Content: []provider.Content{provider.TextBlock{Text: "ok"}},
+		}}
+	}()
+	return out, nil
+}
+
+func TestAgentPropagatesMaxTokens(t *testing.T) {
+	client := &captureClient{}
+	a := NewAgent(client, "fake-model", "system", Registry{})
+	a.MaxTokens = 64000
+
+	if err := a.Prompt(context.Background(), "hello", nil, nil); err != nil {
+		t.Fatalf("Prompt returned %v", err)
+	}
+	if client.lastReq.MaxTokens != 64000 {
+		t.Fatalf("request MaxTokens = %d; want 64000 (Agent.MaxTokens not propagated)", client.lastReq.MaxTokens)
+	}
+}
--- a/packages/provider/amazon_bedrock_test.go
+++ b/packages/provider/amazon_bedrock_test.go
@ -163,6 +163,42 @@ func TestBedrockModelSupportsCaching(t *testing.T) {
 	}
 }

+func TestBedrockBuildRequestMaxTokens(t *testing.T) {
+	client := &bedrockClient{region: "us-east-1"}
+
+	// A non-zero MaxTokens must flow through to InferenceConfig so the
+	// model gets its full output budget. This is the regression guard
+	// for long writes/edits being truncated at Bedrock's 4096 default.
+	req, err := client.buildRequest(Request{
+		Model:     "anthropic.claude-sonnet-4-5-20250929-v1:0",
+		MaxTokens: 64000,
+		Messages: []Message{
+			{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if req.InferenceConfig.MaxTokens != 64000 {
+		t.Errorf("MaxTokens = %d, want 64000", req.InferenceConfig.MaxTokens)
+	}
+
+	// Zero still falls back to the conservative provider default so an
+	// unset budget never sends maxTokens:0 (which Bedrock rejects).
+	reqZero, err := client.buildRequest(Request{
+		Model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
+		Messages: []Message{
+			{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if reqZero.InferenceConfig.MaxTokens != 4096 {
+		t.Errorf("zero MaxTokens default = %d, want 4096", reqZero.InferenceConfig.MaxTokens)
+	}
+}
+
 func TestBedrockBuildRequestCachingClaudeModel(t *testing.T) {
 	// A Claude model (PriceCacheWrite > 0) should get cachePoint markers
 	// in the system array and on the last user message.