Merge remote-tracking branch 'origin/main' into pr-24

This commit is contained in:
patriceckhart 2026-06-09 19:22:05 +02:00
commit b68008327d
5 changed files with 100 additions and 0 deletions

View file

@ -32,6 +32,13 @@ type Resolved struct {
MaxSteps int
Sandbox *tools.Sandbox
// MaxOutput is the resolved model's maximum output-token budget
// (from the catalog). Passed to the agent so each turn requests
// the model's full output capacity instead of the provider's
// conservative default (e.g. Bedrock's 4096, which truncates
// long writes/edits with stopReason=length).
MaxOutput int
// SkillTool is the on-demand skill loader registered with the
// agent's tool registry, or nil if no SKILL.md files were
// discovered. Exposed so the tui can list / preview skills.
@ -501,6 +508,7 @@ func Resolve(args Args, requireCred bool) (Resolved, error) {
ToolSummary: summaries,
SystemPrompt: sys,
MaxSteps: max,
MaxOutput: resolvedModel.MaxOutput,
Sandbox: sandbox,
SkillTool: skillTool,
systemAppend: append_,
@ -768,6 +776,7 @@ func (r *Resolved) UseSandbox(s *tools.Sandbox) {
func (r Resolved) NewAgent() *core.Agent {
a := core.NewAgent(r.NewClient(), r.Model, r.SystemPrompt, r.ToolRegistry)
a.MaxSteps = r.MaxSteps
a.MaxTokens = r.MaxOutput
a.Reasoning = r.Reasoning
return a
}

View file

@ -4656,6 +4656,17 @@ func (i *Interactive) handleEvent(ev core.AgentEvent) {
i.statusOK = "cancelled"
return
}
if e.Stop == provider.StopLength {
// The model hit its output-token cap mid-response, so the
// reply (often a long write/edit) is truncated. Surface it
// explicitly, otherwise the turn just ends and reads like
// the UI gave up. The agent already requests the model's
// full MaxOutput budget, so this means the response genuinely
// exceeded that ceiling; ask the user to continue.
i.statusErr = "response hit the model's output-token limit and was cut off, ask it to continue"
i.statusOK = ""
return
}
// Don't surface mid-loop stream errors as a red banner here.
// EvTurnEnd fires after every step in a multi-step tool loop,
// so a transient 503 / network blip would briefly paint a red

View file

@ -22,6 +22,14 @@ type Agent struct {
MaxSteps int
Reasoning string
// MaxTokens caps the model's output tokens per turn. Zero leaves
// the field unset on the provider request, letting each provider
// apply its own default (which can be conservative, e.g. Bedrock
// defaults to 4096, truncating long writes/edits). Hosts populate
// this from the resolved model's MaxOutput so large single-turn
// responses aren't silently cut off with stopReason=length.
MaxTokens int
// BeforeToolExecute, if set, is called immediately before each
// tool runs. Returning (allowed=false, reason) short-circuits
// the call with an error result containing reason. Optionally,
@ -515,6 +523,7 @@ func (a *Agent) oneTurn(ctx context.Context, sink func(AgentEvent)) (provider.St
Messages: repairToolUseResultPairs(a.Messages()),
Tools: a.Tools.Specs(),
Reasoning: a.Reasoning,
MaxTokens: a.MaxTokens,
}
stream, err := a.Client.Stream(ctx, req)
if err != nil {

View file

@ -109,3 +109,38 @@ func TestAgentDropsPartialAssistantBeforeRetry(t *testing.T) {
t.Fatalf("final assistant text = %q; want recovered", got)
}
}
// captureClient records the last Request it received so tests can
// assert what the agent put on the wire.
type captureClient struct {
lastReq provider.Request
}
func (c *captureClient) Name() string { return "capture" }
func (c *captureClient) Stream(ctx context.Context, req provider.Request) (<-chan provider.Event, error) {
c.lastReq = req
out := make(chan provider.Event, 3)
go func() {
defer close(out)
out <- provider.EventStart{Provider: "capture", Model: req.Model}
out <- provider.EventDone{Stop: provider.StopEnd, Message: provider.Message{
Role: provider.RoleAssistant,
Content: []provider.Content{provider.TextBlock{Text: "ok"}},
}}
}()
return out, nil
}
func TestAgentPropagatesMaxTokens(t *testing.T) {
client := &captureClient{}
a := NewAgent(client, "fake-model", "system", Registry{})
a.MaxTokens = 64000
if err := a.Prompt(context.Background(), "hello", nil, nil); err != nil {
t.Fatalf("Prompt returned %v", err)
}
if client.lastReq.MaxTokens != 64000 {
t.Fatalf("request MaxTokens = %d; want 64000 (Agent.MaxTokens not propagated)", client.lastReq.MaxTokens)
}
}

View file

@ -163,6 +163,42 @@ func TestBedrockModelSupportsCaching(t *testing.T) {
}
}
func TestBedrockBuildRequestMaxTokens(t *testing.T) {
client := &bedrockClient{region: "us-east-1"}
// A non-zero MaxTokens must flow through to InferenceConfig so the
// model gets its full output budget. This is the regression guard
// for long writes/edits being truncated at Bedrock's 4096 default.
req, err := client.buildRequest(Request{
Model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
MaxTokens: 64000,
Messages: []Message{
{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
},
})
if err != nil {
t.Fatal(err)
}
if req.InferenceConfig.MaxTokens != 64000 {
t.Errorf("MaxTokens = %d, want 64000", req.InferenceConfig.MaxTokens)
}
// Zero still falls back to the conservative provider default so an
// unset budget never sends maxTokens:0 (which Bedrock rejects).
reqZero, err := client.buildRequest(Request{
Model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
Messages: []Message{
{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
},
})
if err != nil {
t.Fatal(err)
}
if reqZero.InferenceConfig.MaxTokens != 4096 {
t.Errorf("zero MaxTokens default = %d, want 4096", reqZero.InferenceConfig.MaxTokens)
}
}
func TestBedrockBuildRequestCachingClaudeModel(t *testing.T) {
// A Claude model (PriceCacheWrite > 0) should get cachePoint markers
// in the system array and on the last user message.