mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-26 21:36:31 +02:00
Merge remote-tracking branch 'origin/main' into pr-24
This commit is contained in:
commit
b68008327d
5 changed files with 100 additions and 0 deletions
|
|
@ -32,6 +32,13 @@ type Resolved struct {
|
|||
MaxSteps int
|
||||
Sandbox *tools.Sandbox
|
||||
|
||||
// MaxOutput is the resolved model's maximum output-token budget
|
||||
// (from the catalog). Passed to the agent so each turn requests
|
||||
// the model's full output capacity instead of the provider's
|
||||
// conservative default (e.g. Bedrock's 4096, which truncates
|
||||
// long writes/edits with stopReason=length).
|
||||
MaxOutput int
|
||||
|
||||
// SkillTool is the on-demand skill loader registered with the
|
||||
// agent's tool registry, or nil if no SKILL.md files were
|
||||
// discovered. Exposed so the tui can list / preview skills.
|
||||
|
|
@ -501,6 +508,7 @@ func Resolve(args Args, requireCred bool) (Resolved, error) {
|
|||
ToolSummary: summaries,
|
||||
SystemPrompt: sys,
|
||||
MaxSteps: max,
|
||||
MaxOutput: resolvedModel.MaxOutput,
|
||||
Sandbox: sandbox,
|
||||
SkillTool: skillTool,
|
||||
systemAppend: append_,
|
||||
|
|
@ -768,6 +776,7 @@ func (r *Resolved) UseSandbox(s *tools.Sandbox) {
|
|||
func (r Resolved) NewAgent() *core.Agent {
|
||||
a := core.NewAgent(r.NewClient(), r.Model, r.SystemPrompt, r.ToolRegistry)
|
||||
a.MaxSteps = r.MaxSteps
|
||||
a.MaxTokens = r.MaxOutput
|
||||
a.Reasoning = r.Reasoning
|
||||
return a
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4656,6 +4656,17 @@ func (i *Interactive) handleEvent(ev core.AgentEvent) {
|
|||
i.statusOK = "cancelled"
|
||||
return
|
||||
}
|
||||
if e.Stop == provider.StopLength {
|
||||
// The model hit its output-token cap mid-response, so the
|
||||
// reply (often a long write/edit) is truncated. Surface it
|
||||
// explicitly, otherwise the turn just ends and reads like
|
||||
// the UI gave up. The agent already requests the model's
|
||||
// full MaxOutput budget, so this means the response genuinely
|
||||
// exceeded that ceiling; ask the user to continue.
|
||||
i.statusErr = "response hit the model's output-token limit and was cut off, ask it to continue"
|
||||
i.statusOK = ""
|
||||
return
|
||||
}
|
||||
// Don't surface mid-loop stream errors as a red banner here.
|
||||
// EvTurnEnd fires after every step in a multi-step tool loop,
|
||||
// so a transient 503 / network blip would briefly paint a red
|
||||
|
|
|
|||
|
|
@ -22,6 +22,14 @@ type Agent struct {
|
|||
MaxSteps int
|
||||
Reasoning string
|
||||
|
||||
// MaxTokens caps the model's output tokens per turn. Zero leaves
|
||||
// the field unset on the provider request, letting each provider
|
||||
// apply its own default (which can be conservative, e.g. Bedrock
|
||||
// defaults to 4096, truncating long writes/edits). Hosts populate
|
||||
// this from the resolved model's MaxOutput so large single-turn
|
||||
// responses aren't silently cut off with stopReason=length.
|
||||
MaxTokens int
|
||||
|
||||
// BeforeToolExecute, if set, is called immediately before each
|
||||
// tool runs. Returning (allowed=false, reason) short-circuits
|
||||
// the call with an error result containing reason. Optionally,
|
||||
|
|
@ -515,6 +523,7 @@ func (a *Agent) oneTurn(ctx context.Context, sink func(AgentEvent)) (provider.St
|
|||
Messages: repairToolUseResultPairs(a.Messages()),
|
||||
Tools: a.Tools.Specs(),
|
||||
Reasoning: a.Reasoning,
|
||||
MaxTokens: a.MaxTokens,
|
||||
}
|
||||
stream, err := a.Client.Stream(ctx, req)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -109,3 +109,38 @@ func TestAgentDropsPartialAssistantBeforeRetry(t *testing.T) {
|
|||
t.Fatalf("final assistant text = %q; want recovered", got)
|
||||
}
|
||||
}
|
||||
|
||||
// captureClient records the last Request it received so tests can
|
||||
// assert what the agent put on the wire.
|
||||
type captureClient struct {
|
||||
lastReq provider.Request
|
||||
}
|
||||
|
||||
func (c *captureClient) Name() string { return "capture" }
|
||||
|
||||
func (c *captureClient) Stream(ctx context.Context, req provider.Request) (<-chan provider.Event, error) {
|
||||
c.lastReq = req
|
||||
out := make(chan provider.Event, 3)
|
||||
go func() {
|
||||
defer close(out)
|
||||
out <- provider.EventStart{Provider: "capture", Model: req.Model}
|
||||
out <- provider.EventDone{Stop: provider.StopEnd, Message: provider.Message{
|
||||
Role: provider.RoleAssistant,
|
||||
Content: []provider.Content{provider.TextBlock{Text: "ok"}},
|
||||
}}
|
||||
}()
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func TestAgentPropagatesMaxTokens(t *testing.T) {
|
||||
client := &captureClient{}
|
||||
a := NewAgent(client, "fake-model", "system", Registry{})
|
||||
a.MaxTokens = 64000
|
||||
|
||||
if err := a.Prompt(context.Background(), "hello", nil, nil); err != nil {
|
||||
t.Fatalf("Prompt returned %v", err)
|
||||
}
|
||||
if client.lastReq.MaxTokens != 64000 {
|
||||
t.Fatalf("request MaxTokens = %d; want 64000 (Agent.MaxTokens not propagated)", client.lastReq.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -163,6 +163,42 @@ func TestBedrockModelSupportsCaching(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestBedrockBuildRequestMaxTokens(t *testing.T) {
|
||||
client := &bedrockClient{region: "us-east-1"}
|
||||
|
||||
// A non-zero MaxTokens must flow through to InferenceConfig so the
|
||||
// model gets its full output budget. This is the regression guard
|
||||
// for long writes/edits being truncated at Bedrock's 4096 default.
|
||||
req, err := client.buildRequest(Request{
|
||||
Model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
MaxTokens: 64000,
|
||||
Messages: []Message{
|
||||
{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if req.InferenceConfig.MaxTokens != 64000 {
|
||||
t.Errorf("MaxTokens = %d, want 64000", req.InferenceConfig.MaxTokens)
|
||||
}
|
||||
|
||||
// Zero still falls back to the conservative provider default so an
|
||||
// unset budget never sends maxTokens:0 (which Bedrock rejects).
|
||||
reqZero, err := client.buildRequest(Request{
|
||||
Model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
Messages: []Message{
|
||||
{Role: RoleUser, Content: []Content{TextBlock{Text: "hello"}}},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if reqZero.InferenceConfig.MaxTokens != 4096 {
|
||||
t.Errorf("zero MaxTokens default = %d, want 4096", reqZero.InferenceConfig.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBedrockBuildRequestCachingClaudeModel(t *testing.T) {
|
||||
// A Claude model (PriceCacheWrite > 0) should get cachePoint markers
|
||||
// in the system array and on the last user message.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue