mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-26 21:36:31 +02:00
fix(provider): clamp max_tokens to fit OpenRouter provider context window
- it currently rejects requests where input + max_output exceeds the serving provider's context lmit (which may be tighter than the model-level value) - use the smaller of ContextWindow and MaxOutput as the cap, with a 4096-token input reserve
This commit is contained in:
parent
3d031dde26
commit
1425e68636
2 changed files with 27 additions and 0 deletions
|
|
@ -286,6 +286,8 @@ func DiscoverOpenRouter(ctx context.Context, baseURL string) ([]Model, error) {
|
|||
ctxWin := d.ContextLength
|
||||
if ctxWin == 0 {
|
||||
ctxWin = d.TopProvider.ContextLength
|
||||
} else if d.TopProvider.ContextLength > 0 && d.TopProvider.ContextLength < ctxWin {
|
||||
ctxWin = d.TopProvider.ContextLength
|
||||
}
|
||||
maxOut := 0
|
||||
if d.TopProvider.MaxCompletionTokens != nil {
|
||||
|
|
|
|||
|
|
@ -183,6 +183,31 @@ func (c *openaiClient) buildRequest(req Request) (*oaiRequest, error) {
|
|||
if maxTok <= 0 {
|
||||
maxTok = m.MaxOutput
|
||||
}
|
||||
// Clamp max_tokens so output + minimum input fits within the context
|
||||
// window. Some providers (OpenRouter) enforce input + max_output <=
|
||||
// context_length and reject requests where the total exceeds it.
|
||||
// This reservation ensures basic user input (system prompt + first
|
||||
// message + tool definitions) has room.
|
||||
//
|
||||
// Use the smaller of ContextWindow and MaxOutput as the cap because
|
||||
// some providers (OpenRouter) report inflated model-level context
|
||||
// windows (e.g. 1000000) while the serving provider enforces a much
|
||||
// tighter limit (e.g. 262144). When both are wrong we still land on
|
||||
// the output cap, which is the actual limit reported by the provider.
|
||||
if m.ContextWindow > 0 && m.MaxOutput > 0 {
|
||||
ctxCap := m.ContextWindow
|
||||
if m.MaxOutput < ctxCap {
|
||||
ctxCap = m.MaxOutput
|
||||
}
|
||||
const reserve = 4096
|
||||
clamped := ctxCap - reserve
|
||||
if clamped < 1024 {
|
||||
clamped = 1024
|
||||
}
|
||||
if maxTok > clamped {
|
||||
maxTok = clamped
|
||||
}
|
||||
}
|
||||
if m.Reasoning {
|
||||
if maxTok > 0 {
|
||||
out.MaxCompletionTok = &maxTok
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue