fix(provider): clamp max_tokens to fit OpenRouter provider context window

- it currently rejects requests where input + max_output exceeds the serving provider's context lmit (which may be tighter than the model-level value) - use the smaller of ContextWindow and MaxOutput as the cap, with a 4096-token input reserve
2026-06-26 21:36:31 +02:00 · 2026-06-09 23:25:37 +08:00 · 2026-06-09 23:25:37 +08:00 · 1425e68636
commit 1425e68636
parent 3d031dde26
2 changed files with 27 additions and 0 deletions
--- a/packages/provider/discover.go
+++ b/packages/provider/discover.go
@ -286,6 +286,8 @@ func DiscoverOpenRouter(ctx context.Context, baseURL string) ([]Model, error) {
 		ctxWin := d.ContextLength
 		if ctxWin == 0 {
 			ctxWin = d.TopProvider.ContextLength
+		} else if d.TopProvider.ContextLength > 0 && d.TopProvider.ContextLength < ctxWin {
+			ctxWin = d.TopProvider.ContextLength
 		}
 		maxOut := 0
 		if d.TopProvider.MaxCompletionTokens != nil {
--- a/packages/provider/openai.go
+++ b/packages/provider/openai.go
@ -183,6 +183,31 @@ func (c *openaiClient) buildRequest(req Request) (*oaiRequest, error) {
 	if maxTok <= 0 {
 		maxTok = m.MaxOutput
 	}
+	// Clamp max_tokens so output + minimum input fits within the context
+	// window. Some providers (OpenRouter) enforce input + max_output <=
+	// context_length and reject requests where the total exceeds it.
+	// This reservation ensures basic user input (system prompt + first
+	// message + tool definitions) has room.
+	//
+	// Use the smaller of ContextWindow and MaxOutput as the cap because
+	// some providers (OpenRouter) report inflated model-level context
+	// windows (e.g. 1000000) while the serving provider enforces a much
+	// tighter limit (e.g. 262144). When both are wrong we still land on
+	// the output cap, which is the actual limit reported by the provider.
+	if m.ContextWindow > 0 && m.MaxOutput > 0 {
+		ctxCap := m.ContextWindow
+		if m.MaxOutput < ctxCap {
+			ctxCap = m.MaxOutput
+		}
+		const reserve = 4096
+		clamped := ctxCap - reserve
+		if clamped < 1024 {
+			clamped = 1024
+		}
+		if maxTok > clamped {
+			maxTok = clamped
+		}
+	}
 	if m.Reasoning {
 		if maxTok > 0 {
 			out.MaxCompletionTok = &maxTok