From d8976c94df4c9305fed46b535bf54c8129be23fd Mon Sep 17 00:00:00 2001 From: patriceckhart Date: Thu, 4 Jun 2026 19:25:16 +0200 Subject: [PATCH] Send Bedrock inference-profile IDs for on-demand models Newer Bedrock models (Anthropic Claude 4.x, DeepSeek) reject invocation by their bare foundation-model ID with on-demand throughput, demanding a cross-region inference-profile ID instead (HTTP 400). Rewrite such IDs at request time by prepending the region-matched geo prefix (us/eu/apac/us-gov), so selecting anthropic.claude-sonnet-4-5-... in a us-east-1 setup invokes us.anthropic.claude-sonnet-4-5-... Already-prefixed IDs, ARNs, and families that don't need a profile are left untouched, preserving explicit choices and custom application inference profiles. --- packages/provider/amazon_bedrock.go | 91 +++++++++++++++++++++++- packages/provider/amazon_bedrock_test.go | 31 ++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/packages/provider/amazon_bedrock.go b/packages/provider/amazon_bedrock.go index 56fb36e..f953cd5 100644 --- a/packages/provider/amazon_bedrock.go +++ b/packages/provider/amazon_bedrock.go @@ -285,6 +285,94 @@ func (c *bedrockClient) buildRequest(req Request) (*bedrockRequest, error) { return out, nil } +// resolveBedrockInferenceProfileID maps a bare foundation-model ID to +// its region-matched cross-region inference-profile ID. +// +// Several newer Bedrock models (Anthropic Claude 4.x, DeepSeek, etc.) +// cannot be invoked with on-demand throughput by their plain +// foundation-model ID; Bedrock returns HTTP 400 demanding "the ID or +// ARN of an inference profile that contains this model". The profile +// ID is the same model ID with a geographic prefix (us/eu/apac/...). +// +// We only rewrite IDs that (a) lack an existing geo prefix and (b) +// belong to a model family that requires a profile. IDs that already +// carry a prefix (e.g. "eu.anthropic...", "global.anthropic...") or +// fully-qualified ARNs are returned unchanged, so explicit user +// choices and custom application inference profiles still work. +func resolveBedrockInferenceProfileID(modelID, region string) string { + if modelID == "" { + return modelID + } + // ARNs are already inference-profile references; leave untouched. + if strings.HasPrefix(modelID, "arn:") { + return modelID + } + // Already geo-prefixed (us. / eu. / apac. / ap. / us-gov. / global.)? + if bedrockHasGeoPrefix(modelID) { + return modelID + } + if !bedrockRequiresInferenceProfile(modelID) { + return modelID + } + prefix := bedrockGeoPrefixForRegion(region) + if prefix == "" { + return modelID + } + return prefix + "." + modelID +} + +// bedrockGeoPrefixes are the cross-region inference-profile geo +// prefixes Bedrock uses. A model ID that starts with one of these +// (followed by a dot) is already a profile reference. +var bedrockGeoPrefixes = []string{"us-gov", "us", "eu", "apac", "ap", "global", "au"} + +func bedrockHasGeoPrefix(modelID string) bool { + for _, p := range bedrockGeoPrefixes { + if strings.HasPrefix(modelID, p+".") { + return true + } + } + return false +} + +// bedrockRequiresInferenceProfile reports whether a bare +// foundation-model ID is one of the families AWS only exposes through +// a cross-region inference profile for on-demand throughput. +func bedrockRequiresInferenceProfile(modelID string) bool { + switch { + case strings.HasPrefix(modelID, "anthropic.claude-"): + return true + case strings.HasPrefix(modelID, "deepseek."): + return true + default: + return false + } +} + +// bedrockGeoPrefixForRegion maps an AWS region to the geo prefix used +// by its cross-region inference profiles. Returns "" when the region +// has no known mapping, in which case the model ID is left unchanged. +func bedrockGeoPrefixForRegion(region string) string { + switch { + case region == "": + return "us" + case strings.HasPrefix(region, "us-gov-"): + return "us-gov" + case strings.HasPrefix(region, "us-"): + return "us" + case strings.HasPrefix(region, "eu-"): + return "eu" + case strings.HasPrefix(region, "ap-"): + return "apac" + case strings.HasPrefix(region, "ca-"): + return "us" + case strings.HasPrefix(region, "sa-"): + return "us" + default: + return "us" + } +} + func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event, error) { wire, err := c.buildRequest(req) if err != nil { @@ -294,7 +382,8 @@ func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event, if err != nil { return nil, err } - url := c.baseURL + "/model/" + req.Model + "/converse-stream" + modelID := resolveBedrockInferenceProfileID(req.Model, c.region) + url := c.baseURL + "/model/" + modelID + "/converse-stream" httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) if err != nil { return nil, err diff --git a/packages/provider/amazon_bedrock_test.go b/packages/provider/amazon_bedrock_test.go index 0703ea5..7241cfd 100644 --- a/packages/provider/amazon_bedrock_test.go +++ b/packages/provider/amazon_bedrock_test.go @@ -81,3 +81,34 @@ func TestReadAWSCredentialsFile(t *testing.T) { t.Logf("no aws creds available (expected on CI): %v", err) } } + +func TestResolveBedrockInferenceProfileID(t *testing.T) { + cases := []struct { + model string + region string + want string + }{ + // Bare Anthropic foundation IDs get the region-matched prefix. + {"anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"}, + {"anthropic.claude-sonnet-4-5-20250929-v1:0", "eu-central-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"}, + {"anthropic.claude-opus-4-6-v1", "ap-southeast-2", "apac.anthropic.claude-opus-4-6-v1"}, + {"anthropic.claude-opus-4-6-v1", "us-gov-west-1", "us-gov.anthropic.claude-opus-4-6-v1"}, + {"deepseek.r1-v1:0", "eu-west-1", "eu.deepseek.r1-v1:0"}, + // Empty region defaults to us. + {"anthropic.claude-opus-4-6-v1", "", "us.anthropic.claude-opus-4-6-v1"}, + // Already-prefixed IDs are left untouched. + {"eu.anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"}, + {"global.anthropic.claude-opus-4-6-v1", "us-east-1", "global.anthropic.claude-opus-4-6-v1"}, + {"us.anthropic.claude-opus-4-6-v1", "eu-central-1", "us.anthropic.claude-opus-4-6-v1"}, + // ARNs are passed through verbatim. + {"arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1", "eu-west-1", "arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1"}, + // Families that don't need a profile are untouched. + {"amazon.nova-pro-v1:0", "us-east-1", "amazon.nova-pro-v1:0"}, + {"", "us-east-1", ""}, + } + for _, c := range cases { + if got := resolveBedrockInferenceProfileID(c.model, c.region); got != c.want { + t.Errorf("resolveBedrockInferenceProfileID(%q, %q) = %q; want %q", c.model, c.region, got, c.want) + } + } +}