Send Bedrock inference-profile IDs for on-demand models

Newer Bedrock models (Anthropic Claude 4.x, DeepSeek) reject invocation
by their bare foundation-model ID with on-demand throughput, demanding a
cross-region inference-profile ID instead (HTTP 400). Rewrite such IDs
at request time by prepending the region-matched geo prefix
(us/eu/apac/us-gov), so selecting anthropic.claude-sonnet-4-5-... in a
us-east-1 setup invokes us.anthropic.claude-sonnet-4-5-...

Already-prefixed IDs, ARNs, and families that don't need a profile are
left untouched, preserving explicit choices and custom application
inference profiles.
This commit is contained in:
patriceckhart 2026-06-04 19:25:16 +02:00
parent 4bcdf8804b
commit d8976c94df
2 changed files with 121 additions and 1 deletions

View file

@ -285,6 +285,94 @@ func (c *bedrockClient) buildRequest(req Request) (*bedrockRequest, error) {
return out, nil
}
// resolveBedrockInferenceProfileID maps a bare foundation-model ID to
// its region-matched cross-region inference-profile ID.
//
// Several newer Bedrock models (Anthropic Claude 4.x, DeepSeek, etc.)
// cannot be invoked with on-demand throughput by their plain
// foundation-model ID; Bedrock returns HTTP 400 demanding "the ID or
// ARN of an inference profile that contains this model". The profile
// ID is the same model ID with a geographic prefix (us/eu/apac/...).
//
// We only rewrite IDs that (a) lack an existing geo prefix and (b)
// belong to a model family that requires a profile. IDs that already
// carry a prefix (e.g. "eu.anthropic...", "global.anthropic...") or
// fully-qualified ARNs are returned unchanged, so explicit user
// choices and custom application inference profiles still work.
func resolveBedrockInferenceProfileID(modelID, region string) string {
if modelID == "" {
return modelID
}
// ARNs are already inference-profile references; leave untouched.
if strings.HasPrefix(modelID, "arn:") {
return modelID
}
// Already geo-prefixed (us. / eu. / apac. / ap. / us-gov. / global.)?
if bedrockHasGeoPrefix(modelID) {
return modelID
}
if !bedrockRequiresInferenceProfile(modelID) {
return modelID
}
prefix := bedrockGeoPrefixForRegion(region)
if prefix == "" {
return modelID
}
return prefix + "." + modelID
}
// bedrockGeoPrefixes are the cross-region inference-profile geo
// prefixes Bedrock uses. A model ID that starts with one of these
// (followed by a dot) is already a profile reference.
var bedrockGeoPrefixes = []string{"us-gov", "us", "eu", "apac", "ap", "global", "au"}
func bedrockHasGeoPrefix(modelID string) bool {
for _, p := range bedrockGeoPrefixes {
if strings.HasPrefix(modelID, p+".") {
return true
}
}
return false
}
// bedrockRequiresInferenceProfile reports whether a bare
// foundation-model ID is one of the families AWS only exposes through
// a cross-region inference profile for on-demand throughput.
func bedrockRequiresInferenceProfile(modelID string) bool {
switch {
case strings.HasPrefix(modelID, "anthropic.claude-"):
return true
case strings.HasPrefix(modelID, "deepseek."):
return true
default:
return false
}
}
// bedrockGeoPrefixForRegion maps an AWS region to the geo prefix used
// by its cross-region inference profiles. Returns "" when the region
// has no known mapping, in which case the model ID is left unchanged.
func bedrockGeoPrefixForRegion(region string) string {
switch {
case region == "":
return "us"
case strings.HasPrefix(region, "us-gov-"):
return "us-gov"
case strings.HasPrefix(region, "us-"):
return "us"
case strings.HasPrefix(region, "eu-"):
return "eu"
case strings.HasPrefix(region, "ap-"):
return "apac"
case strings.HasPrefix(region, "ca-"):
return "us"
case strings.HasPrefix(region, "sa-"):
return "us"
default:
return "us"
}
}
func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event, error) {
wire, err := c.buildRequest(req)
if err != nil {
@ -294,7 +382,8 @@ func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event,
if err != nil {
return nil, err
}
url := c.baseURL + "/model/" + req.Model + "/converse-stream"
modelID := resolveBedrockInferenceProfileID(req.Model, c.region)
url := c.baseURL + "/model/" + modelID + "/converse-stream"
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
if err != nil {
return nil, err

View file

@ -81,3 +81,34 @@ func TestReadAWSCredentialsFile(t *testing.T) {
t.Logf("no aws creds available (expected on CI): %v", err)
}
}
func TestResolveBedrockInferenceProfileID(t *testing.T) {
cases := []struct {
model string
region string
want string
}{
// Bare Anthropic foundation IDs get the region-matched prefix.
{"anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"},
{"anthropic.claude-sonnet-4-5-20250929-v1:0", "eu-central-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"},
{"anthropic.claude-opus-4-6-v1", "ap-southeast-2", "apac.anthropic.claude-opus-4-6-v1"},
{"anthropic.claude-opus-4-6-v1", "us-gov-west-1", "us-gov.anthropic.claude-opus-4-6-v1"},
{"deepseek.r1-v1:0", "eu-west-1", "eu.deepseek.r1-v1:0"},
// Empty region defaults to us.
{"anthropic.claude-opus-4-6-v1", "", "us.anthropic.claude-opus-4-6-v1"},
// Already-prefixed IDs are left untouched.
{"eu.anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"},
{"global.anthropic.claude-opus-4-6-v1", "us-east-1", "global.anthropic.claude-opus-4-6-v1"},
{"us.anthropic.claude-opus-4-6-v1", "eu-central-1", "us.anthropic.claude-opus-4-6-v1"},
// ARNs are passed through verbatim.
{"arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1", "eu-west-1", "arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1"},
// Families that don't need a profile are untouched.
{"amazon.nova-pro-v1:0", "us-east-1", "amazon.nova-pro-v1:0"},
{"", "us-east-1", ""},
}
for _, c := range cases {
if got := resolveBedrockInferenceProfileID(c.model, c.region); got != c.want {
t.Errorf("resolveBedrockInferenceProfileID(%q, %q) = %q; want %q", c.model, c.region, got, c.want)
}
}
}