mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-27 05:46:34 +02:00
Send Bedrock inference-profile IDs for on-demand models
Newer Bedrock models (Anthropic Claude 4.x, DeepSeek) reject invocation by their bare foundation-model ID with on-demand throughput, demanding a cross-region inference-profile ID instead (HTTP 400). Rewrite such IDs at request time by prepending the region-matched geo prefix (us/eu/apac/us-gov), so selecting anthropic.claude-sonnet-4-5-... in a us-east-1 setup invokes us.anthropic.claude-sonnet-4-5-... Already-prefixed IDs, ARNs, and families that don't need a profile are left untouched, preserving explicit choices and custom application inference profiles.
This commit is contained in:
parent
4bcdf8804b
commit
d8976c94df
2 changed files with 121 additions and 1 deletions
|
|
@ -285,6 +285,94 @@ func (c *bedrockClient) buildRequest(req Request) (*bedrockRequest, error) {
|
|||
return out, nil
|
||||
}
|
||||
|
||||
// resolveBedrockInferenceProfileID maps a bare foundation-model ID to
|
||||
// its region-matched cross-region inference-profile ID.
|
||||
//
|
||||
// Several newer Bedrock models (Anthropic Claude 4.x, DeepSeek, etc.)
|
||||
// cannot be invoked with on-demand throughput by their plain
|
||||
// foundation-model ID; Bedrock returns HTTP 400 demanding "the ID or
|
||||
// ARN of an inference profile that contains this model". The profile
|
||||
// ID is the same model ID with a geographic prefix (us/eu/apac/...).
|
||||
//
|
||||
// We only rewrite IDs that (a) lack an existing geo prefix and (b)
|
||||
// belong to a model family that requires a profile. IDs that already
|
||||
// carry a prefix (e.g. "eu.anthropic...", "global.anthropic...") or
|
||||
// fully-qualified ARNs are returned unchanged, so explicit user
|
||||
// choices and custom application inference profiles still work.
|
||||
func resolveBedrockInferenceProfileID(modelID, region string) string {
|
||||
if modelID == "" {
|
||||
return modelID
|
||||
}
|
||||
// ARNs are already inference-profile references; leave untouched.
|
||||
if strings.HasPrefix(modelID, "arn:") {
|
||||
return modelID
|
||||
}
|
||||
// Already geo-prefixed (us. / eu. / apac. / ap. / us-gov. / global.)?
|
||||
if bedrockHasGeoPrefix(modelID) {
|
||||
return modelID
|
||||
}
|
||||
if !bedrockRequiresInferenceProfile(modelID) {
|
||||
return modelID
|
||||
}
|
||||
prefix := bedrockGeoPrefixForRegion(region)
|
||||
if prefix == "" {
|
||||
return modelID
|
||||
}
|
||||
return prefix + "." + modelID
|
||||
}
|
||||
|
||||
// bedrockGeoPrefixes are the cross-region inference-profile geo
|
||||
// prefixes Bedrock uses. A model ID that starts with one of these
|
||||
// (followed by a dot) is already a profile reference.
|
||||
var bedrockGeoPrefixes = []string{"us-gov", "us", "eu", "apac", "ap", "global", "au"}
|
||||
|
||||
func bedrockHasGeoPrefix(modelID string) bool {
|
||||
for _, p := range bedrockGeoPrefixes {
|
||||
if strings.HasPrefix(modelID, p+".") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// bedrockRequiresInferenceProfile reports whether a bare
|
||||
// foundation-model ID is one of the families AWS only exposes through
|
||||
// a cross-region inference profile for on-demand throughput.
|
||||
func bedrockRequiresInferenceProfile(modelID string) bool {
|
||||
switch {
|
||||
case strings.HasPrefix(modelID, "anthropic.claude-"):
|
||||
return true
|
||||
case strings.HasPrefix(modelID, "deepseek."):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// bedrockGeoPrefixForRegion maps an AWS region to the geo prefix used
|
||||
// by its cross-region inference profiles. Returns "" when the region
|
||||
// has no known mapping, in which case the model ID is left unchanged.
|
||||
func bedrockGeoPrefixForRegion(region string) string {
|
||||
switch {
|
||||
case region == "":
|
||||
return "us"
|
||||
case strings.HasPrefix(region, "us-gov-"):
|
||||
return "us-gov"
|
||||
case strings.HasPrefix(region, "us-"):
|
||||
return "us"
|
||||
case strings.HasPrefix(region, "eu-"):
|
||||
return "eu"
|
||||
case strings.HasPrefix(region, "ap-"):
|
||||
return "apac"
|
||||
case strings.HasPrefix(region, "ca-"):
|
||||
return "us"
|
||||
case strings.HasPrefix(region, "sa-"):
|
||||
return "us"
|
||||
default:
|
||||
return "us"
|
||||
}
|
||||
}
|
||||
|
||||
func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event, error) {
|
||||
wire, err := c.buildRequest(req)
|
||||
if err != nil {
|
||||
|
|
@ -294,7 +382,8 @@ func (c *bedrockClient) Stream(ctx context.Context, req Request) (<-chan Event,
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url := c.baseURL + "/model/" + req.Model + "/converse-stream"
|
||||
modelID := resolveBedrockInferenceProfileID(req.Model, c.region)
|
||||
url := c.baseURL + "/model/" + modelID + "/converse-stream"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
|
|
@ -81,3 +81,34 @@ func TestReadAWSCredentialsFile(t *testing.T) {
|
|||
t.Logf("no aws creds available (expected on CI): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveBedrockInferenceProfileID(t *testing.T) {
|
||||
cases := []struct {
|
||||
model string
|
||||
region string
|
||||
want string
|
||||
}{
|
||||
// Bare Anthropic foundation IDs get the region-matched prefix.
|
||||
{"anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"},
|
||||
{"anthropic.claude-sonnet-4-5-20250929-v1:0", "eu-central-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"},
|
||||
{"anthropic.claude-opus-4-6-v1", "ap-southeast-2", "apac.anthropic.claude-opus-4-6-v1"},
|
||||
{"anthropic.claude-opus-4-6-v1", "us-gov-west-1", "us-gov.anthropic.claude-opus-4-6-v1"},
|
||||
{"deepseek.r1-v1:0", "eu-west-1", "eu.deepseek.r1-v1:0"},
|
||||
// Empty region defaults to us.
|
||||
{"anthropic.claude-opus-4-6-v1", "", "us.anthropic.claude-opus-4-6-v1"},
|
||||
// Already-prefixed IDs are left untouched.
|
||||
{"eu.anthropic.claude-sonnet-4-5-20250929-v1:0", "us-east-1", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"},
|
||||
{"global.anthropic.claude-opus-4-6-v1", "us-east-1", "global.anthropic.claude-opus-4-6-v1"},
|
||||
{"us.anthropic.claude-opus-4-6-v1", "eu-central-1", "us.anthropic.claude-opus-4-6-v1"},
|
||||
// ARNs are passed through verbatim.
|
||||
{"arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1", "eu-west-1", "arn:aws:bedrock:us-east-1:123:inference-profile/us.anthropic.claude-opus-4-6-v1"},
|
||||
// Families that don't need a profile are untouched.
|
||||
{"amazon.nova-pro-v1:0", "us-east-1", "amazon.nova-pro-v1:0"},
|
||||
{"", "us-east-1", ""},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := resolveBedrockInferenceProfileID(c.model, c.region); got != c.want {
|
||||
t.Errorf("resolveBedrockInferenceProfileID(%q, %q) = %q; want %q", c.model, c.region, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue