Skip to content

Commit df099b9

Browse files
vercel-ai-sdk[bot]heiwenaayush-kapoor
authored
Backport: feat(bedrock): add support for service tier for model inference (#14024)
This is an automated backport of #13242 to the release-v6.0 branch. FYI @heiwen Co-authored-by: Heinrich Wendel <heinrich.wendel@gmail.com> Co-authored-by: Aayush Kapoor <83492835+aayush-kapoor@users.noreply.github.com> Co-authored-by: Aayush Kapoor <aayushkapoor34@gmail.com>
1 parent 35036d1 commit df099b9

File tree

8 files changed

+181
-0
lines changed

8 files changed

+181
-0
lines changed

.changeset/hip-suits-tie.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@ai-sdk/amazon-bedrock": patch
3+
---
4+
5+
feat(bedrock): add support for service tier for model inference

content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,37 @@ console.log(amazonResult.text); // text response
514514
See [AI SDK UI: Chatbot](/docs/ai-sdk-ui/chatbot#reasoning) for more details
515515
on how to integrate reasoning into your chatbot.
516516

517+
## Service Tiers
518+
519+
Amazon Bedrock supports selecting an inference service tier per request via the `serviceTier` provider option.
520+
521+
```ts
522+
import {
523+
bedrock,
524+
type AmazonBedrockLanguageModelOptions,
525+
} from '@ai-sdk/amazon-bedrock';
526+
import { generateText } from 'ai';
527+
528+
const result = await generateText({
529+
model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
530+
prompt: 'Summarize this support ticket backlog.',
531+
providerOptions: {
532+
bedrock: {
533+
serviceTier: 'priority',
534+
} satisfies AmazonBedrockLanguageModelOptions,
535+
},
536+
});
537+
```
538+
539+
Supported values are:
540+
541+
- `reserved`
542+
- `priority`
543+
- `default`
544+
- `flex`
545+
546+
See the [Amazon Bedrock service tiers documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html) for model availability and behavior.
547+
517548
## Extended Context Window
518549

519550
Claude Sonnet 4 models on Amazon Bedrock support an extended context window of up to 1 million tokens when using the `context-1m-2025-08-07` beta feature.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import {
2+
bedrock,
3+
type AmazonBedrockLanguageModelOptions,
4+
} from '@ai-sdk/amazon-bedrock';
5+
import { generateText } from 'ai';
6+
import { run } from '../../lib/run';
7+
8+
run(async () => {
9+
const result = await generateText({
10+
model: bedrock('openai.gpt-oss-120b-1:0'),
11+
prompt: 'Invent a new holiday and describe its traditions.',
12+
providerOptions: {
13+
bedrock: {
14+
serviceTier: 'flex',
15+
} satisfies AmazonBedrockLanguageModelOptions,
16+
},
17+
});
18+
19+
console.log(result.text);
20+
console.log();
21+
console.log('Token usage:', result.usage);
22+
console.log('Finish reason:', result.finishReason);
23+
});

packages/amazon-bedrock/src/bedrock-api-types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ export interface BedrockConverseInput {
1313
};
1414
additionalModelRequestFields?: Record<string, unknown>;
1515
additionalModelResponseFieldPaths?: string[];
16+
serviceTier?: {
17+
type: string;
18+
};
1619
guardrailConfig?:
1720
| BedrockGuardrailConfiguration
1821
| BedrockGuardrailStreamConfiguration

packages/amazon-bedrock/src/bedrock-chat-language-model.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,6 +2137,41 @@ describe('doStream', () => {
21372137
});
21382138
});
21392139

2140+
it('should pass serviceTier provider option in stream requests', async () => {
2141+
setupMockEventStreamHandler();
2142+
server.urls[streamUrl].response = {
2143+
type: 'stream-chunks',
2144+
chunks: [
2145+
JSON.stringify({
2146+
messageStop: {
2147+
stopReason: 'stop_sequence',
2148+
},
2149+
}) + '\n',
2150+
],
2151+
};
2152+
2153+
await model.doStream({
2154+
prompt: TEST_PROMPT,
2155+
includeRawChunks: false,
2156+
providerOptions: {
2157+
bedrock: {
2158+
serviceTier: 'priority',
2159+
},
2160+
},
2161+
});
2162+
2163+
const requestBody = await server.calls[0].requestBodyJson;
2164+
2165+
expect(requestBody).toMatchObject({
2166+
serviceTier: {
2167+
type: 'priority',
2168+
},
2169+
});
2170+
expect(
2171+
requestBody.additionalModelRequestFields?.serviceTier,
2172+
).toBeUndefined();
2173+
});
2174+
21402175
it('should handle JSON response format in streaming', async () => {
21412176
setupMockEventStreamHandler();
21422177
prepareChunksFixtureResponse('bedrock-json-tool.1');
@@ -4231,6 +4266,30 @@ describe('doGenerate', () => {
42314266
});
42324267
});
42334268

4269+
it('should pass serviceTier provider option in generate requests', async () => {
4270+
prepareJsonFixtureResponse('bedrock-text');
4271+
4272+
await model.doGenerate({
4273+
prompt: TEST_PROMPT,
4274+
providerOptions: {
4275+
bedrock: {
4276+
serviceTier: 'priority',
4277+
},
4278+
},
4279+
});
4280+
4281+
const requestBody = await server.calls[0].requestBodyJson;
4282+
4283+
expect(requestBody).toMatchObject({
4284+
serviceTier: {
4285+
type: 'priority',
4286+
},
4287+
});
4288+
expect(
4289+
requestBody.additionalModelRequestFields?.serviceTier,
4290+
).toBeUndefined();
4291+
});
4292+
42344293
it('maps maxReasoningEffort for Nova without thinking (generate)', async () => {
42354294
server.urls[novaGenerateUrl].response = {
42364295
type: 'json-value',

packages/amazon-bedrock/src/bedrock-chat-language-model.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ export class BedrockChatLanguageModel implements LanguageModelV3 {
368368
const {
369369
reasoningConfig: _,
370370
additionalModelRequestFields: __,
371+
serviceTier: ___,
371372
...filteredBedrockOptions
372373
} = providerOptions?.bedrock || {};
373374

@@ -387,6 +388,11 @@ export class BedrockChatLanguageModel implements LanguageModelV3 {
387388
...(Object.keys(inferenceConfig).length > 0 && {
388389
inferenceConfig,
389390
}),
391+
...(bedrockOptions.serviceTier != null && {
392+
serviceTier: {
393+
type: bedrockOptions.serviceTier,
394+
},
395+
}),
390396
...filteredBedrockOptions,
391397
...(toolConfig.tools !== undefined && toolConfig.tools.length > 0
392398
? { toolConfig }
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { describe, expect, it } from 'vitest';
2+
import {
3+
amazonBedrockLanguageModelOptions,
4+
AmazonBedrockLanguageModelOptions,
5+
} from './bedrock-chat-options';
6+
7+
describe('amazonBedrockLanguageModelOptions', () => {
8+
describe('serviceTier', () => {
9+
it('accepts valid service tier values', () => {
10+
const validValues = ['reserved', 'priority', 'default', 'flex'] as const;
11+
12+
validValues.forEach(value => {
13+
const result = amazonBedrockLanguageModelOptions.safeParse({
14+
serviceTier: value,
15+
});
16+
17+
expect(result.success).toBe(true);
18+
expect(result.data?.serviceTier).toBe(value);
19+
});
20+
});
21+
22+
it('rejects invalid service tier values', () => {
23+
const invalidValues = ['on-demand', 'auto', 'standard', '', 'PRIORITY'];
24+
25+
invalidValues.forEach(value => {
26+
const result = amazonBedrockLanguageModelOptions.safeParse({
27+
serviceTier: value,
28+
});
29+
30+
expect(result.success).toBe(false);
31+
});
32+
});
33+
});
34+
35+
describe('type inference', () => {
36+
it('infers AmazonBedrockLanguageModelOptions type correctly', () => {
37+
const options: AmazonBedrockLanguageModelOptions = {
38+
serviceTier: 'priority',
39+
};
40+
41+
expect(options.serviceTier).toBe('priority');
42+
});
43+
});
44+
});

packages/amazon-bedrock/src/bedrock-chat-options.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,16 @@ export const amazonBedrockLanguageModelOptions = z.object({
122122
* Anthropic beta features to enable
123123
*/
124124
anthropicBeta: z.array(z.string()).optional(),
125+
/**
126+
* Service tier for the request.
127+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
128+
*
129+
* - 'reserved': Uses provisioned throughput capacity
130+
* - 'priority': Prioritizes low-latency inference when capacity is available
131+
* - 'default': Standard on-demand tier
132+
* - 'flex': Lower-cost tier for flexible latency workloads
133+
*/
134+
serviceTier: z.enum(['reserved', 'priority', 'default', 'flex']).optional(),
125135
});
126136

127137
export type AmazonBedrockLanguageModelOptions = z.infer<

0 commit comments

Comments
 (0)