Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -568,12 +568,22 @@ The bot store feature allows users to share and discover custom bots. You can co
- **enableBotStoreReplicas**: Controls whether standby replicas are enabled for the OpenSearch Serverless collection used by bot store (default: `false`). Setting it to `true` improves availability but increases costs, while `false` reduces costs but may affect availability.
> **Important**: You can't update this property after the collection is already created. If you attempt to modify this property, the collection continues to use the original value.

### Cross-region inference
### Cross-region and Global inference

[Cross-region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html) allows Amazon Bedrock to dynamically route model inference requests across multiple AWS regions, enhancing throughput and resilience during peak demand periods. To configure, edit `cdk.json`.
[Cross-region and Global inference](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html)
allows Amazon Bedrock to dynamically route model inference requests across
multiple AWS regions, enhancing throughput and resilience during peak demand
periods. Global inference routes the requests to the optimal region based on
latency and availability anywhere in the world, while cross-region inference
routes requests within the same AWS region, for example, within the US. Some
SCPs may restrict on or the other or both and therefore you can configure them
independently. By default both are enabled.

To configure change the following settings in `cdk.json` or `parameters.ts`:

```json
"enableBedrockCrossRegionInference": true
"enableBedrockGlobalInference": false,
"enableBedrockCrossRegionInference": false,
```

### Lambda SnapStart
Expand Down
13 changes: 9 additions & 4 deletions backend/app/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
logger.setLevel(logging.INFO)

BEDROCK_REGION = os.environ.get("BEDROCK_REGION", "us-east-1")
ENABLE_BEDROCK_GLOBAL_INFERENCE = (
os.environ.get("ENABLE_BEDROCK_GLOBAL_INFERENCE", "false") == "true"
)
ENABLE_BEDROCK_CROSS_REGION_INFERENCE = (
os.environ.get("ENABLE_BEDROCK_CROSS_REGION_INFERENCE", "false") == "true"
)
Expand Down Expand Up @@ -979,23 +982,25 @@ def get_regional_inference_profile_id(

def get_model_id(
model: type_model_name,
enable_global: bool = ENABLE_BEDROCK_GLOBAL_INFERENCE,
enable_cross_region: bool = ENABLE_BEDROCK_CROSS_REGION_INFERENCE,
bedrock_region: str = BEDROCK_REGION,
) -> str:
base_model_id = BASE_MODEL_IDS.get(model)
if not base_model_id:
raise ValueError(f"Unsupported model: {model}")

if enable_cross_region:
# 1. First, try to use global inference profile if available
# 1. First, try to use global inference profile if enabled and available
if enable_global:
global_profile_id = get_global_inference_profile_id(model, bedrock_region)
if global_profile_id:
logger.info(
f"Using global inference profile: {global_profile_id} for model '{model}'"
)
return global_profile_id

# 2. Fallback to regional cross-region inference profile if available
# 2. Fallback to regional cross-region inference profile if enabled and available
if enable_cross_region:
regional_profile_id = get_regional_inference_profile_id(model, bedrock_region)
if regional_profile_id:
logger.info(
Expand All @@ -1007,6 +1012,6 @@ def get_model_id(
f"Region '{bedrock_region}' does not support cross-region inference for model '{model}'."
)

# 3. No cross-region inference
# 3. Use standalone model (no global or cross-region inference)
logger.info(f"Using local model ID: {base_model_id} for model '{model}'")
return base_model_id
1 change: 1 addition & 0 deletions cdk/bin/bedrock-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ const chat = new BedrockChatStack(
selfSignUpEnabled: params.selfSignUpEnabled,
documentBucket: bedrockRegionResources.documentBucket,
enableRagReplicas: params.enableRagReplicas,
enableBedrockGlobalInference: params.enableBedrockGlobalInference,
enableBedrockCrossRegionInference: params.enableBedrockCrossRegionInference,
enableLambdaSnapStart: params.enableLambdaSnapStart,
alternateDomainName: params.alternateDomainName,
Expand Down
5 changes: 5 additions & 0 deletions cdk/lib/bedrock-chat-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export interface BedrockChatStackProps extends StackProps {
readonly enableIpV6: boolean;
readonly documentBucket: Bucket;
readonly enableRagReplicas: boolean;
readonly enableBedrockGlobalInference: boolean;
readonly enableBedrockCrossRegionInference: boolean;
readonly enableLambdaSnapStart: boolean;
readonly enableBotStore: boolean;
Expand Down Expand Up @@ -219,6 +220,8 @@ export class BedrockChatStack extends cdk.Stack {
bedrockCustomBotProject: bedrockCustomBotCodebuild.project,
usageAnalysis,
largeMessageBucket,
enableBedrockGlobalInference:
props.enableBedrockGlobalInference,
enableBedrockCrossRegionInference:
props.enableBedrockCrossRegionInference,
enableLambdaSnapStart: props.enableLambdaSnapStart,
Expand Down Expand Up @@ -269,6 +272,8 @@ export class BedrockChatStack extends cdk.Stack {
bedrockRegion: props.bedrockRegion,
largeMessageBucket,
documentBucket: props.documentBucket,
enableBedrockGlobalInference:
props.enableBedrockGlobalInference,
enableBedrockCrossRegionInference:
props.enableBedrockCrossRegionInference,
enableLambdaSnapStart: props.enableLambdaSnapStart,
Expand Down
3 changes: 3 additions & 0 deletions cdk/lib/constructs/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export interface ApiProps {
readonly apiPublishProject: codebuild.IProject;
readonly bedrockCustomBotProject: codebuild.IProject;
readonly usageAnalysis?: UsageAnalysis;
readonly enableBedrockGlobalInference: boolean;
readonly enableBedrockCrossRegionInference: boolean;
readonly enableLambdaSnapStart: boolean;
readonly openSearchEndpoint?: string;
Expand Down Expand Up @@ -262,6 +263,8 @@ export class Api extends Construct {
props.usageAnalysis?.ddbExportTable.tableName || "",
USAGE_ANALYSIS_WORKGROUP: props.usageAnalysis?.workgroupName || "",
USAGE_ANALYSIS_OUTPUT_LOCATION: usageAnalysisOutputLocation,
ENABLE_BEDROCK_GLOBAL_INFERENCE:
props.enableBedrockGlobalInference.toString(),
ENABLE_BEDROCK_CROSS_REGION_INFERENCE:
props.enableBedrockCrossRegionInference.toString(),
GLOBAL_AVAILABLE_MODELS: props.globalAvailableModels
Expand Down
3 changes: 3 additions & 0 deletions cdk/lib/constructs/websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export interface WebSocketProps {
readonly websocketSessionTable: ITable;
readonly largeMessageBucket: s3.IBucket;
readonly accessLogBucket?: s3.Bucket;
readonly enableBedrockGlobalInference: boolean;
readonly enableBedrockCrossRegionInference: boolean;
readonly enableLambdaSnapStart: boolean;
}
Expand Down Expand Up @@ -126,6 +127,8 @@ export class WebSocket extends Construct {
LARGE_MESSAGE_BUCKET: props.largeMessageBucket.bucketName,
LARGE_PAYLOAD_SUPPORT_BUCKET: largePayloadSupportBucket.bucketName,
WEBSOCKET_SESSION_TABLE_NAME: props.websocketSessionTable.tableName,
ENABLE_BEDROCK_GLOBAL_INFERENCE:
props.enableBedrockGlobalInference.toString(),
ENABLE_BEDROCK_CROSS_REGION_INFERENCE:
props.enableBedrockCrossRegionInference.toString(),
},
Expand Down
7 changes: 7 additions & 0 deletions cdk/lib/utils/parameter-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const BaseParametersSchema = z.object({

// Bedrock configuration
bedrockRegion: z.string().default("us-east-1"),
enableBedrockGlobalInference: z.boolean().default(true),
enableBedrockCrossRegionInference: z.boolean().default(true),
});

Expand Down Expand Up @@ -232,6 +233,9 @@ export function resolveBedrockChatParameters(
"publishedApiAllowedIpV6AddressRanges"
),
enableRagReplicas: app.node.tryGetContext("enableRagReplicas"),
enableBedrockGlobalInference: app.node.tryGetContext(
"enableBedrockGlobalInference"
),
enableBedrockCrossRegionInference: app.node.tryGetContext(
"enableBedrockCrossRegionInference"
),
Expand Down Expand Up @@ -303,6 +307,9 @@ export function resolveApiPublishParameters(): ApiPublishParameters {
envName: getEnvVar("ENV_NAME"),
envPrefix: getEnvVar("ENV_PREFIX"),
bedrockRegion: getEnvVar("BEDROCK_REGION"),
enableBedrockGlobalInference: getEnvVar(
"ENABLE_BEDROCK_GLOBAL_INFERENCE"
),
enableBedrockCrossRegionInference: getEnvVar(
"ENABLE_BEDROCK_CROSS_REGION_INFERENCE"
),
Expand Down