@@ -75,6 +75,31 @@ export interface InstanceProductionVariantProps extends ProductionVariantProps {
75
75
readonly instanceType ?: InstanceType ;
76
76
}
77
77
78
+ /**
79
+ * Construction properties for a serverless production variant.
80
+ */
81
+ export interface ServerlessProductionVariantProps extends ProductionVariantProps {
82
+ /**
83
+ * The maximum number of concurrent invocations your serverless endpoint can process.
84
+ *
85
+ * Valid range: 1-200
86
+ */
87
+ readonly maxConcurrency : number ;
88
+ /**
89
+ * The memory size of your serverless endpoint. Valid values are in 1 GB increments:
90
+ * 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
91
+ */
92
+ readonly memorySizeInMB : number ;
93
+ /**
94
+ * The number of concurrent invocations that are provisioned and ready to respond to your endpoint.
95
+ *
96
+ * Valid range: 1-200, must be less than or equal to maxConcurrency.
97
+ *
98
+ * @default - none
99
+ */
100
+ readonly provisionedConcurrency ?: number ;
101
+ }
102
+
78
103
/**
79
104
* Represents common attributes of all production variant types (e.g., instance, serverless) once
80
105
* associated to an EndpointConfig.
@@ -119,6 +144,26 @@ export interface InstanceProductionVariant extends ProductionVariant {
119
144
readonly instanceType : InstanceType ;
120
145
}
121
146
147
+ /**
148
+ * Represents a serverless production variant that has been associated with an EndpointConfig.
149
+ *
150
+ * @internal
151
+ */
152
+ interface ServerlessProductionVariant extends ProductionVariant {
153
+ /**
154
+ * The maximum number of concurrent invocations your serverless endpoint can process.
155
+ */
156
+ readonly maxConcurrency : number ;
157
+ /**
158
+ * The memory size of your serverless endpoint.
159
+ */
160
+ readonly memorySizeInMB : number ;
161
+ /**
162
+ * The number of concurrent invocations that are provisioned and ready to respond to your endpoint.
163
+ */
164
+ readonly provisionedConcurrency ?: number ;
165
+ }
166
+
122
167
/**
123
168
* Construction properties for a SageMaker EndpointConfig.
124
169
*/
@@ -142,9 +187,21 @@ export interface EndpointConfigProps {
142
187
* A list of instance production variants. You can always add more variants later by calling
143
188
* `EndpointConfig#addInstanceProductionVariant`.
144
189
*
190
+ * Cannot be specified if `serverlessProductionVariant` is specified.
191
+ *
145
192
* @default - none
146
193
*/
147
194
readonly instanceProductionVariants ?: InstanceProductionVariantProps [ ] ;
195
+
196
+ /**
197
+ * A serverless production variant. Serverless endpoints automatically launch compute resources
198
+ * and scale them in and out depending on traffic.
199
+ *
200
+ * Cannot be specified if `instanceProductionVariants` is specified.
201
+ *
202
+ * @default - none
203
+ */
204
+ readonly serverlessProductionVariant ?: ServerlessProductionVariantProps ;
148
205
}
149
206
150
207
/**
@@ -207,6 +264,7 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
207
264
public readonly endpointConfigName : string ;
208
265
209
266
private readonly instanceProductionVariantsByName : { [ key : string ] : InstanceProductionVariant } = { } ;
267
+ private serverlessProductionVariant ?: ServerlessProductionVariant ;
210
268
211
269
constructor ( scope : Construct , id : string , props : EndpointConfigProps = { } ) {
212
270
super ( scope , id , {
@@ -215,13 +273,22 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
215
273
// Enhanced CDK Analytics Telemetry
216
274
addConstructMetadata ( this , props ) ;
217
275
276
+ // Validate mutual exclusivity
277
+ if ( props . instanceProductionVariants && props . serverlessProductionVariant ) {
278
+ throw new Error ( 'Cannot specify both instanceProductionVariants and serverlessProductionVariant. Choose one variant type.' ) ;
279
+ }
280
+
218
281
( props . instanceProductionVariants || [ ] ) . map ( p => this . addInstanceProductionVariant ( p ) ) ;
219
282
283
+ if ( props . serverlessProductionVariant ) {
284
+ this . addServerlessProductionVariant ( props . serverlessProductionVariant ) ;
285
+ }
286
+
220
287
// create the endpoint configuration resource
221
288
const endpointConfig = new CfnEndpointConfig ( this , 'EndpointConfig' , {
222
289
kmsKeyId : ( props . encryptionKey ) ? props . encryptionKey . keyRef . keyArn : undefined ,
223
290
endpointConfigName : this . physicalName ,
224
- productionVariants : cdk . Lazy . any ( { produce : ( ) => this . renderInstanceProductionVariants ( ) } ) ,
291
+ productionVariants : cdk . Lazy . any ( { produce : ( ) => this . renderProductionVariants ( ) } ) ,
225
292
} ) ;
226
293
this . endpointConfigName = this . getResourceNameAttribute ( endpointConfig . attrEndpointConfigName ) ;
227
294
this . endpointConfigArn = this . getResourceArnAttribute ( endpointConfig . ref , {
@@ -238,6 +305,9 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
238
305
*/
239
306
@MethodMetadata ( )
240
307
public addInstanceProductionVariant ( props : InstanceProductionVariantProps ) : void {
308
+ if ( this . serverlessProductionVariant ) {
309
+ throw new Error ( 'Cannot add instance production variant when serverless production variant is already configured' ) ;
310
+ }
241
311
if ( props . variantName in this . instanceProductionVariantsByName ) {
242
312
throw new Error ( `There is already a Production Variant with name '${ props . variantName } '` ) ;
243
313
}
@@ -252,6 +322,30 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
252
322
} ;
253
323
}
254
324
325
+ /**
326
+ * Add serverless production variant to the endpoint configuration.
327
+ *
328
+ * @param props The properties of a serverless production variant to add.
329
+ */
330
+ @MethodMetadata ( )
331
+ public addServerlessProductionVariant ( props : ServerlessProductionVariantProps ) : void {
332
+ if ( Object . keys ( this . instanceProductionVariantsByName ) . length > 0 ) {
333
+ throw new Error ( 'Cannot add serverless production variant when instance production variants are already configured' ) ;
334
+ }
335
+ if ( this . serverlessProductionVariant ) {
336
+ throw new Error ( 'Cannot add more than one serverless production variant per endpoint configuration' ) ;
337
+ }
338
+ this . validateServerlessProductionVariantProps ( props ) ;
339
+ this . serverlessProductionVariant = {
340
+ initialVariantWeight : props . initialVariantWeight || 1.0 ,
341
+ maxConcurrency : props . maxConcurrency ,
342
+ memorySizeInMB : props . memorySizeInMB ,
343
+ modelName : props . model . modelName ,
344
+ provisionedConcurrency : props . provisionedConcurrency ,
345
+ variantName : props . variantName ,
346
+ } ;
347
+ }
348
+
255
349
/**
256
350
* Get instance production variants associated with endpoint configuration.
257
351
*
@@ -276,10 +370,21 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
276
370
}
277
371
278
372
private validateProductionVariants ( ) : void {
279
- // validate number of production variants
280
- if ( this . _instanceProductionVariants . length < 1 ) {
373
+ const hasInstanceVariants = this . _instanceProductionVariants . length > 0 ;
374
+ const hasServerlessVariant = this . serverlessProductionVariant !== undefined ;
375
+
376
+ // validate at least one production variant
377
+ if ( ! hasInstanceVariants && ! hasServerlessVariant ) {
281
378
throw new Error ( 'Must configure at least 1 production variant' ) ;
282
- } else if ( this . _instanceProductionVariants . length > 10 ) {
379
+ }
380
+
381
+ // validate mutual exclusivity
382
+ if ( hasInstanceVariants && hasServerlessVariant ) {
383
+ throw new Error ( 'Cannot configure both instance and serverless production variants' ) ;
384
+ }
385
+
386
+ // validate instance variant limits
387
+ if ( hasInstanceVariants && this . _instanceProductionVariants . length > 10 ) {
283
388
throw new Error ( 'Can\'t have more than 10 production variants' ) ;
284
389
}
285
390
}
@@ -310,11 +415,65 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
310
415
}
311
416
}
312
417
418
+ private validateServerlessProductionVariantProps ( props : ServerlessProductionVariantProps ) : void {
419
+ const errors : string [ ] = [ ] ;
420
+
421
+ // check variant weight is not negative
422
+ if ( props . initialVariantWeight && props . initialVariantWeight < 0 ) {
423
+ errors . push ( 'Cannot have negative variant weight' ) ;
424
+ }
425
+
426
+ // check maxConcurrency range
427
+ if ( props . maxConcurrency < 1 || props . maxConcurrency > 200 ) {
428
+ errors . push ( 'maxConcurrency must be between 1 and 200' ) ;
429
+ }
430
+
431
+ // check memorySizeInMB valid values (1GB increments from 1024 to 6144)
432
+ const validMemorySizes = [ 1024 , 2048 , 3072 , 4096 , 5120 , 6144 ] ;
433
+ if ( ! validMemorySizes . includes ( props . memorySizeInMB ) ) {
434
+ errors . push ( `memorySizeInMB must be one of: ${ validMemorySizes . join ( ', ' ) } MB` ) ;
435
+ }
436
+
437
+ // check provisionedConcurrency range and relationship to maxConcurrency
438
+ if ( props . provisionedConcurrency !== undefined ) {
439
+ if ( props . provisionedConcurrency < 1 || props . provisionedConcurrency > 200 ) {
440
+ errors . push ( 'provisionedConcurrency must be between 1 and 200' ) ;
441
+ }
442
+ if ( props . provisionedConcurrency > props . maxConcurrency ) {
443
+ errors . push ( 'provisionedConcurrency cannot be greater than maxConcurrency' ) ;
444
+ }
445
+ }
446
+
447
+ // check environment compatibility with model
448
+ const model = props . model ;
449
+ if ( ! sameEnv ( model . env . account , this . env . account ) ) {
450
+ errors . push ( `Cannot use model in account ${ model . env . account } for endpoint configuration in account ${ this . env . account } ` ) ;
451
+ } else if ( ! sameEnv ( model . env . region , this . env . region ) ) {
452
+ errors . push ( `Cannot use model in region ${ model . env . region } for endpoint configuration in region ${ this . env . region } ` ) ;
453
+ }
454
+
455
+ if ( errors . length > 0 ) {
456
+ throw new Error ( `Invalid Serverless Production Variant Props: ${ errors . join ( EOL ) } ` ) ;
457
+ }
458
+ }
459
+
460
+ /**
461
+ * Render the list of production variants (instance or serverless).
462
+ */
463
+ private renderProductionVariants ( ) : CfnEndpointConfig . ProductionVariantProperty [ ] {
464
+ this . validateProductionVariants ( ) ;
465
+
466
+ if ( this . serverlessProductionVariant ) {
467
+ return this . renderServerlessProductionVariant ( ) ;
468
+ } else {
469
+ return this . renderInstanceProductionVariants ( ) ;
470
+ }
471
+ }
472
+
313
473
/**
314
474
* Render the list of instance production variants.
315
475
*/
316
476
private renderInstanceProductionVariants ( ) : CfnEndpointConfig . ProductionVariantProperty [ ] {
317
- this . validateProductionVariants ( ) ;
318
477
return this . _instanceProductionVariants . map ( v => ( {
319
478
acceleratorType : v . acceleratorType ?. toString ( ) ,
320
479
initialInstanceCount : v . initialInstanceCount ,
@@ -324,4 +483,25 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
324
483
variantName : v . variantName ,
325
484
} ) ) ;
326
485
}
486
+
487
+ /**
488
+ * Render the serverless production variant.
489
+ */
490
+ private renderServerlessProductionVariant ( ) : CfnEndpointConfig . ProductionVariantProperty [ ] {
491
+ if ( ! this . serverlessProductionVariant ) {
492
+ return [ ] ;
493
+ }
494
+
495
+ const variant = this . serverlessProductionVariant ;
496
+ return [ {
497
+ initialVariantWeight : variant . initialVariantWeight ,
498
+ modelName : variant . modelName ,
499
+ variantName : variant . variantName ,
500
+ serverlessConfig : {
501
+ maxConcurrency : variant . maxConcurrency ,
502
+ memorySizeInMb : variant . memorySizeInMB ,
503
+ provisionedConcurrency : variant . provisionedConcurrency ,
504
+ } ,
505
+ } ] ;
506
+ }
327
507
}
0 commit comments