Skip to content

Commit 26cb186

Browse files
clumsyazzhipa
andauthored
feat: add aws_g6e instances (#969)
Co-authored-by: Alexander Jipa <azzhipa@amazon.com>
1 parent 53933e3 commit 26cb186

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

torchx/specs/named_resources_aws.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,80 @@ def aws_g5_48xlarge() -> Resource:
254254
)
255255

256256

257+
def aws_g6e_xlarge() -> Resource:
258+
return Resource(
259+
cpu=4,
260+
gpu=1,
261+
memMB=32 * GiB,
262+
capabilities={K8S_ITYPE: "g6e.xlarge"},
263+
)
264+
265+
266+
def aws_g6e_2xlarge() -> Resource:
267+
return Resource(
268+
cpu=8,
269+
gpu=1,
270+
memMB=64 * GiB,
271+
capabilities={K8S_ITYPE: "g6e.2xlarge"},
272+
)
273+
274+
275+
def aws_g6e_4xlarge() -> Resource:
276+
return Resource(
277+
cpu=16,
278+
gpu=1,
279+
memMB=128 * GiB,
280+
capabilities={K8S_ITYPE: "g6e.4xlarge"},
281+
)
282+
283+
284+
def aws_g6e_8xlarge() -> Resource:
285+
return Resource(
286+
cpu=32,
287+
gpu=1,
288+
memMB=256 * GiB,
289+
capabilities={K8S_ITYPE: "g6e.8xlarge"},
290+
)
291+
292+
293+
def aws_g6e_16xlarge() -> Resource:
294+
return Resource(
295+
cpu=64,
296+
gpu=1,
297+
memMB=512 * GiB,
298+
capabilities={K8S_ITYPE: "g6e.16xlarge"},
299+
)
300+
301+
302+
def aws_g6e_12xlarge() -> Resource:
303+
return Resource(
304+
cpu=48,
305+
gpu=4,
306+
memMB=384 * GiB,
307+
capabilities={K8S_ITYPE: "g6e.12xlarge"},
308+
)
309+
310+
311+
def aws_g6e_24xlarge() -> Resource:
312+
return Resource(
313+
cpu=96,
314+
gpu=4,
315+
memMB=768 * GiB,
316+
capabilities={K8S_ITYPE: "g6e.24xlarge"},
317+
devices={EFA_DEVICE: 2},
318+
)
319+
320+
321+
def aws_g6e_48xlarge() -> Resource:
322+
return Resource(
323+
cpu=192,
324+
gpu=8,
325+
memMB=1536 * GiB,
326+
capabilities={K8S_ITYPE: "g6e.48xlarge"},
327+
devices={EFA_DEVICE: 4},
328+
)
329+
330+
257331
def aws_trn1_2xlarge() -> Resource:
258332
return Resource(
259333
cpu=8,
@@ -299,6 +373,14 @@ def aws_trn1_32xlarge() -> Resource:
299373
"aws_g5.12xlarge": aws_g5_12xlarge,
300374
"aws_g5.24xlarge": aws_g5_24xlarge,
301375
"aws_g5.48xlarge": aws_g5_48xlarge,
376+
"aws_g6e.xlarge": aws_g6e_xlarge,
377+
"aws_g6e.2xlarge": aws_g6e_2xlarge,
378+
"aws_g6e.4xlarge": aws_g6e_4xlarge,
379+
"aws_g6e.8xlarge": aws_g6e_8xlarge,
380+
"aws_g6e.16xlarge": aws_g6e_16xlarge,
381+
"aws_g6e.12xlarge": aws_g6e_12xlarge,
382+
"aws_g6e.24xlarge": aws_g6e_24xlarge,
383+
"aws_g6e.48xlarge": aws_g6e_48xlarge,
302384
"aws_trn1.2xlarge": aws_trn1_2xlarge,
303385
"aws_trn1.32xlarge": aws_trn1_32xlarge,
304386
}

torchx/specs/test/named_resources_aws_test.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@
2323
aws_g5_4xlarge,
2424
aws_g5_8xlarge,
2525
aws_g5_xlarge,
26+
aws_g6e_12xlarge,
27+
aws_g6e_16xlarge,
28+
aws_g6e_24xlarge,
29+
aws_g6e_2xlarge,
30+
aws_g6e_48xlarge,
31+
aws_g6e_4xlarge,
32+
aws_g6e_8xlarge,
33+
aws_g6e_xlarge,
2634
aws_m5_2xlarge,
2735
aws_p3_16xlarge,
2836
aws_p3_2xlarge,
@@ -87,6 +95,50 @@ def test_aws_p5(self) -> None:
8795
self.assertEqual(2048 * GiB, p5.memMB)
8896
self.assertEqual({EFA_DEVICE: 32}, p5.devices)
8997

98+
def test_aws_g6e(self) -> None:
99+
g6e = aws_g6e_xlarge()
100+
g6e_2 = aws_g6e_2xlarge()
101+
g6e_4 = aws_g6e_4xlarge()
102+
g6e_8 = aws_g6e_8xlarge()
103+
g6e_16 = aws_g6e_16xlarge()
104+
g6e_12 = aws_g6e_12xlarge()
105+
g6e_24 = aws_g6e_24xlarge()
106+
g6e_48 = aws_g6e_48xlarge()
107+
108+
self.assertEqual(4, g6e.cpu)
109+
self.assertEqual(1, g6e.gpu)
110+
self.assertEqual(32 * GiB, g6e.memMB)
111+
112+
self.assertEqual(8, g6e_2.cpu)
113+
self.assertEqual(1, g6e_2.gpu)
114+
self.assertEqual(64 * GiB, g6e_2.memMB)
115+
116+
self.assertEqual(16, g6e_4.cpu)
117+
self.assertEqual(1, g6e_4.gpu)
118+
self.assertEqual(128 * GiB, g6e_4.memMB)
119+
120+
self.assertEqual(32, g6e_8.cpu)
121+
self.assertEqual(1, g6e_8.gpu)
122+
self.assertEqual(256 * GiB, g6e_8.memMB)
123+
124+
self.assertEqual(64, g6e_16.cpu)
125+
self.assertEqual(1, g6e_16.gpu)
126+
self.assertEqual(512 * GiB, g6e_16.memMB)
127+
128+
self.assertEqual(48, g6e_12.cpu)
129+
self.assertEqual(4, g6e_12.gpu)
130+
self.assertEqual(384 * GiB, g6e_12.memMB)
131+
132+
self.assertEqual(96, g6e_24.cpu)
133+
self.assertEqual(4, g6e_24.gpu)
134+
self.assertEqual(768 * GiB, g6e_24.memMB)
135+
self.assertEqual({EFA_DEVICE: 2}, g6e_24.devices)
136+
137+
self.assertEqual(192, g6e_48.cpu)
138+
self.assertEqual(8, g6e_48.gpu)
139+
self.assertEqual(1536 * GiB, g6e_48.memMB)
140+
self.assertEqual({EFA_DEVICE: 4}, g6e_48.devices)
141+
90142
def test_aws_g4dn(self) -> None:
91143
g4d = aws_g4dn_xlarge()
92144
self.assertEqual(4, g4d.cpu)

0 commit comments

Comments
 (0)