Skip to content

Commit e43621c

Browse files
authored
[chore] add mi355x BF16 peak flops (#1909)
1 parent 4db8f6e commit e43621c

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

torchtitan/tools/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def collect(reason: str, generation: int = 1):
6464
logger.info("[GC] %s took %.2f seconds", reason, time.monotonic() - begin)
6565

6666

67-
# hardcoded BF16 type peak flops for NVIDIA A100, H100, H200, B200 GPU and AMD MI250, MI300X, AMD MI325X and Intel PVC
67+
# hardcoded BF16 type peak flops for NVIDIA A100, H100, H200, B200 GPU and AMD MI250, MI300X, MI325X, MI355X and Intel PVC
6868
def get_peak_flops(device_name: str) -> int:
6969
try:
7070
# Run the lspci command and capture the output
@@ -97,6 +97,9 @@ def get_peak_flops(device_name: str) -> int:
9797
elif "B200" in device_name:
9898
# data from https://nvdam.widen.net/s/wwnsxrhm2w/blackwell-datasheet-3384703
9999
return 2.25e15
100+
elif "MI355X" in device_name:
101+
# MI355X data from https://www.amd.com/en/products/accelerators/instinct/mi350/mi355x.html
102+
return 2500e12
100103
elif "MI300X" in device_name or "MI325X" in device_name:
101104
# MI300X data from https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html
102105
# MI325X data from https://www.amd.com/en/products/accelerators/instinct/mi300/mi325x.html

0 commit comments

Comments
 (0)