Skip to content

Commit 635ffe7

Browse files
q10facebook-github-bot
authored andcommitted
Migrate CUDA benchmark over to reusable workflow (#4707)
Summary: X-link: facebookresearch/FBGEMM#1735 Pull Request resolved: #4707 Reviewed By: cthi Differential Revision: D80308055 Pulled By: q10 fbshipit-source-id: ebfb90779937102240888c2ff42a72b7af516f31
1 parent 2a0567b commit 635ffe7

File tree

3 files changed

+50
-97
lines changed

3 files changed

+50
-97
lines changed

.github/workflows/_fbgemm_gpu_cuda_test.yml

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,15 @@ on:
4343
type: string
4444
required: false
4545
default: ""
46+
run-target:
47+
description: Run target (test, benchmark)
48+
type: string
49+
required: true
50+
default: test
4651
secrets:
4752
PYPI_TOKEN:
48-
required: true
53+
# The PyPI token is only needed if publishing the artifact to PyPI is desired
54+
required: false
4955

5056
jobs:
5157
# Download the built artifact from GHA, test on GPU, and push to PyPI
@@ -145,11 +151,33 @@ jobs:
145151
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
146152

147153
- name: Test with PyTest
154+
if: ${{ inputs.run-target == 'test' }}
148155
timeout-minutes: 60
149156
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
150157

158+
- name: Run FBGEMM_GPU Benchmark
159+
if: ${{ inputs.run-target == 'benchmark' }}
160+
timeout-minutes: 40
161+
run: . $PRELUDE; run_tbe_microbench $BUILD_ENV
162+
163+
- name: Upload Benchmark Traces as GHA Artifacts
164+
if: ${{ inputs.run-target == 'benchmark' }}
165+
uses: actions/upload-artifact@v4
166+
with:
167+
name: fbgemm_gpu_traces_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.zip
168+
path: fbgemm_gpu/bench/*.json
169+
if-no-files-found: error
170+
151171
- name: Push Wheel to PyPI
152172
if: ${{ inputs.publish-to-pypi && matrix.cuda-version == inputs.cuda-version-publish }}
153173
env:
154174
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
155-
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
175+
run: |
176+
. $PRELUDE;
177+
178+
if [[ -z "$PYPI_TOKEN" ]]; then
179+
echo "PYPI_TOKEN is not set!" >&2
180+
exit 1
181+
fi
182+
183+
publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl

.github/workflows/fbgemm_gpu_benchmark_cuda.yml

Lines changed: 19 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -48,93 +48,22 @@ jobs:
4848

4949
benchmark:
5050
needs: build
51-
runs-on: ${{ matrix.host-machine.instance }}
52-
defaults:
53-
run:
54-
shell: bash
55-
env:
56-
PRELUDE: .github/scripts/setup_env.bash
57-
BUILD_ENV: build_binary
58-
BUILD_VARIANT: cuda
59-
BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
60-
ENFORCE_CUDA_DEVICE: 1
61-
strategy:
62-
fail-fast: false
63-
matrix:
64-
build-target: [ "default" ]
65-
host-machine: [
66-
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
67-
# TODO: Enable when A100 machine queues are reasonably small enough for doing per-PR CI
68-
# https://hud.pytorch.org/metrics
69-
# { arch: x86, instance: "linux.gcp.a100" },
70-
]
71-
python-version: [ "3.13" ]
72-
cuda-version: [ "12.8.1" ]
73-
compiler: [ "gcc" ]
74-
75-
steps:
76-
# Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
77-
- name: Checkout the Repository
78-
uses: actions/checkout@v4
79-
with:
80-
submodules: true
81-
82-
- name: Download Wheel Artifact from GHA
83-
# Cannot upgrade to actions/download-artifact@v4 yet because GLIBC on the instance is too old
84-
uses: actions/download-artifact@v4
85-
with:
86-
name: fbgemm_${{ matrix.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
87-
88-
# Use PyTorch test infrastructure action - https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml
89-
- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
90-
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
91-
92-
- name: Display System Info
93-
run: . $PRELUDE; print_system_info; print_ec2_info
94-
95-
- name: Display GPU Info
96-
run: . $PRELUDE; print_gpu_info
97-
98-
- name: Setup Miniconda
99-
run: . $PRELUDE; setup_miniconda $HOME/miniconda
100-
101-
- name: Create Conda Environment
102-
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
103-
104-
- name: Install Build Tools
105-
run: . $PRELUDE; install_build_tools $BUILD_ENV
106-
107-
- name: Install C/C++ Compilers for Updated LIBGCC
108-
# NOTE: gcc is required for torch dynamo to work properly, as some of
109-
# the compilation flags used by torch dynamo are gcc-specific:
110-
#
111-
# clang-16: error: unknown argument: '-fno-tree-loop-vectorize'
112-
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV gcc
113-
114-
- name: Install CUDA
115-
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
116-
117-
# Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
118-
- name: Install PyTorch Nightly
119-
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch-channel-version) || 'nightly' }} cuda/${{ matrix.cuda-version }}
120-
121-
- name: Collect PyTorch Environment Info
122-
if: ${{ success() || failure() }}
123-
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
124-
125-
- name: Prepare FBGEMM_GPU Build
126-
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
127-
128-
- name: Install FBGEMM_GPU Wheel
129-
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
130-
131-
- name: Run FBGEMM_GPU Benchmark
132-
timeout-minutes: 40
133-
run: . $PRELUDE; run_tbe_microbench $BUILD_ENV
134-
135-
- name: Upload Benchmark Traces as GHA Artifacts
136-
uses: actions/upload-artifact@v4
137-
with:
138-
name: fbgemm_gpu_traces_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.zip
139-
path: fbgemm_gpu/bench/*.json
140-
if-no-files-found: error
51+
uses: ./.github/workflows/_fbgemm_gpu_cuda_test.yml
52+
with:
53+
matrix: >-
54+
{
55+
"build-target": [ "default" ],
56+
"host-machine": [
57+
{ "arch": "x86", "instance": "linux.g5.4xlarge.nvidia.gpu" },
58+
],
59+
"python-version": [ "3.13" ],
60+
"cuda-version": [ "12.8.1" ],
61+
"compiler": [ "gcc" ],
62+
}
63+
repo-ref: ${{ github.ref }}
64+
pytorch-channel-version: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch-channel-version) || 'nightly' }}
65+
run-target: benchmark
66+
extra-env: >-
67+
{
68+
"ENFORCE_CUDA_DEVICE": 1
69+
}

fbgemm_gpu/test/tbe/ssd/ssd_split_tbe_training_test.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,7 @@
2727
)
2828
from fbgemm_gpu.split_table_batched_embeddings_ops_training import RESParams
2929
from fbgemm_gpu.tbe.ssd import SSDTableBatchedEmbeddingBags
30-
from fbgemm_gpu.tbe.utils import (
31-
b_indices,
32-
get_table_batched_offsets_from_dense,
33-
round_up,
34-
)
30+
from fbgemm_gpu.tbe.utils import b_indices, get_table_batched_offsets_from_dense
3531
from hypothesis import assume, given, settings, Verbosity
3632
from torch import distributed as dist
3733

0 commit comments

Comments
 (0)