Skip to content

Commit 473ec22

Browse files
author
Hanbin Hu
committed
Add support for examples under one GPU without NCCL
1 parent 115b909 commit 473ec22

File tree

5 files changed

+14
-9
lines changed

5 files changed

+14
-9
lines changed

examples/pytorch_benchmark.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@
6969
bf.init()
7070

7171
if args.cuda:
72-
torch.cuda.set_device(bf.local_rank())
72+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
73+
torch.cuda.set_device(device_id)
7374
cudnn.benchmark = True
7475

7576
# Set up standard model.

examples/pytorch_mnist.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@
7979

8080
if args.cuda:
8181
# Bluefog: pin GPU to local rank.
82-
torch.cuda.set_device(bf.local_rank())
82+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
83+
torch.cuda.set_device(device_id)
8384
torch.cuda.manual_seed(args.seed)
8485

8586

examples/pytorch_resnet.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@
103103
if args.cuda:
104104
print("using cuda.")
105105
# Bluefog: pin GPU to local rank.
106-
torch.cuda.set_device(bf.local_rank())
106+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
107+
torch.cuda.set_device(device_id)
107108
torch.cuda.manual_seed(args.seed)
108109
else:
109110
print("using cpu")

test/torch_ops_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,11 @@ def convert_cpu_fp16_to_fp32(self, *values):
6161

6262
def cast_and_place(self, tensor, dtype):
6363
if dtype.is_cuda:
64-
if bf.local_size() > torch.cuda.device_count():
64+
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
6565
raise EnvironmentError(
66-
"Cannot run number of processes in one machine are more than device count")
67-
return tensor.cuda(bf.local_rank()).type(dtype)
66+
"Cannot run number of processes in one machine more than GPU device count"
67+
" in NCCL environment")
68+
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
6869
return tensor.type(dtype)
6970

7071
def test_broadcast(self):

test/torch_win_ops_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ def tearDown(self):
5454
@staticmethod
5555
def cast_and_place(tensor, dtype):
5656
if dtype.is_cuda:
57-
if bf.local_size() > torch.cuda.device_count():
57+
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
5858
raise EnvironmentError(
59-
"Cannot run number of processes in one machine are more than device count")
60-
return tensor.cuda(bf.local_rank()).type(dtype)
59+
"Cannot run number of processes in one machine more than GPU device count"
60+
" in NCCL environment")
61+
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
6162
return tensor.type(dtype)
6263

6364
def test_win_create_and_sync_and_free(self):

0 commit comments

Comments
 (0)