Skip to content

Commit 10cd37f

Browse files
author
sandyhouse
committed
update, test=develop
1 parent 8afeb6b commit 10cd37f

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

python/paddle/distributed/fleet/base/role_maker.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def init(rank, nodes, role):
171171

172172
def _init_http(self, ip, port, prefix, start_http_server, http_server_d):
173173
def __start_kv_server(http_server_d, size_d):
174+
print("start http_server: {}, {}".format(port, size_d))
174175
from paddle.distributed.fleet.utils.http_server import KVServer
175176
http_server = KVServer(port, size_d)
176177
http_server.start()
@@ -183,6 +184,7 @@ def __start_kv_server(http_server_d, size_d):
183184
def init_kv_server(http_server_d):
184185
worker_key = prefix + '_' + 'worker'
185186
size_d = {worker_key: self._worker_num, }
187+
print("worker_key:{}, size: {}".format(worker_key, size_d))
186188

187189
http_server_d["running"] = True
188190
# child process for http server
@@ -202,7 +204,7 @@ def init(rank, nodes, role):
202204
gloo.set_iface(self._iface)
203205
gloo.set_timeout_seconds(self._init_timeout_seconds,
204206
self._run_timeout_seconds)
205-
gloo.set_http_store(ip, port, role)
207+
gloo.set_http_store(ip, port, 'worker')
206208
ep = ":".join([ip, str(port)])
207209
wait_server_ready([ep])
208210
gloo.init()
@@ -211,6 +213,7 @@ def init(rank, nodes, role):
211213
port = int(port)
212214

213215
if start_http_server:
216+
print("to start http_server")
214217
http_server = init_kv_server(http_server_d)
215218

216219
if self._role == Role.WORKER:

python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def test_fs_gloo4(self):
274274
print("skip gloo UT on MacOS/Win")
275275
return
276276

277-
os.environ["TRAINING_ROLE"] = "PSERVER"
277+
os.environ["TRAINING_ROLE"] = "WORKER"
278278
os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001"
279279
os.environ["POD_IP"] = "127.0.0.1"
280280
os.environ["PADDLE_PORT"] = "36001"
@@ -284,7 +284,7 @@ def test_fs_gloo4(self):
284284
os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3"
285285
os.environ["PADDLE_GLOO_HTTP_ENDPOINT"] = "127.0.0.1:30019"
286286

287-
role = role_maker.PaddleCloudRoleMaker()
287+
role = role_maker.PaddleCloudRoleMaker(is_collecitve=True)
288288
role._generate_role()
289289
import time
290290
time.sleep(3)
@@ -532,7 +532,7 @@ def test_fs_gloo4(self):
532532
print("skip gloo UT on MacOS/Win")
533533
return
534534

535-
os.environ["TRAINING_ROLE"] = "PSERVER"
535+
os.environ["TRAINING_ROLE"] = "WORKER"
536536
os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001"
537537
os.environ["POD_IP"] = "127.0.0.1"
538538
os.environ["PADDLE_PORT"] = "36001"
@@ -542,7 +542,7 @@ def test_fs_gloo4(self):
542542
os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3"
543543
os.environ["PADDLE_GLOO_HTTP_ENDPOINT"] = "127.0.0.1:30019"
544544

545-
role = role_maker.PaddleCloudRoleMaker()
545+
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
546546
role._generate_role()
547547
import time
548548
time.sleep(3)

0 commit comments

Comments
 (0)