Skip to content

Commit 244e754

Browse files
refine optimizer create accumulators (#50188)
* refine optimizer create accumulators * refine
1 parent eb8353a commit 244e754

File tree

10 files changed

+33
-0
lines changed

10 files changed

+33
-0
lines changed

python/paddle/optimizer/adadelta.py

+3
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,11 @@ def _create_accumulators(self, block, parameters):
145145
parameters = parameters.get('params')
146146

147147
for p in parameters:
148+
if p.name in self._already_create_accumulater:
149+
continue
148150
self._add_accumulator(self._avg_squared_grad_acc_str, p)
149151
self._add_accumulator(self._avg_squared_update_acc_str, p)
152+
self._already_create_accumulater.add(p.name)
150153

151154
def _append_optimize_op(self, block, param_and_grad):
152155
if isinstance(param_and_grad, dict):

python/paddle/optimizer/adagrad.py

+3
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,14 @@ def _create_accumulators(self, block, parameters):
139139
parameters = self._update_param_group(parameters)
140140

141141
for p in parameters:
142+
if p.name in self._already_create_accumulater:
143+
continue
142144
self._add_accumulator(
143145
self._moment_acc_str,
144146
p,
145147
fill_value=self.initial_accumulator_value,
146148
)
149+
self._already_create_accumulater.add(p.name)
147150

148151
def _append_optimize_op(self, block, param_and_grad):
149152
assert isinstance(block, framework.Block)

python/paddle/optimizer/adam.py

+4
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,12 @@ def _create_accumulators(self, block, parameters):
317317

318318
# Create accumulator tensors for first and second moments
319319
for p in parameters:
320+
if p.name in self._already_create_accumulater:
321+
continue
320322
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
321323
master_p = self._create_master_weight(p)
322324
self._add_moments_pows(master_p)
325+
self._already_create_accumulater.add(p.name)
323326
continue
324327
if (
325328
self._is_dtype_fp16_or_bf16(p.dtype)
@@ -330,6 +333,7 @@ def _create_accumulators(self, block, parameters):
330333
"Consider using multi_precision=True option of the Adam optimizer."
331334
)
332335
self._add_moments_pows(p)
336+
self._already_create_accumulater.add(p.name)
333337

334338
def _append_optimize_op(self, block, param_and_grad):
335339
assert isinstance(block, framework.Block)

python/paddle/optimizer/adamax.py

+3
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ def _create_accumulators(self, block, parameters):
176176

177177
# Create accumulator tensors for first moment and infinity norm
178178
for p in parameters:
179+
if p.name in self._already_create_accumulater:
180+
continue
179181
self._add_accumulator(self._moment_acc_str, p)
180182
self._add_accumulator(self._inf_norm_acc_str, p)
181183
self._add_accumulator(
@@ -184,6 +186,7 @@ def _create_accumulators(self, block, parameters):
184186
fill_value=self._beta1,
185187
shape=[1],
186188
)
189+
self._already_create_accumulater.add(p.name)
187190

188191
def _append_optimize_op(self, block, param_and_grad):
189192
assert isinstance(block, framework.Block)

python/paddle/optimizer/adamw.py

+5
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ def __init__(
281281
self._use_multi_tensor = None
282282
self.regularization = None
283283
self._auxiliary_vars = {}
284+
self._already_create_accumulater = set()
284285

285286
def _set_auxiliary_var(self, key, val):
286287
self._auxiliary_vars[key] = val
@@ -422,9 +423,12 @@ def _create_accumulators(self, block, parameters):
422423

423424
# Create accumulator tensors for first and second moments
424425
for p in parameters:
426+
if p.name in self._already_create_accumulater:
427+
continue
425428
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
426429
master_p = self._create_master_weight(p)
427430
self._add_moments_pows(master_p)
431+
self._already_create_accumulater.add(p.name)
428432
continue
429433
if (
430434
self._is_dtype_fp16_or_bf16(p.dtype)
@@ -435,6 +439,7 @@ def _create_accumulators(self, block, parameters):
435439
"Consider using multi_precision=True option of the Adam optimizer."
436440
)
437441
self._add_moments_pows(p)
442+
self._already_create_accumulater.add(p.name)
438443

439444
def _append_optimize_op(self, block, param_and_grad):
440445
assert isinstance(block, framework.Block)

python/paddle/optimizer/lamb.py

+4
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,15 @@ def _create_accumulators(self, block, parameters):
190190

191191
# Create accumulator tensors for first and second moments
192192
for p in parameters:
193+
if p.name in self._already_create_accumulater:
194+
continue
193195
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
194196
master_p = self._create_master_weight(p)
195197
self._add_moments_pows(master_p)
198+
self._already_create_accumulater.add(p.name)
196199
else:
197200
self._add_moments_pows(p)
201+
self._already_create_accumulater.add(p.name)
198202

199203
def _get_accumulator(self, name, param):
200204
"""Utility function to fetch an accumulator for a parameter

python/paddle/optimizer/momentum.py

+4
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,12 @@ def _create_accumulators(self, block, parameters):
270270
parameters = self._update_param_group(parameters)
271271

272272
for p in parameters:
273+
if p.name in self._already_create_accumulater:
274+
continue
273275
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
274276
master_p = self._create_master_weight(p)
275277
self._add_accumulator(self._velocity_acc_str, master_p)
278+
self._already_create_accumulater.add(p.name)
276279
continue
277280
if (
278281
p.dtype == core.VarDesc.VarType.FP16
@@ -283,6 +286,7 @@ def _create_accumulators(self, block, parameters):
283286
"Consider using multi_precision=True option of the Momentum optimizer."
284287
)
285288
self._add_accumulator(self._velocity_acc_str, p)
289+
self._already_create_accumulater.add(p.name)
286290

287291
def _create_regularization_of_grad(self, param, grad, regularization=None):
288292
"""Create and add backward regularization Operators

python/paddle/optimizer/optimizer.py

+1
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def __init__(
275275

276276
self._param_dict = self._create_multi_tensor_dict()
277277
self._auxiliary_vars = {}
278+
self._already_create_accumulater = set()
278279

279280
def _set_auxiliary_var(self, key, val):
280281
self._auxiliary_vars[key] = val

python/paddle/optimizer/rmsprop.py

+3
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,12 @@ def _create_accumulators(self, block, parameters):
199199
parameters = parameters.get('params')
200200

201201
for p in parameters:
202+
if p.name in self._already_create_accumulater:
203+
continue
202204
self._add_accumulator(self._momentum_acc_str, p)
203205
self._add_accumulator(self._mean_square_acc_str, p)
204206
self._add_accumulator(self._mean_grad_acc_str, p)
207+
self._already_create_accumulater.add(p.name)
205208

206209
def _append_optimize_op(self, block, param_and_grad):
207210
if not isinstance(block, framework.Block):

python/paddle/optimizer/sgd.py

+3
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,11 @@ def _create_accumulators(self, block, parameters):
129129

130130
# Create accumulator tensors for first and second moments
131131
for p in parameters:
132+
if p.name in self._already_create_accumulater:
133+
continue
132134
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
133135
master_p = self._create_master_weight(p)
136+
self._already_create_accumulater.add(p.name)
134137
continue
135138
if (
136139
p.dtype == core.VarDesc.VarType.FP16

0 commit comments

Comments
 (0)