@@ -281,7 +281,6 @@ def __init__(
281
281
self ._use_multi_tensor = None
282
282
self .regularization = None
283
283
self ._auxiliary_vars = {}
284
- self ._already_create_accumulater = set ()
285
284
286
285
def _set_auxiliary_var (self , key , val ):
287
286
self ._auxiliary_vars [key ] = val
@@ -423,12 +422,9 @@ def _create_accumulators(self, block, parameters):
423
422
424
423
# Create accumulator tensors for first and second moments
425
424
for p in parameters :
426
- if p .name in self ._already_create_accumulater :
427
- continue
428
425
if self ._multi_precision and self ._is_dtype_fp16_or_bf16 (p .dtype ):
429
426
master_p = self ._create_master_weight (p )
430
427
self ._add_moments_pows (master_p )
431
- self ._already_create_accumulater .add (p .name )
432
428
continue
433
429
if (
434
430
self ._is_dtype_fp16_or_bf16 (p .dtype )
@@ -439,7 +435,6 @@ def _create_accumulators(self, block, parameters):
439
435
"Consider using multi_precision=True option of the Adam optimizer."
440
436
)
441
437
self ._add_moments_pows (p )
442
- self ._already_create_accumulater .add (p .name )
443
438
444
439
def _append_optimize_op (self , block , param_and_grad ):
445
440
assert isinstance (block , framework .Block )
0 commit comments