Skip to content

Fix dygraph unique name bug #17592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions python/paddle/fluid/layer_helper_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,19 @@ def __norm_op(x,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
abs_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_abs'])),
dtype=dtype,
persistable=False)
block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_pow'])),
dtype=dtype,
persistable=False)
Expand All @@ -107,7 +107,7 @@ def __norm_op(x,
outputs={'Out': pow_out},
attrs={'factor': float(p)})
sum_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_sum'])),
dtype=dtype,
persistable=False)
Expand All @@ -133,7 +133,7 @@ def __reshape_op(x,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_reshape'])),
dtype=dtype,
persistable=False)
Expand All @@ -150,7 +150,7 @@ def __transpose_op(x,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_transpose'])),
dtype=dtype,
persistable=False)
Expand All @@ -168,7 +168,7 @@ def __norm_except_dim(x,
"""Computes the norm over all dimensions except dim"""
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
Expand Down Expand Up @@ -327,7 +327,8 @@ def create_variable_for_type_inference(self, dtype, stop_gradient=False):
infer_var_type.
"""
return self.main_program.current_block().create_var(
name=unique_name.generate(".".join([self.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'tmp'])),
dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/fluid/layers/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False):

if out is None:
out = helper.create_variable(
name=unique_name.generate(".".join([x.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[x.name, 'tmp'])),
shape=x.shape,
dtype=x.dtype,
type=x.type,
Expand Down
15 changes: 9 additions & 6 deletions python/paddle/fluid/layers/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def memory(self,
raise ValueError(
"if init is None, memory at least need shape and batch_ref")
parent_block = self._parent_block()
var_name = unique_name.generate("@".join(
var_name = unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "memory_boot"]))
boot_var = parent_block.create_var(
name=var_name,
Expand All @@ -414,7 +414,8 @@ def memory(self,
return self.memory(init=boot_var)
else:
pre_mem = self.helper.create_variable(
name=unique_name.generate("@".join([self.helper.name, "mem"])),
name=unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "mem"])),
dtype=init.dtype,
shape=init.shape)
self.memories[pre_mem.name] = StaticRNNMemoryLink(
Expand Down Expand Up @@ -1559,11 +1560,13 @@ def input(self, x):
if id(x) not in self.input_table:
parent_block = self._parent_block()
out_true = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name),
name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype)

out_false = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name),
name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype)
parent_block.append_op(
type='split_lod_tensor',
Expand Down Expand Up @@ -1605,7 +1608,7 @@ def output(self, *outs):
raise TypeError("Each output should be a variable")
# create outside tensor
outside_out = parent_block.create_var(
name=unique_name.generate("_".join(
name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, 'output'])),
dtype=each_out.dtype)
out_table.append(outside_out)
Expand Down Expand Up @@ -1998,7 +2001,7 @@ def output(self, *outputs):
parent_block = self._parent_block_()
for each in outputs:
outside_array = parent_block.create_var(
name=unique_name.generate("_".join(
name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, "output_array", each.name])),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=each.dtype)
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/fluid/layers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable(
name=unique_name.generate(helper.name + ".out"))
name=unique_name.generate_with_ignorable_key(helper.name + ".out"))
attrs = dict()
if device_count is not None:
attrs['device_count'] = int(device_count)
Expand Down
6 changes: 4 additions & 2 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9758,7 +9758,8 @@ def clip(x, min, max, name=None):
helper = LayerHelper("clip", **locals())

if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))

out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
Expand Down Expand Up @@ -9797,7 +9798,8 @@ def clip_by_norm(x, max_norm, name=None):
helper = LayerHelper("clip_by_norm", **locals())

if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))

out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
Expand Down
6 changes: 4 additions & 2 deletions python/paddle/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,8 @@ def _clip_by_norm(self, x, max_norm, name=None):
helper = LayerHelper("dgc_clip_by_norm_op", **args)

if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))

out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
Expand Down Expand Up @@ -1922,7 +1923,8 @@ def __init__(self,
).all_parameters():
if param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=True)
Expand Down
23 changes: 23 additions & 0 deletions python/paddle/fluid/unique_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,29 @@ def generate(key):
return generator(key)


# FIXME(zjl): The previous naming rule in static graph would
# cause memory leak in dygraph mode. It is because the previous
# nameing rule would use `conv_0.tmp` as the key, and in dygraph
# mode, `conv_i` increases as batch increases. Thus, keys would
# increase in a way like `conv_0.tmp`, `conv_1.tmp`, ....
# Not find a better way to fix this bug in dygraph mode. In TF,
# variable name is meaningless in eager execution mode, and in
# PyTorch, there is no variable name at all. Maybe we should
# discard variable name in dygraph mode.
#
# Another concern is that save/load inference. Usually, user
# would save model in static graph mode, and load it in dygraph
# mode. Therefore, we keep the variable name of Parameter currently.
#
# Please fix me if a better method is found.
def generate_with_ignorable_key(key):
from .framework import in_dygraph_mode
if in_dygraph_mode():
key = "tmp"

return generator(key)


def switch(new_generator=None):
global generator
old = generator
Expand Down