Skip to content

Support Clip in param_attr #6729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/paddle/v2/fluid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
from param_attr import ParamAttr
from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, GPUPlace
import clip

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a concern, we use a related path of clip, the commonly used name clip, io may have a conflict with other packages.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python can rename every module when import

Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
'DataFeeder'
'DataFeeder', 'clip'
]


Expand Down
61 changes: 61 additions & 0 deletions python/paddle/v2/fluid/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import functools
import layers

__all__ = ['GradientClipByValue', 'append_gradient_clip_ops']


class BaseGradientClipAttr(object):
def process_context(self, context, p_g):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should add some comment on how to use process_context

raise NotImplementedError()

def create_operators(self, param, grad):
raise NotImplementedError()


class NullGradientClipAttr(BaseGradientClipAttr):
def process_context(self, context, p_g):
pass

def create_operators(self, param, grad):
return param, grad


class GradientClipByValue(BaseGradientClipAttr):
def __init__(self, max, min=None):
max = float(max)
if min is None:
min = -max
else:
min = float(min)
self.max = max
self.min = min

def process_context(self, context, p_g):
pass

def create_operators(self, param, grad):
new_grad = layers.clip(x=grad, min=self.min, max=self.max)
return param, new_grad


def append_gradient_clip_ops(param_grad):
context = dict()
create_op_callbacks = []
for p, g in param_grad:
clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr())
if clip_attr is None:
clip_attr = NullGradientClipAttr()
if not isinstance(clip_attr, BaseGradientClipAttr):
raise TypeError(
"clip attribute should be an instance of BaseGradientClippingAttr"
)

clip_attr.process_context(context=context, p_g=param_grad)
create_op_callbacks.append(
functools.partial(
clip_attr.create_operators, param=p, grad=g))

return [each_callback() for each_callback in create_op_callbacks]


ClipByValue = GradientClipByValue
3 changes: 3 additions & 0 deletions python/paddle/v2/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,7 @@ def copy_param_info_from(self, other):
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
clip_attr=p.clip_attr,
name=v.name)
self.vars[new_p.name] = new_p

Expand Down Expand Up @@ -866,6 +867,8 @@ def __init__(self, block, shape, dtype, **kwargs):

self.regularizer = kwargs.get('regularizer', None)

self.clip_attr = kwargs.get('clip_attr', None)


# program is a global instance.
_main_program_ = Program()
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ def conv2d_transpose(input,
return out


def sequence_expand(x, y, main_program=None, startup_program=None):
def sequence_expand(x, y):
"""Sequence Expand Layer. This layer will expand the input variable **x**
according to LoD information of **y**. And the following examples will
explain how sequence_expand works:
Expand Down
5 changes: 5 additions & 0 deletions python/paddle/v2/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from initializer import Constant
from layer_helper import LayerHelper
from regularizer import append_regularization_ops
from clip import append_gradient_clip_ops

__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad']

Expand Down Expand Up @@ -197,9 +198,13 @@ def minimize(self,
`create_optimization_pass()` into one.
"""
params_grads = append_backward_ops(loss, parameter_list, no_grad_set)

params_grads = append_gradient_clip_ops(params_grads)

# Add regularization if any
params_grads = append_regularization_ops(params_grads,
self.regularization)

optimize_ops = self.create_optimization_pass(params_grads, loss,
startup_program)
return optimize_ops
Expand Down
9 changes: 7 additions & 2 deletions python/paddle/v2/fluid/param_attr.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
from initializer import Initializer, Xavier, Constant
from regularizer import WeightDecayRegularizer

__all__ = ['ParamAttr']


class ParamAttr(object):
def __init__(self,
name=None,
initializer=None,
learning_rate=1.0,
regularizer=None,
trainable=True):
trainable=True,
clip=None):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.clip = clip

def set_default_initializer(self, initializer):
if initializer is None:
Expand Down Expand Up @@ -56,7 +60,8 @@ def to_kwargs(self, with_initializer=False):
'name': self.name,
'learning_rate': self.learning_rate,
'regularizer': self.regularizer,
'trainable': self.trainable
'trainable': self.trainable,
'clip_attr': self.clip
}
if with_initializer:
kwargs['initializer'] = self.initializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
hidden1 = fluid.layers.fc(input=image,
size=128,
act='relu',
param_attr=regularizer)
param_attr=fluid.ParamAttr(
regularizer=regularizer,
clip=fluid.clip.ClipByValue(10)))
hidden2 = fluid.layers.fc(input=hidden1,
size=64,
act='relu',
Expand Down