Skip to content

Commit 2e79999

Browse files
authored
Merge pull request #5805 from abhinavarora/msra
Implementing the MSRA initializer for rectifier units
2 parents e28157d + 4a2b0ae commit 2e79999

File tree

2 files changed

+187
-0
lines changed

2 files changed

+187
-0
lines changed

python/paddle/v2/fluid/initializer.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,86 @@ def __call__(self, var, block):
285285
})
286286
var.op = op
287287
return op
288+
289+
290+
class MSRAInitializer(Initializer):
291+
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
292+
293+
This class implements the weight initialization from the paper
294+
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
295+
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
296+
and Jian Sun. This is a robust initialization method that particularly
297+
considers the rectifier nonlinearities. In case of Uniform distribution,
298+
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
299+
distribution, the mean is 0 and the standard deviation
300+
is sqrt(2/ fan_in).
301+
302+
References:
303+
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
304+
on ImageNet Classification
305+
(https://arxiv.org/abs/1502.01852)
306+
"""
307+
308+
def __init__(self, uniform=True, fan_in=None, seed=0):
309+
"""Constructor for MSRAInitializer
310+
311+
Args:
312+
uniform: whether to use uniform or normal distribution
313+
fan_in: fan_in for MSRAInitializer. If None, it is
314+
inferred from the variable.
315+
seed: random seed
316+
317+
Note: It is recommended to set fan_in to None for most cases.
318+
"""
319+
assert uniform is not None
320+
assert seed is not None
321+
super(MSRAInitializer, self).__init__()
322+
self._uniform = uniform
323+
self._fan_in = fan_in
324+
self._seed = seed
325+
326+
def __call__(self, var, block):
327+
"""Add MSRA initialization ops for a variable
328+
329+
Args:
330+
var: Variable that needs to be initialized
331+
block: The block in which initialization ops
332+
should be added
333+
334+
Returns:
335+
the initialization op
336+
"""
337+
assert isinstance(var, framework.Variable)
338+
assert isinstance(block, framework.Block)
339+
f_in, f_out = self._compute_fans(var)
340+
341+
# If fan_in is passed, use it
342+
fan_in = f_in if self._fan_in is None else self._fan_in
343+
344+
if self._uniform:
345+
limit = np.sqrt(6.0 / float(fan_in))
346+
op = block.prepend_op(
347+
type="uniform_random",
348+
outputs={"Out": var},
349+
attrs={
350+
"shape": var.shape,
351+
"data_type": int(var.data_type),
352+
"min": -limit,
353+
"max": limit,
354+
"seed": self._seed
355+
})
356+
357+
else:
358+
std = np.sqrt(2.0 / float(fan_in))
359+
op = block.prepend_op(
360+
type="gaussian_random",
361+
outputs={"Out": var},
362+
attrs={
363+
"shape": var.shape,
364+
"data_type": int(var.data_type),
365+
"mean": 0.0,
366+
"std": std,
367+
"seed": self._seed
368+
})
369+
var.op = op
370+
return op

python/paddle/v2/fluid/tests/test_initializer.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,5 +223,109 @@ def test_xavier_initializer_supplied_arguments(self):
223223
self.assertEqual(init_op.attr('seed'), 134)
224224

225225

226+
class TestMSRAInitializer(unittest.TestCase):
227+
def test_uniform_msra_initializer(self):
228+
"""Test MSRA initializer with uniform distribution on
229+
for matrix multiply.
230+
"""
231+
program = framework.Program()
232+
block = program.global_block()
233+
param = block.create_parameter(
234+
dtype="float32",
235+
shape=[5, 10],
236+
lod_level=0,
237+
name="param",
238+
initializer=initializer.MSRAInitializer())
239+
self.assertEqual(len(block.ops), 1)
240+
init_op = block.ops[0]
241+
self.assertEqual(init_op.type, 'uniform_random')
242+
limit = np.sqrt(6.0 / param.shape[0])
243+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
244+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
245+
self.assertEqual(init_op.attr('seed'), 0)
246+
247+
def test_uniform_msra_initializer_conv(self):
248+
"""Test MSRA initializer with uniform distribution on
249+
for convolutions.
250+
"""
251+
program = framework.Program()
252+
block = program.global_block()
253+
param = block.create_parameter(
254+
dtype="float32",
255+
shape=[5, 10, 15, 20],
256+
lod_level=0,
257+
name="param",
258+
initializer=initializer.MSRAInitializer())
259+
self.assertEqual(len(block.ops), 1)
260+
init_op = block.ops[0]
261+
self.assertEqual(init_op.type, 'uniform_random')
262+
receptive_field_size = float(15 * 20)
263+
limit = np.sqrt(6.0 / (param.shape[1] * receptive_field_size))
264+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
265+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
266+
self.assertEqual(init_op.attr('seed'), 0)
267+
268+
def test_normal_msra_initializer(self):
269+
"""Test MSRA initializer with normal distribution on
270+
for matrix multiply.
271+
"""
272+
program = framework.Program()
273+
block = program.global_block()
274+
param = block.create_parameter(
275+
dtype="float32",
276+
shape=[5, 10],
277+
lod_level=0,
278+
name="param",
279+
initializer=initializer.MSRAInitializer(uniform=False))
280+
self.assertEqual(len(block.ops), 1)
281+
init_op = block.ops[0]
282+
self.assertEqual(init_op.type, 'gaussian_random')
283+
std = np.sqrt(2.0 / param.shape[0])
284+
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
285+
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
286+
self.assertEqual(init_op.attr('seed'), 0)
287+
288+
def test_normal_msra_initializer_conv(self):
289+
"""Test MSRA initializer with normal distribution on
290+
for convolutions.
291+
"""
292+
program = framework.Program()
293+
block = program.global_block()
294+
param = block.create_parameter(
295+
dtype="float32",
296+
shape=[5, 10, 15, 20],
297+
lod_level=0,
298+
name="param",
299+
initializer=initializer.MSRAInitializer(uniform=False))
300+
self.assertEqual(len(block.ops), 1)
301+
init_op = block.ops[0]
302+
self.assertEqual(init_op.type, 'gaussian_random')
303+
receptive_field_size = float(15 * 20)
304+
std = np.sqrt(2.0 / (param.shape[1] * receptive_field_size))
305+
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
306+
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
307+
self.assertEqual(init_op.attr('seed'), 0)
308+
309+
def test_msra_initializer_supplied_arguments(self):
310+
"""Test the MSRA initializer with supplied arguments
311+
"""
312+
program = framework.Program()
313+
block = program.global_block()
314+
block.create_parameter(
315+
dtype="float32",
316+
shape=[5, 10],
317+
lod_level=0,
318+
name="param",
319+
initializer=initializer.MSRAInitializer(
320+
fan_in=12, seed=134))
321+
self.assertEqual(len(block.ops), 1)
322+
init_op = block.ops[0]
323+
self.assertEqual(init_op.type, 'uniform_random')
324+
limit = np.sqrt(6.0 / 12)
325+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
326+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
327+
self.assertEqual(init_op.attr('seed'), 134)
328+
329+
226330
if __name__ == '__main__':
227331
unittest.main()

0 commit comments

Comments
 (0)