@@ -151,7 +151,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
151
151
152
152
Args:
153
153
input(Variable): Input to the function
154
- size(tuple|list|None): Shape of the look up table parameter
154
+ size(tuple|list|None): Shape of the look up table parameter
155
155
is_sparse(bool): Boolean flag that specifying whether the input is sparse
156
156
param_attr(ParamAttr): Parameters for this layer
157
157
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -366,9 +366,9 @@ def cross_entropy(input, label, **kwargs):
366
366
367
367
1) One-hot cross-entropy:
368
368
`soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
369
-
369
+
370
370
.. math::
371
-
371
+
372
372
Y[i] = -\log(X[i, Label[i]])
373
373
374
374
2) Soft-label cross-entropy:
@@ -386,15 +386,15 @@ def cross_entropy(input, label, **kwargs):
386
386
As a special case of 2), when each row of 'label' has only one
387
387
non-zero element which is equal to 1, soft-label cross-entropy degenerates
388
388
to a one-hot cross-entropy with one-hot label representation.
389
-
389
+
390
390
Args:
391
- input (Variable|list): a 2-D tensor with shape [N x D], where N is the
392
- batch size and D is the number of classes. This input is a probability
391
+ input (Variable|list): a 2-D tensor with shape [N x D], where N is the
392
+ batch size and D is the number of classes. This input is a probability
393
393
computed by the previous operator, which is almost always the result
394
394
of a softmax operator.
395
- label (Variable|list): the ground truth which is a 2-D tensor. When
396
- `soft_label` is set to `False`, `label` is a tensor<int64> with shape
397
- [N x 1]. When `soft_label` is set to `True`, `label` is a
395
+ label (Variable|list): the ground truth which is a 2-D tensor. When
396
+ `soft_label` is set to `False`, `label` is a tensor<int64> with shape
397
+ [N x 1]. When `soft_label` is set to `True`, `label` is a
398
398
tensor<float/double> with shape [N x D].
399
399
soft_label (bool, via `**kwargs`): a flag indicating whether to interpretate
400
400
the given labels as soft labels, default `False`.
@@ -403,7 +403,7 @@ def cross_entropy(input, label, **kwargs):
403
403
A 2-D tensor with shape [N x 1], the cross entropy loss.
404
404
405
405
Raises:
406
- `ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \
406
+ `ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \
407
407
`soft_label == True`, and the 2nd dimension of `input` and `label` are not \
408
408
equal; 3) when `soft_label == False`, and the 2nd dimension of `label` is not 1.
409
409
@@ -727,9 +727,9 @@ def _get_default_param_initializer():
727
727
728
728
def sequence_pool (input , pool_type , ** kwargs ):
729
729
"""
730
- This function add the operator for sequence pooling.
731
- It pools features of all time-steps of each instance, and is applied
732
- on top of the input using pool_type mentioned in the parameters.
730
+ This function add the operator for sequence pooling.
731
+ It pools features of all time-steps of each instance, and is applied
732
+ on top of the input using pool_type mentioned in the parameters.
733
733
734
734
It supports four pool_type:
735
735
@@ -758,7 +758,7 @@ def sequence_pool(input, pool_type, **kwargs):
758
758
759
759
Args:
760
760
input(variable): The input variable which is a LoDTensor.
761
- pool_type (string): The pooling type of sequence_pool.
761
+ pool_type (string): The pooling type of sequence_pool.
762
762
It supports average, sum, sqrt and max.
763
763
764
764
Returns:
@@ -768,7 +768,7 @@ def sequence_pool(input, pool_type, **kwargs):
768
768
769
769
.. code-block:: python
770
770
771
- x = fluid.layers.data(name='x', shape=[7, 1],
771
+ x = fluid.layers.data(name='x', shape=[7, 1],
772
772
dtype='float32', lod_level=1)
773
773
avg_x = fluid.layers.sequence_pool(input=x, pool_type='average')
774
774
sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
@@ -816,7 +816,7 @@ def sequence_first_step(input, **kwargs):
816
816
817
817
.. code-block:: python
818
818
819
- x = fluid.layers.data(name='x', shape=[7, 1],
819
+ x = fluid.layers.data(name='x', shape=[7, 1],
820
820
dtype='float32', lod_level=1)
821
821
x_first_step = fluid.layers.sequence_first_step(input=x)
822
822
"""
@@ -849,7 +849,7 @@ def sequence_last_step(input, **kwargs):
849
849
850
850
.. code-block:: python
851
851
852
- x = fluid.layers.data(name='x', shape=[7, 1],
852
+ x = fluid.layers.data(name='x', shape=[7, 1],
853
853
dtype='float32', lod_level=1)
854
854
x_last_step = fluid.layers.sequence_last_step(input=x)
855
855
"""
@@ -1168,25 +1168,26 @@ def lstm_unit(x_t,
1168
1168
1169
1169
.. math::
1170
1170
1171
- i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
1171
+ i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i)
1172
1172
1173
- f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
1173
+ f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + b_f)
1174
1174
1175
- c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+ W_{h_c}h_{t-1} + b_c)
1175
+ c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t + W_{h_c}h_{t-1} + b_c)
1176
1176
1177
- o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
1177
+ o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + b_o)
1178
1178
1179
1179
h_t & = o_t tanh(c_t)
1180
1180
1181
- The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
1182
- :math:`c_{t-1}`. The implementation separates the linear transformation
1183
- and non-linear transformation apart. Here, we take :math:`i_t` as an
1184
- example. The linear transformation is applied by calling a `fc` layer and
1185
- the equation is:
1181
+ The inputs of lstm unit include :math:`x_t`, :math:`h_{t-1}` and
1182
+ :math:`c_{t-1}`. The 2nd dimensions of :math:`h_{t-1}` and :math:`c_{t-1}`
1183
+ should be same. The implementation separates the linear transformation and
1184
+ non-linear transformation apart. Here, we take :math:`i_t` as an example.
1185
+ The linear transformation is applied by calling a `fc` layer and the
1186
+ equation is:
1186
1187
1187
1188
.. math::
1188
1189
1189
- L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i
1190
+ L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i
1190
1191
1191
1192
The non-linear transformation is applied by calling `lstm_unit_op` and the
1192
1193
equation is:
@@ -1213,14 +1214,15 @@ def lstm_unit(x_t,
1213
1214
Raises:
1214
1215
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
1215
1216
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
1216
- and **cell_t_prev** not be the same.
1217
+ and **cell_t_prev** not be the same or the 2nd dimensions of \
1218
+ **hidden_t_prev** and **cell_t_prev** not be the same.
1217
1219
1218
1220
Examples:
1219
1221
1220
1222
.. code-block:: python
1221
1223
1222
1224
x_t = fluid.layers.fc(input=x_t_data, size=10)
1223
- prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20 )
1225
+ prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=30 )
1224
1226
prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
1225
1227
hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
1226
1228
hidden_t_prev=prev_hidden,
@@ -1239,7 +1241,11 @@ def lstm_unit(x_t,
1239
1241
1240
1242
if x_t .shape [0 ] != hidden_t_prev .shape [0 ] or x_t .shape [
1241
1243
0 ] != cell_t_prev .shape [0 ]:
1242
- raise ValueError ("The 1s dimension of x_t, hidden_t_prev and "
1244
+ raise ValueError ("The 1s dimensions of x_t, hidden_t_prev and "
1245
+ "cell_t_prev must be the same." )
1246
+
1247
+ if hidden_t_prev .shape [1 ] != cell_t_prev .shape [1 ]:
1248
+ raise ValueError ("The 2nd dimensions of hidden_t_prev and "
1243
1249
"cell_t_prev must be the same." )
1244
1250
1245
1251
if bias_attr is None :
@@ -1268,17 +1274,17 @@ def lstm_unit(x_t,
1268
1274
1269
1275
def reduce_sum (input , dim = None , keep_dim = False ):
1270
1276
"""
1271
- Computes the sum of tensor elements over the given dimension.
1277
+ Computes the sum of tensor elements over the given dimension.
1272
1278
1273
1279
Args:
1274
1280
input (Variable): The input variable which is a Tensor or LoDTensor.
1275
- dim (int|None): The dimension along which the sum is performed. If
1276
- :attr:`None`, sum all elements of :attr:`input` and return a
1277
- Tensor variable with a single element, otherwise must be in the
1278
- range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
1281
+ dim (int|None): The dimension along which the sum is performed. If
1282
+ :attr:`None`, sum all elements of :attr:`input` and return a
1283
+ Tensor variable with a single element, otherwise must be in the
1284
+ range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
1279
1285
the dimension to reduce is :math:`rank + dim`.
1280
- keep_dim (bool): Whether to reserve the reduced dimension in the
1281
- output Tensor. The result tensor will have one fewer dimension
1286
+ keep_dim (bool): Whether to reserve the reduced dimension in the
1287
+ output Tensor. The result tensor will have one fewer dimension
1282
1288
than the :attr:`input` unless :attr:`keep_dim` is true.
1283
1289
1284
1290
Returns:
@@ -1312,17 +1318,17 @@ def reduce_sum(input, dim=None, keep_dim=False):
1312
1318
1313
1319
def reduce_mean (input , dim = None , keep_dim = False ):
1314
1320
"""
1315
- Computes the mean of tensor elements over the given dimension.
1321
+ Computes the mean of tensor elements over the given dimension.
1316
1322
1317
1323
Args:
1318
1324
input (Variable): The input variable which is a Tensor or LoDTensor.
1319
- dim (int|None): The dimension along which the mean is computed. If
1320
- :attr:`None`, compute the mean over all elements of :attr:`input`
1321
- and return a Tensor variable with a single element, otherwise
1322
- must be in the range :math:`[-rank(input), rank(input))`. If
1325
+ dim (int|None): The dimension along which the mean is computed. If
1326
+ :attr:`None`, compute the mean over all elements of :attr:`input`
1327
+ and return a Tensor variable with a single element, otherwise
1328
+ must be in the range :math:`[-rank(input), rank(input))`. If
1323
1329
:math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1324
- keep_dim (bool): Whether to reserve the reduced dimension in the
1325
- output Tensor. The result tensor will have one fewer dimension
1330
+ keep_dim (bool): Whether to reserve the reduced dimension in the
1331
+ output Tensor. The result tensor will have one fewer dimension
1326
1332
than the :attr:`input` unless :attr:`keep_dim` is true.
1327
1333
1328
1334
Returns:
@@ -1356,22 +1362,22 @@ def reduce_mean(input, dim=None, keep_dim=False):
1356
1362
1357
1363
def reduce_max (input , dim = None , keep_dim = False ):
1358
1364
"""
1359
- Computes the maximum of tensor elements over the given dimension.
1365
+ Computes the maximum of tensor elements over the given dimension.
1360
1366
1361
1367
Args:
1362
1368
input (Variable): The input variable which is a Tensor or LoDTensor.
1363
- dim (int|None): The dimension along which the maximum is computed.
1364
- If :attr:`None`, compute the maximum over all elements of
1365
- :attr:`input` and return a Tensor variable with a single element,
1366
- otherwise must be in the range :math:`[-rank(input), rank(input))`.
1369
+ dim (int|None): The dimension along which the maximum is computed.
1370
+ If :attr:`None`, compute the maximum over all elements of
1371
+ :attr:`input` and return a Tensor variable with a single element,
1372
+ otherwise must be in the range :math:`[-rank(input), rank(input))`.
1367
1373
If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1368
- keep_dim (bool): Whether to reserve the reduced dimension in the
1369
- output Tensor. The result tensor will have one fewer dimension
1374
+ keep_dim (bool): Whether to reserve the reduced dimension in the
1375
+ output Tensor. The result tensor will have one fewer dimension
1370
1376
than the :attr:`input` unless :attr:`keep_dim` is true.
1371
1377
1372
1378
Returns:
1373
1379
Variable: The reduced Tensor variable.
1374
-
1380
+
1375
1381
Examples:
1376
1382
.. code-block:: python
1377
1383
@@ -1400,22 +1406,22 @@ def reduce_max(input, dim=None, keep_dim=False):
1400
1406
1401
1407
def reduce_min (input , dim = None , keep_dim = False ):
1402
1408
"""
1403
- Computes the minimum of tensor elements over the given dimension.
1409
+ Computes the minimum of tensor elements over the given dimension.
1404
1410
1405
1411
Args:
1406
1412
input (Variable): The input variable which is a Tensor or LoDTensor.
1407
- dim (int|None): The dimension along which the minimum is computed.
1408
- If :attr:`None`, compute the minimum over all elements of
1409
- :attr:`input` and return a Tensor variable with a single element,
1410
- otherwise must be in the range :math:`[-rank(input), rank(input))`.
1413
+ dim (int|None): The dimension along which the minimum is computed.
1414
+ If :attr:`None`, compute the minimum over all elements of
1415
+ :attr:`input` and return a Tensor variable with a single element,
1416
+ otherwise must be in the range :math:`[-rank(input), rank(input))`.
1411
1417
If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1412
- keep_dim (bool): Whether to reserve the reduced dimension in the
1413
- output Tensor. The result tensor will have one fewer dimension
1418
+ keep_dim (bool): Whether to reserve the reduced dimension in the
1419
+ output Tensor. The result tensor will have one fewer dimension
1414
1420
than the :attr:`input` unless :attr:`keep_dim` is true.
1415
1421
1416
1422
Returns:
1417
1423
Variable: The reduced Tensor variable.
1418
-
1424
+
1419
1425
Examples:
1420
1426
.. code-block:: python
1421
1427
0 commit comments