@@ -2988,8 +2988,8 @@ def spp_layer(input,
2988
2988
A layer performs spatial pyramid pooling.
2989
2989
2990
2990
Reference:
2991
- Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
2992
- https://arxiv.org/abs/1406.4729
2991
+ ` Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
2992
+ https://arxiv.org/abs/1406.4729`_
2993
2993
2994
2994
The example usage is:
2995
2995
@@ -3090,8 +3090,8 @@ def img_cmrnorm_layer(input,
3090
3090
Response normalization across feature maps.
3091
3091
3092
3092
Reference:
3093
- ImageNet Classification with Deep Convolutional Neural Networks
3094
- http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
3093
+ ` ImageNet Classification with Deep Convolutional Neural Networks
3094
+ http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf`_
3095
3095
3096
3096
The example usage is:
3097
3097
@@ -3157,9 +3157,9 @@ def batch_norm_layer(input,
3157
3157
y_i &\\ gets \\ gamma \\ hat{x_i} + \\ beta \\ qquad &//\ scale\ and\ shift
3158
3158
3159
3159
Reference:
3160
- Batch Normalization: Accelerating Deep Network Training by Reducing
3160
+ ` Batch Normalization: Accelerating Deep Network Training by Reducing
3161
3161
Internal Covariate Shift
3162
- http://arxiv.org/abs/1502.03167
3162
+ http://arxiv.org/abs/1502.03167`_
3163
3163
3164
3164
The example usage is:
3165
3165
@@ -5416,10 +5416,10 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
5416
5416
to be devided by groups.
5417
5417
5418
5418
Reference:
5419
- Maxout Networks
5420
- http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
5421
- Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
5422
- https://arxiv.org/pdf/1312.6082v4.pdf
5419
+ ` Maxout Networks
5420
+ http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf`_
5421
+ ` Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
5422
+ https://arxiv.org/pdf/1312.6082v4.pdf`_
5423
5423
5424
5424
.. math::
5425
5425
y_{si+j} = \max_k x_{gsi + sk + j}
@@ -5484,9 +5484,9 @@ def ctc_layer(input,
5484
5484
alignment between the inputs and the target labels is unknown.
5485
5485
5486
5486
Reference:
5487
- Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
5487
+ ` Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
5488
5488
with Recurrent Neural Networks
5489
- http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
5489
+ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf`_
5490
5490
5491
5491
Note:
5492
5492
Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
@@ -5558,9 +5558,9 @@ def warp_ctc_layer(input,
5558
5558
install it to :code:`third_party/install/warpctc` directory.
5559
5559
5560
5560
Reference:
5561
- Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
5561
+ ` Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
5562
5562
with Recurrent Neural Networks
5563
- http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
5563
+ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf`_
5564
5564
5565
5565
Note:
5566
5566
- Let num_classes represents the category number. Considering the 'blank'
@@ -5780,8 +5780,8 @@ def nce_layer(input,
5780
5780
Noise-contrastive estimation.
5781
5781
5782
5782
Reference:
5783
- A fast and simple algorithm for training neural probabilistic language
5784
- models. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf
5783
+ ` A fast and simple algorithm for training neural probabilistic language
5784
+ models. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf`_
5785
5785
5786
5786
The example usage is:
5787
5787
@@ -5896,8 +5896,8 @@ def rank_cost(left,
5896
5896
A cost Layer for learning to rank using gradient descent.
5897
5897
5898
5898
Reference:
5899
- Learning to Rank using Gradient Descent
5900
- http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf
5899
+ ` Learning to Rank using Gradient Descent
5900
+ http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf`_
5901
5901
5902
5902
.. math::
5903
5903
@@ -6432,8 +6432,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
6432
6432
smooth_{L1}(x) = \\ begin{cases} 0.5x^2& \\ text{if} \\ |x| < 1 \\ \\ |x|-0.5& \\ text{otherwise} \end{cases}
6433
6433
6434
6434
Reference:
6435
- Fast R-CNN
6436
- https://arxiv.org/pdf/1504.08083v2.pdf
6435
+ ` Fast R-CNN
6436
+ https://arxiv.org/pdf/1504.08083v2.pdf`_
6437
6437
6438
6438
The example usage is:
6439
6439
@@ -6639,8 +6639,8 @@ def prelu_layer(input,
6639
6639
The Parametric Relu activation that actives outputs with a learnable weight.
6640
6640
6641
6641
Reference:
6642
- Delving Deep into Rectifiers: Surpassing Human-Level Performance on
6643
- ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
6642
+ ` Delving Deep into Rectifiers: Surpassing Human-Level Performance on
6643
+ ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf`_
6644
6644
6645
6645
.. math::
6646
6646
z_i &\\ quad if \\ quad z_i > 0 \\ \\
@@ -6736,8 +6736,8 @@ def gated_unit_layer(input,
6736
6736
product between :match:`X'` and :math:`\sigma` is finally returned.
6737
6737
6738
6738
Reference:
6739
- Language Modeling with Gated Convolutional Networks
6740
- https://arxiv.org/abs/1612.08083
6739
+ ` Language Modeling with Gated Convolutional Networks
6740
+ https://arxiv.org/abs/1612.08083`_
6741
6741
6742
6742
.. math::
6743
6743
y=\\ text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c)
0 commit comments