@@ -9,7 +9,7 @@ template <typename Dtype, bool has_scale, bool shared>
9
9
__global__ void normalize_kernel_no_across_spatial (const int size_in_channel, const int n,const int channels, \
10
10
const Dtype* scale, const Dtype* bottom_data, Dtype* top_data, const float eps, const int p){
11
11
12
- CUDA_KERNEL_LOOP (index , size_in_channel){
12
+ CUDA_KERNEL_LOOP (index , size_in_channel * n ){
13
13
float sqr_sum = 0 .f ;
14
14
int num_index=index /size_in_channel;
15
15
int index_in_channel=index %size_in_channel;
@@ -26,17 +26,16 @@ __global__ void normalize_kernel_no_across_spatial(const int size_in_channel, co
26
26
if (p == 1 ) {
27
27
norm = 1 .f / (sqr_sum + eps);
28
28
} else {
29
- norm = 1 .f / ( sqrtf (sqr_sum) + eps);
29
+ norm = 1 .f / sqrtf (sqr_sum + eps);
30
30
}
31
- Dtype has_scale_norm=scale[0 ]*norm;
32
31
for (int i = 0 ; i < channels; ++i) {
33
32
if (has_scale) {
34
33
if (shared) {
35
34
top_data[data_index + i * size_in_channel] = \
36
- bottom_data[data_index + i * size_in_channel] * scale[0 ] * has_scale_norm ;
35
+ bottom_data[data_index + i * size_in_channel] * scale[0 ] * norm ;
37
36
} else {
38
37
top_data[data_index + i * size_in_channel] = \
39
- bottom_data[data_index + i * size_in_channel] * scale[i] * has_scale_norm ;
38
+ bottom_data[data_index + i * size_in_channel] * scale[i] * norm ;
40
39
}
41
40
} else {
42
41
top_data[data_index + i * size_in_channel] = \
0 commit comments