|
11 | 11 |
|
12 | 12 | #pragma once
|
13 | 13 | #include <string>
|
| 14 | +#include <vector> |
14 | 15 | #include "paddle/fluid/framework/op_registry.h"
|
15 | 16 | #include "paddle/fluid/operators/math/math_function.h"
|
16 | 17 |
|
@@ -57,32 +58,71 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output,
|
57 | 58 | auto input_t = EigenTensor<T, 4>::From(input);
|
58 | 59 | auto output_t = EigenTensor<T, 4>::From(*output);
|
59 | 60 | bool align_flag = (align_mode == 0 && !align_corners);
|
60 |
| - for (int k = 0; k < out_h; k++) { // loop for images |
| 61 | + |
| 62 | + std::vector<int> vy_n, vy_s; |
| 63 | + std::vector<float> vd_n, vd_s; |
| 64 | + vy_n.reserve(out_h); |
| 65 | + vy_s.reserve(out_h); |
| 66 | + vd_n.reserve(out_h); |
| 67 | + vd_s.reserve(out_h); |
| 68 | +#ifdef PADDLE_WITH_MKLML |
| 69 | +#pragma omp parallel for |
| 70 | +#endif |
| 71 | + for (int k = 0; k < out_h; k++) { |
61 | 72 | int y_n = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
|
62 | 73 | : static_cast<int>(ratio_h * k);
|
63 | 74 | y_n = (y_n > 0) ? y_n : 0;
|
64 | 75 | int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
|
65 | 76 | float d_n =
|
66 | 77 | align_flag ? ratio_h * (k + 0.5) - 0.5 - y_n : ratio_h * k - y_n;
|
67 | 78 | float d_s = 1.f - d_n;
|
| 79 | + { |
| 80 | + vy_n[k] = y_n; |
| 81 | + vy_s[k] = y_s; |
| 82 | + vd_n[k] = d_n; |
| 83 | + vd_s[k] = d_s; |
| 84 | + } |
| 85 | + } |
68 | 86 |
|
69 |
| - for (int l = 0; l < out_w; l++) { |
70 |
| - int x_w = (align_mode == 0 && !align_corners) |
71 |
| - ? static_cast<int>(ratio_w * (l + 0.5) - 0.5) |
72 |
| - : static_cast<int>(ratio_w * l); |
73 |
| - x_w = (x_w > 0) ? x_w : 0; |
74 |
| - int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1); |
75 |
| - float d_w = |
76 |
| - align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w; |
77 |
| - float d_e = 1.f - d_w; |
| 87 | + std::vector<int> vx_w, vx_e; |
| 88 | + std::vector<float> vd_w, vd_e; |
| 89 | + vx_w.reserve(out_w); |
| 90 | + vx_e.reserve(out_w); |
| 91 | + vd_w.reserve(out_w); |
| 92 | + vd_e.reserve(out_w); |
| 93 | +#ifdef PADDLE_WITH_MKLML |
| 94 | +#pragma omp parallel for |
| 95 | +#endif |
| 96 | + for (int l = 0; l < out_w; l++) { |
| 97 | + int x_w = (align_mode == 0 && !align_corners) |
| 98 | + ? static_cast<int>(ratio_w * (l + 0.5) - 0.5) |
| 99 | + : static_cast<int>(ratio_w * l); |
| 100 | + x_w = (x_w > 0) ? x_w : 0; |
| 101 | + int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1); |
| 102 | + float d_w = |
| 103 | + align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w; |
| 104 | + float d_e = 1.f - d_w; |
| 105 | + { |
| 106 | + vx_w[l] = x_w; |
| 107 | + vx_e[l] = x_e; |
| 108 | + vd_w[l] = d_w; |
| 109 | + vd_e[l] = d_e; |
| 110 | + } |
| 111 | + } |
78 | 112 |
|
79 |
| - for (int i = 0; i < n; i++) { // loop for batches |
80 |
| - for (int j = 0; j < c; j++) { // loop for channels |
| 113 | +#ifdef PADDLE_WITH_MKLML |
| 114 | +#pragma omp parallel for collapse(4) |
| 115 | +#endif |
| 116 | + for (int i = 0; i < n; i++) { // loop for batches |
| 117 | + for (int j = 0; j < c; j++) { // loop for channels |
| 118 | + for (int k = 0; k < out_h; k++) { // loop for images |
| 119 | + for (int l = 0; l < out_w; l++) { |
81 | 120 | // bilinear interpolation
|
82 |
| - output_t(i, j, k, l) = input_t(i, j, y_n, x_w) * d_s * d_e + |
83 |
| - input_t(i, j, y_s, x_w) * d_n * d_e + |
84 |
| - input_t(i, j, y_n, x_e) * d_s * d_w + |
85 |
| - input_t(i, j, y_s, x_e) * d_n * d_w; |
| 121 | + T out_t = input_t(i, j, vy_n[k], vx_w[l]) * vd_s[k] * vd_e[l] + |
| 122 | + input_t(i, j, vy_s[k], vx_w[l]) * vd_n[k] * vd_e[l] + |
| 123 | + input_t(i, j, vy_n[k], vx_e[l]) * vd_s[k] * vd_w[l] + |
| 124 | + input_t(i, j, vy_s[k], vx_e[l]) * vd_n[k] * vd_w[l]; |
| 125 | + output_t(i, j, k, l) = out_t; |
86 | 126 | }
|
87 | 127 | }
|
88 | 128 | }
|
|
0 commit comments