Skip to content

Commit 9653144

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into support_logical_operants
2 parents 9f95f79 + 2e6e188 commit 9653144

File tree

192 files changed

+4871
-1217
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+4871
-1217
lines changed

AUTHORS.md

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ This is an incomplete list of authors of [Paddle](https://github.com/PaddlePaddl
3131
| helinwang | He-Lin Wang |
3232
| heliqi | Li-Qi He |
3333
| houj04 | HOU Jue |
34+
| HulekJakub | Jakub Hulek |
3435
| jacquesqiao | Long-Fei Qiao |
3536
| [jakpiase](https://raw.githubusercontent.com/jakpiase/Paddle/new_paddle_intel_authors/img/img.jpg) | Jakub Piasecki |
3637
| [jczaja](https://raw.githubusercontent.com/jakpiase/Paddle/new_paddle_intel_authors/img/img.jpg) | Jacek Czaja |

cmake/external/xpu.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set(XPU_PROJECT "extern_xpu")
77
set(XPU_API_LIB_NAME "libxpuapi.so")
88
set(XPU_RT_LIB_NAME "libxpurt.so")
99

10-
set(XPU_BASE_DATE "20230220")
10+
set(XPU_BASE_DATE "20230227")
1111
set(XPU_XCCL_BASE_VERSION "1.0.10")
1212

1313
if(NOT DEFINED XPU_BASE_URL)

paddle/fluid/framework/details/nan_inf_utils_detail.cu

+27-6
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,19 @@ __device__ T BlockReduce(T value) {
174174

175175
__device__ void BlockReduceNumNanInfAndWrite(const int64_t num_nan,
176176
const int64_t num_inf,
177+
const int64_t num_zero,
177178
int64_t offset,
178179
int64_t* num_nan_ptr,
179-
int64_t* num_inf_ptr) {
180+
int64_t* num_inf_ptr,
181+
int64_t* num_zero_ptr) {
180182
int64_t block_num_nan = BlockReduce<int64_t, 2>(num_nan);
181183
int64_t block_num_inf = BlockReduce<int64_t, 2>(num_inf);
184+
int64_t block_num_zero = BlockReduce<int64_t, 2>(num_zero);
182185

183186
if (threadIdx.x == 0) {
184187
num_nan_ptr[offset] = block_num_nan;
185188
num_inf_ptr[offset] = block_num_inf;
189+
num_zero_ptr[offset] = block_num_zero;
186190
}
187191
}
188192

@@ -233,13 +237,15 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr,
233237
const int64_t numel,
234238
int64_t* block_num_nan_ptr,
235239
int64_t* block_num_inf_ptr,
240+
int64_t* block_num_zero_ptr,
236241
MT* tensor_block_max_ptr,
237242
MT* tensor_block_min_ptr,
238243
MT* tensor_block_mean_ptr) {
239244
int64_t i = threadIdx.x + blockIdx.x * blockDim.x;
240245

241246
int64_t num_nan = 0;
242247
int64_t num_inf = 0;
248+
int64_t num_zero = 0;
243249

244250
MT max_value = static_cast<MT>(i < numel ? value_ptr[i] : value_ptr[0]);
245251
MT min_value = static_cast<MT>(i < numel ? value_ptr[i] : value_ptr[0]);
@@ -256,10 +262,18 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr,
256262
} else if (isinf(value)) {
257263
num_inf += 1;
258264
}
265+
if (value == static_cast<MT>(0)) {
266+
num_zero += 1;
267+
}
259268
}
260269

261-
BlockReduceNumNanInfAndWrite(
262-
num_nan, num_inf, blockIdx.x, block_num_nan_ptr, block_num_inf_ptr);
270+
BlockReduceNumNanInfAndWrite(num_nan,
271+
num_inf,
272+
num_zero,
273+
blockIdx.x,
274+
block_num_nan_ptr,
275+
block_num_inf_ptr,
276+
block_num_zero_ptr);
263277

264278
BlockReduceMaxMinAndWrite<MT>(max_value,
265279
min_value,
@@ -273,6 +287,7 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr,
273287
template <typename T, typename MT>
274288
__global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr,
275289
const int64_t* block_num_inf_ptr,
290+
const int64_t* block_num_zero_ptr,
276291
const MT* tensor_block_max_ptr,
277292
const MT* tensor_block_min_ptr,
278293
const MT* tensor_block_mean_ptr,
@@ -283,11 +298,13 @@ __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr,
283298
if (blockIdx.x == 0 && threadIdx.x == 0) {
284299
int64_t num_nan = 0;
285300
int64_t num_inf = 0;
301+
int64_t num_zero = 0;
286302

287303
// numel_max_min <= 128
288304
for (int64_t i = 0; i < numel_max_min; ++i) {
289305
num_nan += block_num_nan_ptr[i];
290306
num_inf += block_num_inf_ptr[i];
307+
num_zero += block_num_zero_ptr[i];
291308
}
292309

293310
MT max_value = static_cast<MT>(0);
@@ -314,6 +331,7 @@ __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr,
314331
numel,
315332
num_nan,
316333
num_inf,
334+
num_zero,
317335
max_value,
318336
min_value,
319337
mean_value,
@@ -451,11 +469,12 @@ void TensorCheckerVisitor<phi::GPUContext>::apply(
451469

452470
int64_t numel_max_min = blocks;
453471

454-
phi::DenseTensor block_num_nan_inf;
455-
block_num_nan_inf.Resize({static_cast<int64_t>(2 * numel_max_min)});
472+
phi::DenseTensor block_num_nan_inf_zero;
473+
block_num_nan_inf_zero.Resize({static_cast<int64_t>(3 * numel_max_min)});
456474
int64_t* block_num_nan_ptr =
457-
dev_ctx->template Alloc<int64_t>(&block_num_nan_inf);
475+
dev_ctx->template Alloc<int64_t>(&block_num_nan_inf_zero);
458476
int64_t* block_num_inf_ptr = block_num_nan_ptr + numel_max_min;
477+
int64_t* block_num_zero_ptr = block_num_inf_ptr + numel_max_min;
459478

460479
phi::DenseTensor tensor_block_max_min;
461480
tensor_block_max_min.Resize({static_cast<int64_t>(3 * numel_max_min)});
@@ -468,6 +487,7 @@ void TensorCheckerVisitor<phi::GPUContext>::apply(
468487
tensor.numel(),
469488
block_num_nan_ptr,
470489
block_num_inf_ptr,
490+
block_num_zero_ptr,
471491
tensor_block_max_ptr,
472492
tensor_block_min_ptr,
473493
tensor_block_mean_ptr);
@@ -476,6 +496,7 @@ void TensorCheckerVisitor<phi::GPUContext>::apply(
476496
FindGlobalMaxMinAndPrint<T, MT>
477497
<<<1, 1, 0, dev_ctx->stream()>>>(block_num_nan_ptr,
478498
block_num_inf_ptr,
499+
block_num_zero_ptr,
479500
tensor_block_max_ptr,
480501
tensor_block_min_ptr,
481502
tensor_block_mean_ptr,

paddle/fluid/framework/details/nan_inf_utils_detail.h

+29-13
Original file line numberDiff line numberDiff line change
@@ -69,33 +69,39 @@ HOSTDEVICE void PrintForDifferentLevel(const char* debug_info,
6969
int64_t numel,
7070
int64_t num_nan,
7171
int64_t num_inf,
72+
int64_t num_zero,
7273
MT max_value,
7374
MT min_value,
7475
MT mean_value,
7576
int check_nan_inf_level) {
7677
if (num_nan > 0 || num_inf > 0) {
7778
printf(
7879
"[PRECISION] [ERROR] in %s, numel=%lld, num_nan=%lld, "
79-
"num_inf=%lld, max=%e, min=%e, mean=%e\n",
80+
"num_inf=%lld, num_zero=%lld, max=%e, min=%e, mean=%e\n",
8081
debug_info,
81-
static_cast<long long>(numel), // NOLINT
82-
static_cast<long long>(num_nan), // NOLINT
83-
static_cast<long long>(num_inf), // NOLINT
82+
static_cast<long long>(numel), // NOLINT
83+
static_cast<long long>(num_nan), // NOLINT
84+
static_cast<long long>(num_inf), // NOLINT
85+
static_cast<long long>(num_zero), // NOLINT
8486
static_cast<float>(max_value),
8587
static_cast<float>(min_value),
8688
static_cast<float>(mean_value));
8789
if (check_nan_inf_level == 0) {
8890
#if defined(__NVCC__) || defined(__HIPCC__)
8991
PADDLE_ENFORCE(false,
90-
"There are NAN or INF (num_nan=%ld, num_inf=%lld) in %s.",
91-
static_cast<long long>(num_nan), // NOLINT
92-
static_cast<long long>(num_inf), // NOLINT
92+
"There are NAN or INF (num_nan=%ld, num_inf=%lld, "
93+
"num_zero=%lld) in %s.",
94+
static_cast<long long>(num_nan), // NOLINT
95+
static_cast<long long>(num_inf), // NOLINT
96+
static_cast<long long>(num_zero), // NOLINT
9397
debug_info);
9498
#else
9599
PADDLE_THROW(platform::errors::PreconditionNotMet(
96-
"There are NAN or INF (num_nan=%lld, num_inf=%lld) in %s.",
97-
static_cast<long long>(num_nan), // NOLINT
98-
static_cast<long long>(num_inf), // NOLINT
100+
"There are NAN or INF (num_nan=%lld, num_inf=%lld, num_zero=%lld) in "
101+
"%s.",
102+
static_cast<long long>(num_nan), // NOLINT
103+
static_cast<long long>(num_inf), // NOLINT
104+
static_cast<long long>(num_zero), // NOLINT
99105
debug_info));
100106
#endif
101107
}
@@ -114,6 +120,7 @@ void PrintForDifferentLevelFile(const char* debug_info,
114120
int64_t numel,
115121
int64_t num_nan,
116122
int64_t num_inf,
123+
int64_t num_zero,
117124
MT max_value,
118125
MT min_value,
119126
MT mean_value,
@@ -136,9 +143,10 @@ void PrintForDifferentLevelFile(const char* debug_info,
136143

137144
if (num_nan > 0 || num_inf > 0) {
138145
outfile << "[PRECISION] [ERROR] in " << debug_info
139-
<< ", numel=" << static_cast<long long>(numel) // NOLINT
140-
<< ", num_nan=" << static_cast<long long>(num_nan) // NOLINT
141-
<< ", num_inf=" << static_cast<long long>(num_inf) // NOLINT
146+
<< ", numel=" << static_cast<long long>(numel) // NOLINT
147+
<< ", num_nan=" << static_cast<long long>(num_nan) // NOLINT
148+
<< ", num_inf=" << static_cast<long long>(num_inf) // NOLINT
149+
<< ", num_zero=" << static_cast<long long>(num_zero) // NOLINT
142150
<< ", max=" << static_cast<float>(max_value)
143151
<< ", min=" << static_cast<float>(min_value)
144152
<< ", mean=" << static_cast<float>(mean_value) << std::endl;
@@ -200,6 +208,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr,
200208

201209
std::vector<int64_t> thread_num_nan(num_threads, 0);
202210
std::vector<int64_t> thread_num_inf(num_threads, 0);
211+
std::vector<int64_t> thread_num_zero(num_threads, 0);
203212
std::vector<MT> thread_min_value(num_threads, static_cast<MT>(value_ptr[0]));
204213
std::vector<MT> thread_max_value(num_threads, static_cast<MT>(value_ptr[0]));
205214
std::vector<MT> thread_mean_value(num_threads, static_cast<MT>(0));
@@ -230,17 +239,22 @@ static void CheckNanInfCpuImpl(const T* value_ptr,
230239
} else if (std::isinf(value)) {
231240
thread_num_inf[tid] += 1;
232241
}
242+
if (value == 0) {
243+
thread_num_zero[tid] += 1;
244+
}
233245
}
234246
}
235247

236248
int64_t num_nan = 0;
237249
int64_t num_inf = 0;
250+
int64_t num_zero = 0;
238251
MT min_value = thread_min_value[0];
239252
MT max_value = thread_max_value[0];
240253
MT mean_value = static_cast<MT>(0);
241254
for (int i = 0; i < num_threads; ++i) {
242255
num_nan += thread_num_nan[i];
243256
num_inf += thread_num_inf[i];
257+
num_zero += thread_num_zero[i];
244258
min_value = std::min(thread_min_value[i], min_value);
245259
max_value = std::max(thread_max_value[i], max_value);
246260
mean_value += thread_mean_value[i];
@@ -254,6 +268,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr,
254268
numel,
255269
num_nan,
256270
num_inf,
271+
num_zero,
257272
max_value,
258273
min_value,
259274
mean_value,
@@ -266,6 +281,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr,
266281
numel,
267282
num_nan,
268283
num_inf,
284+
num_zero,
269285
max_value,
270286
min_value,
271287
mean_value,

paddle/fluid/framework/ir/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ if(WITH_XPU)
215215
cc_library(
216216
xpu_quant_utils
217217
SRCS xpu/quant_utils.cc
218-
DEPS pass)
218+
DEPS pass phi)
219219
cc_library(
220220
xpu_pass_utils
221221
SRCS xpu/pass_utils.cc

0 commit comments

Comments
 (0)