diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index eb6b0b5143813e..d8f7d84349c461 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,17 +28,7 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(WIN32) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS dynload_cuda tensorrt_engine tensorrt_plugin) -elseif(WITH_CINN) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -else() +if(NOT WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index c91bb59aee8235..9075136cc32a3c 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" - #include #include @@ -174,11 +173,22 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, return cuda_graph_.Launch(stream); } +#if IS_TRT_VERSION_GE(8500) + for (size_t j = 0; j < buffers->size(); ++j) { + auto name = context->getEngine().getBindingName(j); + context->setTensorAddress(name, (*buffers)[j]); + } +#endif + bool ret; if (!with_dynamic_shape()) { ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); } else { +#if IS_TRT_VERSION_GE(8500) + ret = context->enqueueV3(stream); +#else ret = context->enqueueV2(buffers->data(), stream, nullptr); +#endif } return ret; } @@ -469,12 +479,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, "of the network at the same time.", name)); network()->markOutput(*output); - PADDLE_ENFORCE_EQ( - output->isNetworkOutput(), - true, - platform::errors::InvalidArgument( - "The output %s of TRT engine should be the output of the network.", - name)); + PADDLE_ENFORCE_EQ(output->isNetworkOutput(), + true, + platform::errors::InvalidArgument( + "The output %s of TRT engine should be the output " + "of the network.", + name)); } void TensorRTEngine::DeclareOutput(const std::string &name) { @@ -567,8 +577,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor( trt_in_shape.nbDims = 1; trt_in_shape.d[0] = 1; } - // In fact , this is not always right, because we can't determine if the 0th - // dimension is batch. Just for run chenqu's model + // In fact , this is not always right, because we can't determine if the + // 0th dimension is batch. Just for run chenqu's model if (!with_dynamic_shape()) { trt_in_shape.nbDims--; for (int i = 0; i < trt_in_shape.nbDims; i++) { @@ -626,8 +636,10 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) { infer_engine_, platform::errors::Fatal( "Building TRT cuda engine failed when deserializing engine info. " - "Please check:\n1. Your TRT serialization is generated and loaded " - "on the same GPU architecture;\n2. The Paddle Inference version of " + "Please check:\n1. Your TRT serialization is generated and " + "loaded " + "on the same GPU architecture;\n2. The Paddle Inference version " + "of " "generating serialization file and doing inference are " "consistent.")); diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index b565df0ec3d8cd..a1914b10829496 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -131,10 +131,16 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { std::vector shape_v = {8, 8, 4}; PrepareInputOutput(x_v, {8, 8, 4}); PrepareShapeInput(shape_v); +#if IS_TRT_VERSION_GE(8500) + const char *tensorName1 = engine_->engine()->getBindingName(0); + const char *tensorName2 = engine_->engine()->getBindingName(1); + engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32}); + engine_->context()->setInputShape(tensorName2, shape_dim); +#else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); engine_->context()->setInputShapeBinding(1, shape_v.data()); - +#endif auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); @@ -142,14 +148,26 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { buffers[0] = reinterpret_cast(x_gpu_data); buffers[1] = reinterpret_cast(shape_gpu_data); buffers[2] = reinterpret_cast(y_gpu_data); +#if IS_TRT_VERSION_GE(8500) + for (size_t i = 0; i < buffers.size(); i++) { + auto name = engine_->engine()->getBindingName(i); + engine_->context()->setTensorAddress(name, buffers[i]); + } +#endif engine_->Execute(-1, &buffers, ctx_->stream()); cudaStreamSynchronize(ctx_->stream()); + std::vector y_cpu; GetOutput(&y_cpu); ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); +#if IS_TRT_VERSION_GE(8500) + const char *name1 = engine_->engine()->getBindingName(2); + auto dims = engine_->context()->getTensorShape(name1); +#else auto dims = engine_->context()->getBindingDimensions(2); +#endif ASSERT_EQ(dims.nbDims, 3); ASSERT_EQ(dims.d[0], 8); ASSERT_EQ(dims.d[1], 8); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 8c75a7bc00f1c8..b7e9758f8d6d1e 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -611,6 +611,10 @@ class TensorRTEngineOp : public framework::OperatorBase { } } else { #if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + trt_context->setInputShape( + x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); +#else trt_context->setBindingDimensions( bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); // If this x is a shape tensor, we need call setInputShapeBinding @@ -644,6 +648,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } trt_context->setInputShapeBinding(bind_index, shape_v.data()); } +#endif #endif } runtime_batch = t_shape[0]; @@ -718,7 +723,20 @@ class TensorRTEngineOp : public framework::OperatorBase { ddim.push_back(dims.d[i]); } } else { -#if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + auto x_name = engine->engine()->getBindingName(bind_index); + auto dims = trt_context->getTensorShape(x_name); + int nb_dims = dims.nbDims; + for (; nb_dims > 0; nb_dims--) { + // some 'x 1' of shape is normal, no need to remove it + if (dims.d[nb_dims - 1] != 1 || + nb_dims == origin_output_rank[output_index]) + break; + } + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } +#else auto dims = trt_context->getBindingDimensions(bind_index); int nb_dims = dims.nbDims; for (; nb_dims > 0; nb_dims--) { @@ -727,7 +745,9 @@ class TensorRTEngineOp : public framework::OperatorBase { nb_dims == origin_output_rank[output_index]) break; } - for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]); + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } #endif } auto *fluid_v = scope.FindVar(y); diff --git a/test/ir/inference/test_trt_convert_bitwise_and.py b/test/ir/inference/test_trt_convert_bitwise_and.py index 0bfa21b5a36de5..015977b4387653 100644 --- a/test/ir/inference/test_trt_convert_bitwise_and.py +++ b/test/ir/inference/test_trt_convert_bitwise_and.py @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 diff --git a/test/ir/inference/test_trt_convert_bitwise_not.py b/test/ir/inference/test_trt_convert_bitwise_not.py index 8d19425011ed48..28dda440b12e2b 100644 --- a/test/ir/inference/test_trt_convert_bitwise_not.py +++ b/test/ir/inference/test_trt_convert_bitwise_not.py @@ -31,18 +31,12 @@ def sample_program_configs(self): self.trt_param.workspace_size = 1073741824 def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): - if dims == 0: - return np.random.random([]).astype(np.bool8) - elif dims == 1: + if dims == 1: return np.random.random([32]).astype(np.bool8) - elif dims == 2: - return np.random.random([3, 32]).astype(np.int8) - elif dims == 3: - return np.random.random([3, 32, 32]).astype(np.int32) else: - return np.random.random([batch, 3, 32, 32]).astype(np.int64) + return np.random.random([3, 32]).astype(np.int8) - for dims in [0, 1, 2, 3, 4]: + for dims in [1, 2]: for batch in [1, 4]: self.dims = dims dics = [{}] @@ -76,32 +70,14 @@ def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - if self.dims == 0: - self.dynamic_shape.min_input_shape = {"input_data": []} - self.dynamic_shape.max_input_shape = {"input_data": []} - self.dynamic_shape.opt_input_shape = {"input_data": []} - elif self.dims == 1: + if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} self.dynamic_shape.max_input_shape = {"input_data": [64]} self.dynamic_shape.opt_input_shape = {"input_data": [32]} - elif self.dims == 2: + else: self.dynamic_shape.min_input_shape = {"input_data": [1, 16]} self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]} - elif self.dims == 3: - self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]} - self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]} - else: - self.dynamic_shape.min_input_shape = { - "input_data": [1, 3, 16, 16] - } - self.dynamic_shape.max_input_shape = { - "input_data": [4, 3, 32, 32] - } - self.dynamic_shape.opt_input_shape = { - "input_data": [1, 3, 32, 32] - } def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} diff --git a/test/ir/inference/test_trt_convert_bitwise_or.py b/test/ir/inference/test_trt_convert_bitwise_or.py index fae933c0cb1851..84cef306b4b551 100644 --- a/test/ir/inference/test_trt_convert_bitwise_or.py +++ b/test/ir/inference/test_trt_convert_bitwise_or.py @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 diff --git a/test/ir/inference/test_trt_convert_solve.py b/test/ir/inference/test_trt_convert_solve.py index c3f9b51d0d05c2..c3117ee335740c 100644 --- a/test/ir/inference/test_trt_convert_solve.py +++ b/test/ir/inference/test_trt_convert_solve.py @@ -87,10 +87,8 @@ def clear_dynamic_shape(): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), (1, 3), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), (1, 3), 1e-3 def test(self):