From 19c046fe9c680e9f4a7ee4da2d7d75d55b0a82d1 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 12 Dec 2023 13:08:10 +0000 Subject: [PATCH 01/28] =?UTF-8?q?=E9=80=9A=E8=BF=87=E4=BA=86=E5=BE=88?= =?UTF-8?q?=E5=A4=9A=E5=8D=95=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../inference/analysis/ir_pass_manager.cc | 3 +- paddle/fluid/inference/tensorrt/engine.cc | 45 ++++++++++++++----- paddle/fluid/inference/tensorrt/engine.h | 1 + .../operators/tensorrt/tensorrt_engine_op.h | 27 ++++++++++- test/ir/inference/test_trt_convert_solve.py | 6 +-- 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 122dbbda8fabdd..7fb5134a22752f 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -223,7 +223,8 @@ void IRPassManager::CreatePasses(Argument *argument, } pass->Set("use_static_engine", new bool(use_static_engine)); pass->Set("model_from_memory", new bool(argument->model_from_memory())); - pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector())); + pass->Set("use_inspector", + new bool(argument->tensorrt_use_inspector() || 1)); pass->Set("inspector_serialize", new bool(argument->tensorrt_inspector_serialize())); pass->Set("trt_ops_run_float", diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index c91bb59aee8235..3036cb2d8c200c 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" - #include #include +#include #include @@ -96,6 +96,14 @@ nvinfer1::IExecutionContext *TensorRTEngine::context() { } else { infer_context = infer_engine_->createExecutionContext(); } +#if IS_TRT_VERSION_GE(8500) + int32_t const endBindingIndex = infer_engine_->getNbIOTensors(); + for (int i = 0; i < endBindingIndex; ++i) { + const auto tensorName = infer_engine_->getIOTensorName(i); + m_IOTensorNames.emplace_back(tensorName); + LOG(INFO) << "IOTensorName: " << m_IOTensorNames[i]; + } +#endif PADDLE_ENFORCE_NOT_NULL( infer_context, platform::errors::InvalidArgument( @@ -174,11 +182,24 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, return cuda_graph_.Launch(stream); } +#if IS_TRT_VERSION_GE(8500) + for (size_t j = 0; j < buffers->size(); ++j) { + auto name = context->getEngine().getBindingName(j); + // LOG(INFO) << "setTensorAddress的名字" << name; + context->setTensorAddress(name, (*buffers)[j]); + } +#endif + bool ret; if (!with_dynamic_shape()) { ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); } else { +#if IS_TRT_VERSION_GE(8500) + LOG(INFO) << "enqueueV3"; + ret = context->enqueueV3(stream); +#else ret = context->enqueueV2(buffers->data(), stream, nullptr); +#endif } return ret; } @@ -469,12 +490,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, "of the network at the same time.", name)); network()->markOutput(*output); - PADDLE_ENFORCE_EQ( - output->isNetworkOutput(), - true, - platform::errors::InvalidArgument( - "The output %s of TRT engine should be the output of the network.", - name)); + PADDLE_ENFORCE_EQ(output->isNetworkOutput(), + true, + platform::errors::InvalidArgument( + "The output %s of TRT engine should be the output " + "of the network.", + name)); } void TensorRTEngine::DeclareOutput(const std::string &name) { @@ -567,8 +588,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor( trt_in_shape.nbDims = 1; trt_in_shape.d[0] = 1; } - // In fact , this is not always right, because we can't determine if the 0th - // dimension is batch. Just for run chenqu's model + // In fact , this is not always right, because we can't determine if the + // 0th dimension is batch. Just for run chenqu's model if (!with_dynamic_shape()) { trt_in_shape.nbDims--; for (int i = 0; i < trt_in_shape.nbDims; i++) { @@ -626,8 +647,10 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) { infer_engine_, platform::errors::Fatal( "Building TRT cuda engine failed when deserializing engine info. " - "Please check:\n1. Your TRT serialization is generated and loaded " - "on the same GPU architecture;\n2. The Paddle Inference version of " + "Please check:\n1. Your TRT serialization is generated and " + "loaded " + "on the same GPU architecture;\n2. The Paddle Inference version " + "of " "generating serialization file and doing inference are " "consistent.")); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index ff35be1c607c7f..d7d6c8c0e4ef9b 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -614,6 +614,7 @@ class TensorRTEngine { public: thread_local static int predictor_id_per_thread; + std::vector m_IOTensorNames; }; // class TensorRTEngine // Add a layer__ into engine__ with args ARGS. diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 8c75a7bc00f1c8..72c5083dfb648a 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -16,6 +16,7 @@ #ifdef PADDLE_WITH_CUDA #include +#include #include #include #include @@ -611,6 +612,18 @@ class TensorRTEngineOp : public framework::OperatorBase { } } else { #if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + // LOG(INFO)<<"输入的名字"<setInputShape( + x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); +#endif + trt_context->setBindingDimensions( bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); // If this x is a shape tensor, we need call setInputShapeBinding @@ -644,6 +657,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } trt_context->setInputShapeBinding(bind_index, shape_v.data()); } + #endif } runtime_batch = t_shape[0]; @@ -718,7 +732,18 @@ class TensorRTEngineOp : public framework::OperatorBase { ddim.push_back(dims.d[i]); } } else { -#if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + auto x_name = engine->engine()->getBindingName(bind_index); + auto dims = trt_context->getTensorShape(x_name); + int nb_dims = dims.nbDims; + for (; nb_dims > 0; nb_dims--) { + // some 'x 1' of shape is normal, no need to remove it + if (dims.d[nb_dims - 1] != 1 || + nb_dims == origin_output_rank[output_index]) + break; + } + for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]); +#else auto dims = trt_context->getBindingDimensions(bind_index); int nb_dims = dims.nbDims; for (; nb_dims > 0; nb_dims--) { diff --git a/test/ir/inference/test_trt_convert_solve.py b/test/ir/inference/test_trt_convert_solve.py index c3f9b51d0d05c2..4d0b8e24649e92 100644 --- a/test/ir/inference/test_trt_convert_solve.py +++ b/test/ir/inference/test_trt_convert_solve.py @@ -89,9 +89,9 @@ def clear_dynamic_shape(): self.trt_param.precision = paddle_infer.PrecisionType.Float32 program_config.set_input_type(np.float32) yield self.create_inference_config(), (1, 3), 1e-5 - self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) - yield self.create_inference_config(), (1, 3), 1e-3 + # self.trt_param.precision = paddle_infer.PrecisionType.Half + # program_config.set_input_type(np.float16) + # yield self.create_inference_config(), (1, 3), 1e-3 def test(self): self.run_test() From b0debc7f99534d67a9ef3a239c91f033c723d82d Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Thu, 14 Dec 2023 07:44:07 +0000 Subject: [PATCH 02/28] enqueueV3 --- paddle/fluid/inference/tensorrt/engine.cc | 19 ++++++++++--------- paddle/fluid/inference/tensorrt/engine.h | 2 +- .../operators/tensorrt/tensorrt_engine_op.h | 10 +++++----- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 3036cb2d8c200c..3eff14faa3e824 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -96,14 +96,14 @@ nvinfer1::IExecutionContext *TensorRTEngine::context() { } else { infer_context = infer_engine_->createExecutionContext(); } -#if IS_TRT_VERSION_GE(8500) - int32_t const endBindingIndex = infer_engine_->getNbIOTensors(); - for (int i = 0; i < endBindingIndex; ++i) { - const auto tensorName = infer_engine_->getIOTensorName(i); - m_IOTensorNames.emplace_back(tensorName); - LOG(INFO) << "IOTensorName: " << m_IOTensorNames[i]; - } -#endif + // #if IS_TRT_VERSION_GE(8500) + // int32_t const endBindingIndex = infer_engine_->getNbIOTensors(); + // for (int i = 0; i < endBindingIndex; ++i) { + // const auto tensorName = infer_engine_->getIOTensorName(i); + // m_IOTensorNames.emplace_back(tensorName); + // LOG(INFO)<<"IOTensorName: "<enqueue(batch_size, buffers->data(), stream, nullptr); } else { #if IS_TRT_VERSION_GE(8500) - LOG(INFO) << "enqueueV3"; + // LOG(INFO)<<"enqueueV3"; ret = context->enqueueV3(stream); + // LOG(INFO)<<"enqueueV3 end"; #else ret = context->enqueueV2(buffers->data(), stream, nullptr); #endif diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index d7d6c8c0e4ef9b..6debb198c0016d 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -614,7 +614,7 @@ class TensorRTEngine { public: thread_local static int predictor_id_per_thread; - std::vector m_IOTensorNames; + // std::vector m_IOTensorNames; }; // class TensorRTEngine // Add a layer__ into engine__ with args ARGS. diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 72c5083dfb648a..a406c4120627af 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -614,16 +614,15 @@ class TensorRTEngineOp : public framework::OperatorBase { #if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(8500) // LOG(INFO)<<"输入的名字"<setInputShape( x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); -#endif - +#else trt_context->setBindingDimensions( bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); // If this x is a shape tensor, we need call setInputShapeBinding @@ -657,7 +656,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } trt_context->setInputShapeBinding(bind_index, shape_v.data()); } - +#endif #endif } runtime_batch = t_shape[0]; @@ -734,6 +733,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } else { #if IS_TRT_VERSION_GE(8500) auto x_name = engine->engine()->getBindingName(bind_index); + // LOG(INFO)<<"输出的名字"<getTensorShape(x_name); int nb_dims = dims.nbDims; for (; nb_dims > 0; nb_dims--) { From a38a31c45160bd5b41e6c7a02b98e19eb093f2f2 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Mon, 25 Dec 2023 09:12:58 +0000 Subject: [PATCH 03/28] V3 --- paddle/fluid/inference/tensorrt/engine.cc | 10 +++++++--- paddle/fluid/inference/tensorrt/engine.h | 2 +- paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 5 ++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 3eff14faa3e824..68505b61292156 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -170,6 +170,7 @@ void TensorRTEngine::Execute(int batch_size, } Enqueue(infer_context, buffers, batch_size, stream); + LOG(INFO) << "after Enqueue"; } bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, @@ -183,9 +184,12 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, } #if IS_TRT_VERSION_GE(8500) + int num_bindings = context->getEngine().getNbBindings(); + LOG(INFO) << "num_bindings: " << num_bindings; + LOG(INFO) << "buffers->size(): " << buffers->size(); for (size_t j = 0; j < buffers->size(); ++j) { auto name = context->getEngine().getBindingName(j); - // LOG(INFO) << "setTensorAddress的名字" << name; + LOG(INFO) << "setTensorAddress的名字" << name; context->setTensorAddress(name, (*buffers)[j]); } #endif @@ -195,9 +199,9 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); } else { #if IS_TRT_VERSION_GE(8500) - // LOG(INFO)<<"enqueueV3"; + LOG(INFO) << "enqueueV3"; ret = context->enqueueV3(stream); - // LOG(INFO)<<"enqueueV3 end"; + LOG(INFO) << "enqueueV3 end"; #else ret = context->enqueueV2(buffers->data(), stream, nullptr); #endif diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 6debb198c0016d..d7d6c8c0e4ef9b 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -614,7 +614,7 @@ class TensorRTEngine { public: thread_local static int predictor_id_per_thread; - // std::vector m_IOTensorNames; + std::vector m_IOTensorNames; }; // class TensorRTEngine // Add a layer__ into engine__ with args ARGS. diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index a406c4120627af..388631aedb6e6e 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -711,6 +711,8 @@ class TensorRTEngineOp : public framework::OperatorBase { "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } + LOG(INFO) << "输入的名字 " << x.c_str(); + // trt_context->setTensorAddress(x.c_str(), buffers[bind_index]); } // Bind output tensor to TRT. @@ -733,7 +735,6 @@ class TensorRTEngineOp : public framework::OperatorBase { } else { #if IS_TRT_VERSION_GE(8500) auto x_name = engine->engine()->getBindingName(bind_index); - // LOG(INFO)<<"输出的名字"<getTensorShape(x_name); int nb_dims = dims.nbDims; for (; nb_dims > 0; nb_dims--) { @@ -777,6 +778,8 @@ class TensorRTEngineOp : public framework::OperatorBase { << TRT2FluidDataType(trt_type); buffers[bind_index] = static_cast( fluid_t->mutable_data(dev_place, TRT2FluidDataType(trt_type))); + LOG(INFO) << "输出的名字 " << y.c_str(); + // trt_context->setTensorAddress(y.c_str(), buffers[bind_index]); output_index += 1; } From 8b5dcab83b15efb953a5214cdf454a6f0d577e85 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Fri, 5 Jan 2024 02:21:23 +0000 Subject: [PATCH 04/28] enqueueV3 --- paddle/fluid/inference/analysis/ir_pass_manager.cc | 3 +-- paddle/fluid/inference/tensorrt/engine.cc | 14 -------------- paddle/fluid/inference/tensorrt/engine.h | 1 - test/ir/inference/test_trt_convert_solve.py | 3 --- 4 files changed, 1 insertion(+), 20 deletions(-) diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 7fb5134a22752f..122dbbda8fabdd 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -223,8 +223,7 @@ void IRPassManager::CreatePasses(Argument *argument, } pass->Set("use_static_engine", new bool(use_static_engine)); pass->Set("model_from_memory", new bool(argument->model_from_memory())); - pass->Set("use_inspector", - new bool(argument->tensorrt_use_inspector() || 1)); + pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector())); pass->Set("inspector_serialize", new bool(argument->tensorrt_inspector_serialize())); pass->Set("trt_ops_run_float", diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 68505b61292156..7f04b3497f2f75 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -96,14 +96,6 @@ nvinfer1::IExecutionContext *TensorRTEngine::context() { } else { infer_context = infer_engine_->createExecutionContext(); } - // #if IS_TRT_VERSION_GE(8500) - // int32_t const endBindingIndex = infer_engine_->getNbIOTensors(); - // for (int i = 0; i < endBindingIndex; ++i) { - // const auto tensorName = infer_engine_->getIOTensorName(i); - // m_IOTensorNames.emplace_back(tensorName); - // LOG(INFO)<<"IOTensorName: "<getEngine().getNbBindings(); - LOG(INFO) << "num_bindings: " << num_bindings; - LOG(INFO) << "buffers->size(): " << buffers->size(); for (size_t j = 0; j < buffers->size(); ++j) { auto name = context->getEngine().getBindingName(j); - LOG(INFO) << "setTensorAddress的名字" << name; context->setTensorAddress(name, (*buffers)[j]); } #endif @@ -199,9 +187,7 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); } else { #if IS_TRT_VERSION_GE(8500) - LOG(INFO) << "enqueueV3"; ret = context->enqueueV3(stream); - LOG(INFO) << "enqueueV3 end"; #else ret = context->enqueueV2(buffers->data(), stream, nullptr); #endif diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index d7d6c8c0e4ef9b..ff35be1c607c7f 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -614,7 +614,6 @@ class TensorRTEngine { public: thread_local static int predictor_id_per_thread; - std::vector m_IOTensorNames; }; // class TensorRTEngine // Add a layer__ into engine__ with args ARGS. diff --git a/test/ir/inference/test_trt_convert_solve.py b/test/ir/inference/test_trt_convert_solve.py index 4d0b8e24649e92..f8fd924eb5e6b0 100644 --- a/test/ir/inference/test_trt_convert_solve.py +++ b/test/ir/inference/test_trt_convert_solve.py @@ -89,9 +89,6 @@ def clear_dynamic_shape(): self.trt_param.precision = paddle_infer.PrecisionType.Float32 program_config.set_input_type(np.float32) yield self.create_inference_config(), (1, 3), 1e-5 - # self.trt_param.precision = paddle_infer.PrecisionType.Half - # program_config.set_input_type(np.float16) - # yield self.create_inference_config(), (1, 3), 1e-3 def test(self): self.run_test() From 931e86072d79f726f6e957173409ac9cfb1d60f1 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Fri, 5 Jan 2024 02:24:18 +0000 Subject: [PATCH 05/28] enqueueV3 --- paddle/fluid/inference/tensorrt/engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 7f04b3497f2f75..0971085789d02e 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include #include -#include #include From 4fe710586c1788f207b936de5bd5721ede176035 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Fri, 5 Jan 2024 02:26:53 +0000 Subject: [PATCH 06/28] enqueueV3 --- paddle/fluid/inference/tensorrt/engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 0971085789d02e..9075136cc32a3c 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -174,7 +174,6 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, } #if IS_TRT_VERSION_GE(8500) - int num_bindings = context->getEngine().getNbBindings(); for (size_t j = 0; j < buffers->size(); ++j) { auto name = context->getEngine().getBindingName(j); context->setTensorAddress(name, (*buffers)[j]); From ac8c26498ad6177851f6b8991c1346c3c3fef73f Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Fri, 5 Jan 2024 08:09:13 +0000 Subject: [PATCH 07/28] enqueueV3 --- paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 388631aedb6e6e..9bae102fc3c7f2 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -613,13 +613,6 @@ class TensorRTEngineOp : public framework::OperatorBase { } else { #if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(8500) - // LOG(INFO)<<"输入的名字"<setInputShape( x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); #else @@ -711,8 +704,6 @@ class TensorRTEngineOp : public framework::OperatorBase { "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } - LOG(INFO) << "输入的名字 " << x.c_str(); - // trt_context->setTensorAddress(x.c_str(), buffers[bind_index]); } // Bind output tensor to TRT. @@ -778,8 +769,6 @@ class TensorRTEngineOp : public framework::OperatorBase { << TRT2FluidDataType(trt_type); buffers[bind_index] = static_cast( fluid_t->mutable_data(dev_place, TRT2FluidDataType(trt_type))); - LOG(INFO) << "输出的名字 " << y.c_str(); - // trt_context->setTensorAddress(y.c_str(), buffers[bind_index]); output_index += 1; } From 6a99a2c8d26b4d2bb9d3689d2b285e4ec369e4ff Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Mon, 8 Jan 2024 02:41:37 +0000 Subject: [PATCH 08/28] enqueueV3 --- paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 9bae102fc3c7f2..06334a3d8bfb43 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -16,7 +16,6 @@ #ifdef PADDLE_WITH_CUDA #include -#include #include #include #include From 746f96db9531b0e1961b07693afb064944a76483 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Mon, 8 Jan 2024 02:53:15 +0000 Subject: [PATCH 09/28] enqueueV3 --- test/ir/inference/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/ir/inference/CMakeLists.txt b/test/ir/inference/CMakeLists.txt index 020b84b4fd32a2..05280322d6b7d7 100755 --- a/test/ir/inference/CMakeLists.txt +++ b/test/ir/inference/CMakeLists.txt @@ -23,6 +23,9 @@ if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_c_allreduce") endif() +list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_and") +list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_or") +list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_not") if(WIN32) list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_trt_convert_fused_token_prune") From f2863a30eafc24f55079fbd175178e530ab70791 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Mon, 8 Jan 2024 14:41:30 +0000 Subject: [PATCH 10/28] =?UTF-8?q?=E4=BF=AE=E6=94=B9windows=E4=B8=8Btest=5F?= =?UTF-8?q?tensorrt=5Fengine=E5=8D=95=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index b565df0ec3d8cd..bfc12f9ec172d3 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -131,10 +131,17 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { std::vector shape_v = {8, 8, 4}; PrepareInputOutput(x_v, {8, 8, 4}); PrepareShapeInput(shape_v); +#if IS_TRT_VERSION_GE(6000) + +#if IS_TRT_VERSION_GE(8500) + engine_->context()->setInputShape(x.c_str(), nvinfer1::Dims2{8, 32}); + engine_->context()->setInputShape(shape.c_ste(), shape_dim); +#else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); engine_->context()->setInputShapeBinding(1, shape_v.data()); - +#endif +#endif auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); From 48bc511e78bf8b6bc8fea5cf7b270ec7dd2d260c Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Mon, 8 Jan 2024 15:48:58 +0000 Subject: [PATCH 11/28] =?UTF-8?q?=E4=BF=AE=E6=94=B9test=5Fdynamic=5Fengine?= =?UTF-8?q?.cc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index bfc12f9ec172d3..66b33f9a96d5ef 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -134,8 +134,8 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { #if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(8500) - engine_->context()->setInputShape(x.c_str(), nvinfer1::Dims2{8, 32}); - engine_->context()->setInputShape(shape.c_ste(), shape_dim); + engine_->context()->setInputShape("input", nvinfer1::Dims2{8, 32}); + engine_->context()->setInputShape("shape", shape_dim); #else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); From 4a5b62d7d5e6f9756cad5d5ff8cf1458fa9de879 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 02:48:15 +0000 Subject: [PATCH 12/28] =?UTF-8?q?=E4=BF=AE=E6=94=B9enqueueV3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/inference/tensorrt/engine.cc | 7 ------- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 8 -------- paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 4 ++++ 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 9075136cc32a3c..8006901d89ab77 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -173,13 +173,6 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, return cuda_graph_.Launch(stream); } -#if IS_TRT_VERSION_GE(8500) - for (size_t j = 0; j < buffers->size(); ++j) { - auto name = context->getEngine().getBindingName(j); - context->setTensorAddress(name, (*buffers)[j]); - } -#endif - bool ret; if (!with_dynamic_shape()) { ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 66b33f9a96d5ef..f029d8285d1a61 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -131,17 +131,9 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { std::vector shape_v = {8, 8, 4}; PrepareInputOutput(x_v, {8, 8, 4}); PrepareShapeInput(shape_v); -#if IS_TRT_VERSION_GE(6000) - -#if IS_TRT_VERSION_GE(8500) - engine_->context()->setInputShape("input", nvinfer1::Dims2{8, 32}); - engine_->context()->setInputShape("shape", shape_dim); -#else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); engine_->context()->setInputShapeBinding(1, shape_v.data()); -#endif -#endif auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 06334a3d8bfb43..8fcc3fe73de46e 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -703,6 +703,8 @@ class TensorRTEngineOp : public framework::OperatorBase { "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } + const char *binding_name = engine->engine()->getBindingName(bind_index); + trt_context->setTensorAddress(binding_name, buffers[bind_index]); } // Bind output tensor to TRT. @@ -769,6 +771,8 @@ class TensorRTEngineOp : public framework::OperatorBase { buffers[bind_index] = static_cast( fluid_t->mutable_data(dev_place, TRT2FluidDataType(trt_type))); output_index += 1; + const char *binding_name = engine->engine()->getBindingName(bind_index); + trt_context->setTensorAddress(binding_name, buffers[bind_index]); } if (!engine->with_dynamic_shape()) { From 8c5f9c953ee76308be06326a7aaa882496485596 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 02:52:07 +0000 Subject: [PATCH 13/28] windows --- paddle/fluid/inference/tensorrt/engine.cc | 7 +++++++ paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 4 ---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 8006901d89ab77..9075136cc32a3c 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -173,6 +173,13 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, return cuda_graph_.Launch(stream); } +#if IS_TRT_VERSION_GE(8500) + for (size_t j = 0; j < buffers->size(); ++j) { + auto name = context->getEngine().getBindingName(j); + context->setTensorAddress(name, (*buffers)[j]); + } +#endif + bool ret; if (!with_dynamic_shape()) { ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 8fcc3fe73de46e..06334a3d8bfb43 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -703,8 +703,6 @@ class TensorRTEngineOp : public framework::OperatorBase { "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } - const char *binding_name = engine->engine()->getBindingName(bind_index); - trt_context->setTensorAddress(binding_name, buffers[bind_index]); } // Bind output tensor to TRT. @@ -771,8 +769,6 @@ class TensorRTEngineOp : public framework::OperatorBase { buffers[bind_index] = static_cast( fluid_t->mutable_data(dev_place, TRT2FluidDataType(trt_type))); output_index += 1; - const char *binding_name = engine->engine()->getBindingName(bind_index); - trt_context->setTensorAddress(binding_name, buffers[bind_index]); } if (!engine->with_dynamic_shape()) { From 73ee1183daf0c401e35e7e7a7c850ec4bc67e53c Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 04:20:53 +0000 Subject: [PATCH 14/28] enqueueV3 --- .../fluid/inference/tensorrt/test_dynamic_engine.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index f029d8285d1a61..90ea8062d4b446 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -131,9 +131,16 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { std::vector shape_v = {8, 8, 4}; PrepareInputOutput(x_v, {8, 8, 4}); PrepareShapeInput(shape_v); +#if IS_TRT_VERSION_GE(8500) + const char *tensorName1 = engine_->engine()->getBindingName(0); + const char *tensorName2 = engine_->engine()->getBindingName(1); + engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32}); + engine_->context()->setInputShape(tensorName2, shape_dim); +#else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); engine_->context()->setInputShapeBinding(1, shape_v.data()); +#endif auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); @@ -141,6 +148,12 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { buffers[0] = reinterpret_cast(x_gpu_data); buffers[1] = reinterpret_cast(shape_gpu_data); buffers[2] = reinterpret_cast(y_gpu_data); +#if IS_TRT_VERSION_GE(8500) + for (int i = 0; i < buffers.size(); i++) { + auto name = engine_->context()->getBindingName(i); + engine_->context()->setTensorAddress(name, (*buffers)[i]); + } +#endif engine_->Execute(-1, &buffers, ctx_->stream()); cudaStreamSynchronize(ctx_->stream()); From 2cf9791b2eb7e155fe8f2c778dcd921d29b760db Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 04:23:09 +0000 Subject: [PATCH 15/28] enqueueV3 --- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 90ea8062d4b446..c5ec26d0009c98 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -150,7 +150,7 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { buffers[2] = reinterpret_cast(y_gpu_data); #if IS_TRT_VERSION_GE(8500) for (int i = 0; i < buffers.size(); i++) { - auto name = engine_->context()->getBindingName(i); + auto name = engine_->engine()->getBindingName(i); engine_->context()->setTensorAddress(name, (*buffers)[i]); } #endif From 77b1b3601531820c693e39bb3fa3ceda84eed679 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 04:34:03 +0000 Subject: [PATCH 16/28] enqueueuV3 --- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index c5ec26d0009c98..41c667c93e46af 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -149,9 +149,9 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { buffers[1] = reinterpret_cast(shape_gpu_data); buffers[2] = reinterpret_cast(y_gpu_data); #if IS_TRT_VERSION_GE(8500) - for (int i = 0; i < buffers.size(); i++) { + for (size_t i = 0; i < buffers.size(); i++) { auto name = engine_->engine()->getBindingName(i); - engine_->context()->setTensorAddress(name, (*buffers)[i]); + engine_->context()->setTensorAddress(name, buffers[i]); } #endif From 7d8c4703dc7b006e727e94d812b268fadaf90235 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 06:42:30 +0000 Subject: [PATCH 17/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index eb6b0b5143813e..e0a5b53455dfe0 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,17 +28,12 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(WIN32) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS dynload_cuda tensorrt_engine tensorrt_plugin) -elseif(WITH_CINN) +if(NOT WIN32 AND WITH_CINN) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -else() +elseif(NOT WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc From cdf70e374665df3a12d82d6795a70adafe650c3d Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 10:48:59 +0000 Subject: [PATCH 18/28] =?UTF-8?q?=E8=A7=A3=E5=86=B3windows=E4=B8=8A?= =?UTF-8?q?=E5=8D=95=E6=B5=8B=E8=B6=85=E6=97=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../inference/test_trt_convert_bitwise_not.py | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/test/ir/inference/test_trt_convert_bitwise_not.py b/test/ir/inference/test_trt_convert_bitwise_not.py index 8d19425011ed48..28dda440b12e2b 100644 --- a/test/ir/inference/test_trt_convert_bitwise_not.py +++ b/test/ir/inference/test_trt_convert_bitwise_not.py @@ -31,18 +31,12 @@ def sample_program_configs(self): self.trt_param.workspace_size = 1073741824 def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): - if dims == 0: - return np.random.random([]).astype(np.bool8) - elif dims == 1: + if dims == 1: return np.random.random([32]).astype(np.bool8) - elif dims == 2: - return np.random.random([3, 32]).astype(np.int8) - elif dims == 3: - return np.random.random([3, 32, 32]).astype(np.int32) else: - return np.random.random([batch, 3, 32, 32]).astype(np.int64) + return np.random.random([3, 32]).astype(np.int8) - for dims in [0, 1, 2, 3, 4]: + for dims in [1, 2]: for batch in [1, 4]: self.dims = dims dics = [{}] @@ -76,32 +70,14 @@ def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - if self.dims == 0: - self.dynamic_shape.min_input_shape = {"input_data": []} - self.dynamic_shape.max_input_shape = {"input_data": []} - self.dynamic_shape.opt_input_shape = {"input_data": []} - elif self.dims == 1: + if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} self.dynamic_shape.max_input_shape = {"input_data": [64]} self.dynamic_shape.opt_input_shape = {"input_data": [32]} - elif self.dims == 2: + else: self.dynamic_shape.min_input_shape = {"input_data": [1, 16]} self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]} - elif self.dims == 3: - self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]} - self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]} - else: - self.dynamic_shape.min_input_shape = { - "input_data": [1, 3, 16, 16] - } - self.dynamic_shape.max_input_shape = { - "input_data": [4, 3, 32, 32] - } - self.dynamic_shape.opt_input_shape = { - "input_data": [1, 3, 32, 32] - } def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} From 392f7b2aa7c53ec6081af70a967bc963ed00a47d Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 12:18:18 +0000 Subject: [PATCH 19/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 12 +++++++++--- .../fluid/inference/tensorrt/test_dynamic_engine.cc | 10 ++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index e0a5b53455dfe0..9f2718e81aefa4 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,12 +28,18 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(NOT WIN32 AND WITH_CINN) +if(WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc - DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -elseif(NOT WIN32) + DEPS dynload_cuda tensorrt_engine tensorrt_plugin) +elseif(WITH_CINN) + nv_test( + test_tensorrt_engine + SRCS test_engine.cc test_dynamic_engine.cc + DEPS fleet_executor cinn_compiler dynload_cuda tensorrt_engine + tensorrt_plugin python) +else() nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 41c667c93e46af..3b96afe476f759 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -134,6 +134,7 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { #if IS_TRT_VERSION_GE(8500) const char *tensorName1 = engine_->engine()->getBindingName(0); const char *tensorName2 = engine_->engine()->getBindingName(1); + // 在设置形状之前和之后添加日志 engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32}); engine_->context()->setInputShape(tensorName2, shape_dim); #else @@ -155,13 +156,22 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { } #endif + // 执行前打印确认信息 engine_->Execute(-1, &buffers, ctx_->stream()); + // 同步流并打印确认信息 cudaStreamSynchronize(ctx_->stream()); + + // 获取输出并进行验证 std::vector y_cpu; GetOutput(&y_cpu); ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); +#if IS_TRT_VERSION_GE(8500) + const char *name = engine_->engine()->getBindingName(2); + auto dims = engine_->context()->getTensorShape(name); +#else auto dims = engine_->context()->getBindingDimensions(2); +#endif ASSERT_EQ(dims.nbDims, 3); ASSERT_EQ(dims.d[0], 8); ASSERT_EQ(dims.d[1], 8); From 8e1c0474a357c4d29f9b413ab8a3cd5f026ccced Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 13:01:34 +0000 Subject: [PATCH 20/28] enqueueV3 --- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 3b96afe476f759..25483bcaef96e9 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -167,8 +167,8 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); #if IS_TRT_VERSION_GE(8500) - const char *name = engine_->engine()->getBindingName(2); - auto dims = engine_->context()->getTensorShape(name); + const char *name1 = engine_->engine()->getBindingName(2); + auto dims = engine_->context()->getTensorShape(name1); #else auto dims = engine_->context()->getBindingDimensions(2); #endif From 5485242d019274815739b72ca001a0a06cf73972 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 13:21:11 +0000 Subject: [PATCH 21/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 12 +++--------- .../fluid/inference/tensorrt/test_dynamic_engine.cc | 11 ----------- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index 9f2718e81aefa4..e0a5b53455dfe0 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,18 +28,12 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(WIN32) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS dynload_cuda tensorrt_engine tensorrt_plugin) -elseif(WITH_CINN) +if(NOT WIN32 AND WITH_CINN) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc - DEPS fleet_executor cinn_compiler dynload_cuda tensorrt_engine - tensorrt_plugin python) -else() + DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) +elseif(NOT WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 25483bcaef96e9..185f6beb823c16 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -134,7 +134,6 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { #if IS_TRT_VERSION_GE(8500) const char *tensorName1 = engine_->engine()->getBindingName(0); const char *tensorName2 = engine_->engine()->getBindingName(1); - // 在设置形状之前和之后添加日志 engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32}); engine_->context()->setInputShape(tensorName2, shape_dim); #else @@ -155,23 +154,13 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { engine_->context()->setTensorAddress(name, buffers[i]); } #endif - - // 执行前打印确认信息 engine_->Execute(-1, &buffers, ctx_->stream()); - // 同步流并打印确认信息 cudaStreamSynchronize(ctx_->stream()); - - // 获取输出并进行验证 std::vector y_cpu; GetOutput(&y_cpu); ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); -#if IS_TRT_VERSION_GE(8500) - const char *name1 = engine_->engine()->getBindingName(2); - auto dims = engine_->context()->getTensorShape(name1); -#else auto dims = engine_->context()->getBindingDimensions(2); -#endif ASSERT_EQ(dims.nbDims, 3); ASSERT_EQ(dims.d[0], 8); ASSERT_EQ(dims.d[1], 8); From 1e9ba0f3b54e41aa549dbf8095ede4a2512024f6 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 14:35:38 +0000 Subject: [PATCH 22/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 9 +++++++-- paddle/fluid/inference/tensorrt/test_dynamic_engine.cc | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index e0a5b53455dfe0..eb6b0b5143813e 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,12 +28,17 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(NOT WIN32 AND WITH_CINN) +if(WIN32) + nv_test( + test_tensorrt_engine + SRCS test_engine.cc test_dynamic_engine.cc + DEPS dynload_cuda tensorrt_engine tensorrt_plugin) +elseif(WITH_CINN) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -elseif(NOT WIN32) +else() nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 185f6beb823c16..a1914b10829496 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -154,13 +154,20 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { engine_->context()->setTensorAddress(name, buffers[i]); } #endif + engine_->Execute(-1, &buffers, ctx_->stream()); cudaStreamSynchronize(ctx_->stream()); + std::vector y_cpu; GetOutput(&y_cpu); ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); +#if IS_TRT_VERSION_GE(8500) + const char *name1 = engine_->engine()->getBindingName(2); + auto dims = engine_->context()->getTensorShape(name1); +#else auto dims = engine_->context()->getBindingDimensions(2); +#endif ASSERT_EQ(dims.nbDims, 3); ASSERT_EQ(dims.d[0], 8); ASSERT_EQ(dims.d[1], 8); From 9796cfb4c78126fac443a4c64eb8818ec5c6436d Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Tue, 9 Jan 2024 16:27:32 +0000 Subject: [PATCH 23/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index eb6b0b5143813e..e0a5b53455dfe0 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,17 +28,12 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(WIN32) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS dynload_cuda tensorrt_engine tensorrt_plugin) -elseif(WITH_CINN) +if(NOT WIN32 AND WITH_CINN) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -else() +elseif(NOT WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc From b74369d72b279386b524093b89fc026be8b82dd4 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Wed, 10 Jan 2024 07:32:54 +0000 Subject: [PATCH 24/28] enqueueV3 --- paddle/fluid/inference/tensorrt/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index e0a5b53455dfe0..d8f7d84349c461 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -28,12 +28,7 @@ nv_test( test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -if(NOT WIN32 AND WITH_CINN) - nv_test( - test_tensorrt_engine - SRCS test_engine.cc test_dynamic_engine.cc - DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python) -elseif(NOT WIN32) +if(NOT WIN32) nv_test( test_tensorrt_engine SRCS test_engine.cc test_dynamic_engine.cc From f00a70230ad5c8e6b486db891e45de82c45768fd Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Wed, 10 Jan 2024 07:45:00 +0000 Subject: [PATCH 25/28] enqueueV3 --- paddle/fluid/operators/tensorrt/tensorrt_engine_op.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 06334a3d8bfb43..b7e9758f8d6d1e 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -733,7 +733,9 @@ class TensorRTEngineOp : public framework::OperatorBase { nb_dims == origin_output_rank[output_index]) break; } - for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]); + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } #else auto dims = trt_context->getBindingDimensions(bind_index); int nb_dims = dims.nbDims; @@ -743,7 +745,9 @@ class TensorRTEngineOp : public framework::OperatorBase { nb_dims == origin_output_rank[output_index]) break; } - for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]); + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } #endif } auto *fluid_v = scope.FindVar(y); From 605b8072c7997675e08eccabb290bc357fb208f1 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Wed, 10 Jan 2024 09:17:43 +0000 Subject: [PATCH 26/28] enqueueV3 --- test/ir/inference/CMakeLists.txt | 3 --- test/ir/inference/test_trt_convert_bitwise_and.py | 5 +++-- test/ir/inference/test_trt_convert_bitwise_not.py | 3 +++ test/ir/inference/test_trt_convert_bitwise_or.py | 5 +++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/ir/inference/CMakeLists.txt b/test/ir/inference/CMakeLists.txt index ac27a37ca458f2..185ca22f897f69 100755 --- a/test/ir/inference/CMakeLists.txt +++ b/test/ir/inference/CMakeLists.txt @@ -23,9 +23,6 @@ if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_c_allreduce") endif() -list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_and") -list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_or") -list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_bitwise_not") if(WIN32) list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_trt_convert_fused_token_prune") diff --git a/test/ir/inference/test_trt_convert_bitwise_and.py b/test/ir/inference/test_trt_convert_bitwise_and.py index 0bfa21b5a36de5..7342063e36e5c6 100644 --- a/test/ir/inference/test_trt_convert_bitwise_and.py +++ b/test/ir/inference/test_trt_convert_bitwise_and.py @@ -25,6 +25,9 @@ class TrtConvertBitwiseAndTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return False return True def sample_program_configs(self): @@ -135,12 +138,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 diff --git a/test/ir/inference/test_trt_convert_bitwise_not.py b/test/ir/inference/test_trt_convert_bitwise_not.py index 28dda440b12e2b..b08137f3b53693 100644 --- a/test/ir/inference/test_trt_convert_bitwise_not.py +++ b/test/ir/inference/test_trt_convert_bitwise_not.py @@ -25,6 +25,9 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return False return True def sample_program_configs(self): diff --git a/test/ir/inference/test_trt_convert_bitwise_or.py b/test/ir/inference/test_trt_convert_bitwise_or.py index fae933c0cb1851..d5e763d11fc1c5 100644 --- a/test/ir/inference/test_trt_convert_bitwise_or.py +++ b/test/ir/inference/test_trt_convert_bitwise_or.py @@ -25,6 +25,9 @@ class TrtConvertBitwiseOrTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return False return True def sample_program_configs(self): @@ -135,12 +138,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 From 4ddc18f5902d0cc3d60494bc37ca332889ff8864 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Wed, 10 Jan 2024 09:22:33 +0000 Subject: [PATCH 27/28] enqueuueV3 --- test/ir/inference/test_trt_convert_solve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/ir/inference/test_trt_convert_solve.py b/test/ir/inference/test_trt_convert_solve.py index f8fd924eb5e6b0..c3117ee335740c 100644 --- a/test/ir/inference/test_trt_convert_solve.py +++ b/test/ir/inference/test_trt_convert_solve.py @@ -87,8 +87,9 @@ def clear_dynamic_shape(): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-3 def test(self): self.run_test() From a440cd449515b82da9e23d5e3c18ce3b9a93d965 Mon Sep 17 00:00:00 2001 From: lizexu <2694294196@qq.com> Date: Wed, 10 Jan 2024 09:40:49 +0000 Subject: [PATCH 28/28] enqueueV3 --- test/ir/inference/test_trt_convert_bitwise_and.py | 3 --- test/ir/inference/test_trt_convert_bitwise_not.py | 3 --- test/ir/inference/test_trt_convert_bitwise_or.py | 3 --- 3 files changed, 9 deletions(-) diff --git a/test/ir/inference/test_trt_convert_bitwise_and.py b/test/ir/inference/test_trt_convert_bitwise_and.py index 7342063e36e5c6..015977b4387653 100644 --- a/test/ir/inference/test_trt_convert_bitwise_and.py +++ b/test/ir/inference/test_trt_convert_bitwise_and.py @@ -25,9 +25,6 @@ class TrtConvertBitwiseAndTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - ver = paddle_infer.get_trt_compile_version() - if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: - return False return True def sample_program_configs(self): diff --git a/test/ir/inference/test_trt_convert_bitwise_not.py b/test/ir/inference/test_trt_convert_bitwise_not.py index b08137f3b53693..28dda440b12e2b 100644 --- a/test/ir/inference/test_trt_convert_bitwise_not.py +++ b/test/ir/inference/test_trt_convert_bitwise_not.py @@ -25,9 +25,6 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - ver = paddle_infer.get_trt_compile_version() - if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: - return False return True def sample_program_configs(self): diff --git a/test/ir/inference/test_trt_convert_bitwise_or.py b/test/ir/inference/test_trt_convert_bitwise_or.py index d5e763d11fc1c5..84cef306b4b551 100644 --- a/test/ir/inference/test_trt_convert_bitwise_or.py +++ b/test/ir/inference/test_trt_convert_bitwise_or.py @@ -25,9 +25,6 @@ class TrtConvertBitwiseOrTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - ver = paddle_infer.get_trt_compile_version() - if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: - return False return True def sample_program_configs(self):