Skip to content

Commit 6b10c0e

Browse files
authored
[Inference] save_optimized_model_pass support tensorrt (#55893)
* fix cudnn 8.7+ bug on cudnnConvolutionBiasActivationForward * save_optimized_model_pass support tensorrt * update * update * fix compile * update * fix ut timeout
1 parent 68b0cf9 commit 6b10c0e

20 files changed

+916
-923
lines changed

paddle/fluid/framework/ir/auto_mixed_precision_pass.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ void AutoMixedPrecisionPass::Init(Graph* graph) const {
249249
subgraphes_[i] = graph->GetSubGraph(i);
250250
all_op_nodes_[i] = TopologySortOperations(*subgraphes_[i]);
251251
VLOG(4) << "subgraph " << i << " has " << all_op_nodes_[i].size()
252-
<< "op nodes";
252+
<< " op nodes";
253253
for (auto* var_node : subgraphes_[i]->Nodes()) {
254254
if (!var_node->IsVar()) continue;
255255

paddle/fluid/framework/naive_executor.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ void NaiveExecutor::Run() {
6464
VLOG(4) << std::this_thread::get_id() << " run "
6565
<< op->DebugStringEx(scope_) << " on scope " << scope_;
6666
op->SetIsCalledByExecutor(false);
67-
#ifdef PADDLE_WITH_NVTX
68-
platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
69-
platform::NvtxRangeColor::Green);
70-
#endif
7167

7268
for (auto &func : input_hookfuncs_) {
7369
func(op.get(), scope_);
@@ -77,7 +73,14 @@ void NaiveExecutor::Run() {
7773
op->SetOutputHooks(output_hookfuncs_);
7874
}
7975

76+
#ifdef PADDLE_WITH_NVTX
77+
platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
78+
platform::NvtxRangeColor::Green);
79+
#endif
8080
op->Run(*scope_, place_);
81+
#ifdef PADDLE_WITH_NVTX
82+
platform::CudaNvtxRangePop();
83+
#endif
8184

8285
// Update the shared_holder so that only records the max one.
8386
if (reuse_cache_.count(op.get())) {
@@ -105,9 +108,6 @@ void NaiveExecutor::Run() {
105108
}
106109
}
107110

108-
#ifdef PADDLE_WITH_NVTX
109-
platform::CudaNvtxRangePop();
110-
#endif
111111
for (auto &func : output_hookfuncs_) {
112112
func(op.get(), scope_);
113113
}

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 107 additions & 99 deletions
Large diffs are not rendered by default.

paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
1616

17+
#include <memory>
1718
#include <string>
1819

1920
#include "paddle/fluid/framework/executor.h"
@@ -63,8 +64,7 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
6364
"set."));
6465
}
6566

66-
auto graph = std::unique_ptr<framework::ir::Graph>(
67-
new framework::ir::Graph(argument->main_program()));
67+
auto graph = std::make_unique<framework::ir::Graph>(argument->main_program());
6868
argument->SetMainGraph(graph.release());
6969
auto *scope_ptr = argument->scope_ptr();
7070
PADDLE_ENFORCE_NOT_NULL(scope_ptr,

paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,6 @@ namespace inference {
2424
namespace analysis {
2525

2626
void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
27-
if (!argument->save_optimized_model()) {
28-
LOG(WARNING) << "save_optim_cache_model is turned off, skip "
29-
"save_optimized_model_pass";
30-
return;
31-
}
32-
if (!argument->enable_ir_optim()) {
33-
LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass";
34-
return;
35-
}
36-
3727
std::string model_opt_cache_dir = argument->optim_cache_dir();
3828
if (!model_opt_cache_dir.empty()) {
3929
if (!PathExists(model_opt_cache_dir)) {
@@ -55,9 +45,11 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
5545
auto* graph = argument->main_graph_ptr();
5646

5747
framework::ProgramDesc optimized_program_desc;
48+
5849
// NOTE(liuyuanle): If the following line of code is not added, an error
5950
// [SegmentFault] may occur!
6051
optimized_program_desc.CopyFrom(*argument->main_program().Proto());
52+
6153
framework::ir::GraphToProgram(*graph, &optimized_program_desc);
6254

6355
auto IsPersistable = [](const framework::VarDesc* var) {
@@ -133,11 +125,10 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
133125
}
134126

135127
void SaveOptimizedModelPass::RunImpl(Argument* argument) {
136-
// TODO(inference): Support trt.
137-
if (argument->use_xpu() ||
138-
(argument->use_gpu() && !argument->use_tensorrt())) {
139-
SaveOptimizedModel(argument);
128+
if (!argument->save_optimized_model() || !argument->enable_ir_optim()) {
129+
return;
140130
}
131+
SaveOptimizedModel(argument);
141132
}
142133

143134
std::string SaveOptimizedModelPass::repr() const {

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ bool AnalysisPredictor::PrepareProgram(
641641
}
642642

643643
bool AnalysisPredictor::CreateExecutor() {
644-
executor_.reset(new paddle::framework::NaiveExecutor(place_));
644+
executor_ = std::make_unique<paddle::framework::NaiveExecutor>(place_);
645645
return true;
646646
}
647647

@@ -1341,7 +1341,7 @@ bool AnalysisPredictor::GetFetch(std::vector<paddle::Tensor> *outputs,
13411341
void AnalysisPredictor::PrepareArgument() {
13421342
VLOG(3) << "AnalysisPredictor::PrepareArgument";
13431343
// Init std::unique_ptr argument_.
1344-
argument_.reset(new Argument);
1344+
argument_ = std::make_unique<Argument>();
13451345
argument_->SetUseGPU(config_.use_gpu());
13461346
argument_->SetUseCutlass(config_.use_cutlass_);
13471347
argument_->SetUseFcPadding(config_.use_fc_padding());
@@ -1570,7 +1570,8 @@ void AnalysisPredictor::PrepareArgument() {
15701570

15711571
if (!config_.ir_optim()) {
15721572
argument_->SetEnableIrOptim(false);
1573-
if (config_.enable_gpu_mixed_) {
1573+
if (config_.enable_gpu_mixed_ &&
1574+
model_precision_ == phi::DataType::FLOAT32) {
15741575
argument_->SetEnableIrOptim(true);
15751576
pass_builder->ClearPasses();
15761577
pass_builder->AppendPass("auto_mixed_precision_pass");
@@ -1886,6 +1887,10 @@ AnalysisPredictor::GetInputTypes() {
18861887
input_type[name] = paddle_infer::DataType::UINT8;
18871888
} else if (dtype == paddle::framework::proto::VarType::INT8) {
18881889
input_type[name] = paddle_infer::DataType::INT8;
1890+
} else if (dtype == paddle::framework::proto::VarType::FP64) {
1891+
input_type[name] = paddle_infer::DataType::FLOAT64;
1892+
} else if (dtype == paddle::framework::proto::VarType::BOOL) {
1893+
input_type[name] = paddle_infer::DataType::BOOL;
18891894
} else {
18901895
PADDLE_THROW(paddle::platform::errors::Unimplemented(
18911896
"Unsupported data type `%s` when get input dtype ", dtype));
@@ -2609,7 +2614,7 @@ AnalysisPredictor::~AnalysisPredictor() {
26092614
#ifdef PADDLE_WITH_TENSORRT
26102615
if (config_.trt_engine_memory_sharing()) {
26112616
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
2612-
.releaseContextMemory(predictor_id_);
2617+
.ReleaseContextMemory(predictor_id_);
26132618
}
26142619
#endif
26152620
}

paddle/fluid/inference/tensorrt/convert/op_converter.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class OpConverter {
167167
op_desc.Type()));
168168

169169
it->SetEngine(engine);
170-
engine->SetScope(scope);
170+
engine->SetScope(&scope);
171171
it->SetBlockDesc(block);
172172
(*it)(op, scope, test_mode);
173173

@@ -301,7 +301,7 @@ class OpConverter {
301301
nvinfer1::DataType in_dtype = FluidDataType2TRT(var->GetDataType());
302302
if (engine->precision() == phi::DataType::FLOAT16 &&
303303
in_dtype == nvinfer1::DataType::kFLOAT &&
304-
engine->EnableLowPrecisionIO()) {
304+
engine->LowPrecisionIOEnabled()) {
305305
in_dtype = nvinfer1::DataType::kHALF;
306306
}
307307

@@ -360,7 +360,7 @@ class OpConverter {
360360
nvinfer1::DataType out_dtype = FluidDataType2TRT(var->GetDataType());
361361
if (engine->precision() == phi::DataType::FLOAT16 &&
362362
out_dtype == nvinfer1::DataType::kFLOAT &&
363-
engine->EnableLowPrecisionIO()) {
363+
engine->LowPrecisionIOEnabled()) {
364364
out_dtype = nvinfer1::DataType::kHALF;
365365
}
366366
engine->DeclareOutput(output, out_dtype);
@@ -470,7 +470,7 @@ class OpConverter {
470470
auto shape = newShape->getDimensions();
471471
shuffle->setReshapeDimensions(shape);
472472
}
473-
if (name != "") {
473+
if (!name.empty()) {
474474
shuffle->setName(name.c_str());
475475
}
476476
return shuffle->getOutput(0);
@@ -481,7 +481,7 @@ class OpConverter {
481481
const std::string& name = "") {
482482
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
483483
shuffle->setReshapeDimensions(shape);
484-
if (name != "") {
484+
if (!name.empty()) {
485485
shuffle->setName(name.c_str());
486486
}
487487
return shuffle->getOutput(0);
@@ -774,11 +774,6 @@ class OpConverter {
774774
bool test_mode_;
775775

776776
private:
777-
// registered op converter map, whose key is the fluid op type, and value is
778-
// the pointer position of corresponding OpConverter class.
779-
std::unordered_map<std::string, OpConverter*> converters_;
780-
// fluid inference scope
781-
framework::Scope* scope_{nullptr};
782777
std::mutex mut_;
783778
};
784779

paddle/fluid/inference/tensorrt/convert/test_custom_plugin_creater.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include <gtest/gtest.h> // NOLINT
16+
#include <memory>
1617

1718
#include "paddle/fluid/framework/program_desc.h"
1819
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
@@ -95,7 +96,11 @@ TEST(CustomPluginCreater, StaticShapePlugin) {
9596

9697
// init trt engine
9798
std::unique_ptr<TensorRTEngine> engine_;
98-
engine_.reset(new TensorRTEngine(5, 1 << 15));
99+
100+
TensorRTEngine::ConstructionParams params;
101+
params.max_batch_size = 5;
102+
params.max_workspace_size = 1 << 15;
103+
engine_ = std::make_unique<TensorRTEngine>(params);
99104
engine_->InitNetwork();
100105

101106
engine_->DeclareInput(
@@ -173,15 +178,10 @@ TEST(CustomPluginCreater, DynamicShapePlugin) {
173178
std::map<std::string, std::vector<int>> optim_input_shape = {
174179
{"x", {1, 2, 5, 5}}};
175180

176-
engine_.reset(new TensorRTEngine(5,
177-
1 << 15,
178-
phi::DataType::FLOAT32,
179-
nullptr,
180-
0,
181-
true,
182-
min_input_shape,
183-
max_input_shape,
184-
optim_input_shape));
181+
TensorRTEngine::ConstructionParams params;
182+
params.max_batch_size = 5;
183+
params.max_workspace_size = 1 << 15;
184+
engine_ = std::make_unique<TensorRTEngine>(params);
185185
engine_->InitNetwork();
186186

187187
LOG(INFO) << "with_dynamic_shape " << engine_->with_dynamic_shape();

paddle/fluid/inference/tensorrt/convert/test_op_converter.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ limitations under the License. */
1414

1515
#include <gtest/gtest.h> // NOLINT
1616

17+
#include <memory>
18+
1719
#include "paddle/fluid/framework/program_desc.h"
1820
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1921

@@ -28,7 +30,10 @@ TEST(OpConverter, ConvertBlock) {
2830

2931
// init trt engine
3032
std::unique_ptr<TensorRTEngine> engine_;
31-
engine_.reset(new TensorRTEngine(5, 1 << 15));
33+
TensorRTEngine::ConstructionParams params;
34+
params.max_batch_size = 5;
35+
params.max_workspace_size = 1 << 15;
36+
engine_ = std::make_unique<TensorRTEngine>(params);
3237
engine_->InitNetwork();
3338

3439
engine_->DeclareInput(

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ class TRTConvertValidation {
8888
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_),
8989
0,
9090
platform::errors::External("cudaStreamCreate error."));
91-
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size));
91+
TensorRTEngine::ConstructionParams params;
92+
params.max_batch_size = max_batch_size;
93+
params.max_workspace_size = workspace_size;
94+
engine_ = std::make_unique<TensorRTEngine>(params);
9295
engine_->InitNetwork();
9396
}
9497

@@ -155,7 +158,7 @@ class TRTConvertValidation {
155158
engine_->FreezeNetwork();
156159

157160
// Declare outputs.
158-
op_desc_.reset(new framework::OpDesc(desc, nullptr));
161+
op_desc_ = std::make_unique<framework::OpDesc>(desc, nullptr);
159162
}
160163

161164
// We use the set 'neglected_output' here, because some Ops like batch norm,

0 commit comments

Comments
 (0)