Skip to content

Refine trt interface and add common models ut . #13624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions paddle/fluid/inference/analysis/analyzer_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ TEST(Analyzer, analysis_without_tensorrt) {
TEST(Analyzer, analysis_with_tensorrt) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true;
Argument argument;
argument.Set<int>("minimum_subgraph_size", new int(0));
argument.Set<int>("max_batch_size", new int(3));
argument.Set<int>("workspace_size", new int(1 << 20));
argument.Set<std::string>("precision_mode", new std::string("FP32"));
argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
Analyzer analyser;
analyser.Run(&argument);
}

void TestWord2vecPrediction(const std::string &model_path) {
void TestWord2vecPrediction(const std::string& model_path) {
NativeConfig config;
config.model_dir = model_path;
config.use_gpu = false;
Expand Down Expand Up @@ -73,8 +77,8 @@ void TestWord2vecPrediction(const std::string &model_path) {
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
LOG(INFO) << "data: "
<< static_cast<float *>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float *>(outputs.front().data.data())[i],
<< static_cast<float*>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) {
}
}

void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
void CreateTrtEngineOp(Node *node, Argument *argument,
framework::proto::BlockDesc *block) {
PADDLE_ENFORCE(argument->main_dfg.get());
const DataFlowGraph &graph = *(argument->main_dfg);
static int counter{0};
PADDLE_ENFORCE(node->IsFunctionBlock());
framework::OpDesc desc;
Expand Down Expand Up @@ -204,7 +206,10 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,

PADDLE_ENFORCE(!block->vars().empty(), "the block has no var-desc");
// Set attrs

SetAttr(desc.Proto(), "subgraph", block->SerializeAsString());
SetAttr(desc.Proto(), "max_batch_size", argument->Get<int>("max_batch_size"));
SetAttr(desc.Proto(), "workspace_size", argument->Get<int>("workspace_size"));
SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++));
SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes()));
SetAttr(desc.Proto(), "output_name_mapping", output_mapping);
Expand Down Expand Up @@ -248,7 +253,7 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
*block_desc.Proto()->mutable_vars() =
argument_->origin_program_desc->blocks(0).vars();
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty());
CreateTrtEngineOp(node, *argument_->main_dfg, block_desc.Proto());
CreateTrtEngineOp(node, argument_, block_desc.Proto());
auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
auto *op = main_block->add_ops();
PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block");
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/analysis/subgraph_splitter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); }
void SubGraphFuse::ReplaceNodesWithSubGraphs() {
auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)();
for (auto &subgraph : subgraphs) {
if (subgraph.size() <= argument_->Get<int>("minimum_subgraph_size"))
continue;
std::unordered_set<Node *> subgraph_uniq(subgraph.begin(), subgraph.end());
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
Expand Down
9 changes: 7 additions & 2 deletions paddle/fluid/inference/analysis/subgraph_splitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */

#include <vector>

#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h"

Expand Down Expand Up @@ -63,8 +64,11 @@ class SubGraphFuse {
public:
using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller;

SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller)
: graph_(graph), node_inside_subgraph_teller_(teller) {}
SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller,
Argument *argument)
: graph_(graph),
node_inside_subgraph_teller_(teller),
argument_(argument) {}

// The main method which run all the logic.
void operator()();
Expand All @@ -76,6 +80,7 @@ class SubGraphFuse {
private:
DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
Argument *argument_;
};

} // namespace analysis
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ TEST(SubGraphSplitter, Split) {
TEST(SubGraphSplitter, Fuse) {
auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__");
auto dfg = ProgramDescToDFG(desc);
Argument argument;
argument.Set<int>("minimum_subgraph_size", new int(3));

size_t count0 = dfg.nodes.size();

SubGraphFuse fuse(&dfg, teller);
SubGraphFuse fuse(&dfg, teller, &argument);
fuse();

int count1 = 0;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
: node_inside_subgraph_teller_(teller) {}

void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_)();
SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
VLOG(4) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/);
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {

explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller);

bool Initialize(Argument* argument) override { return true; }
bool Initialize(Argument* argument) override {
argument_ = argument;
return true;
}

// This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT.
Expand All @@ -46,6 +49,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {

private:
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
Argument* argument_;
};

} // namespace analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ TEST(TensorRTSubGraphPass, main) {
};

Argument argument(FLAGS_inference_model_dir);
argument.Set<int>("minimum_subgraph_size", new int(0));
argument.Set<int>("max_batch_size", new int(3));
argument.Set<int>("workspace_size", new int(1 << 20));
argument.Set<std::string>("precision_mode", new std::string("FP32"));

DFG_GraphvizDrawPass::Config config{FLAGS_dot_dir, "origin"};
DFG_GraphvizDrawPass::Config config1{FLAGS_dot_dir, "fusion"};
Expand Down
10 changes: 8 additions & 2 deletions paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true;
VLOG(3) << "Predictor::init()";
FLAGS_tensorrt_max_batch_size = config_.max_batch_size;
FLAGS_tensorrt_workspace_size = config_.workspace_size;
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
} else {
Expand Down Expand Up @@ -92,6 +90,14 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
void OptimizeInferenceProgram() {
// Analyze inference_program
Argument argument;

argument.Set<int>("minimum_subgraph_size",
new int(config_.minimum_subgraph_size));
argument.Set<int>("max_batch_size", new int(config_.max_batch_size));
argument.Set<int>("workspace_size", new int(config_.workspace_size));
argument.Set<std::string>("precision_mode",
new std::string(config_.precision_mode));

if (!config_.model_dir.empty()) {
argument.fluid_model_dir.reset(new std::string(config_.model_dir));
} else {
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/inference/api/paddle_inference_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@ struct MixedRTConfig : public NativeConfig {
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int workspace_size{1 << 30};
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int minimum_subgraph_size = 3;
// Reserved configuration
// We just support "FP32" now, "FP16" and "INT8" will be supported.
std::string precision_mode = "FP32";
};

// NOTE WIP, not stable yet.
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/inference/tests/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,13 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
DEPS inference_anakin_api_shared dynload_cuda SERIAL)
endif()
endif()

if(WITH_GPU AND TENSORRT_FOUND)
set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt")
if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR})
inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz")
endif()
cc_test(test_trt_models SRCS trt_models_tester.cc
ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models
DEPS paddle_inference_tensorrt_subgraph_engine)
endif()
106 changes: 106 additions & 0 deletions paddle/fluid/inference/tests/api/trt_models_tester.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"

namespace paddle {
using paddle::contrib::MixedRTConfig;

DEFINE_string(dirname, "", "Directory of the inference model.");

NativeConfig GetConfigNative() {
NativeConfig config;
config.model_dir = FLAGS_dirname;
// LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.45;
config.use_gpu = true;
config.device = 0;
return config;
}

MixedRTConfig GetConfigTRT() {
MixedRTConfig config;
config.model_dir = FLAGS_dirname;
config.use_gpu = true;
config.fraction_of_gpu_memory = 0.2;
config.device = 0;
config.max_batch_size = 3;
return config;
}

void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
NativeConfig config0 = GetConfigNative();
config0.model_dir = model_dirname;

MixedRTConfig config1 = GetConfigTRT();
config1.model_dir = model_dirname;
config1.max_batch_size = batch_size;

auto predictor0 =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);
auto predictor1 =
CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config1);
// Prepare inputs
int height = 224;
int width = 224;
float *data = new float[batch_size * 3 * height * width];
memset(data, 0, sizeof(float) * (batch_size * 3 * height * width));
data[0] = 1.0f;

// Prepare inputs
PaddleTensor tensor;
tensor.name = "input_0";
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data = PaddleBuf(static_cast<void *>(data),
sizeof(float) * (batch_size * 3 * height * width));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);

// Prepare outputs
std::vector<PaddleTensor> outputs0;
std::vector<PaddleTensor> outputs1;
CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0));

CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size));

// Get output.
ASSERT_EQ(outputs0.size(), 1UL);
ASSERT_EQ(outputs1.size(), 1UL);

const size_t num_elements = outputs0.front().data.length() / sizeof(float);
const size_t num_elements1 = outputs1.front().data.length() / sizeof(float);
EXPECT_EQ(num_elements, num_elements1);

auto *data0 = static_cast<float *>(outputs0.front().data.data());
auto *data1 = static_cast<float *>(outputs1.front().data.data());

ASSERT_GT(num_elements, 0UL);
for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) {
EXPECT_NEAR(data0[i], data1[i], 1e-3);
}
}

TEST(trt_models_test, main) {
std::vector<std::string> infer_models = {"mobilenet", "resnet50",
"resnext50"};
for (auto &model_dir : infer_models) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir);
}
}
} // namespace paddle
4 changes: 2 additions & 2 deletions paddle/fluid/operators/tensorrt_engine_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
namespace paddle {

DEFINE_int32(tensorrt_engine_batch_size, 1, "the batch_size of TensorRT");
DEFINE_int32(tensorrt_max_batch_size, 1, "TensorRT maximum batch size");
DEFINE_int32(tensorrt_workspace_size, 16 << 20, "TensorRT workspace size");

namespace operators {

Expand All @@ -34,6 +32,8 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine.");
AddAttr<int>("max_batch_size", "the maximum batch size.");
AddAttr<int>("workspace_size", "the workspace size.");
AddComment("TensorRT engine operator.");
}
};
Expand Down
13 changes: 6 additions & 7 deletions paddle/fluid/operators/tensorrt_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
namespace paddle {

DECLARE_int32(tensorrt_engine_batch_size);
DECLARE_int32(tensorrt_max_batch_size);
DECLARE_int32(tensorrt_workspace_size);

namespace operators {

Expand Down Expand Up @@ -92,14 +90,14 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto engine_name = context.Attr<std::string>("engine_uniq_key");
int max_batch_size = context.Attr<int>("max_batch_size");
if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) {
Prepare(context);
}
auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name);
auto input_names = context.op().Inputs("Xs");
PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs");
PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size,
FLAGS_tensorrt_max_batch_size);
PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size, max_batch_size);

std::vector<std::string> output_maps =
context.Attr<std::vector<std::string>>("output_name_mapping");
Expand Down Expand Up @@ -173,8 +171,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Get the ProgramDesc and pass to convert.
framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
int max_batch = FLAGS_tensorrt_max_batch_size;
auto max_workspace = FLAGS_tensorrt_workspace_size;
int max_batch_size = context.Attr<int>("max_batch_size");
int workspace_size = context.Attr<int>("workspace_size");

auto params = context.Attr<std::vector<std::string>>("parameters");
std::unordered_set<std::string> parameters;
for (const auto& param : params) {
Expand All @@ -186,7 +185,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {

// TODO(Superjomn) replace this with a different stream
auto* engine = Singleton<TRT_EngineManager>::Global().Create(
max_batch, max_workspace, nullptr /*engine hold its own stream*/,
max_batch_size, workspace_size, nullptr /*engine hold its own stream*/,
context.Attr<std::string>("engine_uniq_key"),
boost::get<platform::CUDAPlace>(context.GetPlace()).device);

Expand Down
Loading