From ef006ce12f2b9539e4b0a50e3fe76f10e47fc0b5 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Fri, 17 May 2019 11:13:00 +0000 Subject: [PATCH 01/15] update anakin-engine interfaces for content-dnn test=develop --- cmake/anakin_subgraph.cmake | 5 +- paddle/fluid/inference/CMakeLists.txt | 6 +- paddle/fluid/inference/anakin/engine.cc | 2 +- paddle/fluid/inference/api/CMakeLists.txt | 12 + .../fluid/inference/api/api_anakin_engine.cc | 221 ++++++++++++--- .../fluid/inference/api/api_anakin_engine.h | 27 +- paddle/fluid/inference/api/helper.h | 5 +- .../inference/api/paddle_anakin_config.h | 10 +- .../inference/api/paddle_inference_api.h | 2 +- .../tests/api/anakin_mobilenet_tester.cc | 4 +- .../inference/tests/api/anakin_rnn1_tester.cc | 4 +- .../inference/tests/api/anakin_rnn2_tester.cc | 261 ++++++++++++++++++ 12 files changed, 494 insertions(+), 65 deletions(-) create mode 100644 paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc diff --git a/cmake/anakin_subgraph.cmake b/cmake/anakin_subgraph.cmake index 4a7d32a63553d..b39f4477666b7 100644 --- a/cmake/anakin_subgraph.cmake +++ b/cmake/anakin_subgraph.cmake @@ -26,7 +26,10 @@ endif() if(ANAKIN_FOUND) message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ") include_directories(${ANAKIN_ROOT}/include) - include_directories(${ANAKIN_ROOT}/include/saber) + include_directories(${ANAKIN_ROOT}/saber) + include_directories(${ANAKIN_ROOT}/saber/core/) + include_directories(${ANAKIN_ROOT}/saber/funcs/impl/x86/) + include_directories(${ANAKIN_ROOT}/saber/funcs/impl/cuda/base/cuda_c/) link_directories(${ANAKIN_ROOT}) add_definitions(-DPADDLE_WITH_ANAKIN) endif() diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 5e0be5d445eae..395336938b10e 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -43,11 +43,15 @@ if(WITH_MKLDNN) endif() set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor) +if (ANAKIN_FOUND AND WITH_MKL) + set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc) +endif() set(SHARED_INFERENCE_SRCS io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${mkldnn_quantizer_src} - ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc) + ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc + ${ANAKIN_SHARED_INFERENCE_SRCS}) if(WIN32) sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index ba044c9401a5f..ad9315e8501f5 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -114,7 +114,7 @@ void AnakinEngine::Execute( template void AnakinEngine::Freeze() { - PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph."); + PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph."); } template diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index a73f160dfcfe9..6202a6fde4d1c 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -71,3 +71,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI anakin_target(inference_anakin_api) anakin_target(inference_anakin_api_shared) endif() + +if (ANAKIN_FOUND AND WITH_MKL) + cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS mklml zero_copy_tensor_dummy device_context) + target_link_libraries(inference_anakin_api anakin anakin_saber_common cudnn mklml_intel) + cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS zero_copy_tensor_dummy device_context) + target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common cudnn mklml_intel) + function(anakin_target target_name) + target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + endfunction() + anakin_target(inference_anakin_api) + anakin_target(inference_anakin_api_shared) +endif() diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 2c4894fd887f2..798b299d512c0 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/api/api_anakin_engine.h" +#include "paddle/fluid/inference/api/paddle_api.h" #ifdef PADDLE_WITH_CUDA #include @@ -35,37 +36,52 @@ using paddle::contrib::AnakinConfig; template PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const contrib::AnakinConfig &config) { - CHECK(Init(config)); + const contrib::AnakinConfig &config) + : config_(config) { + CHECK(Init()); } template <> PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const contrib::AnakinConfig &config) { + const contrib::AnakinConfig &config) + : config_(config) { omp_set_dynamic(0); omp_set_num_threads(1); mkl_set_num_threads(1); - CHECK(Init(config)); + CHECK(Init()); } template -bool PaddleInferenceAnakinPredictor::Init( - const contrib::AnakinConfig &config) { - if (!(graph_.load(config.model_file))) { - VLOG(3) << "fail to load graph from " << config.model_file; +bool PaddleInferenceAnakinPredictor::Init() { + anakin::Env::env_init(config_.max_stream); + if (!ctx_p_) { + ctx_p_ = std::make_shared>( + config_.device_id, config_.data_stream_id, config_.compute_stream_id); + } + if (!graph_p_) { + graph_p_ = std::make_shared< + anakin::graph::Graph>(); + } + if (!(graph_p_->load(config_.model_file))) { + LOG(INFO) << "fail to load graph from " << config_.model_file; return false; } - auto inputs = graph_.get_ins(); + auto inputs = graph_p_->get_ins(); for (auto &input_str : inputs) { - graph_.ResetBatchSize(input_str, config.max_batch_size); - max_batch_size_ = config.max_batch_size; + if (config_.init_inputs_shape.find(input_str) == + config_.init_inputs_shape.end()) { + LOG(INFO) << input_str << " is not implemented."; + return false; + } + std::vector shape = config_.init_inputs_shape.find(input_str)->second; + graph_p_->Reshape(input_str, shape); } // optimization for graph - if (!(graph_.Optimize())) { + if (!(graph_p_->Optimize())) { return false; } // construct executer if (executor_p_ == nullptr) { - executor_p_ = new anakin::Net(graph_, true); + executor_p_ = new anakin::Net(*graph_p_, true); } return true; } @@ -74,29 +90,133 @@ template bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { + if (config_.re_allocable) { + return this->RunImpl(inputs, output_data); + } else { + // Run inputs data that exceeds batch size in batches. + // 1. Reassign the batch size. + if (batch_size == -1) { + if (!inputs[0].lod.empty()) { + batch_size = inputs[0].lod[0].size() - 1; + } else { + batch_size = inputs[0].shape[0]; + } + } + // 2. If the data don't need to be batched, run it directly. + if (batch_size <= config_.init_batch_size) { + return this->RunImpl(inputs, output_data); + } + // 3. Check the batch size and define temporary variables. + std::vector cur_inputs; + std::vector outputs_master; + std::vector> outputs_vec; + for (const auto &input : inputs) { + if (!input.lod.empty()) { + if (input.lod.size() != 1) { + return false; + } + if (input.lod[0].size() - 1 != batch_size) { + return false; + } + } else { + LOG(INFO) << "Non-lod mode to be implemented."; + return false; + } + PaddleTensor tensor; + tensor.name = input.name; + tensor.dtype = PaddleDType::FLOAT32; + cur_inputs.push_back(tensor); + } + for (auto output : *output_data) { + PaddleTensor tensor; + tensor.name = output.name; + outputs_master.push_back(tensor); + } + // 4. Batch execution. + for (size_t start_batch = 0; start_batch < batch_size;) { + auto end_batch = start_batch + config_.init_batch_size; + if (end_batch > batch_size) { + end_batch = batch_size; + } + auto cur_outputs = outputs_master; + for (size_t i = 0; i < inputs.size(); i++) { + auto start = inputs[i].lod[0][start_batch]; + auto end = inputs[i].lod[0][end_batch]; + std::vector offsets; + for (size_t j = start_batch; j <= end_batch; j++) { + offsets.push_back(inputs[i].lod[0][j] - + inputs[i].lod[0][start_batch]); + } + auto mem_start = static_cast(inputs[i].data.data()) + start; + cur_inputs[i].data = + PaddleBuf(mem_start, (end - start) * sizeof(float)); + cur_inputs[i].lod = std::vector>({offsets}); + cur_inputs[i].shape = + std::vector({static_cast(end - start), 1, 1, 1}); + } + if (!this->RunImpl(cur_inputs, &cur_outputs)) { + return false; + } + outputs_vec.push_back(cur_outputs); + start_batch = end_batch; + } + // 5. Copy the results to contiguous memory. + // Assume that each batch has the same final outputs size. + auto count = [](const std::vector &v) { + int cnt = 1; + for_each(v.begin(), v.end(), [&cnt](int n) { cnt *= n; }); + return cnt; + }; + for (size_t i = 0; i < output_data->size(); i++) { + std::vector shape = outputs_vec[i][0].shape; + shape[0] = batch_size; + int total_cnt = count(shape); + (*output_data)[i].shape = shape; + (*output_data)[i].data.Resize(total_cnt * sizeof(float)); + float *addr = static_cast((*output_data)[i].data.data()); + for (const auto &single_out : outputs_vec) { + int cnt = count(single_out[i].shape); + memcpy(addr, single_out[i].data.data(), cnt * sizeof(float)); + addr += cnt; + } + } + } + return true; +} + +template +bool PaddleInferenceAnakinPredictor::RunImpl( + const std::vector &inputs, + std::vector *output_data) { for (const auto &input : inputs) { if (input.dtype != PaddleDType::FLOAT32) { - VLOG(3) << "Only support float type inputs. " << input.name - << "'s type is not float"; + LOG(INFO) << "Only support float type inputs. " << input.name + << "'s type is not float"; return false; } auto d_tensor_in_p = executor_p_->get_in(input.name); auto net_shape = d_tensor_in_p->shape(); if (net_shape.size() != input.shape.size()) { - VLOG(3) << " input " << input.name - << "'s shape size should be equal to that of net"; + LOG(INFO) << " input " << input.name + << "'s shape size should be equal to that of net"; return false; } int sum = 1; for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); if (sum > net_shape.count()) { - graph_.Reshape(input.name, input.shape); - delete executor_p_; - executor_p_ = new anakin::Net(graph_, true); - d_tensor_in_p = executor_p_->get_in(input.name); + if (config_.re_allocable) { + graph_p_->Reshape(input.name, input.shape); + delete executor_p_; + executor_p_ = + new anakin::Net(*graph_p_, true); + d_tensor_in_p = executor_p_->get_in(input.name); + } else { + LOG(INFO) << "Run failed because Anakin was expected not to reallocate " + "memory."; + return false; + } } - anakin::saber::Shape tmp_shape; for (auto s : input.shape) { tmp_shape.push_back(s); @@ -105,26 +225,26 @@ bool PaddleInferenceAnakinPredictor::Run( if (input.lod.size() > 0) { if (input.lod.size() > 1) { - VLOG(3) << " input lod first dim should <=1, but you set " - << input.lod.size(); + LOG(INFO) << " input lod first dim should <=1, but you set " + << input.lod.size(); return false; } - std::vector offset(input.lod[0].begin(), input.lod[0].end()); + std::vector lod(input.lod[0].begin(), input.lod[0].end()); + std::vector> offset({lod}); d_tensor_in_p->set_seq_offset(offset); - VLOG(3) << "offset.size(): " << offset.size(); - for (int i = 0; i < offset.size(); i++) { - VLOG(3) << offset[i]; + VLOG(3) << "offset.size(): " << offset[0].size(); + for (int i = 0; i < offset[0].size(); i++) { + VLOG(3) << offset[0][i]; } } - float *d_data_p = d_tensor_in_p->mutable_data(); - + float *d_data_p = static_cast(d_tensor_in_p->mutable_data()); #ifdef PADDLE_WITH_CUDA if (std::is_same::value) { if (cudaMemcpy(d_data_p, static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), cudaMemcpyHostToDevice) != 0) { - VLOG(3) << "copy data from CPU to GPU error"; + LOG(INFO) << "copy data from CPU to GPU error"; return false; } } @@ -141,7 +261,7 @@ bool PaddleInferenceAnakinPredictor::Run( #endif if (output_data->empty()) { - VLOG(3) << "At least one output should be set with tensors' names."; + LOG(INFO) << "At least one output should be set with tensors' names."; return false; } for (auto &output : *output_data) { @@ -157,7 +277,7 @@ bool PaddleInferenceAnakinPredictor::Run( if (cudaMemcpy(output.data.data(), tensor->mutable_data(), tensor->valid_size() * sizeof(float), cudaMemcpyDeviceToHost) != 0) { - VLOG(3) << "copy data from GPU to CPU error"; + LOG(INFO) << "copy data from GPU to CPU error"; return false; } } @@ -171,8 +291,22 @@ bool PaddleInferenceAnakinPredictor::Run( } template -anakin::Net - &PaddleInferenceAnakinPredictor::get_executer() { +bool PaddleInferenceAnakinPredictor::ResetConfig( + const AnakinConfig &config) { + config_ = config; + return true; +} + +template +anakin::Net + &PaddleInferenceAnakinPredictor::ResetExecuter( + std::shared_ptr> + graph_p) { + graph_p_ = graph_p; + ctx_p_ = std::make_shared>( + config_.device_id, config_.data_stream_id, config_.compute_stream_id); + executor_p_ = new anakin::Net(*graph_p_, true); return *executor_p_; } @@ -182,18 +316,19 @@ template std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { VLOG(3) << "Anakin Predictor::clone"; + std::unique_lock lock(mutex_); std::unique_ptr cls( new PaddleInferenceAnakinPredictor()); // construct executer from other graph auto anakin_predictor_p = dynamic_cast *>(cls.get()); if (!anakin_predictor_p) { - VLOG(3) << "fail to call Init"; + LOG(INFO) << "fail to call Init"; return nullptr; } - anakin_predictor_p->get_executer().init(graph_); - - return std::move(cls); + anakin_predictor_p->ResetConfig(config_); + anakin_predictor_p->ResetExecuter(graph_p_); + return cls; } #ifdef PADDLE_WITH_CUDA @@ -223,7 +358,7 @@ CreatePaddlePredictor( new PaddleInferenceAnakinPredictor(config)); return x; } else { - VLOG(3) << "Anakin Predictor create on unknown platform."; + LOG(INFO) << "Anakin Predictor create on unknown platform."; return nullptr; } } @@ -260,7 +395,7 @@ void DisplayOpTimer(executor_t *net_executor, int epoch) { template PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { #ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER - DisplayOpTimer(executor_p_, max_batch_size_); + DisplayOpTimer(executor_p_, config_.init_batch_size); #endif delete executor_p_; executor_p_ = nullptr; diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index e14d93de2c41f..3eeea853af57d 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -19,6 +19,7 @@ limitations under the License. */ #pragma once +#include #include #include "framework/core/net/net.h" @@ -34,7 +35,7 @@ using contrib::AnakinConfig; template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: - PaddleInferenceAnakinPredictor() {} + PaddleInferenceAnakinPredictor() : config_() {} explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config); @@ -45,21 +46,25 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { int batch_size = -1) override; std::unique_ptr Clone() override; - - anakin::Net& - get_executer(); + bool ResetConfig(const AnakinConfig& config); + anakin::Net& + ResetExecuter( + std::shared_ptr> + graph_p); ~PaddleInferenceAnakinPredictor() override; private: - bool Init(const AnakinConfig& config); - - anakin::graph::Graph - graph_; - anakin::Net* - executor_p_{nullptr}; + bool Init(); + bool RunImpl(const std::vector& inputs, + std::vector* output_data); + std::mutex mutex_; AnakinConfig config_; - int max_batch_size_{0}; + std::shared_ptr> ctx_p_; + std::shared_ptr> + graph_p_; + anakin::Net* + executor_p_{nullptr}; }; } // namespace paddle diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index c89dd41e0a628..daf84a04a8755 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -57,9 +57,12 @@ static int GetUniqueId() { } static void split(const std::string &str, char sep, - std::vector *pieces) { + std::vector *pieces, bool ignore_null = true) { pieces->clear(); if (str.empty()) { + if (!ignore_null) { + pieces->push_back(str); + } return; } size_t pos = 0; diff --git a/paddle/fluid/inference/api/paddle_anakin_config.h b/paddle/fluid/inference/api/paddle_anakin_config.h index 0e91c2624bed4..7c32a28c324c3 100644 --- a/paddle/fluid/inference/api/paddle_anakin_config.h +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include #include #include @@ -25,9 +26,14 @@ namespace contrib { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { enum TargetType { NVGPU = 0, X86 }; - int device; + int device_id{0}; std::string model_file; - int max_batch_size{-1}; + std::map> init_inputs_shape; + int init_batch_size{-1}; + bool re_allocable{true}; + int max_stream{4}; + int data_stream_id{0}; + int compute_stream_id{0}; TargetType target_type; }; diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 1785bd520a17d..2906a4926f7ca 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -28,6 +28,6 @@ limitations under the License. */ #include "paddle_analysis_config.h" // NOLINT #include "paddle_api.h" // NOLINT -#ifdef WITH_ANAKIN +#if (defined WITH_ANAKIN) || (defined PADDLE_WITH_ANAKIN) #include "paddle_anakin_config.h" // NOLINT #endif diff --git a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc index cf97f064beddb..48689486af4fc 100644 --- a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc @@ -27,8 +27,8 @@ contrib::AnakinConfig GetConfig() { // using AnakinConfig::X86 if you need to use cpu to do inference config.target_type = contrib::AnakinConfig::NVGPU; config.model_file = FLAGS_model; - config.device = 0; - config.max_batch_size = 1; + config.device_id = 0; + config.init_batch_size = 1; return config; } diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc index da42688f29f04..db01cfebcb2b3 100644 --- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc @@ -100,8 +100,8 @@ contrib::AnakinConfig GetConfig() { // using AnakinConfig::X86 if you need to use cpu to do inference config.target_type = contrib::AnakinConfig::X86; config.model_file = FLAGS_model; - config.device = 0; - config.max_batch_size = 1000; // the max number of token + config.device_id = 0; + config.init_batch_size = 1000; // the max number of token return config; } diff --git a/paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc new file mode 100644 index 0000000000000..27abaa530b36f --- /dev/null +++ b/paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc @@ -0,0 +1,261 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" + +#define BUFFER_SIZE (10000) +#define COMPARE_OUTPUTS (1) +#define PRINT_INPUTS (0) + +DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(datapath, "", "Path of the dataset."); +DEFINE_string(truthpath, "", "Path of the dataset."); +DEFINE_int32(batch_size, 1, "Batch size per execution."); +DEFINE_int32(repeats, 1, "Number of iterations."); +DEFINE_int32( + start_line, 0, + "The starting line of the text file read (this line will be read)."); +DEFINE_int32(end_line, 1000000, + "The ending line of the text file read (this line will be read)."); +DEFINE_int32(init_batch_size, 40, + "Max batch size for Anakin memory allocation."); +DEFINE_int32(threads_num, 2, "Threads num for Anakin."); + +class Data { + public: + Data(std::string file_name, size_t batch_size, size_t start = 0, + size_t end = 1000000) + : _batch_size(batch_size), _total_length(0), _inputs_size(6) { + _file.open(file_name); + _file.seekg(_file.end); + _total_length = _file.tellg(); + _file.seekg(_file.beg); + read_file_to_vec(start, end); + reset_current_line(); + } + void reset_current_line(); + const std::vector& get_lines(); + void read_file_to_vec(const size_t start, const size_t end); + int get_next_batches(std::vector>* inputs, + std::vector>* seq_offsets); + + private: + std::fstream _file; + int _batch_size; + size_t _total_length; + size_t _inputs_size; + std::vector _lines; + size_t _current_line; +}; + +void Data::read_file_to_vec(const size_t start, const size_t end) { + std::string line; + size_t count = 0; + _lines.clear(); + while (std::getline(_file, line)) { + if (count >= start && count <= end) { + _lines.push_back(line); + } + count++; + } +} + +const std::vector& Data::get_lines() { return _lines; } + +void Data::reset_current_line() { _current_line = 0; } + +int Data::get_next_batches(std::vector>* data, + std::vector>* offsets) { + data->clear(); + offsets->clear(); + data->resize(_inputs_size); + offsets->resize(_inputs_size); + for (auto& offset : *offsets) { + offset.push_back(0); + } + + int seq_num = -1; + int pre_query_index = -1; + while (_current_line < _lines.size()) { + int cur_query_index = -1; + std::vector line; + paddle::inference::split(_lines[_current_line], ';', &line); + for (size_t i = 0; i < line.size(); i++) { + std::vector float_v; + paddle::inference::split_to_float(line[i], ' ', &float_v); + if (i == 0) { + cur_query_index = float_v[0]; + if (pre_query_index != -1 && cur_query_index != pre_query_index) { + return seq_num; + } + seq_num++; + _current_line++; + } else { + if (float_v.size() == 0) { + float_v.push_back(-1); + } + (*data)[i - 1].insert((*data)[i - 1].end(), float_v.begin(), + float_v.end()); + (*offsets)[i - 1].push_back((*offsets)[i - 1][seq_num] + + float_v.size()); + } + } + if (seq_num + 1 >= _batch_size) { + return seq_num; + } else { + pre_query_index = cur_query_index; + } + } + return seq_num; +} + +namespace paddle { + +contrib::AnakinConfig GetConfig() { + contrib::AnakinConfig config; + + std::map> init_inputs_shape; + init_inputs_shape["q_basic"] = std::vector({1000, 1, 1, 1}); + init_inputs_shape["q_bigram0"] = std::vector({1000, 1, 1, 1}); + init_inputs_shape["pt_basic"] = std::vector({2000, 1, 1, 1}); + init_inputs_shape["pa_basic"] = std::vector({4000, 1, 1, 1}); + init_inputs_shape["pa_bigram0"] = std::vector({4000, 1, 1, 1}); + init_inputs_shape["pt_bigram0"] = std::vector({2000, 1, 1, 1}); + + // using AnakinConfig::X86 if you need to use cpu to do inference + config.target_type = contrib::AnakinConfig::NVGPU; + config.model_file = FLAGS_model; + config.device_id = 0; + config.init_batch_size = FLAGS_init_batch_size; + config.init_inputs_shape = init_inputs_shape; + config.re_allocable = false; + return config; +} + +void single_test(PaddlePredictor* predictor_master) { + auto predictor = predictor_master->Clone(); + + Data data(FLAGS_datapath, FLAGS_batch_size, FLAGS_start_line, FLAGS_end_line); + + std::vector> inputs; + std::vector> seq_offsets; + std::vector compare_outputs; + + const std::vector input_names{"q_basic", "q_bigram0", + "pt_basic", "pt_bigram0", + "pa_basic", "pa_bigram0"}; + std::vector input_tensors; + std::vector output_tensors; + for (auto& name : input_names) { + PaddleTensor tensor; + tensor.name = name; + tensor.dtype = PaddleDType::FLOAT32; + input_tensors.push_back(tensor); + } + + PaddleTensor tensor_out; + tensor_out.name = "save_infer_model/scale_0"; + tensor_out.shape = std::vector({}); + tensor_out.data = PaddleBuf(); + tensor_out.dtype = PaddleDType::FLOAT32; + output_tensors.push_back(tensor_out); + + inference::Timer timer; + for (int i = 0; i < FLAGS_repeats; i++) { + data.reset_current_line(); + size_t count = 0; + float time_sum = 0; + while (data.get_next_batches(&inputs, &seq_offsets) >= 0) { +#if PRINT_INPUTS + for (size_t i = 0; i < inputs.size(); i++) { + LOG(INFO) << "data " << i; + for (size_t j = 0; j < inputs[i].size(); j++) { + LOG(INFO) << j << ": " << inputs[i][j]; + } + for (auto j : seq_offsets[i]) { + LOG(INFO) << "offsets: " << i << ": " << j; + } + } +#endif + for (size_t j = 0; j < input_tensors.size(); j++) { + input_tensors[j].data = + PaddleBuf(&inputs[j][0], inputs[j].size() * sizeof(float)); + input_tensors[j].lod = + std::vector>({seq_offsets[j]}); + input_tensors[j].shape = + std::vector({static_cast(inputs[j].size()), 1, 1, 1}); + } + timer.tic(); + predictor->Run(input_tensors, &output_tensors); + float time = timer.toc(); +#if COMPARE_OUTPUTS + float* data_o = static_cast(output_tensors[0].data.data()); + LOG(INFO) << "outputs[0].data.size() = " + << output_tensors[0].data.length() / sizeof(float); + size_t sum = 1; + for_each(output_tensors[0].shape.begin(), output_tensors[0].shape.end(), + [&](int n) { sum *= n; }); + for (size_t j = 0; j < sum; ++j) { + LOG(INFO) << "output[" << j << "]: " << data_o[j]; + compare_outputs.push_back(data_o[j]); + } +#endif + LOG(INFO) << "Single Time: " << time; + count++; + if (count > 10) { + time_sum += timer.toc(); + } + } + inference::PrintTime(FLAGS_batch_size, FLAGS_repeats, 1, 0, + time_sum / (count - 10)); +#if COMPARE_OUTPUTS + Data data(FLAGS_truthpath, 1); + const std::vector truth_vals = data.get_lines(); + for (size_t j = 0; j < truth_vals.size(); j++) { + float truth = std::atof(truth_vals[j].c_str()); + float compa = compare_outputs[j]; + float diff = std::abs(truth - compa); + LOG(INFO) << "[DIFF " << j << " ] " << diff; + if (diff > 0.0001) { + LOG(FATAL) << "The result is wrong!"; + } + } + LOG(INFO) << "The result is correct!"; +#endif + } +} +} // namespace paddle + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + std::vector threads; + + auto config = paddle::GetConfig(); + config.data_stream_id = 0; + config.compute_stream_id = 0; + std::unique_ptr predictor_master = + paddle::CreatePaddlePredictor(config); + + for (int i = 0; i < FLAGS_threads_num; i++) { + threads.push_back(std::thread(paddle::single_test, predictor_master.get())); + } + for (auto& t : threads) { + t.join(); + } + return 0; +} From e395c5196b2710cc523a2cb5ea57d709bae56b4d Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Mon, 20 May 2019 03:35:05 +0000 Subject: [PATCH 02/15] support only-gpu mode of Anakin modify eltwise parse test=develop --- .../inference/anakin/convert/elementwise.cc | 2 +- .../inference/anakin/convert/op_converter.h | 27 +++++++++------- .../anakin/convert/test_activation_op.cc | 32 ------------------- .../anakin/convert/test_affine_channel_op.cc | 7 ++-- .../anakin/convert/test_batch_norm_op.cc | 8 ++--- .../anakin/convert/test_concat_op.cc | 8 ++--- .../anakin/convert/test_conv2d_op.cc | 8 ++--- .../anakin/convert/test_dropout_op.cc | 7 ++-- .../anakin/convert/test_elementwise_op.cc | 10 ++---- .../inference/anakin/convert/test_fc_op.cc | 7 ++-- .../anakin/convert/test_flatten_op.cc | 7 ++-- .../anakin/convert/test_pool2d_op.cc | 8 ++--- .../inference/anakin/convert/test_relu_op.cc | 5 --- .../anakin/convert/test_reshape_op.cc | 8 ++--- .../anakin/convert/test_softmax_op.cc | 8 ++--- .../inference/anakin/convert/test_split_op.cc | 7 ++-- .../inference/anakin/convert/test_sum_op.cc | 7 ++-- .../anakin/convert/test_transpose_op.cc | 7 ++-- .../inference/anakin/convert/ut_helper.h | 8 ++--- paddle/fluid/inference/anakin/engine.cc | 4 +-- paddle/fluid/inference/anakin/engine.h | 4 ++- .../inference/anakin/test_anakin_engine.cc | 1 - .../ir_passes/anakin_subgraph_pass.cc | 6 +++- .../fluid/inference/api/api_anakin_engine.cc | 11 +++++++ .../fluid/operators/anakin/anakin_engine_op.h | 4 +++ 25 files changed, 74 insertions(+), 137 deletions(-) diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index dd32baa0b9001..d221f26e11934 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -60,7 +60,7 @@ void ElementwiseMulOpConverter::operator()( auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); - std::string elementwise_type = "Prod"; + std::string elementwise_type = "Mul"; this->engine_->template AddOpAttr(op_name, "type", elementwise_type); std::vector coeff = {1.0, 1.0}; diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index a6ae51bd4b1c6..1058e744bca9c 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -153,11 +153,12 @@ template class AnakinOpConverter<::anakin::saber::NV, ::anakin::Precision::FP32>; template class AnakinOpConverter<::anakin::saber::NV, ::anakin::Precision::INT8>; - +#ifdef ANAKIN_X86_PLACE template class AnakinOpConverter<::anakin::saber::X86, ::anakin::Precision::FP32>; template class AnakinOpConverter<::anakin::saber::X86, ::anakin::Precision::INT8>; +#endif } // namespace anakin } // namespace inference } // namespace paddle @@ -203,16 +204,16 @@ template class AnakinOpConverter<::anakin::saber::X86, CPU, ::anakin::saber::X86, precision_type__, \ ::anakin::Precision::precision_type__) -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE) #define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \ REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8) -#else -#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ - REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8) +#elif defined(PADDLE_WITH_CUDA) +#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ + REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8) #endif #define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \ @@ -221,12 +222,16 @@ template class AnakinOpConverter<::anakin::saber::X86, __attribute__((unused)) = \ Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); +#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE) +#define USE_ANAKIN_CONVERTER(op_type__) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32) +#define USE_INT8_ANAKIN_CONVERTER(op_type__) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8) +#elif defined(PADDLE_WITH_CUDA) #define USE_ANAKIN_CONVERTER(op_type__) \ USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) #define USE_INT8_ANAKIN_CONVERTER(op_type__) \ USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) - -#define USE_CPU_ANAKIN_CONVERTER(op_type__) \ - USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32) -#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \ - USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8) +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_activation_op.cc b/paddle/fluid/inference/anakin/convert/test_activation_op.cc index 4f898252d2798..5ac8b45882f51 100644 --- a/paddle/fluid/inference/anakin/convert/test_activation_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc @@ -77,32 +77,6 @@ TEST(swish_op, gpu) { } #endif -/* -TEST(sigm_op, cpu) { - platform::CPUPlace cpu_place; - platform::CPUDeviceContext ctx(cpu_place); - test_activation_op<::anakin::saber::X86>("sigmoid", ctx, false); -} - -TEST(tanh_op, cpu) { - platform::CPUPlace cpu_place; - platform::CPUDeviceContext ctx(cpu_place); - test_activation_op<::anakin::saber::X86>("tanh", ctx, false); -} - -TEST(relu6_op, cpu) { - platform::CPUPlace cpu_place; - platform::CPUDeviceContext ctx(cpu_place); - test_activation_op<::anakin::saber::X86>("relu6", ctx, false); -} - -TEST(swish_op, cpu) { - platform::CPUPlace cpu_place; - platform::CPUDeviceContext ctx(cpu_place); - test_activation_op<::anakin::saber::X86>("swish", ctx, false); -} -*/ - } // namespace anakin } // namespace inference } // namespace paddle @@ -112,13 +86,7 @@ USE_OP(tanh); USE_OP(relu6); USE_OP(swish); -USE_CPU_ANAKIN_CONVERTER(sigmoid); -USE_CPU_ANAKIN_CONVERTER(tanh); -USE_CPU_ANAKIN_CONVERTER(relu6); -USE_CPU_ANAKIN_CONVERTER(swish); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(tanh); USE_ANAKIN_CONVERTER(relu6); USE_ANAKIN_CONVERTER(swish); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc b/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc index f6399387aa264..008537dc8a5a8 100644 --- a/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc @@ -57,19 +57,16 @@ TEST(affine_channel_op, gpu) { test_affine_channel_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(affine_channel_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_affine_channel_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(affine_channel); -USE_CPU_ANAKIN_CONVERTER(affine_channel); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(affine_channel); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc index c008ef1bd5ee2..edba90235fac0 100644 --- a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc @@ -73,19 +73,15 @@ TEST(batch_norm_op, gpu) { test_batchnorm_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(batch_norm_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_batchnorm_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(batch_norm); -USE_CPU_ANAKIN_CONVERTER(batch_norm); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(batch_norm); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_concat_op.cc b/paddle/fluid/inference/anakin/convert/test_concat_op.cc index 42dfbeb5cdc40..6870260c86587 100644 --- a/paddle/fluid/inference/anakin/convert/test_concat_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_concat_op.cc @@ -53,19 +53,15 @@ TEST(concat_op, gpu) { test_concat_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(concat_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_concat_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(concat); -USE_CPU_ANAKIN_CONVERTER(concat); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(concat); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc index e95e11c4f9688..723a348b12e3b 100644 --- a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc @@ -60,20 +60,16 @@ TEST(conv2d_op, gpu) { test_conv2d_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(conv2d_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_conv2d_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(conv2d); -USE_CPU_ANAKIN_CONVERTER(conv2d); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(conv2d); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc index ae27e27ded5d9..83792676a0044 100644 --- a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc @@ -54,19 +54,16 @@ TEST(dropout_op, gpu) { test_dropout_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(dropout_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_dropout_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(dropout); -USE_CPU_ANAKIN_CONVERTER(dropout); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(dropout); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc index bff75294908aa..ee128c1ec9ad6 100644 --- a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc @@ -59,29 +59,23 @@ TEST(elementwise_op, native_mul_gpu) { test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(elementwise_op, native_add_cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false); } - TEST(elementwise_op, native_mul_cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(elementwise_add); USE_OP(elementwise_mul); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(elementwise_add); USE_ANAKIN_CONVERTER(elementwise_mul); -#endif - -USE_CPU_ANAKIN_CONVERTER(elementwise_add); -USE_CPU_ANAKIN_CONVERTER(elementwise_mul); diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc index a24c809c02213..3e68d8fed6a66 100644 --- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -49,19 +49,16 @@ TEST(mul_op, gpu) { test_mul_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(mul_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_mul_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(mul); -USE_CPU_ANAKIN_CONVERTER(fc); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(fc); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc index 5765f5ebd1f2a..5e4cfdabfd7ca 100644 --- a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc @@ -48,20 +48,17 @@ TEST(flatten_op, gpu) { test_flatten_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(flatten_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_flatten_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(reshape); USE_OP_ITSELF(flatten); -USE_CPU_ANAKIN_CONVERTER(flatten); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(flatten); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc index 90503b1fbba81..9b23b5b93df16 100644 --- a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc @@ -87,7 +87,7 @@ TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg"); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(Pool2dOpConverter, normal_cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); @@ -110,14 +110,10 @@ TEST(Pool2dOpConverter, avg_ceil_test_cpu) { platform::CPUDeviceContext ctx(cpu_place); test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg"); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(pool2d); -USE_CPU_ANAKIN_CONVERTER(pool2d); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(pool2d); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_relu_op.cc b/paddle/fluid/inference/anakin/convert/test_relu_op.cc index 3f22479651965..eb6429f3383d2 100644 --- a/paddle/fluid/inference/anakin/convert/test_relu_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_relu_op.cc @@ -66,10 +66,5 @@ TEST(leaky_relu_op, gpu) { USE_OP(relu); USE_OP(leaky_relu); -USE_CPU_ANAKIN_CONVERTER(relu); -USE_CPU_ANAKIN_CONVERTER(leaky_relu); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(relu); USE_ANAKIN_CONVERTER(leaky_relu); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc index e102bd3ac3ea0..b1be42e542ce0 100644 --- a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc @@ -81,7 +81,7 @@ TEST(reshape2_op, gpu) { test_reshape2_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(reshape1_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); @@ -93,14 +93,10 @@ TEST(reshape2_op, cpu) { platform::CPUDeviceContext ctx(cpu_place); test_reshape2_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(reshape); -USE_CPU_ANAKIN_CONVERTER(reshape); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(reshape); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc index de0b18fdbfd5f..1a324739d9853 100644 --- a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc @@ -48,20 +48,16 @@ TEST(softmax_op, gpu) { test_softmax_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(relu_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_softmax_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(softmax); -USE_CPU_ANAKIN_CONVERTER(softmax); - -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(softmax); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_split_op.cc b/paddle/fluid/inference/anakin/convert/test_split_op.cc index 9a42ffd853bb0..f9ef54fdcacec 100644 --- a/paddle/fluid/inference/anakin/convert/test_split_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_split_op.cc @@ -92,7 +92,7 @@ TEST(split_op, test_different_shape_axis3_batch1) { platform::CUDADeviceContext ctx(gpu_place); AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 3}, {2, 1}); } - +#ifdef ANAKIN_X86_PLACE TEST(split_op, test_different_shape_axis1_batch1_cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); @@ -110,13 +110,10 @@ TEST(split_op, test_different_shape_axis3_batch1_cpu) { platform::CPUDeviceContext ctx(cpu_place); AnakinSliceTest<::anakin::saber::X86, 3>(ctx, false, {1, 3, 2, 4}, {2, 2}); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(split); -USE_CPU_ANAKIN_CONVERTER(split); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(split); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_sum_op.cc b/paddle/fluid/inference/anakin/convert/test_sum_op.cc index 65f67ebd12989..9d26430ea68c5 100644 --- a/paddle/fluid/inference/anakin/convert/test_sum_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_sum_op.cc @@ -49,19 +49,16 @@ TEST(sum_op, gpu) { test_sum_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(sum_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); test_sum_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(sum); -USE_CPU_ANAKIN_CONVERTER(sum); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(sum); -#endif diff --git a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc index 51b69dfbb08b7..466e2f1a49f21 100644 --- a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc @@ -79,7 +79,7 @@ TEST(transpose2_op, gpu) { test_transpose2_op<::anakin::saber::NV>(ctx, true); } #endif - +#ifdef ANAKIN_X86_PLACE TEST(transpose1_op, cpu) { platform::CPUPlace cpu_place; platform::CPUDeviceContext ctx(cpu_place); @@ -91,13 +91,10 @@ TEST(transpose2_op, cpu) { platform::CPUDeviceContext ctx(cpu_place); test_transpose2_op<::anakin::saber::X86>(ctx, false); } - +#endif } // namespace anakin } // namespace inference } // namespace paddle USE_OP(transpose); -USE_CPU_ANAKIN_CONVERTER(transpose); -#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(transpose); -#endif diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index 2f8f953892c39..92441f2560f32 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -33,7 +33,6 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" using anakin::Precision; -using anakin::saber::X86; namespace paddle { namespace inference { @@ -215,13 +214,14 @@ class AnakinConvertValidation { template class AnakinConvertValidation<::anakin::saber::NV, ::anakin::Precision::FP32>; -template class AnakinConvertValidation<::anakin::saber::X86, - ::anakin::Precision::FP32>; - template class AnakinConvertValidation<::anakin::saber::NV, ::anakin::Precision::INT8>; +#ifdef ANAKIN_X86_PLACE +template class AnakinConvertValidation<::anakin::saber::X86, + ::anakin::Precision::FP32>; template class AnakinConvertValidation<::anakin::saber::X86, ::anakin::Precision::INT8>; +#endif } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 529a859458a98..2cc49220b99be 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -186,14 +186,14 @@ template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::INT8>; template class AnakinEngineManager<::anakin::saber::NV, ::anakin::Precision::INT8>; #endif - +#ifdef ANAKIN_X86_PLACE template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>; template class AnakinEngineManager<::anakin::saber::X86, ::anakin::Precision::FP32>; template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::INT8>; template class AnakinEngineManager<::anakin::saber::X86, ::anakin::Precision::INT8>; - +#endif // template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>; } // namespace anakin } // namespace inference diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index fb40f56511ba2..d4f8737c4f50e 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -24,7 +24,9 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/engine.h" #include "paddle/fluid/inference/utils/singleton.h" - +#ifdef EXIT // NOLINT +#undef EXIT // NOLINT +#endif // NOLINT #include "framework/core/net/net.h" #include "framework/core/types.h" #include "framework/graph/graph.h" diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc index 422f415a5db62..3c8a33ec60f8a 100644 --- a/paddle/fluid/inference/anakin/test_anakin_engine.cc +++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc @@ -22,7 +22,6 @@ limitations under the License. */ using anakin::AK_FLOAT; using anakin::Precision; using anakin::saber::NV; -using anakin::saber::X86; using anakin::saber::Shape; using anakin::PBlock; using anakin::PTuple; diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index 9586ce3e6b014..a6c6f33cf779f 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -226,7 +226,6 @@ void AnakinSubgraphPass::CreateAnakinEngine( auto max_batch_size = Get("max_batch_size"); auto max_input_shape = Get>>("max_input_shape"); - bool auto_config_layout = Get("auto_config_layout"); if (use_gpu) { #ifdef PADDLE_WITH_CUDA inference::Singleton< @@ -235,11 +234,14 @@ void AnakinSubgraphPass::CreateAnakinEngine( max_input_shape, program_inputs, false, engine_key); #endif } else { +#ifdef ANAKIN_X86_PLACE + bool auto_config_layout = Get("auto_config_layout"); inference::Singleton< anakin::AnakinEngineManager<::anakin::saber::X86, PrecisionT>>::Global() .Create(true, Get("gpu_device_id"), max_batch_size, max_input_shape, program_inputs, auto_config_layout, engine_key); +#endif } auto *scope = param_scope(); @@ -258,6 +260,7 @@ void AnakinSubgraphPass::CreateAnakinEngine( param_set, output_mapping, anakin_engine); #endif } else { +#ifdef ANAKIN_X86_PLACE auto *anakin_engine = inference::Singleton>::Global() @@ -268,6 +271,7 @@ void AnakinSubgraphPass::CreateAnakinEngine( &block_desc_temp, scope, std::vector(input_names.begin(), input_names.end()), param_set, output_mapping, anakin_engine); +#endif } } diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 798b299d512c0..165dad7140eca 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -40,6 +40,7 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( : config_(config) { CHECK(Init()); } +#ifdef ANAKIN_X86_PLACE template <> PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( const contrib::AnakinConfig &config) @@ -49,6 +50,7 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( mkl_set_num_threads(1); CHECK(Init()); } +#endif template bool PaddleInferenceAnakinPredictor::Init() { anakin::Env::env_init(config_.max_stream); @@ -249,10 +251,12 @@ bool PaddleInferenceAnakinPredictor::RunImpl( } } #endif +#ifdef ANAKIN_X86_PLACE if (std::is_same::value) { memcpy(d_data_p, static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float)); } +#endif } #ifdef PADDLE_WITH_CUDA cudaDeviceSynchronize(); @@ -282,10 +286,12 @@ bool PaddleInferenceAnakinPredictor::RunImpl( } } #endif +#ifdef ANAKIN_X86_PLACE if (std::is_same::value) { memcpy(output.data.data(), tensor->mutable_data(), tensor->valid_size() * sizeof(float)); } +#endif } return true; } @@ -334,7 +340,9 @@ PaddleInferenceAnakinPredictor::Clone() { #ifdef PADDLE_WITH_CUDA template class PaddleInferenceAnakinPredictor; #endif +#ifdef ANAKIN_X86_PLACE template class PaddleInferenceAnakinPredictor; +#endif // A factory to help create difference predictor. template <> @@ -353,13 +361,16 @@ CreatePaddlePredictor( return nullptr; #endif } else if (config.target_type == contrib::AnakinConfig::X86) { +#ifdef ANAKIN_X86_PLACE VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; std::unique_ptr x( new PaddleInferenceAnakinPredictor(config)); return x; +#else } else { LOG(INFO) << "Anakin Predictor create on unknown platform."; return nullptr; +#endif } } diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h index 11c394c76cd98..b4aaa228693c8 100644 --- a/paddle/fluid/operators/anakin/anakin_engine_op.h +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -119,11 +119,15 @@ class AnakinEngineOp : public framework::OperatorBase { engine->Execute(inputs, outputs, stream); #endif } else { +#ifdef ANAKIN_X86_PLACE auto *engine = inference::Singleton>::Global() .Get(engine_key_); engine->Execute(inputs, outputs); +#else + LOG(FATAL) << "Unknown Platform for AnakinEngine!"; +#endif } } }; From 641dc0d2d07c7278f01fd3be0e4553c08f945ca2 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Sat, 1 Jun 2019 04:06:12 +0000 Subject: [PATCH 03/15] modification for thread-safe test=develop --- .../fluid/inference/api/api_anakin_engine.cc | 44 ++++++++++++------- .../fluid/inference/api/api_anakin_engine.h | 7 ++- 2 files changed, 33 insertions(+), 18 deletions(-) mode change 100644 => 100755 paddle/fluid/inference/api/api_anakin_engine.h diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 165dad7140eca..6e3c50293375b 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -33,6 +33,10 @@ namespace paddle { using paddle::contrib::AnakinConfig; +template +extern std::mutex PaddleInferenceAnakinPredictor::mutex_; +template +extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; template PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( @@ -52,8 +56,26 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( } #endif template +bool PaddleInferenceAnakinPredictor::InitEnv() { + std::call_once(init_anakin_, [this]() { + anakin::Env::env_init(config_.max_stream); + }); + return true; +} +template +bool PaddleInferenceAnakinPredictor::InitNet() { + std::unique_lock lock(mutex_); + if (executor_p_ == nullptr) { + executor_p_ = new anakin::Net(*graph_p_, true); + } + return true; +} +template bool PaddleInferenceAnakinPredictor::Init() { - anakin::Env::env_init(config_.max_stream); + if (!InitEnv()) { + return false; + } if (!ctx_p_) { ctx_p_ = std::make_shared>( config_.device_id, config_.data_stream_id, config_.compute_stream_id); @@ -80,11 +102,7 @@ bool PaddleInferenceAnakinPredictor::Init() { if (!(graph_p_->Optimize())) { return false; } - // construct executer - if (executor_p_ == nullptr) { - executor_p_ = new anakin::Net(*graph_p_, true); - } + InitNet(); return true; } @@ -209,9 +227,7 @@ bool PaddleInferenceAnakinPredictor::RunImpl( if (config_.re_allocable) { graph_p_->Reshape(input.name, input.shape); delete executor_p_; - executor_p_ = - new anakin::Net(*graph_p_, true); + InitNet(); d_tensor_in_p = executor_p_->get_in(input.name); } else { LOG(INFO) << "Run failed because Anakin was expected not to reallocate " @@ -311,8 +327,7 @@ anakin::Net graph_p_ = graph_p; ctx_p_ = std::make_shared>( config_.device_id, config_.data_stream_id, config_.compute_stream_id); - executor_p_ = new anakin::Net(*graph_p_, true); + InitNet(); return *executor_p_; } @@ -322,7 +337,6 @@ template std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { VLOG(3) << "Anakin Predictor::clone"; - std::unique_lock lock(mutex_); std::unique_ptr cls( new PaddleInferenceAnakinPredictor()); // construct executer from other graph @@ -366,12 +380,10 @@ CreatePaddlePredictor( std::unique_ptr x( new PaddleInferenceAnakinPredictor(config)); return x; -#else - } else { - LOG(INFO) << "Anakin Predictor create on unknown platform."; - return nullptr; #endif } + LOG(INFO) << "Anakin Predictor create on unknown platform."; + return nullptr; } #ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h old mode 100644 new mode 100755 index 3eeea853af57d..87973d0d082a0 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -35,7 +35,7 @@ using contrib::AnakinConfig; template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: - PaddleInferenceAnakinPredictor() : config_() {} + PaddleInferenceAnakinPredictor() : config_() { InitEnv(); } explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config); @@ -56,9 +56,12 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { private: bool Init(); + bool InitEnv(); + bool InitNet(); bool RunImpl(const std::vector& inputs, std::vector* output_data); - std::mutex mutex_; + static std::mutex mutex_; + static std::once_flag init_anakin_; AnakinConfig config_; std::shared_ptr> ctx_p_; std::shared_ptr> From 83f79a3e1fefe751dfdd117d18e7688401ac6e12 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Mon, 3 Jun 2019 12:54:58 +0000 Subject: [PATCH 04/15] Integrated template instance test=develop --- paddle/fluid/inference/api/CMakeLists.txt | 6 +- .../fluid/inference/api/api_anakin_engine.cc | 214 +++++++----------- .../fluid/inference/api/api_anakin_engine.h | 10 +- 3 files changed, 90 insertions(+), 140 deletions(-) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 5c7e7d4f575d3..cc23c2232632b 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -73,9 +73,9 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI endif() if (ANAKIN_FOUND AND WITH_MKL) - cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS mklml zero_copy_tensor_dummy device_context) - target_link_libraries(inference_anakin_api anakin anakin_saber_common cudnn mklml_intel) - cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS zero_copy_tensor_dummy device_context) + cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) + target_link_libraries(inference_anakin_api anakin anakin_saber_common mklml_intel) + cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common cudnn mklml_intel) function(anakin_target target_name) target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 6e3c50293375b..15e80a510b750 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -42,70 +42,69 @@ template PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( const contrib::AnakinConfig &config) : config_(config) { - CHECK(Init()); + InitPredictor(); } -#ifdef ANAKIN_X86_PLACE -template <> -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const contrib::AnakinConfig &config) - : config_(config) { - omp_set_dynamic(0); - omp_set_num_threads(1); - mkl_set_num_threads(1); - CHECK(Init()); -} -#endif template -bool PaddleInferenceAnakinPredictor::InitEnv() { +void PaddleInferenceAnakinPredictor::InitEnv() { std::call_once(init_anakin_, [this]() { anakin::Env::env_init(config_.max_stream); +#ifdef ANAKIN_X86_PLACE + omp_set_dynamic(0); + omp_set_num_threads(1); + mkl_set_num_threads(1); +#endif }); - return true; } template -bool PaddleInferenceAnakinPredictor::InitNet() { +void PaddleInferenceAnakinPredictor::InitNet() { std::unique_lock lock(mutex_); if (executor_p_ == nullptr) { executor_p_ = new anakin::Net(*graph_p_, true); } - return true; } template -bool PaddleInferenceAnakinPredictor::Init() { - if (!InitEnv()) { - return false; - } - if (!ctx_p_) { - ctx_p_ = std::make_shared>( - config_.device_id, config_.data_stream_id, config_.compute_stream_id); - } - if (!graph_p_) { - graph_p_ = std::make_shared< - anakin::graph::Graph>(); - } +void PaddleInferenceAnakinPredictor::SetContext() { + ctx_p_ = std::make_shared>( + config_.device_id, config_.data_stream_id, config_.compute_stream_id); +} +template +void PaddleInferenceAnakinPredictor::InitGraph() { + graph_p_ = + std::make_shared>(); if (!(graph_p_->load(config_.model_file))) { - LOG(INFO) << "fail to load graph from " << config_.model_file; - return false; + LOG(FATAL) << "fail to load graph from " << config_.model_file; } auto inputs = graph_p_->get_ins(); for (auto &input_str : inputs) { if (config_.init_inputs_shape.find(input_str) == config_.init_inputs_shape.end()) { - LOG(INFO) << input_str << " is not implemented."; - return false; + LOG(FATAL) << input_str << " is not implemented."; } std::vector shape = config_.init_inputs_shape.find(input_str)->second; graph_p_->Reshape(input_str, shape); } - // optimization for graph - if (!(graph_p_->Optimize())) { - return false; +} +template +void PaddleInferenceAnakinPredictor::OptimizeGraph() { + if (!graph_p_->Optimize()) { + LOG(FATAL) << "Graph optimization error."; } +} +template +void PaddleInferenceAnakinPredictor::InitPredictor() { + InitEnv(); + SetContext(); + InitGraph(); + OptimizeGraph(); InitNet(); - return true; } - +template +void PaddleInferenceAnakinPredictor::Predict() { + anakin::TargetWrapper::device_sync(); + executor_p_->prediction(); + anakin::TargetWrapper::device_sync(); +} template bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, @@ -203,23 +202,20 @@ bool PaddleInferenceAnakinPredictor::Run( } return true; } - template bool PaddleInferenceAnakinPredictor::RunImpl( const std::vector &inputs, std::vector *output_data) { for (const auto &input : inputs) { if (input.dtype != PaddleDType::FLOAT32) { - LOG(INFO) << "Only support float type inputs. " << input.name - << "'s type is not float"; - return false; + LOG(FATAL) << "Only support float type inputs. " << input.name + << "'s type is not float"; } - auto d_tensor_in_p = executor_p_->get_in(input.name); - auto net_shape = d_tensor_in_p->shape(); + auto d_tensor_p = executor_p_->get_in(input.name); + auto net_shape = d_tensor_p->shape(); if (net_shape.size() != input.shape.size()) { - LOG(INFO) << " input " << input.name - << "'s shape size should be equal to that of net"; - return false; + LOG(FATAL) << " input " << input.name + << "'s shape size should be equal to that of net"; } int sum = 1; for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); @@ -228,97 +224,62 @@ bool PaddleInferenceAnakinPredictor::RunImpl( graph_p_->Reshape(input.name, input.shape); delete executor_p_; InitNet(); - d_tensor_in_p = executor_p_->get_in(input.name); + d_tensor_p = executor_p_->get_in(input.name); } else { - LOG(INFO) << "Run failed because Anakin was expected not to reallocate " - "memory."; - return false; + LOG(FATAL) + << "Run failed because Anakin was expected not to reallocate " + "memory."; } } - anakin::saber::Shape tmp_shape; + std::vector tmp_shape; for (auto s : input.shape) { tmp_shape.push_back(s); } - d_tensor_in_p->reshape(tmp_shape); + auto *data = static_cast(input.data.data()); + anakin::saber::Tensor::Host_type> + h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, + tmp_shape); + d_tensor_p->reshape(tmp_shape); if (input.lod.size() > 0) { if (input.lod.size() > 1) { - LOG(INFO) << " input lod first dim should <=1, but you set " - << input.lod.size(); - return false; + LOG(FATAL) << " input lod first dim should <=1, but you set " + << input.lod.size(); } std::vector lod(input.lod[0].begin(), input.lod[0].end()); std::vector> offset({lod}); - d_tensor_in_p->set_seq_offset(offset); + d_tensor_p->set_seq_offset(offset); VLOG(3) << "offset.size(): " << offset[0].size(); for (int i = 0; i < offset[0].size(); i++) { VLOG(3) << offset[0][i]; } } - - float *d_data_p = static_cast(d_tensor_in_p->mutable_data()); -#ifdef PADDLE_WITH_CUDA - if (std::is_same::value) { - if (cudaMemcpy(d_data_p, static_cast(input.data.data()), - d_tensor_in_p->valid_size() * sizeof(float), - cudaMemcpyHostToDevice) != 0) { - LOG(INFO) << "copy data from CPU to GPU error"; - return false; - } - } -#endif -#ifdef ANAKIN_X86_PLACE - if (std::is_same::value) { - memcpy(d_data_p, static_cast(input.data.data()), - d_tensor_in_p->valid_size() * sizeof(float)); - } -#endif + d_tensor_p->copy_from(h_tensor); } -#ifdef PADDLE_WITH_CUDA - cudaDeviceSynchronize(); - executor_p_->prediction(); - cudaDeviceSynchronize(); -#endif - + Predict(); if (output_data->empty()) { - LOG(INFO) << "At least one output should be set with tensors' names."; - return false; + LOG(FATAL) << "At least one output should be set with tensors' names."; } for (auto &output : *output_data) { - auto *tensor = executor_p_->get_out(output.name); - output.shape = tensor->valid_shape(); - if (output.data.length() < tensor->valid_size() * sizeof(float)) { - output.data.Resize(tensor->valid_size() * sizeof(float)); + auto *d_tensor_p = executor_p_->get_out(output.name); + output.shape = d_tensor_p->valid_shape(); + if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) { + output.data.Resize(d_tensor_p->valid_size() * sizeof(float)); } - -#if PADDLE_WITH_CUDA - if (std::is_same::value) { - // Copy data from GPU -> CPU - if (cudaMemcpy(output.data.data(), tensor->mutable_data(), - tensor->valid_size() * sizeof(float), - cudaMemcpyDeviceToHost) != 0) { - LOG(INFO) << "copy data from GPU to CPU error"; - return false; - } - } -#endif -#ifdef ANAKIN_X86_PLACE - if (std::is_same::value) { - memcpy(output.data.data(), tensor->mutable_data(), - tensor->valid_size() * sizeof(float)); - } -#endif + auto *data = static_cast(output.data.data()); + anakin::saber::Tensor::Host_type> + h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, + d_tensor_p->valid_shape()); + h_tensor.copy_from(*d_tensor_p); } return true; } - template bool PaddleInferenceAnakinPredictor::ResetConfig( const AnakinConfig &config) { config_ = config; return true; } - template anakin::Net &PaddleInferenceAnakinPredictor::ResetExecuter( @@ -330,7 +291,6 @@ anakin::Net InitNet(); return *executor_p_; } - // the cloned new Predictor of anakin share the same net weights from original // Predictor template @@ -343,8 +303,7 @@ PaddleInferenceAnakinPredictor::Clone() { auto anakin_predictor_p = dynamic_cast *>(cls.get()); if (!anakin_predictor_p) { - LOG(INFO) << "fail to call Init"; - return nullptr; + LOG(FATAL) << "fail to call Init"; } anakin_predictor_p->ResetConfig(config_); anakin_predictor_p->ResetExecuter(graph_p_); @@ -363,36 +322,26 @@ template <> std::unique_ptr CreatePaddlePredictor( const contrib::AnakinConfig &config) { - VLOG(3) << "Anakin Predictor create."; - if (config.target_type == contrib::AnakinConfig::NVGPU) { #ifdef PADDLE_WITH_CUDA - VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; - std::unique_ptr x( + if (config.target_type == contrib::AnakinConfig::NV) { + return std::unique_ptr( new PaddleInferenceAnakinPredictor(config)); - return x; -#else - LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment"; - return nullptr; + } #endif - } else if (config.target_type == contrib::AnakinConfig::X86) { #ifdef ANAKIN_X86_PLACE - VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; - std::unique_ptr x( + if (config.target_type == contrib::AnakinConfig::X86) { + return std::unique_ptr( new PaddleInferenceAnakinPredictor(config)); - return x; -#endif } - LOG(INFO) << "Anakin Predictor create on unknown platform."; +#endif + LOG(FATAL) << "Anakin Predictor create on unknown platform."; return nullptr; } - -#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER template -using executor_t = - anakin::Net; - -template -void DisplayOpTimer(executor_t *net_executor, int epoch) { +void DisplayOpTimer(anakin::Net *net_executor, + int epoch) { +#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER std::vector op_time = net_executor->get_op_time(); auto exec_funcs = net_executor->get_exec_funcs(); auto op_param = net_executor->get_op_param(); @@ -412,14 +361,11 @@ void DisplayOpTimer(executor_t *net_executor, int epoch) { for (auto it = op_map.begin(); it != op_map.end(); ++it) { LOG(INFO) << it->first << " " << (it->second) / epoch << " ms"; } -} #endif - +} template PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { -#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER DisplayOpTimer(executor_p_, config_.init_batch_size); -#endif delete executor_p_; executor_p_ = nullptr; } diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 87973d0d082a0..5d6050a1cf01b 100755 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -55,11 +55,15 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { ~PaddleInferenceAnakinPredictor() override; private: - bool Init(); - bool InitEnv(); - bool InitNet(); + void InitPredictor(); + void InitEnv(); + void InitGraph(); + void OptimizeGraph(); + void InitNet(); + void SetContext(); bool RunImpl(const std::vector& inputs, std::vector* output_data); + void Predict(); static std::mutex mutex_; static std::once_flag init_anakin_; AnakinConfig config_; From 89bfbe84039bfc9ae7f968a93fb8f5b1f8b64feb Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 4 Jun 2019 11:51:03 +0000 Subject: [PATCH 05/15] increase template parameters test=develop --- .../fluid/inference/api/api_anakin_engine.cc | 117 +++++++++--------- .../fluid/inference/api/api_anakin_engine.h | 18 ++- .../inference/api/paddle_anakin_config.h | 2 +- 3 files changed, 68 insertions(+), 69 deletions(-) mode change 100755 => 100644 paddle/fluid/inference/api/api_anakin_engine.h diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 15e80a510b750..16d998f0e23ec 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -33,21 +33,21 @@ namespace paddle { using paddle::contrib::AnakinConfig; -template -extern std::mutex PaddleInferenceAnakinPredictor::mutex_; -template -extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; +template +extern std::mutex PaddleInferenceAnakinPredictor::mutex_; +template +extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; -template -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( +template +PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( const contrib::AnakinConfig &config) : config_(config) { InitPredictor(); } -template -void PaddleInferenceAnakinPredictor::InitEnv() { +template +void PaddleInferenceAnakinPredictor::InitEnv() { std::call_once(init_anakin_, [this]() { - anakin::Env::env_init(config_.max_stream); + anakin::Env::env_init(config_.max_stream); #ifdef ANAKIN_X86_PLACE omp_set_dynamic(0); omp_set_num_threads(1); @@ -55,23 +55,24 @@ void PaddleInferenceAnakinPredictor::InitEnv() { #endif }); } -template -void PaddleInferenceAnakinPredictor::InitNet() { +template +void PaddleInferenceAnakinPredictor::InitNet() { std::unique_lock lock(mutex_); if (executor_p_ == nullptr) { - executor_p_ = new anakin::Net(*graph_p_, true); + executor_p_ = + new anakin::Net( + *graph_p_, true); } } -template -void PaddleInferenceAnakinPredictor::SetContext() { - ctx_p_ = std::make_shared>( +template +void PaddleInferenceAnakinPredictor::SetContext() { + ctx_p_ = std::make_shared>( config_.device_id, config_.data_stream_id, config_.compute_stream_id); } -template -void PaddleInferenceAnakinPredictor::InitGraph() { +template +void PaddleInferenceAnakinPredictor::InitGraph() { graph_p_ = - std::make_shared>(); + std::make_shared>(); if (!(graph_p_->load(config_.model_file))) { LOG(FATAL) << "fail to load graph from " << config_.model_file; } @@ -85,28 +86,28 @@ void PaddleInferenceAnakinPredictor::InitGraph() { graph_p_->Reshape(input_str, shape); } } -template -void PaddleInferenceAnakinPredictor::OptimizeGraph() { +template +void PaddleInferenceAnakinPredictor::OptimizeGraph() { if (!graph_p_->Optimize()) { LOG(FATAL) << "Graph optimization error."; } } -template -void PaddleInferenceAnakinPredictor::InitPredictor() { +template +void PaddleInferenceAnakinPredictor::InitPredictor() { InitEnv(); SetContext(); InitGraph(); OptimizeGraph(); InitNet(); } -template -void PaddleInferenceAnakinPredictor::Predict() { - anakin::TargetWrapper::device_sync(); +template +void PaddleInferenceAnakinPredictor::Predict() { + anakin::TargetWrapper::device_sync(); executor_p_->prediction(); - anakin::TargetWrapper::device_sync(); + anakin::TargetWrapper::device_sync(); } -template -bool PaddleInferenceAnakinPredictor::Run( +template +bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { if (config_.re_allocable) { @@ -202,8 +203,8 @@ bool PaddleInferenceAnakinPredictor::Run( } return true; } -template -bool PaddleInferenceAnakinPredictor::RunImpl( +template +bool PaddleInferenceAnakinPredictor::RunImpl( const std::vector &inputs, std::vector *output_data) { for (const auto &input : inputs) { @@ -236,8 +237,8 @@ bool PaddleInferenceAnakinPredictor::RunImpl( tmp_shape.push_back(s); } auto *data = static_cast(input.data.data()); - anakin::saber::Tensor::Host_type> - h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, + anakin::saber::Tensor::Host_type> + h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, tmp_shape); d_tensor_p->reshape(tmp_shape); @@ -267,41 +268,39 @@ bool PaddleInferenceAnakinPredictor::RunImpl( output.data.Resize(d_tensor_p->valid_size() * sizeof(float)); } auto *data = static_cast(output.data.data()); - anakin::saber::Tensor::Host_type> - h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, + anakin::saber::Tensor::Host_type> + h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, d_tensor_p->valid_shape()); h_tensor.copy_from(*d_tensor_p); } return true; } -template -bool PaddleInferenceAnakinPredictor::ResetConfig( +template +bool PaddleInferenceAnakinPredictor::ResetConfig( const AnakinConfig &config) { config_ = config; return true; } -template -anakin::Net - &PaddleInferenceAnakinPredictor::ResetExecuter( - std::shared_ptr> - graph_p) { +template +anakin::Net &PaddleInferenceAnakinPredictor::ResetExecuter( + std::shared_ptr> graph_p) { graph_p_ = graph_p; - ctx_p_ = std::make_shared>( + ctx_p_ = std::make_shared>( config_.device_id, config_.data_stream_id, config_.compute_stream_id); InitNet(); return *executor_p_; } // the cloned new Predictor of anakin share the same net weights from original // Predictor -template +template std::unique_ptr -PaddleInferenceAnakinPredictor::Clone() { +PaddleInferenceAnakinPredictor::Clone() { VLOG(3) << "Anakin Predictor::clone"; std::unique_ptr cls( - new PaddleInferenceAnakinPredictor()); + new PaddleInferenceAnakinPredictor()); // construct executer from other graph auto anakin_predictor_p = - dynamic_cast *>(cls.get()); + dynamic_cast *>(cls.get()); if (!anakin_predictor_p) { LOG(FATAL) << "fail to call Init"; } @@ -311,10 +310,12 @@ PaddleInferenceAnakinPredictor::Clone() { } #ifdef PADDLE_WITH_CUDA -template class PaddleInferenceAnakinPredictor; +template class PaddleInferenceAnakinPredictor< + anakin::NV, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>; #endif #ifdef ANAKIN_X86_PLACE -template class PaddleInferenceAnakinPredictor; +template class PaddleInferenceAnakinPredictor< + anakin::X86, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>; #endif // A factory to help create difference predictor. @@ -325,22 +326,22 @@ CreatePaddlePredictor( #ifdef PADDLE_WITH_CUDA if (config.target_type == contrib::AnakinConfig::NV) { return std::unique_ptr( - new PaddleInferenceAnakinPredictor(config)); + new PaddleInferenceAnakinPredictor(config)); } #endif #ifdef ANAKIN_X86_PLACE if (config.target_type == contrib::AnakinConfig::X86) { return std::unique_ptr( - new PaddleInferenceAnakinPredictor(config)); + new PaddleInferenceAnakinPredictor(config)); } #endif LOG(FATAL) << "Anakin Predictor create on unknown platform."; return nullptr; } -template -void DisplayOpTimer(anakin::Net *net_executor, - int epoch) { +template +void DisplayOpTimer(anakin::Net *net_executor, int epoch) { #ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER std::vector op_time = net_executor->get_op_time(); auto exec_funcs = net_executor->get_exec_funcs(); @@ -363,9 +364,9 @@ void DisplayOpTimer(anakin::Net -PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { - DisplayOpTimer(executor_p_, config_.init_batch_size); +template +PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { + DisplayOpTimer(executor_p_, config_.init_batch_size); delete executor_p_; executor_p_ = nullptr; } diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h old mode 100755 new mode 100644 index 5d6050a1cf01b..47f07726a1f75 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -31,8 +31,10 @@ limitations under the License. */ namespace paddle { using contrib::AnakinConfig; +using anakin::Precision; +using anakin::OpRunType; -template +template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: PaddleInferenceAnakinPredictor() : config_() { InitEnv(); } @@ -47,10 +49,8 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { std::unique_ptr Clone() override; bool ResetConfig(const AnakinConfig& config); - anakin::Net& - ResetExecuter( - std::shared_ptr> - graph_p); + anakin::Net& ResetExecuter( + std::shared_ptr> graph_p); ~PaddleInferenceAnakinPredictor() override; @@ -67,11 +67,9 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { static std::mutex mutex_; static std::once_flag init_anakin_; AnakinConfig config_; - std::shared_ptr> ctx_p_; - std::shared_ptr> - graph_p_; - anakin::Net* - executor_p_{nullptr}; + std::shared_ptr> ctx_p_; + std::shared_ptr> graph_p_; + anakin::Net* executor_p_{nullptr}; }; } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_anakin_config.h b/paddle/fluid/inference/api/paddle_anakin_config.h index 7c32a28c324c3..6981b1b911c28 100644 --- a/paddle/fluid/inference/api/paddle_anakin_config.h +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -25,7 +25,7 @@ namespace paddle { namespace contrib { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NVGPU = 0, X86 }; + enum TargetType { NV = 0, X86 }; int device_id{0}; std::string model_file; std::map> init_inputs_shape; From 0de3225d170d91a40d7d36d842e3a919f49f879c Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Wed, 5 Jun 2019 02:53:51 +0000 Subject: [PATCH 06/15] support MLU predictor test=develop --- paddle/fluid/inference/api/CMakeLists.txt | 6 +- .../fluid/inference/api/api_anakin_engine.cc | 161 ++++++++++-------- .../fluid/inference/api/api_anakin_engine.h | 52 ++++-- .../inference/api/paddle_anakin_config.h | 10 +- .../inference/tests/api/anakin_mlu_tester.cc | 98 +++++++++++ 5 files changed, 237 insertions(+), 90 deletions(-) create mode 100644 paddle/fluid/inference/tests/api/anakin_mlu_tester.cc diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index cc23c2232632b..b0f8a81c5580e 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -72,11 +72,11 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI anakin_target(inference_anakin_api_shared) endif() -if (ANAKIN_FOUND AND WITH_MKL) +if (ANAKIN_FOUND) cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) - target_link_libraries(inference_anakin_api anakin anakin_saber_common mklml_intel) + target_link_libraries(inference_anakin_api anakin anakin_saber_common) cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) - target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common cudnn mklml_intel) + target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) function(anakin_target target_name) target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) endfunction() diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 16d998f0e23ec..adb3e8e50c0a9 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,20 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/api/api_anakin_engine.h" -#include "paddle/fluid/inference/api/paddle_api.h" - -#ifdef PADDLE_WITH_CUDA -#include -#endif - -#include -#include #include #include #include #include +#include "paddle/fluid/inference/api/api_anakin_engine.h" +#include "paddle/fluid/inference/api/paddle_api.h" + #include "framework/core/net/net.h" #include "framework/operators/ops.h" #include "saber/funcs/timer.h" @@ -38,79 +32,66 @@ extern std::mutex PaddleInferenceAnakinPredictor::mutex_; template extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; -template -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const contrib::AnakinConfig &config) - : config_(config) { - InitPredictor(); -} template void PaddleInferenceAnakinPredictor::InitEnv() { - std::call_once(init_anakin_, [this]() { - anakin::Env::env_init(config_.max_stream); -#ifdef ANAKIN_X86_PLACE - omp_set_dynamic(0); - omp_set_num_threads(1); - mkl_set_num_threads(1); -#endif + std::call_once(this->init_anakin_, [this]() { + anakin::Env::env_init(this->config_.max_stream); }); } template void PaddleInferenceAnakinPredictor::InitNet() { - std::unique_lock lock(mutex_); - if (executor_p_ == nullptr) { - executor_p_ = - new anakin::Net( - *graph_p_, true); - } + std::unique_lock lock(this->mutex_); + this->executor_p_ = new anakin::Net(*this->graph_p_, true); } template void PaddleInferenceAnakinPredictor::SetContext() { - ctx_p_ = std::make_shared>( - config_.device_id, config_.data_stream_id, config_.compute_stream_id); + this->ctx_p_ = std::make_shared>( + this->config_.device_id, this->config_.data_stream_id, + this->config_.compute_stream_id); } template void PaddleInferenceAnakinPredictor::InitGraph() { - graph_p_ = + this->graph_p_ = std::make_shared>(); - if (!(graph_p_->load(config_.model_file))) { - LOG(FATAL) << "fail to load graph from " << config_.model_file; + if (!(this->graph_p_->load(this->config_.model_file))) { + LOG(FATAL) << "fail to load graph from " << this->config_.model_file; } - auto inputs = graph_p_->get_ins(); + auto inputs = this->graph_p_->get_ins(); for (auto &input_str : inputs) { - if (config_.init_inputs_shape.find(input_str) == - config_.init_inputs_shape.end()) { + if (this->config_.init_inputs_shape.find(input_str) == + this->config_.init_inputs_shape.end()) { LOG(FATAL) << input_str << " is not implemented."; } - std::vector shape = config_.init_inputs_shape.find(input_str)->second; - graph_p_->Reshape(input_str, shape); + std::vector shape = + this->config_.init_inputs_shape.find(input_str)->second; + this->graph_p_->Reshape(input_str, shape); } } template void PaddleInferenceAnakinPredictor::OptimizeGraph() { - if (!graph_p_->Optimize()) { + if (!this->graph_p_->Optimize()) { LOG(FATAL) << "Graph optimization error."; } } template void PaddleInferenceAnakinPredictor::InitPredictor() { - InitEnv(); - SetContext(); - InitGraph(); - OptimizeGraph(); - InitNet(); + this->InitEnv(); + this->SetContext(); + this->InitGraph(); + this->OptimizeGraph(); + this->InitNet(); } template void PaddleInferenceAnakinPredictor::Predict() { anakin::TargetWrapper::device_sync(); - executor_p_->prediction(); + this->executor_p_->prediction(); anakin::TargetWrapper::device_sync(); } template bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { - if (config_.re_allocable) { + if (this->config_.re_allocable) { return this->RunImpl(inputs, output_data); } else { // Run inputs data that exceeds batch size in batches. @@ -123,7 +104,7 @@ bool PaddleInferenceAnakinPredictor::Run( } } // 2. If the data don't need to be batched, run it directly. - if (batch_size <= config_.init_batch_size) { + if (batch_size <= this->config_.init_batch_size) { return this->RunImpl(inputs, output_data); } // 3. Check the batch size and define temporary variables. @@ -154,7 +135,7 @@ bool PaddleInferenceAnakinPredictor::Run( } // 4. Batch execution. for (size_t start_batch = 0; start_batch < batch_size;) { - auto end_batch = start_batch + config_.init_batch_size; + auto end_batch = start_batch + this->config_.init_batch_size; if (end_batch > batch_size) { end_batch = batch_size; } @@ -212,7 +193,7 @@ bool PaddleInferenceAnakinPredictor::RunImpl( LOG(FATAL) << "Only support float type inputs. " << input.name << "'s type is not float"; } - auto d_tensor_p = executor_p_->get_in(input.name); + auto d_tensor_p = this->executor_p_->get_in(input.name); auto net_shape = d_tensor_p->shape(); if (net_shape.size() != input.shape.size()) { LOG(FATAL) << " input " << input.name @@ -221,11 +202,11 @@ bool PaddleInferenceAnakinPredictor::RunImpl( int sum = 1; for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); if (sum > net_shape.count()) { - if (config_.re_allocable) { - graph_p_->Reshape(input.name, input.shape); - delete executor_p_; - InitNet(); - d_tensor_p = executor_p_->get_in(input.name); + if (this->config_.re_allocable) { + this->graph_p_->Reshape(input.name, input.shape); + delete this->executor_p_; + this->InitNet(); + d_tensor_p = this->executor_p_->get_in(input.name); } else { LOG(FATAL) << "Run failed because Anakin was expected not to reallocate " @@ -257,12 +238,12 @@ bool PaddleInferenceAnakinPredictor::RunImpl( } d_tensor_p->copy_from(h_tensor); } - Predict(); + this->Predict(); if (output_data->empty()) { LOG(FATAL) << "At least one output should be set with tensors' names."; } for (auto &output : *output_data) { - auto *d_tensor_p = executor_p_->get_out(output.name); + auto *d_tensor_p = this->executor_p_->get_out(output.name); output.shape = d_tensor_p->valid_shape(); if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) { output.data.Resize(d_tensor_p->valid_size() * sizeof(float)); @@ -278,17 +259,18 @@ bool PaddleInferenceAnakinPredictor::RunImpl( template bool PaddleInferenceAnakinPredictor::ResetConfig( const AnakinConfig &config) { - config_ = config; + this->config_ = config; return true; } template anakin::Net &PaddleInferenceAnakinPredictor::ResetExecuter( std::shared_ptr> graph_p) { - graph_p_ = graph_p; - ctx_p_ = std::make_shared>( - config_.device_id, config_.data_stream_id, config_.compute_stream_id); - InitNet(); - return *executor_p_; + this->graph_p_ = graph_p; + this->ctx_p_ = std::make_shared>( + this->config_.device_id, this->config_.data_stream_id, + this->config_.compute_stream_id); + this->InitNet(); + return *this->executor_p_; } // the cloned new Predictor of anakin share the same net weights from original // Predictor @@ -304,11 +286,40 @@ PaddleInferenceAnakinPredictor::Clone() { if (!anakin_predictor_p) { LOG(FATAL) << "fail to call Init"; } - anakin_predictor_p->ResetConfig(config_); - anakin_predictor_p->ResetExecuter(graph_p_); + anakin_predictor_p->ResetConfig(this->config_); + anakin_predictor_p->ResetExecuter(this->graph_p_); return cls; } +#ifdef ANAKIN_MLU_PLACE +template +void PaddleInferenceAnakinMLUPredictor::SetContext() { + this->ctx_p_ = std::make_shared>( + this->config_.device_id, this->config_.data_stream_id, + this->config_.compute_stream_id); + this->ctx_p_->set_model_parallel(this->config_.model_parallel); + this->ctx_p_->set_fusion(this->config_.op_fuse); +} +template +void PaddleInferenceAnakinMLUPredictor::OptimizeGraph() { + if (!this->graph_p_->fusion_optimize(this->config_.op_fuse)) { + LOG(FATAL) << "Graph optimization error."; + } +} +template +void PaddleInferenceAnakinMLUPredictor::InitNet() { + std::unique_lock lock(this->mutex_); + this->executor_p_ = new anakin::Net(); + this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); +} +template +void PaddleInferenceAnakinMLUPredictor::Predict() { + anakin::TargetWrapper::device_sync(); + this->executor_p_->fusion_prediction(); + anakin::TargetWrapper::device_sync(); +} +#endif + #ifdef PADDLE_WITH_CUDA template class PaddleInferenceAnakinPredictor< anakin::NV, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>; @@ -317,6 +328,10 @@ template class PaddleInferenceAnakinPredictor< template class PaddleInferenceAnakinPredictor< anakin::X86, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>; #endif +#ifdef ANAKIN_MLU_PLACE +template class PaddleInferenceAnakinMLUPredictor; +#endif // A factory to help create difference predictor. template <> @@ -324,7 +339,7 @@ std::unique_ptr CreatePaddlePredictor( const contrib::AnakinConfig &config) { #ifdef PADDLE_WITH_CUDA - if (config.target_type == contrib::AnakinConfig::NV) { + if (config.target_type == contrib::AnakinConfig::NVGPU) { return std::unique_ptr( new PaddleInferenceAnakinPredictor(config)); @@ -336,6 +351,14 @@ CreatePaddlePredictor( new PaddleInferenceAnakinPredictor(config)); } +#endif +#ifdef ANAKIN_MLU_PLACE + if (config.target_type == contrib::AnakinConfig::MLU) { + return std::unique_ptr( + new PaddleInferenceAnakinMLUPredictor( + config)); + } #endif LOG(FATAL) << "Anakin Predictor create on unknown platform."; return nullptr; @@ -366,9 +389,9 @@ void DisplayOpTimer(anakin::Net *net_executor, int epoch) { } template PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { - DisplayOpTimer(executor_p_, config_.init_batch_size); - delete executor_p_; - executor_p_ = nullptr; + DisplayOpTimer(this->executor_p_, this->config_.init_batch_size); + delete this->executor_p_; + this->executor_p_ = nullptr; } } // namespace paddle diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 47f07726a1f75..c19477a8aafe1 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,9 +37,12 @@ using anakin::OpRunType; template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: - PaddleInferenceAnakinPredictor() : config_() { InitEnv(); } + PaddleInferenceAnakinPredictor() : config_() { this->InitEnv(); } - explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config); + explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config) + : config_(config) { + this->InitPredictor(); + } // NOTE Unlike the native engine, the buffers of anakin engine's output_data // should be allocated first. @@ -48,28 +51,45 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { int batch_size = -1) override; std::unique_ptr Clone() override; - bool ResetConfig(const AnakinConfig& config); - anakin::Net& ResetExecuter( + virtual bool ResetConfig(const AnakinConfig& config); + virtual anakin::Net& ResetExecuter( std::shared_ptr> graph_p); + void InitPredictor(); ~PaddleInferenceAnakinPredictor() override; - private: - void InitPredictor(); - void InitEnv(); - void InitGraph(); - void OptimizeGraph(); - void InitNet(); - void SetContext(); - bool RunImpl(const std::vector& inputs, - std::vector* output_data); - void Predict(); static std::mutex mutex_; - static std::once_flag init_anakin_; AnakinConfig config_; std::shared_ptr> ctx_p_; std::shared_ptr> graph_p_; anakin::Net* executor_p_{nullptr}; + + void InitEnv(); + void InitGraph(); + virtual void OptimizeGraph(); + virtual void InitNet(); + virtual void SetContext(); + virtual void Predict(); + + private: + bool RunImpl(const std::vector& inputs, + std::vector* output_data); + static std::once_flag init_anakin_; }; +#ifdef ANAKIN_MLU_PLACE +template +class PaddleInferenceAnakinMLUPredictor + : public PaddleInferenceAnakinPredictor { + public: + explicit PaddleInferenceAnakinMLUPredictor(const AnakinConfig& config) { + this->ResetConfig(config); + this->InitPredictor(); + } + void SetContext() override; + void OptimizeGraph() override; + void InitNet() override; + void Predict() override; +}; +#endif } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_anakin_config.h b/paddle/fluid/inference/api/paddle_anakin_config.h index 6981b1b911c28..7c0e2f06ff417 100644 --- a/paddle/fluid/inference/api/paddle_anakin_config.h +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace paddle { namespace contrib { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NV = 0, X86 }; + enum TargetType { NVGPU = 0, X86, MLU }; int device_id{0}; std::string model_file; std::map> init_inputs_shape; @@ -35,6 +35,12 @@ struct AnakinConfig : public PaddlePredictor::Config { int data_stream_id{0}; int compute_stream_id{0}; TargetType target_type; +#ifdef ANAKIN_MLU_PLACE + int model_parallel{8}; + int data_parallel{1}; + bool op_fuse{false}; + bool sparse{false}; +#endif }; } // namespace contrib diff --git a/paddle/fluid/inference/tests/api/anakin_mlu_tester.cc b/paddle/fluid/inference/tests/api/anakin_mlu_tester.cc new file mode 100644 index 0000000000000..8094c744fef50 --- /dev/null +++ b/paddle/fluid/inference/tests/api/anakin_mlu_tester.cc @@ -0,0 +1,98 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/api/paddle_inference_api.h" + +DEFINE_string(model, "", "Directory of the inference model."); + +namespace paddle { + +contrib::AnakinConfig Config() { + // Determine the use of memory here. + std::map> init_inputs_shape; + init_inputs_shape["input_0"] = std::vector({1, 3, 112, 112}); + + contrib::AnakinConfig config; + config.target_type = contrib::AnakinConfig::MLU; + config.model_file = FLAGS_model; + config.init_inputs_shape = init_inputs_shape; + + // Determine the device execution context. + config.device_id = 0; + config.data_stream_id = 0; + config.compute_stream_id = 0; + + // Set re_allocable and op_fuse TRUE. + config.re_allocable = true; + config.op_fuse = true; + + return config; +} + +void single_test() { + // 1. Defining basic data structures. + auto config = paddle::Config(); + auto predictor = + paddle::CreatePaddlePredictor(config); + + // 2. Define the data structure of the predictor inputs and outputs. + std::vector input_tensors; + std::vector output_tensors; + + // 3. Define and fill the inputs tensor. + int num = 1; + int channel = 3; + int height = 112; + int width = 112; + std::vector input(num * channel * height * width, 1); + std::vector> inputs({input}); + const std::vector input_names{"input_0"}; + for (auto& name : input_names) { + paddle::PaddleTensor tensor; + tensor.name = name; + tensor.dtype = PaddleDType::FLOAT32; + input_tensors.push_back(tensor); + } + for (size_t j = 0; j < input_tensors.size(); j++) { + input_tensors[j].data = + paddle::PaddleBuf(&inputs[j][0], inputs[j].size() * sizeof(float)); + // The shape of each execution can be changed. + input_tensors[j].shape = std::vector({num, channel, height, width}); + } + + // 4. Set the output placeholder of predictor. + PaddleTensor predict_out, score_out; + predict_out.name = "landmark_predict_out"; + score_out.name = "landmark_score_out"; + output_tensors.push_back(predict_out); + output_tensors.push_back(score_out); + + // 5. Execution predict. + predictor->Run(input_tensors, &output_tensors); + + // 6. Take out the output data. + for (auto out : output_tensors) { + float* data_o = static_cast(out.data.data()); + LOG(INFO) << out.name << " size = " << out.data.length() / sizeof(float); + } +} +} // namespace paddle + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + paddle::single_test(); + return 0; +} From 0ea5684c372295d3003e35e8360dbc4904650592 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Wed, 5 Jun 2019 05:57:59 +0000 Subject: [PATCH 07/15] update anakin cmake files test=develop --- CMakeLists.txt | 2 +- cmake/anakin_subgraph.cmake | 11 ++-- paddle/fluid/framework/ir/CMakeLists.txt | 2 +- paddle/fluid/inference/CMakeLists.txt | 4 +- .../analysis/ir_passes/CMakeLists.txt | 2 +- paddle/fluid/inference/api/CMakeLists.txt | 50 ++++++++----------- paddle/fluid/operators/CMakeLists.txt | 2 +- 7 files changed, 33 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd113a9ec8a9e..002c42a4c24db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,8 +194,8 @@ include(configure) # add paddle env configuration if(WITH_GPU) include(cuda) include(tensorrt) - include(anakin_subgraph) endif() +include(anakin_subgraph) if(WIN32 OR APPLE OR NOT WITH_GPU OR ON_INFER) set(WITH_DGC OFF) diff --git a/cmake/anakin_subgraph.cmake b/cmake/anakin_subgraph.cmake index b5437e776d31e..eb7bce9f3b7a9 100644 --- a/cmake/anakin_subgraph.cmake +++ b/cmake/anakin_subgraph.cmake @@ -1,7 +1,3 @@ -if(NOT WITH_GPU) - return() -endif() - set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT") find_path(ANAKIN_INCLUDE_DIR anakin_config.h PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include @@ -16,9 +12,7 @@ find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so DOC "Path to ANAKIN library.") if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY) - if(WITH_DSO) set(ANAKIN_FOUND ON) - endif(WITH_DSO) else() set(ANAKIN_FOUND OFF) endif() @@ -31,3 +25,8 @@ if(ANAKIN_FOUND) link_directories(${ANAKIN_ROOT}) add_definitions(-DPADDLE_WITH_ANAKIN) endif() + +if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO) + message(STATUS "Compile with anakin subgraph.") + set(ANAKIN_SUBGRAPH ON) +endif() diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 032fcbedf49cb..d2d6fc9c92691 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -77,7 +77,7 @@ pass_library(quant_conv2d_dequant_fuse_pass inference) pass_library(fillconstant_elementwisemul_fuse inference) pass_library(shuffle_channel_detect_pass inference) -if(ANAKIN_FOUND) +if(ANAKIN_SUBGRAPH) pass_library(simplify_anakin_priorbox_detection_out_pass inference) endif() diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 395336938b10e..44eaf90371d58 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -17,7 +17,7 @@ if (TENSORRT_FOUND) add_subdirectory(tensorrt) endif() -if (ANAKIN_FOUND) +if (ANAKIN_SUBGRAPH) add_subdirectory(anakin) endif() @@ -43,7 +43,7 @@ if(WITH_MKLDNN) endif() set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor) -if (ANAKIN_FOUND AND WITH_MKL) +if (ANAKIN_FOUND) set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc) endif() set(SHARED_INFERENCE_SRCS diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index 05a3d7ddfdb08..ddadbc6df4aa3 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -15,7 +15,7 @@ if (WITH_GPU AND TENSORRT_FOUND) set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") endif() -if (ANAKIN_FOUND) +if (ANAKIN_SUBGRAPH) cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller) set(analysis_deps ${analysis_deps} diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index b0f8a81c5580e..d1df9c7918025 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -27,16 +27,16 @@ if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) endif() -if (ANAKIN_FOUND) +if (ANAKIN_SUBGRAPH) set(inference_deps ${inference_deps} anakin_op_converter anakin_engine) endif() add_subdirectory(details) if(WITH_MKLDNN) - set(mkldnn_quantizer_src mkldnn_quantizer.cc) - set(mkldnn_quantizer_cfg mkldnn_quantizer_config) - cc_library(${mkldnn_quantizer_cfg} SRCS mkldnn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder) + set(mkldnn_quantizer_src mkldnn_quantizer.cc) + set(mkldnn_quantizer_cfg mkldnn_quantizer_config) + cc_library(${mkldnn_quantizer_cfg} SRCS mkldnn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder) endif() cc_library(analysis_config SRCS analysis_config.cc DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder) @@ -48,9 +48,7 @@ cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS paddle_pass_builder zero_copy_tensor reset_tensor_array) -cc_test(test_paddle_inference_api - SRCS api_tester.cc - DEPS paddle_inference_api) +cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api) if(WITH_TESTING) inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps} @@ -61,25 +59,21 @@ endif() cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor benchmark ${inference_deps} ARGS --dirname=${WORD2VEC_MODEL_DIR}) -if (WITH_ANAKIN AND WITH_MKL) # only needed in CI - # compile the libinference_anakin_api.a and anakin.so. - cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml zero_copy_tensor_dummy device_context) - cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber zero_copy_tensor_dummy device_context) - function(anakin_target target_name) - target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) - endfunction() - anakin_target(inference_anakin_api) - anakin_target(inference_anakin_api_shared) -endif() - -if (ANAKIN_FOUND) - cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) - target_link_libraries(inference_anakin_api anakin anakin_saber_common) - cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) - target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) - function(anakin_target target_name) - target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) - endfunction() - anakin_target(inference_anakin_api) - anakin_target(inference_anakin_api_shared) +if(ANAKIN_FOUND) + if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86) + message(STATUS "Compile with anakin mlu place.") + add_definitions(-DANAKIN_MLU_PLACE) + elseif(ANAKIN_X86) + message(STATUS "Compile with anakin x86 place.") + add_definitions(-DANAKIN_X86_PLACE) + endif() + cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) + target_link_libraries(inference_anakin_api anakin anakin_saber_common) + cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) + target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) + function(anakin_target target_name) + target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + endfunction() + anakin_target(inference_anakin_api) + anakin_target(inference_anakin_api_shared) endif() diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 6e8d6f459c511..9a95b11408c35 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -34,7 +34,7 @@ if (WITH_GPU AND TENSORRT_FOUND) add_subdirectory(tensorrt) endif() -if (ANAKIN_FOUND) +if (ANAKIN_SUBGRAPH) add_subdirectory(anakin) endif() From 1b7e5d468db3c44aab05735bffc11752cf630948 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 Date: Wed, 5 Jun 2019 19:24:00 +0800 Subject: [PATCH 08/15] update TargetWrapper::set_device --- paddle/fluid/inference/api/api_anakin_engine.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index adb3e8e50c0a9..dd3e4c79a8207 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -35,6 +35,7 @@ extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; template void PaddleInferenceAnakinPredictor::InitEnv() { std::call_once(this->init_anakin_, [this]() { + anakin::TargetWrapper::set_device(this->config_.device_id); anakin::Env::env_init(this->config_.max_stream); }); } From 5a7f452d271584e575fece3ee90197c7bf610c4b Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 6 Jun 2019 05:26:43 +0000 Subject: [PATCH 09/15] update the initialization of anakin subgraph test=develop --- paddle/fluid/inference/anakin/engine.cc | 24 ++++++++++++------- paddle/fluid/inference/anakin/engine.h | 5 ++-- paddle/fluid/inference/api/analysis_config.cc | 1 + 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 2cc49220b99be..d7da976696c31 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -32,18 +32,26 @@ namespace paddle { namespace inference { namespace anakin { +template +extern std::once_flag + AnakinEngine::init_anakin_; + template AnakinEngine::AnakinEngine( bool need_summary, int device, int max_batch_size, std::map> max_input_shape, std::vector program_inputs, bool auto_config_layout) - : graph_(new AnakinGraphT()), - net_(new AnakinNetT(need_summary)) { - device_ = device; - max_batch_size_ = max_batch_size; - max_input_shape_ = max_input_shape; - program_inputs_ = program_inputs; - auto_config_layout_ = auto_config_layout; + : device_(device), + max_batch_size_(max_batch_size), + max_input_shape_(max_input_shape), + program_inputs_(program_inputs), + auto_config_layout_(auto_config_layout) { + std::call_once(init_anakin_, [this]() { + ::anakin::TargetWrapper::set_device(device_); + ::anakin::Env::env_init(); + }); + graph_.reset(new AnakinGraphT()); + net_.reset(new AnakinNetT(need_summary)); } template @@ -102,7 +110,7 @@ void AnakinEngine::BindInput( anakin_input = net_->get_in(input.first); } anakin_input->reshape(fluid_input_shape); - ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, + ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), device_, fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); } diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index d4f8737c4f50e..e62bb82fd1240 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -116,12 +116,13 @@ class AnakinEngine { private: bool initialized_{false}; + int device_; int max_batch_size_; std::map> max_input_shape_; - int device_; + std::vector program_inputs_; std::unique_ptr graph_; std::unique_ptr net_; - std::vector program_inputs_; + static std::once_flag init_anakin_; std::unordered_map tensor_scales_; // Always be false in gpu mode but true in most cpu cases. bool auto_config_layout_; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 8b940b67e3f9c..cfd2df2f8c037 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -342,6 +342,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const { // Get the GPU memory details and calculate the fraction of memory for the // GPU memory pool. size_t gpu_used, gpu_available; + platform::SetDeviceId(device_id_); platform::GpuMemoryUsage(&gpu_used, &gpu_available); double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.; float fraction_of_gpu_memory = From 325d0cc45a02d5d30e296594a2d4d4b016ea529d Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 6 Jun 2019 06:58:17 +0000 Subject: [PATCH 10/15] use the default constructor of base class test=develop --- paddle/fluid/inference/anakin/engine.cc | 7 +++---- paddle/fluid/inference/api/api_anakin_engine.cc | 2 +- paddle/fluid/inference/api/api_anakin_engine.h | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index d7da976696c31..13f16c4c8982e 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -46,10 +46,9 @@ AnakinEngine::AnakinEngine( max_input_shape_(max_input_shape), program_inputs_(program_inputs), auto_config_layout_(auto_config_layout) { - std::call_once(init_anakin_, [this]() { - ::anakin::TargetWrapper::set_device(device_); - ::anakin::Env::env_init(); - }); + ::anakin::TargetWrapper::set_device(device_); + std::call_once(init_anakin_, + [this]() { ::anakin::Env::env_init(); }); graph_.reset(new AnakinGraphT()); net_.reset(new AnakinNetT(need_summary)); } diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index dd3e4c79a8207..63d23321ab41e 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -34,8 +34,8 @@ extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; template void PaddleInferenceAnakinPredictor::InitEnv() { + anakin::TargetWrapper::set_device(this->config_.device_id); std::call_once(this->init_anakin_, [this]() { - anakin::TargetWrapper::set_device(this->config_.device_id); anakin::Env::env_init(this->config_.max_stream); }); } diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index c19477a8aafe1..32f8def63c090 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -37,7 +37,7 @@ using anakin::OpRunType; template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: - PaddleInferenceAnakinPredictor() : config_() { this->InitEnv(); } + PaddleInferenceAnakinPredictor() = default; explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config) : config_(config) { @@ -79,7 +79,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { #ifdef ANAKIN_MLU_PLACE template -class PaddleInferenceAnakinMLUPredictor +class PaddleInferenceAnakinMLUPredictor final : public PaddleInferenceAnakinPredictor { public: explicit PaddleInferenceAnakinMLUPredictor(const AnakinConfig& config) { From d332127d17bcd5b834450444c67fd8063e1fab90 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 11 Jun 2019 03:35:32 +0000 Subject: [PATCH 11/15] load model from buffer with length test=develop --- paddle/fluid/inference/api/api_anakin_engine.cc | 9 +++++++-- paddle/fluid/inference/api/paddle_anakin_config.h | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 63d23321ab41e..5cff95665bd5a 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -54,8 +54,13 @@ template void PaddleInferenceAnakinPredictor::InitGraph() { this->graph_p_ = std::make_shared>(); - if (!(this->graph_p_->load(this->config_.model_file))) { - LOG(FATAL) << "fail to load graph from " << this->config_.model_file; + if (!this->config_.model_file.empty()) { + this->graph_p_->load(this->config_.model_file); + } else if (this->config_.model_buf_p) { + this->graph_p_->load(this->config_.model_buf_p, + this->config_.model_buf_len); + } else { + LOG(FATAL) << "Model load error."; } auto inputs = this->graph_p_->get_ins(); for (auto &input_str : inputs) { diff --git a/paddle/fluid/inference/api/paddle_anakin_config.h b/paddle/fluid/inference/api/paddle_anakin_config.h index 7c0e2f06ff417..44abba0c8000d 100644 --- a/paddle/fluid/inference/api/paddle_anakin_config.h +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -34,6 +34,8 @@ struct AnakinConfig : public PaddlePredictor::Config { int max_stream{4}; int data_stream_id{0}; int compute_stream_id{0}; + char* model_buf_p{nullptr}; + size_t model_buf_len{0}; TargetType target_type; #ifdef ANAKIN_MLU_PLACE int model_parallel{8}; From cd5c89855fb5b2842e4c0d4c4c2b2b59d2beec8e Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Wed, 12 Jun 2019 02:40:46 +0000 Subject: [PATCH 12/15] modify the access level of class test=develop --- paddle/fluid/inference/api/api_anakin_engine.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 32f8def63c090..0f0d7febe2ed7 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -58,18 +58,18 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { ~PaddleInferenceAnakinPredictor() override; - static std::mutex mutex_; - AnakinConfig config_; - std::shared_ptr> ctx_p_; - std::shared_ptr> graph_p_; - anakin::Net* executor_p_{nullptr}; - + protected: void InitEnv(); void InitGraph(); virtual void OptimizeGraph(); virtual void InitNet(); virtual void SetContext(); virtual void Predict(); + static std::mutex mutex_; + AnakinConfig config_; + std::shared_ptr> ctx_p_; + std::shared_ptr> graph_p_; + anakin::Net* executor_p_{nullptr}; private: bool RunImpl(const std::vector& inputs, From 0b00dadc89eef49579ef295a9cd232975e56986f Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Wed, 19 Jun 2019 13:56:51 +0000 Subject: [PATCH 13/15] support anakin for bitmain arch test=develop --- paddle/fluid/inference/api/CMakeLists.txt | 5 +++ .../fluid/inference/api/api_anakin_engine.cc | 36 ++++++++++++++++++- .../fluid/inference/api/api_anakin_engine.h | 15 ++++++++ .../inference/api/paddle_anakin_config.h | 2 +- 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index d1df9c7918025..88d2a06b80e5c 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -60,9 +60,14 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_ ARGS --dirname=${WORD2VEC_MODEL_DIR}) if(ANAKIN_FOUND) + # Do not turn warnings into errors. + set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error") if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86) message(STATUS "Compile with anakin mlu place.") add_definitions(-DANAKIN_MLU_PLACE) + elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86) + message(STATUS "Compile with anakin bm place.") + add_definitions(-DANAKIN_BM_PLACE) elseif(ANAKIN_X86) message(STATUS "Compile with anakin x86 place.") add_definitions(-DANAKIN_X86_PLACE) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 5cff95665bd5a..936b175f3c3ea 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -34,10 +34,10 @@ extern std::once_flag PaddleInferenceAnakinPredictor::init_anakin_; template void PaddleInferenceAnakinPredictor::InitEnv() { - anakin::TargetWrapper::set_device(this->config_.device_id); std::call_once(this->init_anakin_, [this]() { anakin::Env::env_init(this->config_.max_stream); }); + anakin::TargetWrapper::set_device(this->config_.device_id); } template void PaddleInferenceAnakinPredictor::InitNet() { @@ -194,6 +194,7 @@ template bool PaddleInferenceAnakinPredictor::RunImpl( const std::vector &inputs, std::vector *output_data) { + anakin::TargetWrapper::set_device(this->config_.device_id); for (const auto &input : inputs) { if (input.dtype != PaddleDType::FLOAT32) { LOG(FATAL) << "Only support float type inputs. " << input.name @@ -326,6 +327,27 @@ void PaddleInferenceAnakinMLUPredictor::Predict() { } #endif +#ifdef ANAKIN_BM_PLACE +template +void PaddleInferenceAnakinBMPredictor::OptimizeGraph() { + if (!this->graph_p_->fusion_optimize()) { + LOG(FATAL) << "Graph optimization error."; + } +} +template +void PaddleInferenceAnakinBMPredictor::InitNet() { + std::unique_lock lock(this->mutex_); + this->executor_p_ = new anakin::Net(); + this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); +} +template +void PaddleInferenceAnakinBMPredictor::Predict() { + anakin::TargetWrapper::device_sync(); + this->executor_p_->fusion_prediction(); + anakin::TargetWrapper::device_sync(); +} +#endif + #ifdef PADDLE_WITH_CUDA template class PaddleInferenceAnakinPredictor< anakin::NV, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>; @@ -338,6 +360,10 @@ template class PaddleInferenceAnakinPredictor< template class PaddleInferenceAnakinMLUPredictor; #endif +#ifdef ANAKIN_BM_PLACE +template class PaddleInferenceAnakinBMPredictor; +#endif // A factory to help create difference predictor. template <> @@ -365,6 +391,14 @@ CreatePaddlePredictor( ::anakin::OpRunType::SYNC>( config)); } +#endif +#ifdef ANAKIN_BM_PLACE + if (config.target_type == contrib::AnakinConfig::BM) { + return std::unique_ptr( + new PaddleInferenceAnakinBMPredictor( + config)); + } #endif LOG(FATAL) << "Anakin Predictor create on unknown platform."; return nullptr; diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 0f0d7febe2ed7..17a4c9fa4f6a9 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -92,4 +92,19 @@ class PaddleInferenceAnakinMLUPredictor final void Predict() override; }; #endif + +#ifdef ANAKIN_BM_PLACE +template +class PaddleInferenceAnakinBMPredictor final + : public PaddleInferenceAnakinPredictor { + public: + explicit PaddleInferenceAnakinBMPredictor(const AnakinConfig& config) { + this->ResetConfig(config); + this->InitPredictor(); + } + void OptimizeGraph() override; + void InitNet() override; + void Predict() override; +}; +#endif } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_anakin_config.h b/paddle/fluid/inference/api/paddle_anakin_config.h index 44abba0c8000d..e9af13f526a23 100644 --- a/paddle/fluid/inference/api/paddle_anakin_config.h +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -25,7 +25,7 @@ namespace paddle { namespace contrib { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NVGPU = 0, X86, MLU }; + enum TargetType { NVGPU = 0, X86, MLU, BM }; int device_id{0}; std::string model_file; std::map> init_inputs_shape; From 396cc14298dc789256f441283954aff2e29460fa Mon Sep 17 00:00:00 2001 From: Shixiaowei02 Date: Mon, 8 Jul 2019 11:32:17 +0800 Subject: [PATCH 14/15] remove files --- .../tests/api/anakin_mobilenet_tester.cc | 67 ----- .../inference/tests/api/anakin_rnn1_tester.cc | 246 ------------------ 2 files changed, 313 deletions(-) delete mode 100644 paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc delete mode 100644 paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc diff --git a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc deleted file mode 100644 index 48689486af4fc..0000000000000 --- a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "gflags/gflags.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" - -DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); - -namespace paddle { - -contrib::AnakinConfig GetConfig() { - contrib::AnakinConfig config; - // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = contrib::AnakinConfig::NVGPU; - config.model_file = FLAGS_model; - config.device_id = 0; - config.init_batch_size = 1; - return config; -} - -TEST(inference, anakin) { - auto config = GetConfig(); - auto predictor = - CreatePaddlePredictor( - config); - - float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleTensor tensor; - tensor.name = "input_0"; - tensor.shape = std::vector({1, 3, 224, 224}); - tensor.data = PaddleBuf(data, sizeof(data)); - tensor.dtype = PaddleDType::FLOAT32; - - // For simplicity, we set all the slots with the same data. - std::vector paddle_tensor_feeds(1, tensor); - - PaddleTensor tensor_out; - tensor_out.name = "prob_out"; - tensor_out.shape = std::vector({}); - tensor_out.data = PaddleBuf(); - tensor_out.dtype = PaddleDType::FLOAT32; - - std::vector outputs(1, tensor_out); - - ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); - - float* data_o = static_cast(outputs[0].data.data()); - for (size_t j = 0; j < outputs[0].data.length(); ++j) { - LOG(INFO) << "output[" << j << "]: " << data_o[j]; - } -} - -} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc deleted file mode 100644 index db01cfebcb2b3..0000000000000 --- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc +++ /dev/null @@ -1,246 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include -#include // NOLINT -#include -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "utils/logger/logger.h" - -DEFINE_string(model, "", "Directory of the inference model."); -DEFINE_string(datapath, "", "Path of the dataset."); -DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); - -class Data { - public: - Data(std::string file_name, int batch_size) - : _batch_size(batch_size), _total_length(0) { - _file.open(file_name); - _file.seekg(_file.end); - _total_length = _file.tellg(); - _file.seekg(_file.beg); - } - void get_batch_data(std::vector>& fea, // NOLINT - std::vector>& week_fea, // NOLINT - std::vector>& time_fea, // NOLINT - std::vector& seq_offset); // NOLINT - - private: - std::fstream _file; - int _total_length; - int _batch_size; -}; - -void Data::get_batch_data( - std::vector>& fea, // NOLINT - std::vector>& week_fea, // NOLINT - std::vector>& time_fea, // NOLINT - std::vector& seq_offset) { // NOLINT - int seq_num = 0; - long unsigned int cum = 0; // NOLINT - - char buf[10000]; - seq_offset.clear(); - seq_offset.push_back(0); - fea.clear(); - week_fea.clear(); - time_fea.clear(); - while (_file.getline(buf, 10000)) { - std::vector data_vec; - paddle::inference::split(buf, ':', &data_vec); - - std::vector seq; - paddle::inference::split(data_vec[0], '|', &seq); - - for (auto link : seq) { - std::vector vec; - paddle::inference::split_to_float(link, ',', &vec); - fea.push_back(vec); - } - - std::vector vec_w; - paddle::inference::split_to_float(data_vec[2], ',', &vec_w); - week_fea.push_back(vec_w); - - std::vector vec_t; - paddle::inference::split_to_float(data_vec[1], ',', &vec_t); - time_fea.push_back(vec_t); - - cum += seq.size(); - seq_offset.push_back(cum); - - seq_num++; - if (seq_num >= _batch_size) { - break; - } - } -} - -namespace paddle { - -contrib::AnakinConfig GetConfig() { - contrib::AnakinConfig config; - // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = contrib::AnakinConfig::X86; - config.model_file = FLAGS_model; - config.device_id = 0; - config.init_batch_size = 1000; // the max number of token - return config; -} - -void set_tensor(std::string name, std::vector shape, - std::vector& vec) { // NOLINT - int sum = 1; - std::for_each(shape.begin(), shape.end(), [&](int n) { sum *= n; }); - float* data = new float[sum]; - PaddleTensor tensor; - tensor.name = name; - tensor.shape = shape; - tensor.data = PaddleBuf(data, sum); - tensor.dtype = PaddleDType::FLOAT32; - vec.push_back(tensor); -} - -void single_test() { - auto config = GetConfig(); - auto predictor = - CreatePaddlePredictor( - config); - - int max_batch_size = 1000; - std::string feature_file = FLAGS_datapath; - Data map_data(feature_file, FLAGS_batch_size); - std::vector> fea; - std::vector> week_fea; - std::vector> time_fea; - std::vector seq_offset; // NOLINT - - paddle::PaddleTensor tensor_0, tensor_1, tensor_2; - tensor_0.name = "input_0"; - tensor_1.name = "input_4"; - tensor_2.name = "input_5"; - - PaddleTensor tensor_out; - tensor_out.name = "final_output.tmp_1_gout"; - tensor_out.shape = std::vector({}); - tensor_out.data = PaddleBuf(); - tensor_out.dtype = PaddleDType::FLOAT32; - - std::vector inputs; - std::vector outputs(1, tensor_out); - - int data_0_dim = 38; - int data_1_dim = 10; - int data_2_dim = 10; - float data_0[max_batch_size * data_0_dim]; // NOLINT - float data_1[max_batch_size * data_1_dim]; // NOLINT - float data_2[max_batch_size * data_2_dim]; // NOLINT - - int count = 0; - while (true) { - if (count++ > 0) break; // only run the first batch in ci. - seq_offset.clear(); - map_data.get_batch_data(fea, week_fea, time_fea, seq_offset); - if (seq_offset.size() <= 1) { - LOG(FATAL) << "seq_offset.size() <= 1, exit."; - break; - } - - std::vector> seq_offset_vec; // NOLINT - seq_offset_vec.push_back(seq_offset); - tensor_0.lod = seq_offset_vec; - - int p_shape_0[] = {(int)fea.size(), 1, 1, data_0_dim}; // NOLINT - int p_shape_1[] = {(int)week_fea.size(), data_1_dim, 1, 1}; // NOLINT - int p_shape_2[] = {(int)time_fea.size(), data_2_dim, 1, 1}; // NOLINT - - std::vector shape_0(p_shape_0, p_shape_0 + 4); - std::vector shape_1(p_shape_1, p_shape_1 + 4); - std::vector shape_2(p_shape_2, p_shape_2 + 4); - - tensor_0.shape = shape_0; - tensor_1.shape = shape_1; - tensor_2.shape = shape_2; - - for (int i = 0; i < fea.size(); i++) { - memcpy(data_0 + i * data_0_dim, &fea[i][0], sizeof(float) * data_0_dim); - } - for (int i = 0; i < week_fea.size(); i++) { - memcpy(data_1 + i * data_1_dim, &week_fea[i][0], - sizeof(float) * data_1_dim); - } - for (int i = 0; i < time_fea.size(); i++) { - memcpy(data_2 + i * data_2_dim, &time_fea[i][0], - sizeof(float) * data_2_dim); - } - - tensor_0.data = - paddle::PaddleBuf(data_0, fea.size() * sizeof(float) * data_0_dim); - tensor_1.data = - paddle::PaddleBuf(data_1, week_fea.size() * sizeof(float) * data_1_dim); - tensor_2.data = - paddle::PaddleBuf(data_2, time_fea.size() * sizeof(float) * data_2_dim); - - tensor_0.dtype = paddle::PaddleDType::FLOAT32; - tensor_1.dtype = paddle::PaddleDType::FLOAT32; - tensor_2.dtype = paddle::PaddleDType::FLOAT32; - - inputs.clear(); - inputs.push_back(tensor_1); - inputs.push_back(tensor_2); - inputs.push_back(tensor_0); - - paddle::inference::Timer timer; - timer.tic(); - for (int i = 0; i < FLAGS_repeat; i++) predictor->Run(inputs, &outputs); - - paddle::inference::PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, - timer.toc() / FLAGS_repeat); - LOG(INFO) << "sequence_length = " << seq_offset[seq_offset.size() - 1]; - - float* data_o = static_cast(outputs[0].data.data()); - VLOG(3) << "outputs[0].data.length() = " << outputs[0].data.length(); - for (size_t j = 0; j < outputs[0].data.length(); ++j) { - VLOG(3) << "output[" << j << "]: " << data_o[j]; - } - } -} -} // namespace paddle - -int main(int argc, char** argv) { - google::ParseCommandLineFlags(&argc, &argv, true); - logger::init(argv[0]); - - paddle::single_test(); - /* multi-threads - std::vector threads; - int num = 1; - for (int i = 0; i < num; i++) { - LOG(INFO) << " thread id : " << i; - threads.emplace_back(paddle::single_test); - } - for (int i = 0; i < num; i++) { - threads[i].join(); - } - threads.clear(); - */ - - return 0; -} From 57c5dfb011945da4d18995172b0910315cbc19b8 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Mon, 8 Jul 2019 07:07:31 +0000 Subject: [PATCH 15/15] checkout cmakelists test=develop --- CMakeLists.txt | 1 - cmake/anakin_subgraph.cmake | 13 +++++++++++++ paddle/fluid/inference/api/CMakeLists.txt | 10 ---------- paddle/fluid/inference/api/api_anakin_engine.cc | 5 +++-- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8efd10c7973ed..29e24ff724db3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,7 +191,6 @@ if(WITH_GPU) include(cuda) include(tensorrt) endif() -include(anakin_subgraph) if(WIN32 OR APPLE OR NOT WITH_GPU OR ON_INFER) set(WITH_DGC OFF) diff --git a/cmake/anakin_subgraph.cmake b/cmake/anakin_subgraph.cmake index eb7bce9f3b7a9..177f34438d6ac 100644 --- a/cmake/anakin_subgraph.cmake +++ b/cmake/anakin_subgraph.cmake @@ -26,6 +26,19 @@ if(ANAKIN_FOUND) add_definitions(-DPADDLE_WITH_ANAKIN) endif() +if(ANAKIN_FOUND) + if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86) + message(STATUS "Compile with anakin mlu place.") + add_definitions(-DANAKIN_MLU_PLACE) + elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86) + message(STATUS "Compile with anakin bm place.") + add_definitions(-DANAKIN_BM_PLACE) + elseif(ANAKIN_X86) + message(STATUS "Compile with anakin x86 place.") + add_definitions(-DANAKIN_X86_PLACE) + endif() +endif() + if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO) message(STATUS "Compile with anakin subgraph.") set(ANAKIN_SUBGRAPH ON) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index bf9f5eb86991e..033224c0f59cb 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -70,16 +70,6 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_ if(ANAKIN_FOUND) # Do not turn warnings into errors. set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error") - if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86) - message(STATUS "Compile with anakin mlu place.") - add_definitions(-DANAKIN_MLU_PLACE) - elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86) - message(STATUS "Compile with anakin bm place.") - add_definitions(-DANAKIN_BM_PLACE) - elseif(ANAKIN_X86) - message(STATUS "Compile with anakin x86 place.") - add_definitions(-DANAKIN_X86_PLACE) - endif() cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) target_link_libraries(inference_anakin_api anakin anakin_saber_common) cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 936b175f3c3ea..3a7ad2d2db897 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -66,7 +66,7 @@ void PaddleInferenceAnakinPredictor::InitGraph() { for (auto &input_str : inputs) { if (this->config_.init_inputs_shape.find(input_str) == this->config_.init_inputs_shape.end()) { - LOG(FATAL) << input_str << " is not implemented."; + LOG(FATAL) << input_str << " should be set in init_inputs_shape."; } std::vector shape = this->config_.init_inputs_shape.find(input_str)->second; @@ -400,7 +400,8 @@ CreatePaddlePredictor( config)); } #endif - LOG(FATAL) << "Anakin Predictor create on unknown platform."; + LOG(FATAL) << "Anakin Predictor create on unknown platform: " + << config.target_type; return nullptr; } template