Skip to content

add inplace logic into new_executor #35618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions paddle/fluid/framework/details/share_tensor_buffer_functor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,6 @@ namespace paddle {
namespace framework {
namespace details {

// TODO(zjl): support SelectedRows
static inline const Tensor &GetTensorFromVar(const Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable must be type of LoDTensor, but received %s.",
framework::ToTypeName(var->Type())));
}
}

static inline Tensor *GetMutableTensorFromVar(Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->GetMutable<LoDTensor>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable must be type of LoDTensor, but received %s.",
framework::ToTypeName(var->Type())));
}
}

ShareTensorBufferFunctor::ShareTensorBufferFunctor(
Scope *scope, size_t scope_idx, const std::string &op_type,
const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
Expand Down
21 changes: 21 additions & 0 deletions paddle/fluid/framework/details/share_tensor_buffer_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ namespace paddle {
namespace framework {
namespace details {

// TODO(zjl): support SelectedRows
static inline const Tensor &GetTensorFromVar(const Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable must be type of LoDTensor, but received %s.",
framework::ToTypeName(var->Type())));
}
}

static inline Tensor *GetMutableTensorFromVar(Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->GetMutable<LoDTensor>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable must be type of LoDTensor, but received %s.",
framework::ToTypeName(var->Type())));
}
}

// NOTE(paddle-dev): ShareTensorBufferFunctor is responsible for
// performing memory reuse in run-time. ShareTensorBufferOpHandle
// is only a wrapper of ShareTensorBufferFunctor.
Expand Down
50 changes: 50 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

#include <unordered_set>

#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"

DEFINE_bool(new_executor_use_inplace, true, "Use inplace in new executor");

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -192,6 +196,41 @@ void InterpreterCore::Convert() {
gc_event_.emplace_back(vec_instruction_[i].execution_ctx_.get()->GetPlace(),
platform::GenerateDeviceEventFlag());
}

if (FLAGS_new_executor_use_inplace) {
BuildInplace();
}
}

void InterpreterCore::BuildInplace() {
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
if (!vec_instruction_[i]
.kernel_func_.operator_base_->Info()
.infer_inplace_) {
continue;
}

auto in_to_outs =
vec_instruction_[i].kernel_func_.operator_base_->Info().infer_inplace_(
platform::is_gpu_place(vec_instruction_[i].dev_ctx_->GetPlace()));

for (auto& pair : in_to_outs) {
auto iter = vec_instruction_[i].input_index_.find(pair.first);
if (iter != vec_instruction_[i].input_index_.end()) {
if (input_var2op_info_[iter->second[0]].size() == 1) {
auto iterout = vec_instruction_[i].output_index_.find(pair.second);
if (iterout != vec_instruction_[i].output_index_.end()) {
auto invar = global_scope_->var_list[iter->second[0]];
auto outvar = global_scope_->var_list[iterout->second[0]];
if (invar && outvar) {
vec_instruction_[i].vec_inplace_in_to_out_.emplace_back(invar,
outvar);
}
}
}
}
}
}
}

void InterpreterCore::BuildAndCacheInstructionCtx(
Expand Down Expand Up @@ -243,6 +282,17 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
instr_node.kernel_func_.operator_base_)
->InferShape(instr_node.infershape_ctx_.get());

if (FLAGS_new_executor_use_inplace) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

底层share了buffer之后,Variable的ref count也要增加; 因为out和input share了数据,如果out被其他op使用,这个inpute的数据就不能够提前释放

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

底层share了buffer之后,Variable的ref count也要增加; 因为out和input share了数据,如果out被其他op使用,这个inpute的数据就不能够提前释放

这个不会的。share在Instruction前执行,此时In、Out分别持有share_ptr的holder。Instruction执行后,In交给GC后,GC只会减去share_ptr的一个RefCount,Out还能够继续正常持有holder。

for (auto& pair : instr_node.vec_inplace_in_to_out_) {
const auto& in = paddle::framework::details::GetTensorFromVar(pair.first);
auto* out =
paddle::framework::details::GetMutableTensorFromVar(pair.second);
if (in.dims() == out->dims()) {
out->ShareBufferWith(in);
}
}
}

instr_node.kernel_func_.compute_func_(*instr_node.execution_ctx_.get());
}

Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class InterpreterCore {
const VariableScope& var_scope,
const platform::Place& place);

void BuildInplace();

void RunInstruction(const Instruction& instr_node);

void ExecuteInstructionList(const std::vector<Instruction>& vec_instr,
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/new_executor/new_executor_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ struct Instruction {
std::vector<EventInter> output_events_;

platform::DeviceContext* dev_ctx_; // not owned

std::vector<std::pair<Variable*, Variable*>> vec_inplace_in_to_out_;
};

enum class OpFuncType {
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def __bootstrap__():
'sort_sum_gradient',
'max_inplace_grad_add',
'apply_pass_to_program',
'new_executor_use_inplace',
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
Expand Down