Skip to content

Fix/Fix memory leak in dygraph #17394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
57 changes: 30 additions & 27 deletions paddle/fluid/imperative/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ void AddGradBySort(BackwardSumMap* bck_map, VarBase* target) {
return a.first > b.first;
});
for (auto& var_pair : current.second) {
Variable* origin_grad = target->var_;
Variable* grad_to_add = var_pair.second->var_;
Variable* origin_grad = target->var_.get();
Variable* grad_to_add = var_pair.second->var_.get();
VLOG(2) << "add origin_grad: " << target->Name();
VLOG(2) << "added grad: " << var_pair.second->Name()
<< " trace id is: " << var_pair.first;
AddTo(grad_to_add, origin_grad, current.first);
delete grad_to_add;
delete var_pair.second;
var_pair.second = nullptr;
}
}
Expand All @@ -132,8 +132,8 @@ class Autograd {
return;
}
VLOG(3) << "start autograd";
bck_map = new BackwardSumMap();
grad_ref = new GradientRef();
bck_map = BackwardSumMap();
grad_ref = GradientRef();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two lines are unnecessary. BTW, it seems that it is not necessary thatbck_map and grad_ref are data member of Autograd. Just make them temporary variable inside the function.

std::deque<OpBase*> ready;
ready.push_back(var->PreOp());

Expand All @@ -144,7 +144,7 @@ class Autograd {
OpBase* ready_op = ready.front();
ready.pop_front();
std::map<std::string, std::vector<VarBase*>> input_grads =
ready_op->ApplyGrad(bck_map, grad_ref, bck_stratedy);
ready_op->ApplyGrad(&bck_map, &grad_ref, bck_stratedy);

for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) {
const std::vector<VarBase*>& ingrads = it->second;
Expand Down Expand Up @@ -185,12 +185,12 @@ class Autograd {
for (const auto& map : candidate->grad_output_vars_) {
for (const auto& it : map) {
for (const auto& vb : it.second) {
if (grad_ref->find(vb) == grad_ref->end()) {
grad_ref->insert(std::make_pair(vb, 1));
if (grad_ref.find(vb) == grad_ref.end()) {
grad_ref.insert(std::make_pair(vb, 1));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C++ guarantees zero-initialization of primitive types. See here. You can just write ++grad_ref[vb];

} else {
// add ref count by 1 when we find grad_var can be generated by
// one grad_op
grad_ref->at(vb) += 1;
grad_ref.at(vb) += 1;
}
}
}
Expand All @@ -213,8 +213,8 @@ class Autograd {
return ret;
}

BackwardSumMap* bck_map;
GradientRef* grad_ref;
BackwardSumMap bck_map;
GradientRef grad_ref;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to be data members.

};

std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
Expand All @@ -230,14 +230,16 @@ std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
new_var->var_->GetMutable<framework::LoDTensor>();
tensor->set_lod(var_->Get<framework::LoDTensor>().lod());

framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
if (blocking) {
platform::DeviceContext* dev_ctx =
platform::DeviceContext* dst_dev_ctx =
platform::DeviceContextPool::Instance().Get(dst_place);

framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
tensor);

dev_ctx->Wait();
platform::DeviceContext* src_dev_ctx =
platform::DeviceContextPool::Instance().Get(
var_->Get<framework::LoDTensor>().place());
dst_dev_ctx->Wait();
src_dev_ctx->Wait();
} else {
framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
}
Expand Down Expand Up @@ -326,7 +328,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
PADDLE_ENFORCE_NOT_NULL(grad_inp->var_, "op %s input %s nullptr",
grad_op_desc->Type(), grad_inp->Name());

grad_invars.emplace_back(grad_inp->var_);
grad_invars.emplace_back(grad_inp->var_.get());
}
}

Expand All @@ -337,7 +339,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
PADDLE_ENFORCE_NOT_NULL(grad_out->var_, "op %s output %s nullptr",
grad_op_desc->Type(), grad_out->Name());

grad_outvars.emplace_back(grad_out->var_);
grad_outvars.emplace_back(grad_out->var_.get());
}
}

Expand Down Expand Up @@ -396,13 +398,13 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
grad_ref->at(origin_outputs[i])--;
}
} else {
framework::Variable* grad = outputs[i]->var_;
framework::Variable* orig_grad = origin_outputs[i]->var_;
framework::Variable* grad = outputs[i]->var_.get();
framework::Variable* orig_grad = origin_outputs[i]->var_.get();
VLOG(2) << "AddTo Called with orig_grad is: "
<< origin_outputs[i]->name_ << " Grad to be added is "
<< outputs[i]->name_;
AddTo(grad, orig_grad, place_);
delete grad;
delete outputs[i];
}
}
}
Expand Down Expand Up @@ -453,7 +455,7 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {

int PyLayer::NumFuncs() { return py_funcs_.size(); }

std::vector<framework::Variable*> PyLayer::Apply(
std::vector<std::unique_ptr<framework::Variable>> PyLayer::Apply(
int func_id, const std::vector<VarBase*>& inputs) {
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
return CallPythonFunc(py_funcs_[func_id], inputs);
Expand All @@ -470,13 +472,13 @@ std::vector<VarBase*> PyLayer::ApplyGrad(int func_id,
outs.emplace_back(new VarBase(
string::Sprintf("%s_out_%d", framework::GradVarName(PyLayer::kFwdOut),
i),
rets[i], nullptr, true));
std::move(rets[i]), nullptr, true));
}

return outs;
}

std::vector<framework::Variable*> PyLayer::CallPythonFunc(
std::vector<std::unique_ptr<framework::Variable>> PyLayer::CallPythonFunc(
const py::object& callable, const std::vector<VarBase*>& ins) {
py::gil_scoped_acquire guard;
py::tuple in_args(ins.size());
Expand All @@ -490,19 +492,20 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc(
auto ret = callable(in_args);
auto ret_tuple = py::cast<py::tuple>(ret);
size_t ret_num = py::len(ret_tuple);
std::vector<framework::Variable*> outs;
std::vector<std::unique_ptr<framework::Variable>> outs;
outs.reserve(ret_num);
VLOG(3) << "pyfunc out " << ret_num;
for (size_t i = 0; i < ret_num; ++i) {
try {
auto* py_out_tensor = py::cast<framework::LoDTensor*>(ret_tuple[i]);
PADDLE_ENFORCE_NOT_NULL(py_out_tensor,
"Output tensor %d should not be nullptr", i);
auto* var = new framework::Variable();
auto var =
std::unique_ptr<framework::Variable>(new framework::Variable());
auto* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(*py_out_tensor);
tensor->set_lod(py_out_tensor->lod());
outs.emplace_back(var);
outs.emplace_back(std::move(var));
} catch (py::cast_error&) {
PADDLE_THROW("The %d-th output must be LoDTensor", i);
}
Expand Down
33 changes: 15 additions & 18 deletions paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@ class OpBase;
class VarBase {
public:
// Internal interface, create VarBase from exist variable
VarBase(const std::string& name, framework::Variable* var, VarBase* grad,
bool stop_gradient)
VarBase(const std::string& name, std::unique_ptr<framework::Variable> var,
VarBase* grad, bool stop_gradient)
: VarBase(name, var->Get<framework::LoDTensor>().type(),
var->Get<framework::LoDTensor>().dims(),
var->Get<framework::LoDTensor>().place(), var, grad,
stop_gradient, false) {}
var->Get<framework::LoDTensor>().place(), nullptr, grad,
stop_gradient, false) {
var_ = std::move(var);
}

// Python interface
VarBase(const std::string& name, const framework::proto::VarType::Type dtype,
Expand All @@ -140,19 +142,19 @@ class VarBase {
// TODO(minqiyang): need support SelectedRows
VarBase(const std::string& name, framework::proto::VarType::Type dtype,
const framework::DDim& shape, const platform::Place& place,
framework::Variable* var, VarBase* grad, bool stop_gradient,
bool persistable)
std::unique_ptr<framework::Variable> var, VarBase* grad,
bool stop_gradient, bool persistable)
: name_(name),
type_(framework::proto::VarType::LOD_TENSOR),
var_(var),
var_(std::move(var)),
grads_(grad),
stop_gradient_(stop_gradient),
persistable_(persistable),
pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1) {
if (!var_) {
var_ = new framework::Variable();
var_.reset(new framework::Variable());
}
auto tensor = var_->GetMutable<framework::LoDTensor>();
tensor->Resize(shape);
Expand All @@ -163,11 +165,6 @@ class VarBase {

public:
virtual ~VarBase() {
if (var_) {
delete var_;
var_ = nullptr;
}

if (grads_) {
delete grads_;
grads_ = nullptr;
Expand Down Expand Up @@ -261,7 +258,7 @@ class VarBase {
framework::proto::VarType::Type type_;
platform::Place place_;

framework::Variable* var_;
std::unique_ptr<framework::Variable> var_;
VarBase* grads_;

private:
Expand Down Expand Up @@ -369,8 +366,8 @@ class Layer {
public:
virtual ~Layer() {}

virtual std::vector<VarBase> Forward(const std::vector<VarBase>& inputs) {
std::vector<VarBase> vars;
virtual std::vector<VarBase*> Forward(const std::vector<VarBase*>& inputs) {
std::vector<VarBase*> vars;
return vars;
}
};
Expand All @@ -386,14 +383,14 @@ class PyLayer {

static int NumFuncs();

static std::vector<framework::Variable*> Apply(
static std::vector<std::unique_ptr<framework::Variable>> Apply(
int func_id, const std::vector<VarBase*>& inputs);

static std::vector<VarBase*> ApplyGrad(int func_id,
const std::vector<VarBase*>& inputs);

private:
static std::vector<framework::Variable*> CallPythonFunc(
static std::vector<std::unique_ptr<framework::Variable>> CallPythonFunc(
const py::object& callable, const std::vector<VarBase*>& ins);
};

Expand Down
12 changes: 5 additions & 7 deletions paddle/fluid/imperative/tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->Type(),
inp->Name());

invars.emplace_back(inp->var_);
invars.emplace_back(inp->var_.get());
if (!stop_gradient) {
current_vars_map[inp->Name()] = inp;
}
Expand All @@ -171,7 +171,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
outvars.reserve(outputs.size());
for (size_t i = 0U; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
outvars.emplace_back(out->var_);
outvars.emplace_back(out->var_.get());
out->TrackPreOp(op, it.first, i, stop_gradient);
if (!stop_gradient) {
current_vars_map[out->Name()] = out;
Expand Down Expand Up @@ -294,17 +294,15 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,

op->input_vars_[PyLayer::kFwdInp] = inputs;

std::vector<framework::Variable*> ret_vars =
std::vector<std::unique_ptr<framework::Variable>> ret_vars =
PyLayer::Apply(op->forward_id_, inputs);

op->TrackPreOp(PyLayer::kFwdInp, inputs);

std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut];
outputs.reserve(ret_vars.size());
for (size_t i = 0U; i != ret_vars.size(); ++i) {
framework::Variable* v = ret_vars[i];
VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i), v,
nullptr, stop_gradient);
VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i),
std::move(ret_vars[i]), nullptr, stop_gradient);
outputs.emplace_back(out);
out->TrackPreOp(op, PyLayer::kFwdOut, i, stop_gradient);
}
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/pybind/imperative.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ class Layer : public imperative::Layer {
public:
using imperative::Layer::Layer; // Inherit constructors

std::vector<imperative::VarBase> Forward(
const std::vector<imperative::VarBase>& inputs) override {
PYBIND11_OVERLOAD(std::vector<imperative::VarBase>, Layer, Forward,
std::vector<imperative::VarBase*> Forward(
const std::vector<imperative::VarBase*>& inputs) override {
PYBIND11_OVERLOAD(std::vector<imperative::VarBase*>, Layer, Forward,
inputs); // NOLINT
}
};
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ PYBIND11_MODULE(core, m) {
return new_var.release();
},
py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; },
.def("value",
[](const imperative::VarBase &self) { return self.var_.get(); },
py::return_value_policy::reference)
.def_property("name", &imperative::VarBase::Name,
&imperative::VarBase::SetName)
Expand Down Expand Up @@ -285,7 +286,7 @@ PYBIND11_MODULE(core, m) {
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>())
.def("forward", [](imperative::Layer &self,
const std::vector<imperative::VarBase> &inputs) {
const std::vector<imperative::VarBase *> &inputs) {
return self.Forward(inputs);
});

Expand All @@ -299,10 +300,9 @@ PYBIND11_MODULE(core, m) {
std::vector<imperative::VarBase *> outputs;
outputs.reserve(ret_vars.size());
for (size_t i = 0U; i != ret_vars.size(); ++i) {
framework::Variable *v = ret_vars[i];
// TODO(minqiyang): use unique_name generator to set a name
outputs.emplace_back(
new imperative::VarBase("", v, nullptr, true));
outputs.emplace_back(new imperative::VarBase(
"", std::move(ret_vars[i]), nullptr, true));
}

return outputs;
Expand Down