Skip to content

Commit d7df4e5

Browse files
authored
Fix/Fix memory leak in dygraph (#17394)
* test=develop, add gradient sort backward strategy * test=develop, fix test by add FLAGS_cudnn_deterministic on new tests * test=develop, fix memory leak in dygraph mode * test=develop, fix memory leak in dygraph mode * test=develop, polish code * test=develop, polish code * test=develop, polish code
1 parent 728bbaa commit d7df4e5

File tree

5 files changed

+61
-66
lines changed

5 files changed

+61
-66
lines changed

paddle/fluid/imperative/layer.cc

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ void AddGradBySort(BackwardSumMap* bck_map, VarBase* target) {
112112
return a.first > b.first;
113113
});
114114
for (auto& var_pair : current.second) {
115-
Variable* origin_grad = target->var_;
116-
Variable* grad_to_add = var_pair.second->var_;
115+
Variable* origin_grad = target->var_.get();
116+
Variable* grad_to_add = var_pair.second->var_.get();
117117
VLOG(2) << "add origin_grad: " << target->Name();
118118
VLOG(2) << "added grad: " << var_pair.second->Name()
119119
<< " trace id is: " << var_pair.first;
@@ -132,19 +132,19 @@ class Autograd {
132132
return;
133133
}
134134
VLOG(3) << "start autograd";
135-
bck_map = new BackwardSumMap();
136-
grad_ref = new GradientRef();
135+
BackwardSumMap bck_map;
136+
GradientRef grad_ref;
137137
std::deque<OpBase*> ready;
138138
ready.push_back(var->PreOp());
139139

140140
std::map<OpBase*, int> dep_counts =
141-
ComputeDepCounts(var->PreOp(), bck_stratedy);
141+
ComputeDepCounts(var->PreOp(), bck_stratedy, &grad_ref);
142142

143143
while (!ready.empty()) {
144144
OpBase* ready_op = ready.front();
145145
ready.pop_front();
146146
std::map<std::string, std::vector<VarBase*>> input_grads =
147-
ready_op->ApplyGrad(bck_map, grad_ref, bck_stratedy);
147+
ready_op->ApplyGrad(&bck_map, &grad_ref, bck_stratedy);
148148

149149
for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) {
150150
const std::vector<VarBase*>& ingrads = it->second;
@@ -171,7 +171,13 @@ class Autograd {
171171

172172
private:
173173
std::map<OpBase*, int> ComputeDepCounts(
174-
OpBase* op, const detail::BackwardStrategy& bck_stratedy) {
174+
OpBase* op, const detail::BackwardStrategy& bck_stratedy,
175+
GradientRef* grad_ref) {
176+
if (bck_stratedy.sorted_sum_gradient_) {
177+
PADDLE_ENFORCE_NOT_NULL(grad_ref,
178+
"grad_ref should not be null when "
179+
"using sorted grad backward strategy");
180+
}
175181
std::map<OpBase*, int> ret;
176182

177183
std::deque<OpBase*> queue;
@@ -185,13 +191,7 @@ class Autograd {
185191
for (const auto& map : candidate->grad_output_vars_) {
186192
for (const auto& it : map) {
187193
for (const auto& vb : it.second) {
188-
if (grad_ref->find(vb) == grad_ref->end()) {
189-
grad_ref->insert(std::make_pair(vb, 1));
190-
} else {
191-
// add ref count by 1 when we find grad_var can be generated by
192-
// one grad_op
193-
grad_ref->at(vb) += 1;
194-
}
194+
++(*grad_ref)[vb];
195195
}
196196
}
197197
}
@@ -212,9 +212,6 @@ class Autograd {
212212
}
213213
return ret;
214214
}
215-
216-
BackwardSumMap* bck_map;
217-
GradientRef* grad_ref;
218215
};
219216

220217
std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
@@ -324,7 +321,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
324321
PADDLE_ENFORCE_NOT_NULL(grad_inp->var_, "op %s input %s nullptr",
325322
grad_op_desc->Type(), grad_inp->Name());
326323

327-
grad_invars.emplace_back(grad_inp->var_);
324+
grad_invars.emplace_back(grad_inp->var_.get());
328325
}
329326
}
330327

@@ -335,7 +332,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
335332
PADDLE_ENFORCE_NOT_NULL(grad_out->var_, "op %s output %s nullptr",
336333
grad_op_desc->Type(), grad_out->Name());
337334

338-
grad_outvars.emplace_back(grad_out->var_);
335+
grad_outvars.emplace_back(grad_out->var_.get());
339336
}
340337
}
341338

@@ -394,8 +391,8 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad(
394391
grad_ref->at(origin_outputs[i])--;
395392
}
396393
} else {
397-
framework::Variable* grad = outputs[i]->var_;
398-
framework::Variable* orig_grad = origin_outputs[i]->var_;
394+
framework::Variable* grad = outputs[i]->var_.get();
395+
framework::Variable* orig_grad = origin_outputs[i]->var_.get();
399396
VLOG(2) << "AddTo Called with orig_grad is: "
400397
<< origin_outputs[i]->name_ << " Grad to be added is "
401398
<< outputs[i]->name_;
@@ -451,7 +448,7 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {
451448

452449
int PyLayer::NumFuncs() { return py_funcs_.size(); }
453450

454-
std::vector<framework::Variable*> PyLayer::Apply(
451+
std::vector<std::unique_ptr<framework::Variable>> PyLayer::Apply(
455452
int func_id, const std::vector<VarBase*>& inputs) {
456453
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
457454
return CallPythonFunc(py_funcs_[func_id], inputs);
@@ -468,13 +465,13 @@ std::vector<VarBase*> PyLayer::ApplyGrad(int func_id,
468465
outs.emplace_back(new VarBase(
469466
string::Sprintf("%s_out_%d", framework::GradVarName(PyLayer::kFwdOut),
470467
i),
471-
rets[i], nullptr, true));
468+
std::move(rets[i]), nullptr, true));
472469
}
473470

474471
return outs;
475472
}
476473

477-
std::vector<framework::Variable*> PyLayer::CallPythonFunc(
474+
std::vector<std::unique_ptr<framework::Variable>> PyLayer::CallPythonFunc(
478475
const py::object& callable, const std::vector<VarBase*>& ins) {
479476
py::gil_scoped_acquire guard;
480477
py::tuple in_args(ins.size());
@@ -488,19 +485,20 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc(
488485
auto ret = callable(in_args);
489486
auto ret_tuple = py::cast<py::tuple>(ret);
490487
size_t ret_num = py::len(ret_tuple);
491-
std::vector<framework::Variable*> outs;
488+
std::vector<std::unique_ptr<framework::Variable>> outs;
492489
outs.reserve(ret_num);
493490
VLOG(3) << "pyfunc out " << ret_num;
494491
for (size_t i = 0; i < ret_num; ++i) {
495492
try {
496493
auto* py_out_tensor = py::cast<framework::LoDTensor*>(ret_tuple[i]);
497494
PADDLE_ENFORCE_NOT_NULL(py_out_tensor,
498495
"Output tensor %d should not be nullptr", i);
499-
auto* var = new framework::Variable();
496+
auto var =
497+
std::unique_ptr<framework::Variable>(new framework::Variable());
500498
auto* tensor = var->GetMutable<framework::LoDTensor>();
501499
tensor->ShareDataWith(*py_out_tensor);
502500
tensor->set_lod(py_out_tensor->lod());
503-
outs.emplace_back(var);
501+
outs.emplace_back(std::move(var));
504502
} catch (py::cast_error&) {
505503
PADDLE_THROW("The %d-th output must be LoDTensor", i);
506504
}

paddle/fluid/imperative/layer.h

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@
1414

1515
#pragma once
1616

17-
// clang-format off
18-
#include "paddle/fluid/framework/python_headers.h"
19-
// clang-format on
20-
2117
#include <map> // NOLINT
22-
#include <string> // NOLINT
23-
#include <vector> // NOLINT
2418
#include <memory> // NOLINT
19+
#include <string> // NOLINT
2520
#include <unordered_map> // NOLINT
21+
#include <utility>
22+
#include <vector> // NOLINT
23+
24+
// clang-format off
25+
#include "paddle/fluid/framework/python_headers.h"
26+
// clang-format on
2627

2728
#include "paddle/fluid/framework/op_desc.h"
2829
#include "paddle/fluid/framework/operator.h"
@@ -115,12 +116,14 @@ class OpBase;
115116
class VarBase {
116117
public:
117118
// Internal interface, create VarBase from exist variable
118-
VarBase(const std::string& name, framework::Variable* var, VarBase* grad,
119-
bool stop_gradient)
119+
VarBase(const std::string& name, std::unique_ptr<framework::Variable> var,
120+
VarBase* grad, bool stop_gradient)
120121
: VarBase(name, var->Get<framework::LoDTensor>().type(),
121122
var->Get<framework::LoDTensor>().dims(),
122-
var->Get<framework::LoDTensor>().place(), var, grad,
123-
stop_gradient, false) {}
123+
var->Get<framework::LoDTensor>().place(), nullptr, grad,
124+
stop_gradient, false) {
125+
var_ = std::move(var);
126+
}
124127

125128
// Python interface
126129
VarBase(const std::string& name, const framework::proto::VarType::Type dtype,
@@ -140,19 +143,19 @@ class VarBase {
140143
// TODO(minqiyang): need support SelectedRows
141144
VarBase(const std::string& name, framework::proto::VarType::Type dtype,
142145
const framework::DDim& shape, const platform::Place& place,
143-
framework::Variable* var, VarBase* grad, bool stop_gradient,
144-
bool persistable)
146+
std::unique_ptr<framework::Variable> var, VarBase* grad,
147+
bool stop_gradient, bool persistable)
145148
: name_(name),
146149
type_(framework::proto::VarType::LOD_TENSOR),
147-
var_(var),
150+
var_(std::move(var)),
148151
grads_(grad),
149152
stop_gradient_(stop_gradient),
150153
persistable_(persistable),
151154
pre_op_(nullptr),
152155
pre_op_out_name_(),
153156
pre_op_out_idx_(-1) {
154157
if (!var_) {
155-
var_ = new framework::Variable();
158+
var_.reset(new framework::Variable());
156159
}
157160
auto tensor = var_->GetMutable<framework::LoDTensor>();
158161
tensor->Resize(shape);
@@ -163,11 +166,6 @@ class VarBase {
163166

164167
public:
165168
virtual ~VarBase() {
166-
if (var_) {
167-
delete var_;
168-
var_ = nullptr;
169-
}
170-
171169
if (grads_) {
172170
delete grads_;
173171
grads_ = nullptr;
@@ -261,7 +259,7 @@ class VarBase {
261259
framework::proto::VarType::Type type_;
262260
platform::Place place_;
263261

264-
framework::Variable* var_;
262+
std::unique_ptr<framework::Variable> var_;
265263
VarBase* grads_;
266264

267265
private:
@@ -369,8 +367,8 @@ class Layer {
369367
public:
370368
virtual ~Layer() {}
371369

372-
virtual std::vector<VarBase> Forward(const std::vector<VarBase>& inputs) {
373-
std::vector<VarBase> vars;
370+
virtual std::vector<VarBase*> Forward(const std::vector<VarBase*>& inputs) {
371+
std::vector<VarBase*> vars;
374372
return vars;
375373
}
376374
};
@@ -386,14 +384,14 @@ class PyLayer {
386384

387385
static int NumFuncs();
388386

389-
static std::vector<framework::Variable*> Apply(
387+
static std::vector<std::unique_ptr<framework::Variable>> Apply(
390388
int func_id, const std::vector<VarBase*>& inputs);
391389

392390
static std::vector<VarBase*> ApplyGrad(int func_id,
393391
const std::vector<VarBase*>& inputs);
394392

395393
private:
396-
static std::vector<framework::Variable*> CallPythonFunc(
394+
static std::vector<std::unique_ptr<framework::Variable>> CallPythonFunc(
397395
const py::object& callable, const std::vector<VarBase*>& ins);
398396
};
399397

paddle/fluid/imperative/tracer.cc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <set>
1919
#include <unordered_map>
2020
#include <unordered_set>
21+
#include <utility>
2122

2223
#include "paddle/fluid/framework/var_type_inference.h"
2324
#include "paddle/fluid/operators/math/math_function.h"
@@ -153,7 +154,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
153154
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->Type(),
154155
inp->Name());
155156

156-
invars.emplace_back(inp->var_);
157+
invars.emplace_back(inp->var_.get());
157158
if (!stop_gradient) {
158159
current_vars_map[inp->Name()] = inp;
159160
}
@@ -171,7 +172,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
171172
outvars.reserve(outputs.size());
172173
for (size_t i = 0U; i < outputs.size(); ++i) {
173174
VarBase* out = outputs[i];
174-
outvars.emplace_back(out->var_);
175+
outvars.emplace_back(out->var_.get());
175176
out->TrackPreOp(op, it.first, i, stop_gradient);
176177
if (!stop_gradient) {
177178
current_vars_map[out->Name()] = out;
@@ -294,17 +295,15 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
294295

295296
op->input_vars_[PyLayer::kFwdInp] = inputs;
296297

297-
std::vector<framework::Variable*> ret_vars =
298+
std::vector<std::unique_ptr<framework::Variable>> ret_vars =
298299
PyLayer::Apply(op->forward_id_, inputs);
299-
300300
op->TrackPreOp(PyLayer::kFwdInp, inputs);
301301

302302
std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut];
303303
outputs.reserve(ret_vars.size());
304304
for (size_t i = 0U; i != ret_vars.size(); ++i) {
305-
framework::Variable* v = ret_vars[i];
306-
VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i), v,
307-
nullptr, stop_gradient);
305+
VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i),
306+
std::move(ret_vars[i]), nullptr, stop_gradient);
308307
outputs.emplace_back(out);
309308
out->TrackPreOp(op, PyLayer::kFwdOut, i, stop_gradient);
310309
}

paddle/fluid/pybind/imperative.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ class Layer : public imperative::Layer {
2828
public:
2929
using imperative::Layer::Layer; // Inherit constructors
3030

31-
std::vector<imperative::VarBase> Forward(
32-
const std::vector<imperative::VarBase>& inputs) override {
33-
PYBIND11_OVERLOAD(std::vector<imperative::VarBase>, Layer, Forward,
31+
std::vector<imperative::VarBase*> Forward(
32+
const std::vector<imperative::VarBase*>& inputs) override {
33+
PYBIND11_OVERLOAD(std::vector<imperative::VarBase*>, Layer, Forward,
3434
inputs); // NOLINT
3535
}
3636
};

paddle/fluid/pybind/pybind.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,8 @@ PYBIND11_MODULE(core, m) {
237237
return new_var.release();
238238
},
239239
py::return_value_policy::take_ownership)
240-
.def("value", [](const imperative::VarBase &self) { return self.var_; },
240+
.def("value",
241+
[](const imperative::VarBase &self) { return self.var_.get(); },
241242
py::return_value_policy::reference)
242243
.def_property("name", &imperative::VarBase::Name,
243244
&imperative::VarBase::SetName)
@@ -285,7 +286,7 @@ PYBIND11_MODULE(core, m) {
285286
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
286287
layer.def(py::init<>())
287288
.def("forward", [](imperative::Layer &self,
288-
const std::vector<imperative::VarBase> &inputs) {
289+
const std::vector<imperative::VarBase *> &inputs) {
289290
return self.Forward(inputs);
290291
});
291292

@@ -299,10 +300,9 @@ PYBIND11_MODULE(core, m) {
299300
std::vector<imperative::VarBase *> outputs;
300301
outputs.reserve(ret_vars.size());
301302
for (size_t i = 0U; i != ret_vars.size(); ++i) {
302-
framework::Variable *v = ret_vars[i];
303303
// TODO(minqiyang): use unique_name generator to set a name
304-
outputs.emplace_back(
305-
new imperative::VarBase("", v, nullptr, true));
304+
outputs.emplace_back(new imperative::VarBase(
305+
"", std::move(ret_vars[i]), nullptr, true));
306306
}
307307

308308
return outputs;

0 commit comments

Comments
 (0)