PaddlePaddle · sneaxiy · Jun 18, 2019 · Jun 13, 2019 · Jun 14, 2019 · Jun 14, 2019
diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt
@@ -1,5 +1,7 @@
+cc_library(imperative_flag SRCS flags.cc DEPS gflags) 
+
 if(WITH_PYTHON)
-cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind profiler)
+cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind profiler imperative_flag)
 cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind profiler)
 cc_library(engine SRCS engine.cc)
 cc_library(imperative_profiler SRCS profiler.cc)

diff --git a/paddle/fluid/imperative/flags.cc b/paddle/fluid/imperative/flags.cc
@@ -0,0 +1,30 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/imperative/flags.h"
+#include "gflags/gflags.h"
+
+DEFINE_uint64(dygraph_debug, 0,
+              "Debug level of dygraph. This flag is not "
+              "open to users");
+
+namespace paddle {
+namespace imperative {
+
+bool IsDebugEnabled() { return FLAGS_dygraph_debug != 0; }
+
+uint64_t GetDebugLevel() { return FLAGS_dygraph_debug; }
+
+}  // namespace imperative
+}  // namespace paddle
diff --git a/paddle/fluid/imperative/flags.h b/paddle/fluid/imperative/flags.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+
+namespace paddle {
+namespace imperative {
+
+extern bool IsDebugEnabled();
+extern uint64_t GetDebugLevel();
+
+}  // namespace imperative
+}  // namespace paddle
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
@@ -34,6 +34,27 @@
 namespace paddle {
 namespace imperative {
 
+void ThreadSafeNameSet::Insert(const std::string& name) {
+  std::lock_guard<std::mutex> guard(mtx_);
+  set_.insert(name);
+}
+
+void ThreadSafeNameSet::Remove(const std::string& name) {
+  std::lock_guard<std::mutex> guard(mtx_);
+  auto iter = set_.find(name);
+  PADDLE_ENFORCE(iter != set_.end(), "%s does not exist", name);
+  set_.erase(iter);
+}
+
+std::vector<std::string> ThreadSafeNameSet::Names() const {
+  std::lock_guard<std::mutex> guard(mtx_);
+  return std::vector<std::string>(set_.begin(), set_.end());
+}
+
+ThreadSafeNameSet VarBase::name_set_;
+
+std::vector<std::string> VarBase::AliveVarNames() { return name_set_.Names(); }
+
 using framework::Variable;
 
 namespace detail {

diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h
@@ -14,8 +14,11 @@
 
 #pragma once
 
-#include <map>            // NOLINT
-#include <memory>         // NOLINT
+#include <cstdint>
+#include <map>     // NOLINT
+#include <memory>  // NOLINT
+#include <mutex>   // NOLINT
+#include <set>
 #include <string>         // NOLINT
 #include <unordered_map>  // NOLINT
 #include <utility>
@@ -34,6 +37,7 @@
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/imperative/backward_strategy.h"
 #include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/imperative/flags.h"
 
 namespace paddle {
 namespace imperative {
@@ -108,13 +112,28 @@ class PreparedOp {
 
 class OpBase;
 
+class ThreadSafeNameSet {
+ public:
+  void Insert(const std::string& name);
+
+  void Remove(const std::string& name);
+
+  std::vector<std::string> Names() const;
+
+ private:
+  std::multiset<std::string> set_;
+  mutable std::mutex mtx_;
+};
+
 /* The wrapper for Variable which holds a Variable and a VarBase of its
  * gradient. This object should be managed totally by Python intepreter.
  *
  * Nearly all interface should be implemented in C++.
  */
 class VarBase {
  public:
+  static std::vector<std::string> AliveVarNames();
+
   // Internal interface, create VarBase from exist variable
   VarBase(const std::string& name, std::unique_ptr<framework::Variable> var,
           VarBase* grad, bool stop_gradient)
@@ -180,13 +199,20 @@ class VarBase {
     }
     VLOG(8) << "create varbase: " << name_ << " type: " << dtype
             << " place: " << place << "Stop gradient: " << stop_gradient_;
+
+    if (IsDebugEnabled()) {
+      name_set_.Insert(name_);
+    }
   }
 
  public:
   virtual ~VarBase() {
     pre_op_ = nullptr;
     pre_op_out_idx_ = -1;
     VLOG(8) << "destruct varbase: " << name_;
+    if (IsDebugEnabled()) {
+      name_set_.Remove(name_);
+    }
   }
 
   inline void SetName(const std::string& name) { name_ = name; }
@@ -297,6 +323,9 @@ class VarBase {
   OpBase* pre_op_;
   std::string pre_op_out_name_;
   int pre_op_out_idx_;
+
+  // A private flag to check memory leak
+  static ThreadSafeNameSet name_set_;
 };
 
 /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -194,8 +194,13 @@ void BindImperative(pybind11::module *m_ptr) {
 
   m.def("stop_imperative_gperf_profiler", []() { imperative::StopProfile(); });
 
+  m.def("_is_dygraph_debug_enabled",
+        []() { return imperative::IsDebugEnabled(); });
+  m.def("_dygraph_debug_level", []() { return imperative::GetDebugLevel(); });
+
   py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>(
       m, "VarBase", R"DOC()DOC")
+      .def_static("_alive_vars", &imperative::VarBase::AliveVarNames)
       .def(
           py::init<const std::string &, paddle::framework::proto::VarType::Type,
                    const std::vector<int64_t>, const paddle::platform::CPUPlace,

diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
@@ -142,7 +142,7 @@ def __bootstrap__():
         'print_sub_graph_dir', 'pe_profile_fname', 'inner_op_parallelism',
         'enable_parallel_graph', 'fuse_parameter_groups_size',
         'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size',
-        'tracer_profile_fname'
+        'tracer_profile_fname', 'dygraph_debug'
     ]
     if 'Darwin' not in sysstr:
         read_env_flags.append('use_pinned_memory')

diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py
@@ -57,6 +57,8 @@
         from .core_avx import _set_eager_deletion_mode
         from .core_avx import _set_fuse_parameter_group_size
         from .core_avx import _set_fuse_parameter_memory_size
+        from .core_avx import _is_dygraph_debug_enabled
+        from .core_avx import _dygraph_debug_level
     except ImportError:
         sys.stderr.write(
             'WARNING: Can not import avx core. You may not build with AVX, '
@@ -78,6 +80,8 @@
         from .core_noavx import _set_eager_deletion_mode
         from .core_noavx import _set_fuse_parameter_group_size
         from .core_noavx import _set_fuse_parameter_memory_size
+        from .core_noavx import _is_dygraph_debug_enabled
+        from .core_noavx import _dygraph_debug_level
     except ImportError as error:
         sys.exit("Error: Can not load core_noavx.* ." +
                  error.__class__.__name__)

diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
@@ -14,10 +14,12 @@
 from ..wrapped_decorator import signature_safe_contextmanager, wrap_decorator
 import contextlib
 import numpy as np
+import os
 
 from paddle.fluid import core
 from paddle.fluid import framework
 from .tracer import Tracer
+import logging
 
 __all__ = [
     'enabled',
@@ -136,6 +138,21 @@ def guard(place=None):
                     yield
 
 
+def _print_debug_msg():
+    if not core._is_dygraph_debug_enabled():
+        logging.warn(
+            'Debug mode is not enabled. Please set FLAGS_dygraph_debug=1 to enable debug'
+        )
+        return
+
+    unique_name_size = len(framework.unique_name.generator.ids)
+    tracer_var_size = len(framework._dygraph_tracer()._vars)
+    alive_cpp_var_size = len(core.VarBase._alive_vars())
+    logging.warn(
+        'unique_name num: {}, tracer vars num: {}, alive cpp vars num: {}'
+        .format(unique_name_size, tracer_var_size, alive_cpp_var_size))
+
+
 def to_variable(value, block=None, name=None):
     """
     This function will create a variable from ndarray

diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -60,7 +60,7 @@ def create_lr_var(self, lr):
             shape=[1],
             value=float(lr),
             dtype=self.dtype,
-            persistable=True)
+            persistable=False)
         return lr
 
     def step(self):

diff --git a/python/paddle/fluid/unique_name.py b/python/paddle/fluid/unique_name.py
@@ -79,15 +79,15 @@ def generate(key):
 
 # FIXME(zjl): The previous naming rule in static graph would
 # cause memory leak in dygraph mode. It is because the previous
-# nameing rule would use `conv_0.tmp` as the key, and in dygraph
+# naming rule would use `conv_0.tmp` as the key, and in dygraph
 # mode, `conv_i` increases as batch increases. Thus, keys would
 # increase in a way like `conv_0.tmp`, `conv_1.tmp`, .... 
 # Not find a better way to fix this bug in dygraph mode. In TF,
 # variable name is meaningless in eager execution mode, and in
 # PyTorch, there is no variable name at all. Maybe we should
 # discard variable name in dygraph mode.
 #
-# Another concern is that save/load inference. Usually, user
+# Another concern is that save/load interfaces. Usually, user
 # would save model in static graph mode, and load it in dygraph
 # mode. Therefore, we keep the variable name of Parameter currently.
 #