[MKL-DNN] Add Fully Connected Op for inference only(#15226)

Sand3r- · luotao1 · commit 0c39b97b4ee0 · 2019-05-24T22:51:22.000+08:00
* fuse mul and elementwise add to fc

* Reimplement the FC forward operator

* Fix FC MKLDNN integration by transposing weights

* Add FC MKLDNN Pass

test=develop

* FC MKLDNN Pass: change memcpy to std::copy

* Fix MKLDNN FC handling of mismatch input and weights dims

* Lower tolerance for MKL-DNN in resnet50 test

test=develop

* Adjust FC to support MKLDNN Op placement

test=develop

* Adjust Placement Op to set use_mkldnn attribute for graph

test=develop

* MKLDNN FC: fix weights format so that gemm version is called

test=develop

* FC MKLDNN: Remove tolerance decrease from tester_helper

* FC MKL-DNN: Refactor the code, change input reorder to weight reorder

* MKL-DNN FC: Introduce operator caching

test=develop

* FC MKL-DNN: Fix the tensor type in ExpectedKernelType

test=develop

* FC MKL-DNN: fix style changes

test=develop

* FC MKL-DNN: fallback to native on non-supported dim sizes

test=develop

* FC MKLDNN: fix CMake paths

test=develop

* FC MKLDNN: Refine placement pass graph mkldnn attribute

test=develop

* Fix Transpiler error for fuse_conv_eltwise

test=develop

* Fix missing STL includes in files

test=develop

* FC MKL-DNN: Enable new output size computation

Also, refine pass to comply with newest interface.
test=develop

* FC MKL-DNN: enable only when fc_mkldnn_pass is enabled

* FC MKL-DNN: Allow Weights to use oi or io format

* FC MKL-DNN: Adjust UT to work with correct dims

test=develop

* Enable MKL DEBUG for resnet50 analyzer

test=develop

* FC MKL-DNN: Improve Hashing function

test=develop

* FC MKL-DNN: Fix shape for fc weights in transpiler

* FC MKL-DNN: Update input pointer in re-used fc primitive

* Add log for not handling fc fuse for unsupported dims

test=develop

* FC MKL-DNN: Move transpose from pass to Op Kernel

test=develop

* FC MKL-DNN: Disable transpose in unit test

test=develop

* FC MKL-DNN: Remove fc_mkldnn_pass from default list

* Correct Flag for fake data analyzer tests

test=develop

* FC MKL-DNN: Add comment about fc mkldnn pass disablement

test=develop

* FC MKL-DNN: Disable fc in int8 tests

test=develop
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -385,7 +385,7 @@ function(cc_test TARGET_NAME)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_limit_of_tmp_allocation=4294967296) # 4G
-    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
+    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true ${MKL_DEBUG_FLAG})
     # No unit test should exceed 10 minutes.
     set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
   endif()
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -88,6 +88,7 @@ if(WITH_MKLDNN)
     pass_library(conv_brelu_mkldnn_fuse_pass inference mkldnn)
     pass_library(conv_concat_relu_mkldnn_fuse_pass inference mkldnn)
     pass_library(conv_elementwise_add_mkldnn_fuse_pass inference mkldnn)
+    pass_library(fc_mkldnn_pass inference mkldnn)
     pass_library(cpu_quantize_placement_pass base mkldnn)
     pass_library(cpu_quantize_pass inference mkldnn)
     pass_library(cpu_quantize_squash_pass inference mkldnn)
diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/ir/fc_fuse_pass.h"
+#include <memory>
 #include <string>
 #include <unordered_set>
 #include <vector>
@@ -80,6 +81,7 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
     }
 
     desc.SetType("fc");
+
     auto fc_node = g->CreateOpNode(&desc);  // OpDesc will be copied.
     GraphSafeRemoveNodes(graph, {mul, elementwise_add, mul_out});
 
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -14,7 +14,10 @@
 
 #include <algorithm>
 #include <array>
+#include <memory>
 #include <string>
+#include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "paddle/fluid/framework/ir/graph_helper.h"
@@ -896,6 +899,33 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
   }
 }
 
+PDNode *patterns::FCMKLDNN::operator()(paddle::framework::ir::PDNode *x,
+                                       bool with_bias) {
+  // Create shared nodes.
+  x->assert_is_op_input("fc", "Input");
+
+  auto *fc_op = pattern->NewNode(fc_repr())->assert_is_op("fc");
+  // Create variables
+  // Filter
+  auto *fc_weight_var = pattern->NewNode(weights_repr())
+                            ->AsInput()
+                            ->assert_is_persistable_var()
+                            ->assert_is_op_input("fc", "W");
+  // Bias
+  auto *fc_bias_var = pattern->NewNode(bias_repr())
+                          ->AsInput()
+                          ->assert_is_persistable_var()
+                          ->assert_is_op_input("fc", "Bias");
+  // Output
+  auto *fc_out_var = pattern->NewNode(output_repr())
+                         ->AsOutput()
+                         ->assert_is_op_output("fc", "Out")
+                         ->assert_is_only_output_of_op("fc");
+
+  fc_op->LinksFrom({x, fc_weight_var, fc_bias_var}).LinksTo({fc_out_var});
+  return fc_out_var;
+}
+
 PDNode *patterns::Embedding::operator()(PDNode *x) {
   x->assert_is_op_input("lookup_table", "Ids");
   auto *lookup_table_op =
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -517,6 +517,25 @@ struct FC : public PatternBase {
   PATTERN_DECL_NODE(Out);
 };
 
+// MKL-DNN's FC with bias
+// op: fc
+// named node:
+// fc
+// w, bias, output
+struct FCMKLDNN : public PatternBase {
+  FCMKLDNN(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "fc_mkldnn") {}
+
+  PDNode* operator()(PDNode* x, bool with_bias);
+
+  // declare operator node's name
+  PATTERN_DECL_NODE(fc);
+  // declare variable node's name
+  PATTERN_DECL_NODE(weights);
+  PATTERN_DECL_NODE(bias);
+  PATTERN_DECL_NODE(output);
+};
+
 // Embedding
 struct Embedding : public PatternBase {
   Embedding(PDPattern* pattern, const std::string& name_scope)
diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h"
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const {
+  PADDLE_ENFORCE(graph);
+  Init("fc_mkldnn_pass", graph);
+
+  auto* scope = param_scope();
+  PADDLE_ENFORCE(scope);
+
+  GraphPatternDetector gpd;
+  auto* x = gpd.mutable_pattern()
+                ->NewNode("fc_mkldnn_pass/x")
+                ->AsInput()
+                ->assert_is_op_input("fc", "Input");
+  patterns::FCMKLDNN fc_pattern(gpd.mutable_pattern(), "fc_mkldnn_pass");
+  fc_pattern(x, true /*with bias*/);
+
+  int found_fc_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "Handle FC MKL-DNN pass";
+    if (!(graph->Has("use_mkldnn") && graph->Get<bool>("use_mkldnn"))) {
+      VLOG(3) << "do not perform fc fuse";
+      return;
+    }
+    GET_IR_NODE_FROM_SUBGRAPH(fc, fc, fc_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(weights, weights, fc_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(bias, bias, fc_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern);
+
+    OpDesc* desc = fc->Op();
+    auto in_size = fc->inputs[0]->Var()->GetShape().size();
+    if (in_size != 2 && in_size != 4) {
+      VLOG(3) << "Do not enable FC MKL-DNN for dimensions different than 2 & 4";
+      return;
+    }
+    desc->SetAttr("use_mkldnn", true);
+    PADDLE_ENFORCE(subgraph.count(x));
+
+    found_fc_count++;
+  };
+
+  gpd(graph, handler);
+
+  AddStatis(found_fc_count);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(fc_mkldnn_pass, paddle::framework::ir::FCMKLDNNPass);
diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <memory>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+/*
+ * Transpose weights of FC to comply with MKL-DNN interface
+ */
+class FCMKLDNNPass : public FusePassBase {
+ public:
+  virtual ~FCMKLDNNPass() {}
+
+ protected:
+  void ApplyImpl(ir::Graph* graph) const;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
+#include <memory>
 #include <string>
 #include <unordered_set>
 
@@ -24,6 +25,9 @@ void MKLDNNPlacementPass::ApplyImpl(ir::Graph* graph) const {
   VLOG(3) << "Applies MKL-DNN placement strategy.";
   const auto& op_types_list =
       Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
+  if (!graph->Has("use_mkldnn")) {
+    graph->Set<bool>("use_mkldnn", new bool(true));
+  }
   for (const Node* n : graph->Nodes()) {
     if (n->IsOp()) {
       auto* op = n->Op();
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -146,16 +146,19 @@ void CpuPassStrategy::EnableMKLDNN() {
   if (!use_mkldnn_) {
     passes_.insert(passes_.begin(), "mkldnn_placement_pass");
 
-    for (auto &pass : std::vector<std::string>(
-             {"depthwise_conv_mkldnn_pass",    //
-              "conv_bn_fuse_pass",             // Execute BN passes again to
-              "conv_eltwiseadd_bn_fuse_pass",  // preserve correct pass order
-              "conv_bias_mkldnn_fuse_pass",    //
-              "conv3d_bias_mkldnn_fuse_pass",  //
-              "conv_elementwise_add_mkldnn_fuse_pass",
-              "conv_concat_relu_mkldnn_fuse_pass",
-              "conv_relu_mkldnn_fuse_pass",  //
-              "conv_brelu_mkldnn_fuse_pass"})) {
+    for (auto &pass : std::vector<std::string>({
+             "depthwise_conv_mkldnn_pass",    //
+             "conv_bn_fuse_pass",             // Execute BN passes again to
+             "conv_eltwiseadd_bn_fuse_pass",  // preserve correct pass order
+             "conv_bias_mkldnn_fuse_pass",    //
+             "conv3d_bias_mkldnn_fuse_pass",  //
+             "conv_elementwise_add_mkldnn_fuse_pass",
+             "conv_concat_relu_mkldnn_fuse_pass",
+             "conv_relu_mkldnn_fuse_pass",   //
+             "conv_brelu_mkldnn_fuse_pass",  //
+             // Disabled due to topology-dependent speed-up
+             // "fc_mkldnn_pass"
+         })) {
       passes_.push_back(pass);
     }
   }
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -33,8 +33,10 @@ function(inference_analysis_api_int8_test target model_dir data_dir filename)
              --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=2)
 endfunction()
-
-function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
+function(inference_analysis_api_test_with_fake_data target install_dir filename model_name mkl_debug)
+    if(mkl_debug) 
+        set(MKL_DEBUG_FLAG MKL_DEBUG_CPU_TYPE=7)
+    endif()
     download_model(${install_dir} ${model_name})
     inference_analysis_test(${target} SRCS ${filename}
         EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
@@ -143,15 +145,15 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
 
 # googlenet
 inference_analysis_api_test_with_fake_data(test_analyzer_googlenet
-  "${INFERENCE_DEMO_INSTALL_DIR}/googlenet" analyzer_resnet50_tester.cc "googlenet.tar.gz")
+  "${INFERENCE_DEMO_INSTALL_DIR}/googlenet" analyzer_resnet50_tester.cc "googlenet.tar.gz" false)
 
 # resnet50
 inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
-  "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
+  "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz" true)
 
 # mobilenet with depthwise_conv op
 inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv
-  "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz")
+  "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" false)
 
 # int8 image classification tests
 if(WITH_MKLDNN)
diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
@@ -152,6 +152,7 @@ void profile(bool use_mkldnn = false) {
 
   if (use_mkldnn) {
     config.EnableMKLDNN();
+    config.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
 
   std::vector<std::vector<PaddleTensor>> outputs;
diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
@@ -200,8 +200,9 @@ void profile(bool use_mkldnn = false) {
     cfg.EnableMKLDNN();
     // Enable all the mkldnn supported ops except conv3d in dam
     std::unordered_set<std::string> op_list = {"softmax", "elementwise_add",
-                                               "relu"};
+                                               "relu", "fc"};
     cfg.SetMKLDNNOp(op_list);
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
 
   std::vector<std::vector<PaddleTensor>> outputs;
diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
@@ -100,6 +100,7 @@ void profile(bool use_mkldnn = false) {
 
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
 
   std::vector<std::vector<PaddleTensor>> input_slots_all;
diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
@@ -48,6 +48,7 @@ void profile(bool use_mkldnn = false) {
 
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
   std::vector<std::vector<PaddleTensor>> outputs;
 
@@ -79,6 +80,7 @@ void compare(bool use_mkldnn = false) {
   SetConfig(&cfg);
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
 
   std::vector<std::vector<PaddleTensor>> input_slots_all;
diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
@@ -149,6 +149,7 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
   }
   if (use_mkldnn) {
     cfg->EnableMKLDNN();
+    cfg->pass_builder()->AppendPass("fc_mkldnn_pass");
   }
   // Enable seqpool_concat_fuse_pass, disabled by default since it takes much
   // time
diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc
@@ -189,6 +189,7 @@ void profile(bool use_mkldnn = false) {
   std::vector<std::vector<PaddleTensor>> outputs;
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
 
   std::vector<std::vector<PaddleTensor>> input_slots_all;
diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
@@ -85,6 +85,7 @@ void profile(bool use_mkldnn = false) {
   SetConfig(&cfg);
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
   }
   // cfg.pass_builder()->TurnOnDebug();
   std::vector<std::vector<PaddleTensor>> outputs;
diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py

Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@ void profile(bool use_mkldnn = false) {`
`152`	`152`
`153`	`153`	`if (use_mkldnn) {`
`154`	`154`	`config.EnableMKLDNN();`
	`155`	`+ config.pass_builder()->AppendPass("fc_mkldnn_pass");`
`155`	`156`	`}`
`156`	`157`
`157`	`158`	`std::vector<std::vector<PaddleTensor>> outputs;`
Original file line number	Diff line number	Diff line change
`@@ -100,6 +100,7 @@ void profile(bool use_mkldnn = false) {`
`100`	`100`
`101`	`101`	`if (use_mkldnn) {`
`102`	`102`	`cfg.EnableMKLDNN();`
	`103`	`+ cfg.pass_builder()->AppendPass("fc_mkldnn_pass");`
`103`	`104`	`}`
`104`	`105`
`105`	`106`	`std::vector<std::vector<PaddleTensor>> input_slots_all;`
Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ void profile(bool use_mkldnn = false) {`
`48`	`48`
`49`	`49`	`if (use_mkldnn) {`
`50`	`50`	`cfg.EnableMKLDNN();`
	`51`	`+ cfg.pass_builder()->AppendPass("fc_mkldnn_pass");`
`51`	`52`	`}`
`52`	`53`	`std::vector<std::vector<PaddleTensor>> outputs;`
`53`	`54`
`@@ -79,6 +80,7 @@ void compare(bool use_mkldnn = false) {`
`79`	`80`	`SetConfig(&cfg);`
`80`	`81`	`if (use_mkldnn) {`
`81`	`82`	`cfg.EnableMKLDNN();`
	`83`	`+ cfg.pass_builder()->AppendPass("fc_mkldnn_pass");`
`82`	`84`	`}`
`83`	`85`
`84`	`86`	`std::vector<std::vector<PaddleTensor>> input_slots_all;`
Original file line number	Diff line number	Diff line change
`@@ -149,6 +149,7 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {`
`149`	`149`	`}`
`150`	`150`	`if (use_mkldnn) {`
`151`	`151`	`cfg->EnableMKLDNN();`
	`152`	`+ cfg->pass_builder()->AppendPass("fc_mkldnn_pass");`
`152`	`153`	`}`
`153`	`154`	`// Enable seqpool_concat_fuse_pass, disabled by default since it takes much`
`154`	`155`	`// time`
Original file line number	Diff line number	Diff line change
`@@ -189,6 +189,7 @@ void profile(bool use_mkldnn = false) {`
`189`	`189`	`std::vector<std::vector<PaddleTensor>> outputs;`
`190`	`190`	`if (use_mkldnn) {`
`191`	`191`	`cfg.EnableMKLDNN();`
	`192`	`+ cfg.pass_builder()->AppendPass("fc_mkldnn_pass");`
`192`	`193`	`}`
`193`	`194`
`194`	`195`	`std::vector<std::vector<PaddleTensor>> input_slots_all;`
Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ void profile(bool use_mkldnn = false) {`
`85`	`85`	`SetConfig(&cfg);`
`86`	`86`	`if (use_mkldnn) {`
`87`	`87`	`cfg.EnableMKLDNN();`
	`88`	`+ cfg.pass_builder()->AppendPass("fc_mkldnn_pass");`
`88`	`89`	`}`
`89`	`90`	`// cfg.pass_builder()->TurnOnDebug();`
`90`	`91`	`std::vector<std::vector<PaddleTensor>> outputs;`