Skip to content

Commit 2731ead

Browse files
AndSonderwz1qqx
authored andcommitted
【静态图性能优化】图依赖信息复用 (PaddlePaddle#55389)
* add share api for DependencyBuilder * add judge codes for sharing build results * add ShareBuildResultsFrom * update ShareDependencyFrom * fix error * add share codes * fix memory error * update according review * update notes * fix code style * remove const_cast * fix code style
1 parent dbc52a2 commit 2731ead

10 files changed

+155
-41
lines changed

paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,26 @@ const std::string StringizeDownstreamMap(
6060
return oss.str();
6161
}
6262

63+
DependencyBuilder::DependencyBuilder()
64+
: is_build_(false), instructions_(nullptr) {
65+
op_downstream_map_ = std::make_shared<std::map<size_t, std::set<size_t>>>();
66+
op_happens_before_ = std::make_shared<std::vector<std::vector<bool>>>();
67+
}
68+
6369
const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
6470
const std::vector<Instruction>& instructions) {
6571
if (is_build_) {
66-
return op_downstream_map_;
72+
return *op_downstream_map_;
6773
}
6874

75+
std::tie(op_downstream_map_, op_happens_before_) = GetDependency();
76+
6977
instructions_ = &instructions;
7078
op_num_ = instructions_->size();
7179

7280
ops_before_.assign(op_num_, {});
7381
ops_behind_.assign(op_num_, {});
74-
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
82+
op_happens_before_->assign(op_num_, std::vector<bool>(op_num_, false));
7583

7684
BuildDownstreamMap();
7785
VLOG(6) << "Finish BuildDownstreamMap";
@@ -97,13 +105,24 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
97105
VLOG(6) << "Finish AddDependencyForReadOp";
98106

99107
VLOG(6) << "Finish build dependency";
100-
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
108+
VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_);
101109
VLOG(8) << "downstream_map: " << std::endl
102-
<< StringizeDownstreamMap(op_downstream_map_);
110+
<< StringizeDownstreamMap(*op_downstream_map_);
103111

104112
is_build_ = true;
105113

106-
return op_downstream_map_;
114+
return *op_downstream_map_;
115+
}
116+
117+
std::tuple<std::shared_ptr<std::map<size_t, std::set<size_t>>>,
118+
std::shared_ptr<std::vector<std::vector<bool>>>>
119+
DependencyBuilder::GetDependency() const {
120+
return std::make_tuple(op_downstream_map_, op_happens_before_);
121+
}
122+
123+
void DependencyBuilder::ShareDependencyFrom(const DependencyBuilder& src) {
124+
std::tie(op_downstream_map_, op_happens_before_) = src.GetDependency();
125+
is_build_ = true;
107126
}
108127

109128
const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
@@ -113,7 +132,7 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
113132
true,
114133
phi::errors::Unavailable(
115134
"DependencyBuilder is not yet built, call Build() firstly."));
116-
return op_downstream_map_;
135+
return *op_downstream_map_;
117136
}
118137

119138
void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
@@ -268,8 +287,8 @@ void DependencyBuilder::AddDependencyForRandomOp() {
268287
void DependencyBuilder::AddDependencyForReadOp() {
269288
std::vector<bool> is_startup_ops(op_num_, true);
270289
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
271-
auto it = op_downstream_map_.find(op_idx);
272-
if (it != op_downstream_map_.end()) {
290+
auto it = op_downstream_map_->find(op_idx);
291+
if (it != op_downstream_map_->end()) {
273292
for (size_t downstream_op_idx : it->second) {
274293
is_startup_ops[downstream_op_idx] = false;
275294
}
@@ -320,8 +339,7 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
320339
posterior_op_idx,
321340
posterior_op_idx,
322341
prior_op_idx));
323-
324-
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
342+
std::set<size_t>& downstream_ops = (*op_downstream_map_)[prior_op_idx];
325343
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
326344
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
327345
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
@@ -342,8 +360,8 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
342360

343361
auto update_op_happen_before = [this](size_t prior_op_idx,
344362
size_t posterior_op_idx) {
345-
if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
346-
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
363+
if (!(*op_happens_before_)[prior_op_idx][posterior_op_idx]) {
364+
(*op_happens_before_)[prior_op_idx][posterior_op_idx] = true;
347365
ops_before_[posterior_op_idx].push_back(prior_op_idx);
348366
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
349367
}
@@ -377,8 +395,8 @@ void DependencyBuilder::BuildDownstreamMap() {
377395
std::map<size_t, size_t>(); // # map from variable to recent write op.
378396
auto op2dependences =
379397
std::map<size_t,
380-
std::set<size_t>>(); //# map from op to the dependence list,
381-
// op must run after the dependence.
398+
std::set<size_t>>(); // # map from op to the dependence list,
399+
// op must run after the dependence.
382400
std::set<size_t>
383401
remove_duplicate; // remove the duplicate between inputs and outputs
384402

@@ -497,15 +515,15 @@ void DependencyBuilder::ShrinkDownstreamMap() {
497515
// shrink, find the downstream op that has no other op in the
498516
// downstream list happens before it
499517
for (size_t i = 0; i < op_num_; ++i) {
500-
if (op_downstream_map_.find(i) == op_downstream_map_.end()) {
518+
if (op_downstream_map_->find(i) == op_downstream_map_->end()) {
501519
continue;
502520
}
503521

504522
std::set<size_t> minumum_nexts;
505-
for (size_t item : op_downstream_map_.at(i)) {
523+
for (size_t item : op_downstream_map_->at(i)) {
506524
bool not_after_any = true;
507525
// find the op that is not executed after any
508-
for (size_t other_item : op_downstream_map_.at(i)) {
526+
for (size_t other_item : op_downstream_map_->at(i)) {
509527
if (OpHappensBefore(other_item, item)) {
510528
VLOG(8) << "happens_before: " << other_item << "->" << item
511529
<< ", so skip " << item;
@@ -520,12 +538,12 @@ void DependencyBuilder::ShrinkDownstreamMap() {
520538
}
521539
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
522540
// dowstream map
523-
op_downstream_map_.at(i) = minumum_nexts;
541+
(*op_downstream_map_)[i] = minumum_nexts;
524542
}
525543
VLOG(8) << "Finish shrink downstream map";
526-
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
544+
VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_);
527545
VLOG(8) << "downstream_map: " << std::endl
528-
<< StringizeDownstreamMap(op_downstream_map_);
546+
<< StringizeDownstreamMap(*op_downstream_map_);
529547
}
530548

531549
/// ======================== ///

paddle/fluid/framework/new_executor/interpreter/dependency_builder.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,29 @@ namespace interpreter {
3434

3535
class DependencyBuilder {
3636
public:
37-
DependencyBuilder() : is_build_(false), instructions_(nullptr) {}
37+
DependencyBuilder();
3838

3939
// build op dependencies and return the mapping from op to its downstream-op
4040
// set
4141
const std::map<size_t, std::set<size_t>>& Build(
4242
const std::vector<Instruction>& instructions);
4343

44+
std::tuple<std::shared_ptr<std::map<size_t, std::set<size_t>>>,
45+
std::shared_ptr<std::vector<std::vector<bool>>>>
46+
GetDependency() const;
47+
4448
const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;
4549

4650
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
4751
PADDLE_ENFORCE_GE(
48-
op_happens_before_.size(),
52+
op_happens_before_->size(),
4953
0,
5054
phi::errors::Unavailable("op_happen_before is not yet built"));
51-
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
55+
return op_happens_before_->at(prior_op_idx).at(posterior_op_idx);
5256
}
5357

58+
void ShareDependencyFrom(const DependencyBuilder& src);
59+
5460
private:
5561
void AddDependencyForCoalesceTensorOp();
5662
void AddDependencyForCommunicationOp();
@@ -76,13 +82,13 @@ class DependencyBuilder {
7682
std::vector<std::vector<size_t>> ops_behind_;
7783

7884
// op_downstream_map_ is the mapping from op to its downstream-op set, that is
79-
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
85+
// to say, (*op_downstream_map_)[i] == {a, b, c} means op[a], op[b] and op[c]
8086
// depend on op[i] directly.
81-
std::map<size_t, std::set<size_t>> op_downstream_map_;
87+
std::shared_ptr<std::map<size_t, std::set<size_t>>> op_downstream_map_;
8288

8389
// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
8490
// used to speed up the query.
85-
std::vector<std::vector<bool>> op_happens_before_;
91+
std::shared_ptr<std::vector<std::vector<bool>>> op_happens_before_;
8692
};
8793

8894
// /// ======================== ///

paddle/fluid/framework/new_executor/interpreter_base_impl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class InterpreterBaseImpl {
8080

8181
virtual void ShareWorkQueueFrom(InterpreterBaseImpl* src) = 0;
8282

83+
virtual void ShareBuildResultsFrom(const InterpreterBaseImpl& src) = 0;
84+
8385
virtual void SetCopyProgram(std::shared_ptr<ProgramDesc> prog) = 0;
8486

8587
virtual void SetSkipGcVars(const std::set<std::string>& skip_gc_vars) = 0;
@@ -97,6 +99,11 @@ class InterpreterBaseImpl {
9799
virtual const platform::Place& GetPlace() const = 0;
98100

99101
virtual void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) = 0;
102+
103+
virtual const interpreter::DependencyBuilder& GetDependencyBuilder()
104+
const = 0;
105+
106+
virtual std::shared_ptr<std::vector<size_t>> GetDependencyCount() const = 0;
100107
};
101108

102109
inline void SetDeviceId(const platform::Place& place) {

paddle/fluid/framework/new_executor/interpretercore.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ void InterpreterCore::ShareWorkQueueFrom(std::shared_ptr<InterpreterCore> src) {
8181
impl_->ShareWorkQueueFrom(const_cast<InterpreterBaseImpl*>(src->Impl()));
8282
}
8383

84+
void InterpreterCore::ShareBuildResultsFrom(
85+
std::shared_ptr<InterpreterCore> src) {
86+
// ShareBuildResultsFrom required const InterpreterBaseImpl& src as input
87+
impl_->ShareBuildResultsFrom(*src->Impl());
88+
}
89+
8490
void InterpreterCore::SetCopyProgram(std::shared_ptr<ProgramDesc> prog) {
8591
impl_->SetCopyProgram(prog);
8692
}

paddle/fluid/framework/new_executor/interpretercore.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class InterpreterCore {
5656

5757
void ShareWorkQueueFrom(std::shared_ptr<InterpreterCore> src);
5858

59+
void ShareBuildResultsFrom(std::shared_ptr<InterpreterCore> src);
60+
5961
void SetCopyProgram(std::shared_ptr<ProgramDesc> prog);
6062

6163
void SetSkipGcVars(const std::set<std::string>& skip_gc_vars);

paddle/fluid/framework/new_executor/new_ir_interpreter.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,24 @@ void NewIRInterpreter::ShareWorkQueueFrom(InterpreterBaseImpl* src) {
351351
<< ") to InterpreterCore(" << this << ")";
352352
}
353353

354+
void NewIRInterpreter::ShareBuildResultsFrom(const InterpreterBaseImpl& src) {
355+
PADDLE_THROW(platform::errors::Unimplemented(
356+
"ShareBuildResultsFrom is not implemented in NewIRInterpreter."));
357+
}
358+
359+
// op dependences
360+
const interpreter::DependencyBuilder& NewIRInterpreter::GetDependencyBuilder()
361+
const {
362+
PADDLE_THROW(platform::errors::Unimplemented(
363+
"GetDependencyBuilder is not implemented in NewIRInterpreter."));
364+
}
365+
366+
std::shared_ptr<std::vector<size_t>> NewIRInterpreter::GetDependencyCount()
367+
const {
368+
PADDLE_THROW(platform::errors::Unimplemented(
369+
"GetDependencyCount is not implemented in NewIRInterpreter."));
370+
}
371+
354372
bool NewIRInterpreter::BuildInplaceCheckVarIsOnlyInput(
355373
const std::vector<std::vector<size_t>>& input_var2op, size_t var_index) {
356374
if (!var_scope_.VarDesc(var_index)) {

paddle/fluid/framework/new_executor/new_ir_interpreter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ class NewIRInterpreter : public InterpreterBaseImpl {
5353

5454
void ShareWorkQueueFrom(InterpreterBaseImpl* src) override;
5555

56+
void ShareBuildResultsFrom(const InterpreterBaseImpl& src) override;
57+
58+
// op dependences
59+
const interpreter::DependencyBuilder& GetDependencyBuilder() const override;
60+
61+
std::shared_ptr<std::vector<size_t>> GetDependencyCount() const override;
62+
5663
void SetCopyProgram(std::shared_ptr<ProgramDesc> prog) override;
5764

5865
void SetSkipGcVars(const std::set<std::string>& skip_gc_vars) override;

0 commit comments

Comments
 (0)