Skip to content

Commit bed700f

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into zyf_slice
2 parents d10c4be + a957734 commit bed700f

File tree

76 files changed

+4951
-1816
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+4951
-1816
lines changed

cmake/external/xbyak.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ ExternalProject_Add(
4444
DEPENDS ""
4545
PREFIX ${XBYAK_PREFIX_DIR}
4646
SOURCE_DIR ${XBYAK_SOURCE_DIR}
47-
# UPDATE_COMMAND ""
47+
UPDATE_COMMAND ""
4848
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
4949
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
5050
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}

paddle/fluid/framework/distributed_strategy.proto

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ message ShardingConfig {
4343
optional bool pp_allreduce_in_optimize = 10 [ default = false ];
4444
optional int32 pp_degree = 11 [ default = 1 ];
4545
optional bool optimize_cast = 12 [ default = false ];
46+
// Optimizer sharding. Temporary plans and may be deprecated
47+
optional bool _dp_as_optimizer_sharding = 13 [ default = false ];
4648
}
4749

4850
message HybridConfig {

paddle/fluid/framework/fleet/ps_gpu_wrapper.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,15 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task) {
235235

236236
timeline.Start();
237237
std::vector<std::vector<std::pair<uint64_t, char*>>> pass_values;
238-
uint16_t pass_id = 0;
239238

240239
bool record_status = false;
240+
#ifdef PADDLE_WITH_PSLIB
241+
uint16_t pass_id = 0;
241242
if (multi_node_) {
242243
record_status = fleet_ptr->pslib_ptr_->_worker_ptr->take_sparse_record(
243244
table_id_, pass_id, pass_values);
244245
}
246+
#endif
245247
auto build_func = [device_num, record_status, &pass_values, &local_keys,
246248
&local_ptr, &device_keys, &device_vals,
247249
&device_mutex](int i) {
@@ -260,6 +262,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task) {
260262
task_keys[shard].push_back(local_keys[i][j]);
261263
task_ptrs[shard].push_back(local_ptr[i][j]);
262264
}
265+
#ifdef PADDLE_WITH_PSLIB
263266
if (record_status) {
264267
size_t local_keys_size = local_keys.size();
265268
size_t pass_values_size = pass_values.size();
@@ -275,6 +278,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task) {
275278
}
276279
}
277280
}
281+
#endif
278282
for (int dev = 0; dev < device_num; dev++) {
279283
device_mutex[dev]->lock();
280284

paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ bool SimplifyWithBasicOpsPass::SimplifyDropout(
157157
float scale =
158158
1.0f - BOOST_GET_CONST(float, dropout_op_desc->GetAttr("dropout_prob"));
159159

160-
framework::OpDesc new_op_desc;
160+
framework::OpDesc new_op_desc(dropout_op_desc->Block());
161161
new_op_desc.SetType("scale");
162162
new_op_desc.SetInput("X", {dropout_x->Name()});
163163
new_op_desc.SetOutput("Out", {dropout_out->Name()});

paddle/fluid/framework/new_executor/interpretercore.cc

+24-2
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ void InterpreterCore::Convert() {
188188
BuildAndCacheInstructionCtx(&vec_instruction_[i], *global_scope_, place_);
189189
}
190190

191+
BuildSkipShareLoDInfo();
192+
191193
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
192194
gc_event_.emplace_back(vec_instruction_[i].execution_ctx_.get()->GetPlace(),
193195
platform::GenerateDeviceEventFlag());
@@ -225,8 +227,8 @@ void InterpreterCore::BuildAndCacheInstructionCtx(
225227
instr_node->runtime_ctx_->inputs.swap(ins_map);
226228
instr_node->runtime_ctx_->outputs.swap(outs_map);
227229

228-
instr_node->infershape_ctx_.reset(
229-
new RuntimeInferShapeContext(*op_base, *instr_node->runtime_ctx_.get()));
230+
instr_node->infershape_ctx_.reset(new InterpretercoreInferShapeContext(
231+
*op_base, *instr_node->runtime_ctx_.get()));
230232

231233
auto* dev_ctx = instr_node->dev_ctx_;
232234
Scope scope;
@@ -235,6 +237,26 @@ void InterpreterCore::BuildAndCacheInstructionCtx(
235237
*op_base, scope, *dev_ctx, *instr_node->runtime_ctx_.get()));
236238
}
237239

240+
void InterpreterCore::BuildSkipShareLoDInfo() {
241+
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
242+
bool can_skip_lod = true;
243+
for (auto& input : vec_instruction_[i].runtime_ctx_.get()->inputs) {
244+
for (auto& var : input.second) {
245+
if (var->IsType<LoDTensor>()) {
246+
if (var->Get<LoDTensor>().lod().size() != 0) {
247+
can_skip_lod = false;
248+
break;
249+
}
250+
} else {
251+
can_skip_lod = false;
252+
break;
253+
}
254+
}
255+
}
256+
vec_instruction_[i].infershape_ctx_.get()->SetSkipLoD(can_skip_lod);
257+
}
258+
}
259+
238260
void InterpreterCore::RunInstruction(const Instruction& instr_node) {
239261
VLOG(3) << "RunInstruction: "
240262
<< instr_node.kernel_func_.operator_base_->Type();

paddle/fluid/framework/new_executor/interpretercore.h

+2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class InterpreterCore {
6868

6969
void AddFetch(const std::vector<std::string>& fetch_names);
7070

71+
void BuildSkipShareLoDInfo();
72+
7173
bool is_build_;
7274

7375
const platform::Place& place_;

paddle/fluid/framework/new_executor/interpretercore_util.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ void build_op_func_list(const platform::Place& place,
206206
RuntimeContext runtime_context({}, {});
207207
runtime_context.inputs.swap(ins_map);
208208
runtime_context.outputs.swap(outs_map);
209-
RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context);
209+
InterpretercoreInferShapeContext infer_shape_ctx(*op_base, runtime_context);
210210
static_cast<const framework::OperatorWithKernel*>(op_base)->InferShape(
211211
&infer_shape_ctx);
212212
auto kernels_iter = all_op_kernels.find(op->Type());
@@ -320,8 +320,8 @@ void build_op_func_list(const platform::Place& place,
320320
RuntimeContext copy_runtime_context({}, {});
321321
copy_runtime_context.inputs.swap(copy_ins_value_map);
322322
copy_runtime_context.outputs.swap(copy_outs_value_map);
323-
RuntimeInferShapeContext copy_infer_shape_ctx(*copy_op,
324-
copy_runtime_context);
323+
InterpretercoreInferShapeContext copy_infer_shape_ctx(
324+
*copy_op, copy_runtime_context);
325325
static_cast<const framework::OperatorWithKernel*>(copy_op)
326326
->InferShape(&copy_infer_shape_ctx);
327327

0 commit comments

Comments
 (0)