Skip to content

Commit ffd75d1

Browse files
[PIR slice]Change deep_copy to share_buffer in eager backward (#72966)
* change deep_copy to share_buffer in eager backward * share grad_out_tensor only for shared_buffer io
1 parent 317a3de commit ffd75d1

File tree

1 file changed

+24
-3
lines changed

1 file changed

+24
-3
lines changed

paddle/fluid/eager/backward.cc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,30 @@ std::vector<paddle::Tensor> RunBackward(
192192
// Feed given tensor if it's provided
193193
VLOG(3) << "Fill grad input tensor " << i << "with give grad tensor";
194194

195-
// Deep copy
196-
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
197-
input_info.first, input_info.second, grad_tensors[i]);
195+
bool use_shared_buffer = false;
196+
// Check if inputs and outputs are equal in size and share the same buffer
197+
if (tensors.size() == inputs.size() &&
198+
tensors[i].numel() == inputs[i].numel()) {
199+
auto output_tensor =
200+
std::dynamic_pointer_cast<phi::DenseTensor>(tensors[i].impl());
201+
auto input_tensor =
202+
std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl());
203+
use_shared_buffer = output_tensor->IsSharedBufferWith(*input_tensor);
204+
}
205+
206+
if (use_shared_buffer) {
207+
// Share buffer with given grad_tensor
208+
paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
209+
inputs_grad_tensors;
210+
inputs_grad_tensors.push_back({grad_tensors[i]});
211+
auto grad_holder = GradTensorHolder(std::move(inputs_grad_tensors));
212+
node_input_buffers_dict[grad_node] =
213+
std::make_unique<GradTensorHolder>(grad_holder);
214+
} else {
215+
// Deep copy
216+
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
217+
input_info.first, input_info.second, grad_tensors[i]);
218+
}
198219
} else {
199220
VLOG(3) << "Fill grad input tensor " << i << " with 1.0";
200221
// Initialize tensor with 1.0

0 commit comments

Comments
 (0)