@@ -192,9 +192,30 @@ std::vector<paddle::Tensor> RunBackward(
192
192
// Feed given tensor if it's provided
193
193
VLOG (3 ) << " Fill grad input tensor " << i << " with give grad tensor" ;
194
194
195
- // Deep copy
196
- node_input_buffers_dict[grad_node]->CopyValueFromTensor (
197
- input_info.first , input_info.second , grad_tensors[i]);
195
+ bool use_shared_buffer = false ;
196
+ // Check if inputs and outputs are equal in size and share the same buffer
197
+ if (tensors.size () == inputs.size () &&
198
+ tensors[i].numel () == inputs[i].numel ()) {
199
+ auto output_tensor =
200
+ std::dynamic_pointer_cast<phi::DenseTensor>(tensors[i].impl ());
201
+ auto input_tensor =
202
+ std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl ());
203
+ use_shared_buffer = output_tensor->IsSharedBufferWith (*input_tensor);
204
+ }
205
+
206
+ if (use_shared_buffer) {
207
+ // Share buffer with given grad_tensor
208
+ paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize >
209
+ inputs_grad_tensors;
210
+ inputs_grad_tensors.push_back ({grad_tensors[i]});
211
+ auto grad_holder = GradTensorHolder (std::move (inputs_grad_tensors));
212
+ node_input_buffers_dict[grad_node] =
213
+ std::make_unique<GradTensorHolder>(grad_holder);
214
+ } else {
215
+ // Deep copy
216
+ node_input_buffers_dict[grad_node]->CopyValueFromTensor (
217
+ input_info.first , input_info.second , grad_tensors[i]);
218
+ }
198
219
} else {
199
220
VLOG (3 ) << " Fill grad input tensor " << i << " with 1.0" ;
200
221
// Initialize tensor with 1.0
0 commit comments