From 93d0faf9260ec92c59815901c8903d75e3966f6b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 13 Mar 2025 09:02:03 +0000
Subject: [PATCH 1/5] support 0-Size Tensor

---
 .../fluid/distributed/collective/reducer.cc   |  2 +-
 .../eager/accumulation/accumulation_node.cc   |  2 +-
 .../manual/eager_manual/nodes/conv2d_nodes.cc | 26 ++---
 .../eager_manual/nodes/multiply_node.cc       | 32 +++---
 .../nodes/sync_batch_norm_node.cc             | 28 +++---
 .../forwards/fused_attention_fwd_func.cc      | 78 +++++++--------
 ...as_dropout_residual_layer_norm_fwd_func.cc | 18 ++--
 .../forwards/fused_feedforward_fwd_func.cc    | 84 ++++++++--------
 .../forwards/fused_gate_attention_fwd_func.cc | 98 ++++++++++---------
 .../custom_operator/custom_operator_node.cc   |  4 +-
 paddle/fluid/eager/pylayer/py_layer_node.cc   |  4 +-
 paddle/fluid/eager/tensor_wrapper.h           |  2 +-
 .../eager/to_static/run_program_op_func.h     |  2 +-
 paddle/fluid/eager/utils.cc                   |  8 +-
 paddle/fluid/framework/executor_cache.cc      |  2 +-
 .../new_executor/collect_shape_manager.cc     |  5 +-
 .../instruction/instruction_base.cc           |  8 +-
 .../new_executor/interpreter/static_build.cc  |  8 +-
 .../pir_adaptor/pir_adaptor_util.cc           |  8 +-
 paddle/fluid/framework/operator.cc            |  4 +-
 .../fluid/inference/api/analysis_predictor.cc |  8 +-
 .../operators/controlflow/fetch_v2_op.cc      |  4 +-
 .../general/constant_folding_pass.cc          |  2 +-
 .../general/remove_shadow_feed_pass.cc        |  2 +-
 .../composite_double_backward_api.h           | 26 ++---
 paddle/fluid/pybind/eager.cc                  |  2 +-
 paddle/fluid/pybind/eager_custom_python_api.h |  2 +-
 paddle/fluid/pybind/eager_functions.cc        |  8 +-
 paddle/fluid/pybind/eager_method.cc           | 24 ++---
 paddle/fluid/pybind/tensor_py.h               |  2 +-
 paddle/phi/api/lib/api_gen_utils.cc           |  6 +-
 paddle/phi/core/device_context.cc             |  4 +-
 paddle/phi/core/selected_rows_impl.cc         |  2 +-
 paddle/phi/core/tensor_array.cc               |  2 +-
 34 files changed, 266 insertions(+), 251 deletions(-)

diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc
index cf6d8211eedd14..4140c89635c184 100644
--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -983,7 +983,7 @@ void EagerReducer::MarkGroupReady(size_t group_index) {
 
 bool EagerReducer::HasGrad(size_t var_index) {
   auto grad = egr::EagerUtils::mutable_grad(tensors_[var_index]);
-  if (grad && grad->initialized()) {
+  if (grad && grad->has_allocation()) {
     return true;
   } else {
     return false;
diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc
index e1f10500047199..17423751a71b2a 100644
--- a/paddle/fluid/eager/accumulation/accumulation_node.cc
+++ b/paddle/fluid/eager/accumulation/accumulation_node.cc
@@ -35,7 +35,7 @@ static void CopyOrAddTensor(paddle::Tensor* tensor,
     VLOG(3) << "Move Tensor ptr: " << t.impl();
     *tensor = t;
   } else {
-    if (!tensor->defined() || !tensor->initialized()) {
+    if (!tensor->defined() || !tensor->has_allocation()) {
       // Simply copy tensor->impl
       VLOG(3) << "Move Tensor ptr: " << t.impl();
       *tensor = t;
diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
index 1fc70d9c637115..01eeeed54318cf 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
@@ -128,8 +128,9 @@ Conv2dGradNodeFinal::operator()(
 
   auto& grad_input = returns[0][0];
   egr::AutogradMeta* grad_input_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_input)
-                                  : nullptr;
+      returns[0][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&grad_input)
+          : nullptr;
   if (grad_input_autograd_meta)
     grad_input_autograd_meta->SetStopGradient(false);
   VLOG(3) << "Conv2dGradNodeFinal grad_input_autograd_meta: "
@@ -137,8 +138,9 @@ Conv2dGradNodeFinal::operator()(
 
   auto& grad_filter = returns[1][0];
   egr::AutogradMeta* grad_filter_autograd_meta =
-      returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_filter)
-                                  : nullptr;
+      returns[1][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&grad_filter)
+          : nullptr;
   if (grad_filter_autograd_meta)
     grad_filter_autograd_meta->SetStopGradient(false);
   VLOG(3) << "Conv2dGradNodeFinal grad_filter_autograd_meta: "
@@ -268,14 +270,14 @@ Conv2dDoubleGradNodeFinal::operator()(
   auto& grad_input_grad = hooked_grads[0][0];
 
   paddle::optional<paddle::Tensor> grad_input_grad_optional;
-  if (grad_input_grad.initialized())
+  if (grad_input_grad.has_allocation())
     grad_input_grad_optional =
         paddle::make_optional<paddle::Tensor>(grad_input_grad);
 
   auto& grad_filter_grad = hooked_grads[1][0];
 
   paddle::optional<paddle::Tensor> grad_filter_grad_optional;
-  if (grad_filter_grad.initialized())
+  if (grad_filter_grad.has_allocation())
     grad_filter_grad_optional =
         paddle::make_optional<paddle::Tensor>(grad_filter_grad);
 
@@ -339,21 +341,23 @@ Conv2dDoubleGradNodeFinal::operator()(
 
   auto& input_grad = returns[0][0];
   egr::AutogradMeta* input_grad_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&input_grad)
-                                  : nullptr;
+      returns[0][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&input_grad)
+          : nullptr;
   if (input_grad_autograd_meta)
     input_grad_autograd_meta->SetStopGradient(false);
 
   auto& filter_grad = returns[1][0];
   egr::AutogradMeta* filter_grad_autograd_meta =
-      returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&filter_grad)
-                                  : nullptr;
+      returns[1][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&filter_grad)
+          : nullptr;
   if (filter_grad_autograd_meta)
     filter_grad_autograd_meta->SetStopGradient(false);
 
   auto& grad_out_grad = returns[2][0];
   egr::AutogradMeta* grad_out_grad_autograd_meta =
-      returns[2][0].initialized()
+      returns[2][0].has_allocation()
           ? egr::EagerUtils::autograd_meta(&grad_out_grad)
           : nullptr;
   if (grad_out_grad_autograd_meta)
diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc
index cfb41ea40397a1..7a80264bf0834a 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc
@@ -170,14 +170,14 @@ MultiplyGradNode::operator()(
 
   auto& grad_x = returns[0][0];
   egr::AutogradMeta* grad_x_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_x)
-                                  : nullptr;
+      returns[0][0].has_allocation() ? egr::EagerUtils::autograd_meta(&grad_x)
+                                     : nullptr;
   if (grad_x_autograd_meta) grad_x_autograd_meta->SetStopGradient(false);
 
   auto& grad_y = returns[1][0];
   egr::AutogradMeta* grad_y_autograd_meta =
-      returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_y)
-                                  : nullptr;
+      returns[1][0].has_allocation() ? egr::EagerUtils::autograd_meta(&grad_y)
+                                     : nullptr;
   if (grad_y_autograd_meta) grad_y_autograd_meta->SetStopGradient(false);
 
   // Create Grad Node
@@ -299,14 +299,14 @@ MultiplyDoubleGradNode::operator()(
   auto& fwd_grad_grad_x = hooked_grads[0][0];
 
   paddle::optional<paddle::Tensor> fwd_grad_grad_x_optional;
-  if (fwd_grad_grad_x.initialized())
+  if (fwd_grad_grad_x.has_allocation())
     fwd_grad_grad_x_optional =
         paddle::make_optional<paddle::Tensor>(fwd_grad_grad_x);
 
   auto& fwd_grad_grad_y = hooked_grads[1][0];
 
   paddle::optional<paddle::Tensor> fwd_grad_grad_y_optional;
-  if (fwd_grad_grad_y.initialized())
+  if (fwd_grad_grad_y.has_allocation())
     fwd_grad_grad_y_optional =
         paddle::make_optional<paddle::Tensor>(fwd_grad_grad_y);
 
@@ -339,7 +339,7 @@ MultiplyDoubleGradNode::operator()(
   // Inplace Check
 
   bool can_be_inplaced = false;
-  if (fwd_grad_grad_x.initialized()) {
+  if (fwd_grad_grad_x.has_allocation()) {
     VLOG(10) << fwd_grad_grad_x.name() << "(grad_x_grad) use_count: "
              << fwd_grad_grad_x.impl().use_count();
     if (fwd_grad_grad_x.impl().use_count() == 1 ||
@@ -450,19 +450,19 @@ MultiplyDoubleGradNode::operator()(
 
   auto& grad_x = returns[0][0];
   egr::AutogradMeta* grad_x_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_x)
-                                  : nullptr;
+      returns[0][0].has_allocation() ? egr::EagerUtils::autograd_meta(&grad_x)
+                                     : nullptr;
   if (grad_x_autograd_meta) grad_x_autograd_meta->SetStopGradient(false);
 
   auto& grad_y = returns[1][0];
   egr::AutogradMeta* grad_y_autograd_meta =
-      returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_y)
-                                  : nullptr;
+      returns[1][0].has_allocation() ? egr::EagerUtils::autograd_meta(&grad_y)
+                                     : nullptr;
   if (grad_y_autograd_meta) grad_y_autograd_meta->SetStopGradient(false);
 
   auto& grad_grad_out = returns[2][0];
   egr::AutogradMeta* grad_grad_out_autograd_meta =
-      returns[2][0].initialized()
+      returns[2][0].has_allocation()
           ? egr::EagerUtils::autograd_meta(&grad_grad_out)
           : nullptr;
   if (grad_grad_out_autograd_meta)
@@ -638,14 +638,14 @@ MultiplyGradNode::operator()(
 
   auto& x_grad = returns[0][0];
   egr::AutogradMeta* x_grad_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad)
-                                  : nullptr;
+      returns[0][0].has_allocation() ? egr::EagerUtils::autograd_meta(&x_grad)
+                                     : nullptr;
   if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false);
 
   auto& y_grad = returns[1][0];
   egr::AutogradMeta* y_grad_autograd_meta =
-      returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&y_grad)
-                                  : nullptr;
+      returns[1][0].has_allocation() ? egr::EagerUtils::autograd_meta(&y_grad)
+                                     : nullptr;
   if (y_grad_autograd_meta) y_grad_autograd_meta->SetStopGradient(false);
 
   // Create Grad Node
diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
index 0f84bb18621d51..8bb82d29252f49 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
@@ -175,21 +175,23 @@ SyncBatchNormGradNode::operator()(
 
   auto& x_grad = returns[0][0];
   egr::AutogradMeta* x_grad_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad)
-                                  : nullptr;
+      returns[0][0].has_allocation() ? egr::EagerUtils::autograd_meta(&x_grad)
+                                     : nullptr;
   if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false);
 
   auto& scale_grad = returns[3][0];
   egr::AutogradMeta* scale_grad_autograd_meta =
-      returns[3][0].initialized() ? egr::EagerUtils::autograd_meta(&scale_grad)
-                                  : nullptr;
+      returns[3][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&scale_grad)
+          : nullptr;
   if (scale_grad_autograd_meta)
     scale_grad_autograd_meta->SetStopGradient(false);
 
   auto& bias_grad = returns[4][0];
   egr::AutogradMeta* bias_grad_autograd_meta =
-      returns[4][0].initialized() ? egr::EagerUtils::autograd_meta(&bias_grad)
-                                  : nullptr;
+      returns[4][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&bias_grad)
+          : nullptr;
   if (bias_grad_autograd_meta) bias_grad_autograd_meta->SetStopGradient(false);
 
   // Create Grad Node
@@ -409,21 +411,23 @@ SyncBatchNormGradNode::operator()(
 
   auto& x_grad = returns[0][0];
   egr::AutogradMeta* x_grad_autograd_meta =
-      returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad)
-                                  : nullptr;
+      returns[0][0].has_allocation() ? egr::EagerUtils::autograd_meta(&x_grad)
+                                     : nullptr;
   if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false);
 
   auto& scale_grad = returns[3][0];
   egr::AutogradMeta* scale_grad_autograd_meta =
-      returns[3][0].initialized() ? egr::EagerUtils::autograd_meta(&scale_grad)
-                                  : nullptr;
+      returns[3][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&scale_grad)
+          : nullptr;
   if (scale_grad_autograd_meta)
     scale_grad_autograd_meta->SetStopGradient(false);
 
   auto& bias_grad = returns[4][0];
   egr::AutogradMeta* bias_grad_autograd_meta =
-      returns[4][0].initialized() ? egr::EagerUtils::autograd_meta(&bias_grad)
-                                  : nullptr;
+      returns[4][0].has_allocation()
+          ? egr::EagerUtils::autograd_meta(&bias_grad)
+          : nullptr;
   if (bias_grad_autograd_meta) bias_grad_autograd_meta->SetStopGradient(false);
 
   // Create Grad Node
diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
index 56723ff1ab0f10..958714f3eb26e9 100644
--- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
@@ -64,15 +64,15 @@ fused_attention_dygraph_function(
 
     paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
         amp_tensors_vector = {{X}, {QKVW}, {OutLinearW}};
-    if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale});
-    if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias});
-    if (QKVBias.initialized()) amp_tensors_vector.push_back({QKVBias});
-    if (CacheKV.initialized()) amp_tensors_vector.push_back({CacheKV});
-    if (SrcMask.initialized()) amp_tensors_vector.push_back({SrcMask});
-    if (OutLinearBias.initialized())
+    if (LnScale.has_allocation()) amp_tensors_vector.push_back({LnScale});
+    if (LnBias.has_allocation()) amp_tensors_vector.push_back({LnBias});
+    if (QKVBias.has_allocation()) amp_tensors_vector.push_back({QKVBias});
+    if (CacheKV.has_allocation()) amp_tensors_vector.push_back({CacheKV});
+    if (SrcMask.has_allocation()) amp_tensors_vector.push_back({SrcMask});
+    if (OutLinearBias.has_allocation())
       amp_tensors_vector.push_back({OutLinearBias});
-    if (Ln2Scale.initialized()) amp_tensors_vector.push_back({Ln2Scale});
-    if (Ln2Bias.initialized()) amp_tensors_vector.push_back({Ln2Bias});
+    if (Ln2Scale.has_allocation()) amp_tensors_vector.push_back({Ln2Scale});
+    if (Ln2Bias.has_allocation()) amp_tensors_vector.push_back({Ln2Bias});
 
     auto amp_dst_dtype = paddle::imperative::GetAmpDestDtype(
         "fused_attention", amp_tensors_vector);
@@ -83,43 +83,43 @@ fused_attention_dygraph_function(
     auto NEW_OutLinearW = egr::AmpAutoCast(
         "OutLinearW", OutLinearW, amp_dst_dtype, "fused_attention");
     auto NEW_LnScale =
-        ((LnScale.initialized())
+        ((LnScale.has_allocation())
              ? egr::AmpAutoCast(
                    "LnScale", LnScale, amp_dst_dtype, "fused_attention")
              : LnScale);
     auto NEW_LnBias =
-        ((LnBias.initialized())
+        ((LnBias.has_allocation())
              ? egr::AmpAutoCast(
                    "LnBias", LnBias, amp_dst_dtype, "fused_attention")
              : LnBias);
     auto NEW_QKVBias =
-        ((QKVBias.initialized())
+        ((QKVBias.has_allocation())
              ? egr::AmpAutoCast(
                    "QKVBias", QKVBias, amp_dst_dtype, "fused_attention")
              : QKVBias);
     auto NEW_CacheKV =
-        ((CacheKV.initialized())
+        ((CacheKV.has_allocation())
              ? egr::AmpAutoCast(
                    "CacheKV", CacheKV, amp_dst_dtype, "fused_attention")
              : CacheKV);
     auto NEW_SrcMask =
-        ((SrcMask.initialized())
+        ((SrcMask.has_allocation())
              ? egr::AmpAutoCast(
                    "SrcMask", SrcMask, amp_dst_dtype, "fused_attention")
              : SrcMask);
     auto NEW_OutLinearBias =
-        ((OutLinearBias.initialized()) ? egr::AmpAutoCast("OutLinearBias",
-                                                          OutLinearBias,
-                                                          amp_dst_dtype,
-                                                          "fused_attention")
-                                       : OutLinearBias);
+        ((OutLinearBias.has_allocation()) ? egr::AmpAutoCast("OutLinearBias",
+                                                             OutLinearBias,
+                                                             amp_dst_dtype,
+                                                             "fused_attention")
+                                          : OutLinearBias);
     auto NEW_Ln2Scale =
-        ((Ln2Scale.initialized())
+        ((Ln2Scale.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln2Scale", Ln2Scale, amp_dst_dtype, "fused_attention")
              : Ln2Scale);
     auto NEW_Ln2Bias =
-        ((Ln2Bias.initialized())
+        ((Ln2Bias.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln2Bias", Ln2Bias, amp_dst_dtype, "fused_attention")
              : Ln2Bias);
@@ -147,21 +147,21 @@ fused_attention_dygraph_function(
       {{"X", egr::EagerUtils::TrySyncToVars(X)},
        {"QKVW", egr::EagerUtils::TrySyncToVars(QKVW)},
        {"OutLinearW", egr::EagerUtils::TrySyncToVars(OutLinearW)}};
-  if (LnScale.initialized())
+  if (LnScale.has_allocation())
     ins["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale);
-  if (LnBias.initialized())
+  if (LnBias.has_allocation())
     ins["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias);
-  if (QKVBias.initialized())
+  if (QKVBias.has_allocation())
     ins["QKVBias"] = egr::EagerUtils::TrySyncToVars(QKVBias);
-  if (CacheKV.initialized())
+  if (CacheKV.has_allocation())
     ins["CacheKV"] = egr::EagerUtils::TrySyncToVars(CacheKV);
-  if (SrcMask.initialized())
+  if (SrcMask.has_allocation())
     ins["SrcMask"] = egr::EagerUtils::TrySyncToVars(SrcMask);
-  if (OutLinearBias.initialized())
+  if (OutLinearBias.has_allocation())
     ins["OutLinearBias"] = egr::EagerUtils::TrySyncToVars(OutLinearBias);
-  if (Ln2Scale.initialized())
+  if (Ln2Scale.has_allocation())
     ins["Ln2Scale"] = egr::EagerUtils::TrySyncToVars(Ln2Scale);
-  if (Ln2Bias.initialized())
+  if (Ln2Bias.has_allocation())
     ins["Ln2Bias"] = egr::EagerUtils::TrySyncToVars(Ln2Bias);
 
   std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs =
@@ -417,7 +417,7 @@ fused_attention_dygraph_function(
       grad_node->SetGradOutMeta(QKVW, 3);
       grad_node->SetGradOutMeta(OutLinearW, 7);
 
-      if (QKVBias.initialized()) {
+      if (QKVBias.has_allocation()) {
         grad_node->SetTensorWrapper_QKVBias(QKVBias);
         grad_node->SetTensorWrapper_QKVBiasOut(QKVBiasOut);
         grad_node->SetGradOutMeta(QKVBias, 4);
@@ -431,7 +431,7 @@ fused_attention_dygraph_function(
         grad_node->SetGradOutMeta(QKVBiasOut, 11);
       }
 
-      if (SrcMask.initialized()) {
+      if (SrcMask.has_allocation()) {
         grad_node->SetTensorWrapper_SrcMask(SrcMask);
         grad_node->SetTensorWrapper_SrcMaskOut(SrcMaskOut);
 
@@ -444,21 +444,21 @@ fused_attention_dygraph_function(
         grad_node->SetGradOutMeta(SrcMaskOut, 12);
       }
 
-      if (OutLinearBias.initialized()) {
+      if (OutLinearBias.has_allocation()) {
         grad_node->SetTensorWrapper_OutLinearBias(OutLinearBias);
         grad_node->SetGradOutMeta(OutLinearBias, 8);
       }
 
       if (pre_layer_norm) {
-        if (LnScale.initialized()) {
+        if (LnScale.has_allocation()) {
           grad_node->SetTensorWrapper_LnScale(LnScale);
           grad_node->SetGradOutMeta(LnScale, 1);
         }
-        if (LnBias.initialized()) {
+        if (LnBias.has_allocation()) {
           grad_node->SetTensorWrapper_LnBias(LnBias);
           grad_node->SetGradOutMeta(LnBias, 2);
         }
-        if (LnOut.initialized()) {
+        if (LnOut.has_allocation()) {
           grad_node->SetTensorWrapper_LnOut(LnOut);
 
           auto LnOut_accumulation_node =
@@ -469,18 +469,18 @@ fused_attention_dygraph_function(
           LnOut_accumulation_node->SetGradInMeta(LnOut, 0);
           grad_node->SetGradOutMeta(LnOut, 13);
         }
-        if (LnMean.initialized()) {
+        if (LnMean.has_allocation()) {
           grad_node->SetTensorWrapper_LnMean(LnMean);
         }
-        if (LnVariance.initialized()) {
+        if (LnVariance.has_allocation()) {
           grad_node->SetTensorWrapper_LnVariance(LnVariance);
         }
       } else {
-        if (Ln2Scale.initialized()) {
+        if (Ln2Scale.has_allocation()) {
           grad_node->SetTensorWrapper_Ln2Scale(Ln2Scale);
           grad_node->SetGradOutMeta(Ln2Scale, 9);
         }
-        if (Ln2Bias.initialized()) {
+        if (Ln2Bias.has_allocation()) {
           grad_node->SetTensorWrapper_Ln2Bias(Ln2Bias);
           grad_node->SetGradOutMeta(Ln2Bias, 10);
         }
@@ -557,7 +557,7 @@ fused_attention_dygraph_function(
       SoftmaxOut_accumulation_node->SetGradInMeta(SoftmaxOut, 0);
       grad_node->SetGradOutMeta(SoftmaxOut, 19);
 
-      if (AttnDropoutOut.initialized()) {
+      if (AttnDropoutOut.has_allocation()) {
         auto AttnDropoutOut_accumulation_node =
             std::make_shared<egr::GradNodeAccumulation>(
                 p_autograd_AttnDropoutOut);
diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc
index 9174fe7ad38292..c901d9c66f8ebd 100644
--- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc
@@ -45,9 +45,9 @@ fused_bias_dropout_residual_layer_norm_dygraph_function(
 
     paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
         amp_tensors_vector = {{X}, {Residual}};
-    if (Bias.initialized()) amp_tensors_vector.push_back({Bias});
-    if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale});
-    if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias});
+    if (Bias.has_allocation()) amp_tensors_vector.push_back({Bias});
+    if (LnScale.has_allocation()) amp_tensors_vector.push_back({LnScale});
+    if (LnBias.has_allocation()) amp_tensors_vector.push_back({LnBias});
 
     auto amp_dst_dtype = paddle::imperative::GetAmpDestDtype(
         "fused_bias_dropout_residual_layer_norm", amp_tensors_vector);
@@ -60,21 +60,21 @@ fused_bias_dropout_residual_layer_norm_dygraph_function(
                          amp_dst_dtype,
                          "fused_bias_dropout_residual_layer_norm");
     auto NEW_Bias =
-        ((Bias.initialized())
+        ((Bias.has_allocation())
              ? egr::AmpAutoCast("Bias",
                                 Bias,
                                 amp_dst_dtype,
                                 "fused_bias_dropout_residual_layer_norm")
              : Bias);
     auto NEW_LnScale =
-        ((LnScale.initialized())
+        ((LnScale.has_allocation())
              ? egr::AmpAutoCast("LnScale",
                                 LnScale,
                                 amp_dst_dtype,
                                 "fused_bias_dropout_residual_layer_norm")
              : LnScale);
     auto NEW_LnBias =
-        ((LnBias.initialized())
+        ((LnBias.has_allocation())
              ? egr::AmpAutoCast("LnBias",
                                 LnBias,
                                 amp_dst_dtype,
@@ -93,10 +93,10 @@ fused_bias_dropout_residual_layer_norm_dygraph_function(
   std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins =
       {{"X", egr::EagerUtils::TrySyncToVars(X)},
        {"Residual", egr::EagerUtils::TrySyncToVars(Residual)}};
-  if (Bias.initialized()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);
-  if (LnScale.initialized())
+  if (Bias.has_allocation()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);
+  if (LnScale.has_allocation())
     ins["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale);
-  if (LnBias.initialized())
+  if (LnBias.has_allocation())
     ins["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias);
 
   std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs =
diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
index f1ae95cbe8ab2a..49a37b042335ec 100644
--- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
@@ -54,16 +54,18 @@ fused_feedforward_dygraph_function(
 
     paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
         amp_tensors_vector = {{X}, {Linear1Weight}, {Linear2Weight}};
-    if (Dropout1Seed.initialized())
+    if (Dropout1Seed.has_allocation())
       amp_tensors_vector.push_back({Dropout1Seed});
-    if (Dropout2Seed.initialized())
+    if (Dropout2Seed.has_allocation())
       amp_tensors_vector.push_back({Dropout2Seed});
-    if (Linear1Bias.initialized()) amp_tensors_vector.push_back({Linear1Bias});
-    if (Linear2Bias.initialized()) amp_tensors_vector.push_back({Linear2Bias});
-    if (Ln1Scale.initialized()) amp_tensors_vector.push_back({Ln1Scale});
-    if (Ln1Bias.initialized()) amp_tensors_vector.push_back({Ln1Bias});
-    if (Ln2Scale.initialized()) amp_tensors_vector.push_back({Ln2Scale});
-    if (Ln2Bias.initialized()) amp_tensors_vector.push_back({Ln2Bias});
+    if (Linear1Bias.has_allocation())
+      amp_tensors_vector.push_back({Linear1Bias});
+    if (Linear2Bias.has_allocation())
+      amp_tensors_vector.push_back({Linear2Bias});
+    if (Ln1Scale.has_allocation()) amp_tensors_vector.push_back({Ln1Scale});
+    if (Ln1Bias.has_allocation()) amp_tensors_vector.push_back({Ln1Bias});
+    if (Ln2Scale.has_allocation()) amp_tensors_vector.push_back({Ln2Scale});
+    if (Ln2Bias.has_allocation()) amp_tensors_vector.push_back({Ln2Bias});
 
     auto amp_dst_dtype = paddle::imperative::GetAmpDestDtype(
         "fused_feedforward", amp_tensors_vector);
@@ -74,46 +76,46 @@ fused_feedforward_dygraph_function(
     auto NEW_Linear2Weight = egr::AmpAutoCast(
         "Linear2Weight", Linear2Weight, amp_dst_dtype, "fused_feedforward");
     auto NEW_Dropout1Seed =
-        ((Dropout1Seed.initialized()) ? egr::AmpAutoCast("Dropout1Seed",
-                                                         Dropout1Seed,
-                                                         amp_dst_dtype,
-                                                         "fused_feedforward")
-                                      : Dropout1Seed);
+        ((Dropout1Seed.has_allocation()) ? egr::AmpAutoCast("Dropout1Seed",
+                                                            Dropout1Seed,
+                                                            amp_dst_dtype,
+                                                            "fused_feedforward")
+                                         : Dropout1Seed);
     auto NEW_Dropout2Seed =
-        ((Dropout2Seed.initialized()) ? egr::AmpAutoCast("Dropout2Seed",
-                                                         Dropout2Seed,
-                                                         amp_dst_dtype,
-                                                         "fused_feedforward")
-                                      : Dropout2Seed);
+        ((Dropout2Seed.has_allocation()) ? egr::AmpAutoCast("Dropout2Seed",
+                                                            Dropout2Seed,
+                                                            amp_dst_dtype,
+                                                            "fused_feedforward")
+                                         : Dropout2Seed);
     auto NEW_Linear1Bias =
-        ((Linear1Bias.initialized()) ? egr::AmpAutoCast("Linear1Bias",
-                                                        Linear1Bias,
-                                                        amp_dst_dtype,
-                                                        "fused_feedforward")
-                                     : Linear1Bias);
+        ((Linear1Bias.has_allocation()) ? egr::AmpAutoCast("Linear1Bias",
+                                                           Linear1Bias,
+                                                           amp_dst_dtype,
+                                                           "fused_feedforward")
+                                        : Linear1Bias);
     auto NEW_Linear2Bias =
-        ((Linear2Bias.initialized()) ? egr::AmpAutoCast("Linear2Bias",
-                                                        Linear2Bias,
-                                                        amp_dst_dtype,
-                                                        "fused_feedforward")
-                                     : Linear2Bias);
+        ((Linear2Bias.has_allocation()) ? egr::AmpAutoCast("Linear2Bias",
+                                                           Linear2Bias,
+                                                           amp_dst_dtype,
+                                                           "fused_feedforward")
+                                        : Linear2Bias);
     auto NEW_Ln1Scale =
-        ((Ln1Scale.initialized())
+        ((Ln1Scale.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln1Scale", Ln1Scale, amp_dst_dtype, "fused_feedforward")
              : Ln1Scale);
     auto NEW_Ln1Bias =
-        ((Ln1Bias.initialized())
+        ((Ln1Bias.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln1Bias", Ln1Bias, amp_dst_dtype, "fused_feedforward")
              : Ln1Bias);
     auto NEW_Ln2Scale =
-        ((Ln2Scale.initialized())
+        ((Ln2Scale.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln2Scale", Ln2Scale, amp_dst_dtype, "fused_feedforward")
              : Ln2Scale);
     auto NEW_Ln2Bias =
-        ((Ln2Bias.initialized())
+        ((Ln2Bias.has_allocation())
              ? egr::AmpAutoCast(
                    "Ln2Bias", Ln2Bias, amp_dst_dtype, "fused_feedforward")
              : Ln2Bias);
@@ -141,21 +143,21 @@ fused_feedforward_dygraph_function(
       {{"X", egr::EagerUtils::TrySyncToVars(X)},
        {"Linear1Weight", egr::EagerUtils::TrySyncToVars(Linear1Weight)},
        {"Linear2Weight", egr::EagerUtils::TrySyncToVars(Linear2Weight)}};
-  if (Dropout1Seed.initialized())
+  if (Dropout1Seed.has_allocation())
     ins["Dropout1Seed"] = egr::EagerUtils::TrySyncToVars(Dropout1Seed);
-  if (Dropout2Seed.initialized())
+  if (Dropout2Seed.has_allocation())
     ins["Dropout2Seed"] = egr::EagerUtils::TrySyncToVars(Dropout2Seed);
-  if (Linear1Bias.initialized())
+  if (Linear1Bias.has_allocation())
     ins["Linear1Bias"] = egr::EagerUtils::TrySyncToVars(Linear1Bias);
-  if (Linear2Bias.initialized())
+  if (Linear2Bias.has_allocation())
     ins["Linear2Bias"] = egr::EagerUtils::TrySyncToVars(Linear2Bias);
-  if (Ln1Scale.initialized())
+  if (Ln1Scale.has_allocation())
     ins["Ln1Scale"] = egr::EagerUtils::TrySyncToVars(Ln1Scale);
-  if (Ln1Bias.initialized())
+  if (Ln1Bias.has_allocation())
     ins["Ln1Bias"] = egr::EagerUtils::TrySyncToVars(Ln1Bias);
-  if (Ln2Scale.initialized())
+  if (Ln2Scale.has_allocation())
     ins["Ln2Scale"] = egr::EagerUtils::TrySyncToVars(Ln2Scale);
-  if (Ln2Bias.initialized())
+  if (Ln2Bias.has_allocation())
     ins["Ln2Bias"] = egr::EagerUtils::TrySyncToVars(Ln2Bias);
 
   std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs =
@@ -351,7 +353,7 @@ fused_feedforward_dygraph_function(
         grad_node->SetTensorWrapper_Ln2Variance(Ln2Variance);
       }
 
-      if (Linear2Bias.initialized()) {
+      if (Linear2Bias.has_allocation()) {
         grad_node->SetTensorWrapper_Linear2Bias(Linear2Bias);
         grad_node->SetGradOutMeta(Linear2Bias, 6);
       }
diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
index 0b4bc68438aa22..76a972db730275 100644
--- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
@@ -54,15 +54,17 @@ fused_gate_attention_dygraph_function(
     paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
         amp_tensors_vector = {
             {Query}, {SrcMask}, {OutLinearWeight}, {OutLinearBias}};
-    if (Key.initialized()) amp_tensors_vector.push_back({Key});
-    if (QueryWeight.initialized()) amp_tensors_vector.push_back({QueryWeight});
-    if (KeyWeight.initialized()) amp_tensors_vector.push_back({KeyWeight});
-    if (ValueWeight.initialized()) amp_tensors_vector.push_back({ValueWeight});
-    if (QKVWeight.initialized()) amp_tensors_vector.push_back({QKVWeight});
-    if (NonbatchedBias.initialized())
+    if (Key.has_allocation()) amp_tensors_vector.push_back({Key});
+    if (QueryWeight.has_allocation())
+      amp_tensors_vector.push_back({QueryWeight});
+    if (KeyWeight.has_allocation()) amp_tensors_vector.push_back({KeyWeight});
+    if (ValueWeight.has_allocation())
+      amp_tensors_vector.push_back({ValueWeight});
+    if (QKVWeight.has_allocation()) amp_tensors_vector.push_back({QKVWeight});
+    if (NonbatchedBias.has_allocation())
       amp_tensors_vector.push_back({NonbatchedBias});
-    if (GateWeight.initialized()) amp_tensors_vector.push_back({GateWeight});
-    if (GateBias.initialized()) amp_tensors_vector.push_back({GateBias});
+    if (GateWeight.has_allocation()) amp_tensors_vector.push_back({GateWeight});
+    if (GateBias.has_allocation()) amp_tensors_vector.push_back({GateBias});
 
     auto amp_dst_dtype = paddle::imperative::GetAmpDestDtype(
         "fused_gate_attention", amp_tensors_vector);
@@ -70,7 +72,7 @@ fused_gate_attention_dygraph_function(
     auto NEW_Query =
         egr::AmpAutoCast("Query", Query, amp_dst_dtype, "fused_gate_attention");
     auto NEW_Key =
-        ((Key.initialized())
+        ((Key.has_allocation())
              ? ((Query.data() == Key.data())
                     ? NEW_Query
                     : egr::AmpAutoCast(
@@ -85,44 +87,44 @@ fused_gate_attention_dygraph_function(
                                                 "fused_gate_attention");
     auto NEW_OutLinearBias = egr::AmpAutoCast(
         "OutLinearBias", OutLinearBias, amp_dst_dtype, "fused_gate_attention");
-    auto NEW_QueryWeight =
-        ((QueryWeight.initialized()) ? egr::AmpAutoCast("QueryWeight",
-                                                        QueryWeight,
-                                                        amp_dst_dtype,
-                                                        "fused_gate_attention")
-                                     : QueryWeight);
+    auto NEW_QueryWeight = ((QueryWeight.has_allocation())
+                                ? egr::AmpAutoCast("QueryWeight",
+                                                   QueryWeight,
+                                                   amp_dst_dtype,
+                                                   "fused_gate_attention")
+                                : QueryWeight);
     auto NEW_KeyWeight =
-        ((KeyWeight.initialized()) ? egr::AmpAutoCast("KeyWeight",
-                                                      KeyWeight,
-                                                      amp_dst_dtype,
-                                                      "fused_gate_attention")
-                                   : KeyWeight);
-    auto NEW_ValueWeight =
-        ((ValueWeight.initialized()) ? egr::AmpAutoCast("ValueWeight",
-                                                        ValueWeight,
-                                                        amp_dst_dtype,
-                                                        "fused_gate_attention")
-                                     : ValueWeight);
+        ((KeyWeight.has_allocation()) ? egr::AmpAutoCast("KeyWeight",
+                                                         KeyWeight,
+                                                         amp_dst_dtype,
+                                                         "fused_gate_attention")
+                                      : KeyWeight);
+    auto NEW_ValueWeight = ((ValueWeight.has_allocation())
+                                ? egr::AmpAutoCast("ValueWeight",
+                                                   ValueWeight,
+                                                   amp_dst_dtype,
+                                                   "fused_gate_attention")
+                                : ValueWeight);
     auto NEW_QKVWeight =
-        ((QKVWeight.initialized()) ? egr::AmpAutoCast("QKVWeight",
-                                                      QKVWeight,
-                                                      amp_dst_dtype,
-                                                      "fused_gate_attention")
-                                   : QKVWeight);
-    auto NEW_NonbatchedBias = ((NonbatchedBias.initialized())
+        ((QKVWeight.has_allocation()) ? egr::AmpAutoCast("QKVWeight",
+                                                         QKVWeight,
+                                                         amp_dst_dtype,
+                                                         "fused_gate_attention")
+                                      : QKVWeight);
+    auto NEW_NonbatchedBias = ((NonbatchedBias.has_allocation())
                                    ? egr::AmpAutoCast("NonbatchedBias",
                                                       NonbatchedBias,
                                                       amp_dst_dtype,
                                                       "fused_gate_attention")
                                    : NonbatchedBias);
-    auto NEW_GateWeight =
-        ((GateWeight.initialized()) ? egr::AmpAutoCast("GateWeight",
-                                                       GateWeight,
-                                                       amp_dst_dtype,
-                                                       "fused_gate_attention")
-                                    : GateWeight);
+    auto NEW_GateWeight = ((GateWeight.has_allocation())
+                               ? egr::AmpAutoCast("GateWeight",
+                                                  GateWeight,
+                                                  amp_dst_dtype,
+                                                  "fused_gate_attention")
+                               : GateWeight);
     auto NEW_GateBias =
-        ((GateBias.initialized())
+        ((GateBias.has_allocation())
              ? egr::AmpAutoCast(
                    "GateBias", GateBias, amp_dst_dtype, "fused_gate_attention")
              : GateBias);
@@ -152,7 +154,7 @@ fused_gate_attention_dygraph_function(
        {"SrcMask", egr::EagerUtils::TrySyncToVars(SrcMask)},
        {"OutLinearWeight", egr::EagerUtils::TrySyncToVars(OutLinearWeight)},
        {"OutLinearBias", egr::EagerUtils::TrySyncToVars(OutLinearBias)}};
-  if (Key.initialized()) {
+  if (Key.has_allocation()) {
     if (Query.data() == Key.data()) {
       ins["Key"] = ins["Query"];
     } else {
@@ -160,19 +162,19 @@ fused_gate_attention_dygraph_function(
     }
   }
 
-  if (QueryWeight.initialized())
+  if (QueryWeight.has_allocation())
     ins["QueryWeight"] = egr::EagerUtils::TrySyncToVars(QueryWeight);
-  if (KeyWeight.initialized())
+  if (KeyWeight.has_allocation())
     ins["KeyWeight"] = egr::EagerUtils::TrySyncToVars(KeyWeight);
-  if (ValueWeight.initialized())
+  if (ValueWeight.has_allocation())
     ins["ValueWeight"] = egr::EagerUtils::TrySyncToVars(ValueWeight);
-  if (QKVWeight.initialized())
+  if (QKVWeight.has_allocation())
     ins["QKVWeight"] = egr::EagerUtils::TrySyncToVars(QKVWeight);
-  if (NonbatchedBias.initialized())
+  if (NonbatchedBias.has_allocation())
     ins["NonbatchedBias"] = egr::EagerUtils::TrySyncToVars(NonbatchedBias);
-  if (GateWeight.initialized())
+  if (GateWeight.has_allocation())
     ins["GateWeight"] = egr::EagerUtils::TrySyncToVars(GateWeight);
-  if (GateBias.initialized())
+  if (GateBias.has_allocation())
     ins["GateBias"] = egr::EagerUtils::TrySyncToVars(GateBias);
 
   std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs =
@@ -370,7 +372,7 @@ fused_gate_attention_dygraph_function(
         grad_node->SetTensorWrapper_GateOut(GateOut);
       }
 
-      if (NonbatchedBias.initialized()) {
+      if (NonbatchedBias.has_allocation()) {
         grad_node->SetTensorWrapper_NonbatchedBias(NonbatchedBias);
         grad_node->SetGradOutMeta(NonbatchedBias, 6);
       }
diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc
index b6833bba1d620f..c856f333645336 100644
--- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc
+++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc
@@ -204,7 +204,7 @@ RunCustomOpNode::operator()(paddle::small_vector<std::vector<paddle::Tensor>,
   VLOG(6) << "Prepare Grad inputs";
   for (auto& in : tmp_ins) {
     for (auto& tensor : in) {
-      if (tensor.initialized() && tensor.is_dense_tensor() &&
+      if (tensor.has_allocation() && tensor.is_dense_tensor() &&
           !std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
                ->meta()
                .is_contiguous()) {
@@ -262,7 +262,7 @@ RunCustomOpNode::operator()(paddle::small_vector<std::vector<paddle::Tensor>,
     if (ctx.OutputRangeAt(i).first + 1 == ctx.OutputRangeAt(i).second) {
       paddle::Tensor* out_tensor =
           ctx.MutableOutputAt(ctx.OutputRangeAt(i).first);
-      if (!out_tensor->initialized()) {
+      if (!out_tensor->has_allocation()) {
         PADDLE_ENFORCE(
             paddle::framework::detail::IsOptionalVar(
                 grad_outputs_names.at(i)) ||
diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc
index 921a57c14af962..86d5a9f6df89c8 100644
--- a/paddle/fluid/eager/pylayer/py_layer_node.cc
+++ b/paddle/fluid/eager/pylayer/py_layer_node.cc
@@ -61,7 +61,7 @@ GradNodePyLayer::operator()(
     if (ctx->forward_output_tensor_is_duplicable[i]) {
       PyObject* pylist = PyList_New((Py_ssize_t)grads[i].size());
       for (size_t j = 0; j < grads[i].size(); j++) {
-        if (ctx->materialize_grads && !grads[i][j].initialized()) {
+        if (ctx->materialize_grads && !grads[i][j].has_allocation()) {
           if (forward_outputs_is_dist_meta_[i][j]) {
             paddle::Tensor dist_tensor;
             dist_tensor.set_impl(std::make_shared<phi::distributed::DistTensor>(
@@ -103,7 +103,7 @@ GradNodePyLayer::operator()(
       }
       PyTuple_SET_ITEM(backward_args, i, pylist);
     } else {
-      if (ctx->materialize_grads && !grads[i][0].initialized()) {
+      if (ctx->materialize_grads && !grads[i][0].has_allocation()) {
         if (forward_outputs_is_dist_meta_[i][0]) {
           paddle::Tensor dist_tensor;
           dist_tensor.set_impl(std::make_shared<phi::distributed::DistTensor>(
diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h
index ae04cd88a2e413..1bc7985e2cebbe 100644
--- a/paddle/fluid/eager/tensor_wrapper.h
+++ b/paddle/fluid/eager/tensor_wrapper.h
@@ -102,7 +102,7 @@ class TensorWrapper {
         unpack_hook_ = egr::SavedTensorsHooks::GetInstance().GetUnPackHook();
         packed_value_ = (*pack_hook)(tensor);
       } else if (egr::SavedTensorsHooks::GetInstance().IsEnable() &&
-                 tensor.initialized() && tensor.is_dist_tensor()) {
+                 tensor.has_allocation() && tensor.is_dist_tensor()) {
         intermediate_tensor_.set_impl(
             std::make_shared<phi::distributed::DistTensor>(
                 tensor.dims(),
diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h
index fae45da4e77f31..523dfd7d76873f 100644
--- a/paddle/fluid/eager/to_static/run_program_op_func.h
+++ b/paddle/fluid/eager/to_static/run_program_op_func.h
@@ -155,7 +155,7 @@ pir_filter_no_need_buffer_input_var_in_backward(
                   no_need_buffers_values.end(),
                   forward_inputs_values[i]) != no_need_buffers_values.end()) {
       auto& tensor = filter_x[i];
-      if (tensor.initialized() && tensor.is_dense_tensor()) {
+      if (tensor.has_allocation() && tensor.is_dense_tensor()) {
         auto copied_dense_tensor = std::make_shared<phi::DenseTensor>(
             *std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()));
         garbages->emplace_back(copied_dense_tensor->MoveMemoryHolder());
diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc
index c632860a5e7639..4603b3cefc8f82 100644
--- a/paddle/fluid/eager/utils.cc
+++ b/paddle/fluid/eager/utils.cc
@@ -556,7 +556,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput(
     const std::vector<GradSlotMeta>& grad_in_metas) {
   for (size_t i = 0; i < in_grads->size(); i++) {
     paddle::Tensor& grad = (*in_grads)[i];
-    if (!grad.initialized() && grad_in_metas[i].HasTensorMeta()) {
+    if (!grad.has_allocation() && grad_in_metas[i].HasTensorMeta()) {
       if (grad_in_metas[i].IsDistMeta()) {
         grad.set_impl(std::make_shared<phi::distributed::DistTensor>(
             grad_in_metas[i].DistTensorGlobalDims(),
@@ -591,7 +591,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput(
       continue;
     }
     paddle::Tensor& grad = (*output_grads)[i];
-    if (!grad.initialized() && grad_output_metas[i].HasTensorMeta()) {
+    if (!grad.has_allocation() && grad_output_metas[i].HasTensorMeta()) {
       if (grad.defined() && grad.is_selected_rows()) {
         continue;
       }
@@ -624,7 +624,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput(
 
 void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad,
                                            const GradSlotMeta& grad_in_meta) {
-  if (!in_grad->initialized()) {
+  if (!in_grad->has_allocation()) {
     PADDLE_ENFORCE(
         grad_in_meta.HasTensorMeta(),
         common::errors::Fatal(
@@ -663,7 +663,7 @@ void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad,
 
 void EagerUtils::FillZeroForEmptyOptionalGradInput(
     paddle::Tensor* in_grad, const GradSlotMeta& grad_in_meta) {
-  if (!in_grad->initialized() && grad_in_meta.HasTensorMeta()) {
+  if (!in_grad->has_allocation() && grad_in_meta.HasTensorMeta()) {
     const auto& tensor_meta = grad_in_meta.GetTensorMeta();
     if (grad_in_meta.IsDistMeta()) {
       in_grad->set_impl(std::make_shared<phi::distributed::DistTensor>(
diff --git a/paddle/fluid/framework/executor_cache.cc b/paddle/fluid/framework/executor_cache.cc
index 3a1892be4606d3..046768b42955fb 100644
--- a/paddle/fluid/framework/executor_cache.cc
+++ b/paddle/fluid/framework/executor_cache.cc
@@ -365,7 +365,7 @@ std::unique_ptr<::pir::Program> ConstructBackwardIrProgram(
     if (scope->FindVar(var_name)) {
       auto tensor = scope->FindVar(var_name)->Get<phi::DenseTensor>();
       phi::AllocationType p = place.GetType();
-      if (tensor.initialized()) {
+      if (tensor.has_allocation()) {
         p = tensor.place().GetType();
       }
 
diff --git a/paddle/fluid/framework/new_executor/collect_shape_manager.cc b/paddle/fluid/framework/new_executor/collect_shape_manager.cc
index d0b94ca9c5d68c..3a60409665a26c 100644
--- a/paddle/fluid/framework/new_executor/collect_shape_manager.cc
+++ b/paddle/fluid/framework/new_executor/collect_shape_manager.cc
@@ -56,8 +56,9 @@ void CollectShapeManager::CollectShapeInfo(
     }
 
     auto tensor = var->Get<phi::DenseTensor>();
-    if (!tensor.initialized() && !instr->NoNeedBuffer().count(input.first)) {
-      VLOG(3) << "input tensor is initialized: " << (tensor.initialized());
+    if (!tensor.has_allocation() && !instr->NoNeedBuffer().count(input.first)) {
+      VLOG(3) << "input tensor is has_allocation: "
+              << (tensor.has_allocation());
       VLOG(3) << "input tensor is no need buffer:"
               << instr->NoNeedBuffer().count(input.first);
       VLOG(3) << "input tensor is not initialized and not no need buffer, jump "
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.cc b/paddle/fluid/framework/new_executor/instruction/instruction_base.cc
index 0acd423665fb0e..8ba8d33ff16eef 100644
--- a/paddle/fluid/framework/new_executor/instruction/instruction_base.cc
+++ b/paddle/fluid/framework/new_executor/instruction/instruction_base.cc
@@ -63,13 +63,13 @@ static std::string GetDtype(const Scope& scope, const std::string& name) {
 
   if (var->IsType<phi::DenseTensor>()) {
     const phi::DenseTensor& tensor = var->Get<phi::DenseTensor>();
-    if (UNLIKELY(!tensor.IsInitialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "";
     }
     return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
   } else if (var->IsType<phi::SelectedRows>()) {
     auto tensor = var->Get<phi::SelectedRows>().value();
-    if (UNLIKELY(!tensor.IsInitialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "uninited";
     } else {
       return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
@@ -94,13 +94,13 @@ static std::string GetPlace(const Scope& scope, const std::string& name) {
 
   if (var->IsType<phi::DenseTensor>()) {
     const phi::DenseTensor& tensor = var->Get<phi::DenseTensor>();
-    if (UNLIKELY(!tensor.IsInitialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "";
     }
     return to_string(tensor.place());
   } else if (var->IsType<phi::SelectedRows>()) {
     auto tensor = var->Get<phi::SelectedRows>().value();
-    if (UNLIKELY(!tensor.IsInitialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "uninited";
     } else {
       return to_string(tensor.place());
diff --git a/paddle/fluid/framework/new_executor/interpreter/static_build.cc b/paddle/fluid/framework/new_executor/interpreter/static_build.cc
index 09a899d50eaf95..9e530761b76245 100644
--- a/paddle/fluid/framework/new_executor/interpreter/static_build.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/static_build.cc
@@ -76,13 +76,13 @@ static VarMetaInfo GetVarMetaInfo(const Scope& scope, const std::string& name) {
 
   if (var->IsType<phi::DenseTensor>()) {
     const phi::DenseTensor& tensor = var->Get<phi::DenseTensor>();
-    if (!UNLIKELY(!tensor.IsInitialized())) {
+    if (!UNLIKELY(!tensor.has_allocation())) {
       dtype = tensor.dtype();
       place = tensor.place();
     }
   } else if (var->IsType<phi::SelectedRows>()) {
     auto tensor = var->Get<phi::SelectedRows>().value();
-    if (!UNLIKELY(!tensor.IsInitialized())) {
+    if (!UNLIKELY(!tensor.has_allocation())) {
       dtype = tensor.dtype();
       place = tensor.place();
     }
@@ -323,13 +323,13 @@ void FakeInitializeTensor(const phi::DeviceContext& dev_ctx,
       common::errors::InvalidArgument(
           "The tensor to fake initialize should not be null."));
 
-  if (tensor->initialized() && place == tensor->place() &&
+  if (tensor->has_allocation() && place == tensor->place() &&
       dtype == tensor->dtype() && tensor->layout() == layout) {
     return;
   }
 
   // set place
-  if (tensor->initialized()) {  // avoid overwriting valid data
+  if (tensor->has_allocation()) {  // avoid overwriting valid data
     phi::DeviceContext* dev_ctx_for_copy = nullptr;
     if (place.GetType() != AllocationType::CPU) {
       dev_ctx_for_copy = phi::DeviceContextPool::Instance().Get(place);
diff --git a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc
index 7f6864c63c3604..7ecef0a54751f5 100644
--- a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc
+++ b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc
@@ -314,13 +314,13 @@ void DeepCopyVariable(const Variable* src_var,
     // have holder. In this case we only do set_meta but not copy Tensor.
     if (src_tensor.numel() == 0) {
       tmp_dst_tensor->set_meta(src_tensor.meta());
-      if (src_tensor.IsInitialized()) {
+      if (src_tensor.has_allocation()) {
         tmp_dst_tensor->ResetHolder(
             ::phi::memory_utils::AllocShared(src_tensor.place(), 0u));
       }
       return;
     }
-    if (!src_tensor.initialized()) {
+    if (!src_tensor.has_allocation()) {
       if (is_optional) {
         (*dst_var) = nullptr;
         return;
@@ -341,7 +341,7 @@ void DeepCopyVariable(const Variable* src_var,
       dst_t->set_meta(src_t.meta());
       return;
     }
-    if (!src_slr.initialized()) {
+    if (!src_slr.has_allocation()) {
       if (is_optional) {
         (*dst_var) = nullptr;
         return;
@@ -354,7 +354,7 @@ void DeepCopyVariable(const Variable* src_var,
   } else if (src_var->IsType<phi::TensorArray>()) {
     auto src_tensor_array = src_var->Get<phi::TensorArray>();
     auto* dst_tensor_array = (*dst_var)->GetMutable<phi::TensorArray>();
-    if (!src_tensor_array.initialized()) {
+    if (!src_tensor_array.has_allocation()) {
       if (is_optional) {
         (*dst_var) = nullptr;
         return;
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 71ede3b01f11c1..c22b513f491c02 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -135,13 +135,13 @@ static std::string GetDtype(const Scope& scope, const std::string& name) {
     return "strings";
   } else if (var->IsType<phi::SparseCooTensor>()) {
     const phi::SparseCooTensor& tensor = var->Get<phi::SparseCooTensor>();
-    if (UNLIKELY(!tensor.initialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "";
     }
     return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
   } else if (var->IsType<phi::SparseCsrTensor>()) {
     const phi::SparseCsrTensor& tensor = var->Get<phi::SparseCsrTensor>();
-    if (UNLIKELY(!tensor.initialized())) {
+    if (UNLIKELY(!tensor.has_allocation())) {
       return "";
     }
     return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index c9d5b7cfc6635a..75e9acc28ba7cb 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -3235,7 +3235,7 @@ void AnalysisPredictor::RegisterOutputHook(
               auto *var = scope->FindVar(var_name);
               if (!var || !var->IsType<phi::DenseTensor>()) continue;
               auto dense_tensor = var->Get<phi::DenseTensor>();
-              if (!dense_tensor.initialized()) continue;
+              if (!dense_tensor.has_allocation()) continue;
               auto tensor = paddle::Tensor(
                   std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
               for (auto &hookfunc : this->output_hookfuncs_) {
@@ -3256,7 +3256,7 @@ void AnalysisPredictor::RegisterOutputHook(
                 auto *var = scope->FindVar(var_name);
                 if (!var || !var->IsType<phi::DenseTensor>()) continue;
                 auto dense_tensor = var->Get<phi::DenseTensor>();
-                if (!dense_tensor.initialized()) continue;
+                if (!dense_tensor.has_allocation()) continue;
                 auto tensor = paddle::Tensor(
                     std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
                 for (auto &hookfunc : this->output_hookfuncs_) {
@@ -3282,7 +3282,7 @@ void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
               auto *var = scope->FindVar(var_name);
               if (!var || !var->IsType<phi::DenseTensor>()) continue;
               auto dense_tensor = var->Get<phi::DenseTensor>();
-              if (!dense_tensor.initialized()) continue;
+              if (!dense_tensor.has_allocation()) continue;
               auto tensor = paddle::Tensor(
                   std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
               for (auto &hookfunc : this->input_hookfuncs_) {
@@ -3303,7 +3303,7 @@ void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
                 auto *var = scope->FindVar(var_name);
                 if (!var || !var->IsType<phi::DenseTensor>()) continue;
                 auto dense_tensor = var->Get<phi::DenseTensor>();
-                if (!dense_tensor.initialized()) continue;
+                if (!dense_tensor.has_allocation()) continue;
                 auto tensor = paddle::Tensor(
                     std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
                 for (auto &hookfunc : this->input_hookfuncs_) {
diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
index e4f5918792383c..efc51b38d4d763 100644
--- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
@@ -96,7 +96,7 @@ class FetchV2Op : public framework::OperatorWithKernel {
       }
     } else if (fetch_var->IsType<phi::SparseCooTensor>()) {
       auto &src_item = fetch_var->Get<phi::SparseCooTensor>();
-      if (!src_item.initialized()) {
+      if (!src_item.has_allocation()) {
         return phi::KernelKey(framework::proto::VarType::FP32, phi::CPUPlace());
       }
     } else {
@@ -169,7 +169,7 @@ class FetchV2Kernel {
       }
     } else if (fetch_var->IsType<phi::SparseCooTensor>()) {
       auto &src_item = fetch_var->Get<phi::SparseCooTensor>();
-      if (!src_item.initialized()) {
+      if (!src_item.has_allocation()) {
         return;
       }
       fetch_list->at(col) = src_item;
diff --git a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc
index 66669e276ee2ae..910fad8d0701b3 100644
--- a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc
+++ b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc
@@ -187,7 +187,7 @@ class ConstantFoldingPattern : public pir::RewritePattern {
       if (use_parameter_op) {
         if (output_var->IsType<phi::DenseTensor>()) {
           auto* output_tensor = output_var->GetMutable<phi::DenseTensor>();
-          if (output_tensor->IsInitialized() &&
+          if (output_tensor->has_allocation() &&
               output_tensor->place().GetType() != place_.GetType()) {
             phi::DenseTensor temp_tensor;
             temp_tensor.Resize(output_tensor->dims());
diff --git a/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc
index fd8d4aea8c2d02..2d08a9537c0aa9 100644
--- a/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc
+++ b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc
@@ -50,7 +50,7 @@ phi::Place GetVarPlace(const paddle::framework::Variable *var,
                        const phi::Place &exe_place) {
   phi::Place place;
   auto &tensor = var->Get<T>();
-  if (tensor.initialized()) {
+  if (tensor.has_allocation()) {
     place = tensor.place();
   } else {
     place = exe_place;
diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
index 623e2987fbc9c5..609c1ed20bf9cf 100644
--- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
+++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
@@ -593,37 +593,39 @@ void matmul_double_grad(const Tensor& x,
   }
 
   // recover the original dim of output (delete 1)
-  std::vector<int64_t> dx_dims = dx.initialized() ? common::vectorize(dx.dims())
-                                                  : std::vector<int64_t>({});
-  std::vector<int64_t> dy_dims = dy.initialized() ? common::vectorize(dy.dims())
-                                                  : std::vector<int64_t>({});
-  std::vector<int64_t> ddout_dims = ddout.initialized()
+  std::vector<int64_t> dx_dims = dx.has_allocation()
+                                     ? common::vectorize(dx.dims())
+                                     : std::vector<int64_t>({});
+  std::vector<int64_t> dy_dims = dy.has_allocation()
+                                     ? common::vectorize(dy.dims())
+                                     : std::vector<int64_t>({});
+  std::vector<int64_t> ddout_dims = ddout.has_allocation()
                                         ? common::vectorize(ddout.dims())
                                         : std::vector<int64_t>({});
   if (x_ndim == 1 && y_ndim == 1) {
-    if (dx.initialized() && dx_dims[0] == 1) {
+    if (dx.has_allocation() && dx_dims[0] == 1) {
       dx = reshape<T>(dx, IntArray(x_dims));
     }
-    if (dy.initialized() && dy_dims.back() == 1) {
+    if (dy.has_allocation() && dy_dims.back() == 1) {
       dy = reshape<T>(dy, IntArray(y_dims));
     }
-    if (ddout.initialized() && ddout_dims == std::vector<int64_t>({1, 1})) {
+    if (ddout.has_allocation() && ddout_dims == std::vector<int64_t>({1, 1})) {
       ddout = reshape<T>(ddout, IntArray(std::vector<int64_t>({1})));
     }
   } else if (x_ndim == 1) {
-    if (dx.initialized() && dx_dims[0] == 1) {
+    if (dx.has_allocation() && dx_dims[0] == 1) {
       dx = reshape<T>(dx, IntArray(x_dims));
     }
-    if (ddout.initialized() && ddout_dims[0] == 1) {
+    if (ddout.has_allocation() && ddout_dims[0] == 1) {
       ddout = reshape<T>(ddout,
                          IntArray(std::vector<int64_t>(
                              {ddout_dims.cbegin() + 1, ddout_dims.cend()})));
     }
   } else if (y_ndim == 1) {
-    if (dy.initialized() && dy_dims.back() == 1) {
+    if (dy.has_allocation() && dy_dims.back() == 1) {
       dy = reshape<T>(dy, IntArray(y_dims));
     }
-    if (ddout.initialized() && ddout_dims.back() == 1) {
+    if (ddout.has_allocation() && ddout_dims.back() == 1) {
       ddout = reshape<T>(ddout,
                          IntArray(std::vector<int64_t>(
                              {ddout_dims.cbegin(),
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index dba28787c1acf6..8f4551fdeb72bc 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -257,7 +257,7 @@ void InitDistTensorWithTensor(TensorObject* self,
     VLOG(4) << "Same place, do ShareDataWith for DistTensor.";
   } else {
     std::shared_ptr<phi::DenseTensor> tensor;
-    if (src.initialized()) {
+    if (src.has_allocation()) {
       tensor = std::static_pointer_cast<phi::DenseTensor>(
           src.copy_to(place, true).impl());
     } else {
diff --git a/paddle/fluid/pybind/eager_custom_python_api.h b/paddle/fluid/pybind/eager_custom_python_api.h
index 4eadf73330c0ff..a1d0e4730e871e 100644
--- a/paddle/fluid/pybind/eager_custom_python_api.h
+++ b/paddle/fluid/pybind/eager_custom_python_api.h
@@ -35,7 +35,7 @@ static PyObject *eager_api_linear(PyObject *self,
 
     tstate = PyEval_SaveThread();
 
-    if (bias.is_dist_tensor() || bias.initialized()) {
+    if (bias.is_dist_tensor() || bias.has_allocation()) {
       const phi::distributed::ProcessMesh *mesh = nullptr;
       if (InputsContainDistTensor(&mesh, x, weight, bias)) {
         ConvertAllInputsToDistTensor(mesh, x, weight, bias);
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 381c3fb14737ef..e74b4e67f8a8e5 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -244,7 +244,7 @@ PyObject* eager_api_get_all_grads(PyObject* self,
       ret.emplace_back(paddle::Tensor());
       continue;
     }
-    if (meta && meta->Grad().initialized()) {
+    if (meta && meta->Grad().has_allocation()) {
       ret.emplace_back(meta->Grad());
     } else {
       ret.emplace_back(paddle::Tensor());
@@ -265,7 +265,7 @@ PyObject* eager_api_get_grads_lists(PyObject* self,
   for (auto& tensor : tensor_list) {
     VLOG(6) << "Get grad for tensor: " << tensor.name();
     auto meta = egr::EagerUtils::nullable_autograd_meta(tensor);
-    if (meta && meta->Grad().initialized()) {
+    if (meta && meta->Grad().has_allocation()) {
       auto& grad = meta->Grad();
       switch (grad.dtype()) {
         case phi::DataType::FLOAT16:
@@ -727,7 +727,7 @@ PyObject* eager_api_run_custom_op(PyObject* self,
       if (ctx.OutputRangeAt(i).first + 1 == ctx.OutputRangeAt(i).second) {
         paddle::Tensor* out_tensor =
             ctx.MutableOutputAt(ctx.OutputRangeAt(i).first);
-        if (!out_tensor->initialized()) {
+        if (!out_tensor->has_allocation()) {
           PADDLE_ENFORCE(
               paddle::framework::detail::IsOptionalVar(outputs.at(i)) ||
                   out_tensor->is_dist_tensor(),
@@ -1353,7 +1353,7 @@ static PyObject* eager_api_set_master_grads(PyObject* self,
         common::errors::Fatal("Detected nullptr grad"
                               "Please check if you have manually cleared"
                               "the grad inside autograd_meta"));
-    if (((*grad).initialized() || (*grad).is_dist_tensor()) &&
+    if (((*grad).has_allocation() || (*grad).is_dist_tensor()) &&
         ((*grad).dtype() == phi::DataType::FLOAT16 ||
          (*grad).dtype() == phi::DataType::BFLOAT16)) {
       auto master_grad =
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index f3cb62afe32d53..07c27fc566b3f6 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -89,7 +89,7 @@ Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj) {
     VLOG(6) << "Call GetSliceIndexFromTensor in Eager";
     paddle::Tensor tensor = CastPyArg2Tensor(obj, 0);
     PADDLE_ENFORCE_EQ(
-        tensor.initialized(),
+        tensor.has_allocation(),
         true,
         common::errors::InvalidArgument(
             "We can only support initialized tensor in slice, however we got "
@@ -691,7 +691,7 @@ static PyObject* tensor_method_copy_(TensorObject* self,
   bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
   VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
           << self->tensor.name();
-  if (!self->tensor.initialized()) {
+  if (!self->tensor.has_allocation()) {
     eager_gil_scoped_release guard;
 
     EagerSetDeviceId();
@@ -1402,12 +1402,12 @@ static PyObject* tensor_method__get_tensor_from_selected_rows(
   auto* selected_rows =
       static_cast<phi::SelectedRows*>(self->tensor.impl().get());
 
-  PADDLE_ENFORCE(selected_rows->initialized(),
-                 common::errors::Fatal("SelectedRows must be initialized."));
+  PADDLE_ENFORCE(selected_rows->has_allocation(),
+                 common::errors::Fatal("SelectedRows must be has_allocation."));
 
   auto* dense_tensor =
       static_cast<phi::DenseTensor*>(selected_rows->mutable_value());
-  VLOG(4) << "dense_tensor: " << dense_tensor->IsInitialized();
+  VLOG(4) << "dense_tensor: " << dense_tensor->has_allocation();
 
   auto t = paddle::Tensor(egr::Controller::Instance().GenerateUniqueName());
   t.set_impl(std::make_shared<phi::DenseTensor>(*dense_tensor));
@@ -1757,7 +1757,7 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
     // Release gil and do tracing
     py::gil_scoped_release release;
     // use inplace set_value_ operator
-    if (value_tensor.initialized()) {
+    if (value_tensor.has_allocation()) {
       if (self->tensor.dtype() != value_tensor.dtype()) {
         if (egr::Controller::Instance().GetAMPLevel() !=
             paddle::imperative::AmpLevel::O0) {
@@ -1852,7 +1852,7 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
 
     // Release gil and do tracing
     py::gil_scoped_release release;
-    if (value_tensor.initialized()) {
+    if (value_tensor.has_allocation()) {
       if (self->tensor.dtype() != value_tensor.dtype()) {
         if (egr::Controller::Instance().GetAMPLevel() !=
             paddle::imperative::AmpLevel::O0) {
@@ -2180,7 +2180,7 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self,
                                             PyObject* kwargs) {
   EAGER_TRY
   auto src = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0);
-  if (self->tensor.initialized()) {
+  if (self->tensor.has_allocation()) {
     PADDLE_ENFORCE_EQ(self->tensor.dtype(),
                       src.dtype(),
                       common::errors::PreconditionNotMet(
@@ -2198,7 +2198,7 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self,
   VLOG(6) << "Tensor copy gradient from: " << src.name();
   auto* p_grad = egr::EagerUtils::mutable_grad(self->tensor);
   if (p_grad) {
-    PADDLE_ENFORCE_EQ(src.initialized(),
+    PADDLE_ENFORCE_EQ(src.has_allocation(),
                       true,
                       common::errors::InvalidArgument(
                           "Tensor %s has not been initialized", src.name()));
@@ -3177,11 +3177,11 @@ static PyObject* tensor__grad_ivar(TensorObject* self,
   EAGER_TRY
   VLOG(6) << "Get grad for tensor: " << self->tensor.name();
   auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor);
-  VLOG(6) << meta << " initialized: " << meta->Grad().initialized();
-  if (meta && meta->Grad().initialized()) {
+  VLOG(6) << meta << " has_allocation: " << meta->Grad().has_allocation();
+  if (meta && meta->Grad().has_allocation()) {
     return ToPyObject(meta->Grad());
   } else {
-    if (meta && !meta->Grad().initialized() && meta->Grad().impl() &&
+    if (meta && !meta->Grad().has_allocation() && meta->Grad().impl() &&
         meta->Grad().is_dist_tensor()) {
       return ToPyObject(meta->Grad(), false);
     }
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 9c60552d87e69b..e11d90a22b8639 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -985,7 +985,7 @@ inline phi::DenseTensor *PySliceTensor(const phi::DenseTensor &self,
 
 inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
                                  py::object copy = py::none()) {
-  if (!tensor.IsInitialized()) {
+  if (!tensor.has_allocation()) {
     return py::array();
   }
   bool is_gpu_tensor = phi::is_gpu_place(tensor.place());
diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc
index 5c9e1a2435e465..781f9e2ab655b1 100644
--- a/paddle/phi/api/lib/api_gen_utils.cc
+++ b/paddle/phi/api/lib/api_gen_utils.cc
@@ -250,7 +250,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(std::vector<Tensor*>* out) {
 }
 
 phi::SelectedRows* SetSelectedRowsKernelOutput(Tensor* out) {
-  if (!out->initialized()) {
+  if (!out->defined()) {
     auto select_rows = std::make_shared<phi::SelectedRows>();
     out->set_impl(select_rows);
     return select_rows.get();
@@ -262,7 +262,7 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
   if (!out) {
     return nullptr;
   }
-  if (!out->initialized()) {
+  if (!out->defined()) {
     if (type == TensorType::SPARSE_COO) {
       auto sparse_tensor = std::make_shared<phi::SparseCooTensor>(
           phi::DenseTensor(), phi::DenseTensor(), phi::DDim{-1});
@@ -286,7 +286,7 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
 }
 
 phi::TensorBase* SetStringsKernelOutput(Tensor* out, TensorType type) {
-  if (!out->initialized()) {
+  if (!out->defined()) {
     if (type == TensorType::STRING_TENSOR) {
       if (out->impl() == nullptr) {
         auto strings_tensor = std::make_shared<phi::StringTensor>();
diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index ccc70ea8217866..6f5978e0729f1f 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -156,7 +156,7 @@ struct DeviceContext::Impl {
         ClearHolder(tensor);
       }
     } else {
-      if (tensor->initialized() && tensor->place() != place) {
+      if (tensor->has_allocation() && tensor->place() != place) {
         ClearHolder(tensor);
       }
     }
@@ -213,7 +213,7 @@ struct DeviceContext::Impl {
         ClearHolder(tensor);
       }
     } else {
-      if (tensor->initialized() && tensor->place() != CPUPlace()) {
+      if (tensor->has_allocation() && tensor->place() != CPUPlace()) {
         ClearHolder(tensor);
       }
     }
diff --git a/paddle/phi/core/selected_rows_impl.cc b/paddle/phi/core/selected_rows_impl.cc
index be389e5e3355ca..762625cc05a1cd 100644
--- a/paddle/phi/core/selected_rows_impl.cc
+++ b/paddle/phi/core/selected_rows_impl.cc
@@ -176,7 +176,7 @@ void SelectedRowsImpl::Get(const phi::DenseTensor& ids,
                            bool auto_grown,
                            bool is_test) {
   PADDLE_ENFORCE_EQ(
-      value->IsInitialized(),
+      value->has_allocation(),
       true,
       common::errors::InvalidArgument("The value tensor is not initialized."));
   if (ids.numel() == 0) {
diff --git a/paddle/phi/core/tensor_array.cc b/paddle/phi/core/tensor_array.cc
index 6bdfe132db1c27..3ee8407d513f84 100644
--- a/paddle/phi/core/tensor_array.cc
+++ b/paddle/phi/core/tensor_array.cc
@@ -44,7 +44,7 @@ bool TensorArray::initialized() const {
   }
 
   for (auto const& tensor : tensors_) {
-    if (!tensor.initialized()) {
+    if (!tensor.has_allocation()) {
       return false;
     }
   }

From cf5694eb00dd963eaeb6660d11a5536869c3c6cd Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 14 Mar 2025 01:16:49 +0000
Subject: [PATCH 2/5] refine

---
 .../composite_double_backward_api.h           | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
index 609c1ed20bf9cf..623e2987fbc9c5 100644
--- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
+++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h
@@ -593,39 +593,37 @@ void matmul_double_grad(const Tensor& x,
   }
 
   // recover the original dim of output (delete 1)
-  std::vector<int64_t> dx_dims = dx.has_allocation()
-                                     ? common::vectorize(dx.dims())
-                                     : std::vector<int64_t>({});
-  std::vector<int64_t> dy_dims = dy.has_allocation()
-                                     ? common::vectorize(dy.dims())
-                                     : std::vector<int64_t>({});
-  std::vector<int64_t> ddout_dims = ddout.has_allocation()
+  std::vector<int64_t> dx_dims = dx.initialized() ? common::vectorize(dx.dims())
+                                                  : std::vector<int64_t>({});
+  std::vector<int64_t> dy_dims = dy.initialized() ? common::vectorize(dy.dims())
+                                                  : std::vector<int64_t>({});
+  std::vector<int64_t> ddout_dims = ddout.initialized()
                                         ? common::vectorize(ddout.dims())
                                         : std::vector<int64_t>({});
   if (x_ndim == 1 && y_ndim == 1) {
-    if (dx.has_allocation() && dx_dims[0] == 1) {
+    if (dx.initialized() && dx_dims[0] == 1) {
       dx = reshape<T>(dx, IntArray(x_dims));
     }
-    if (dy.has_allocation() && dy_dims.back() == 1) {
+    if (dy.initialized() && dy_dims.back() == 1) {
       dy = reshape<T>(dy, IntArray(y_dims));
     }
-    if (ddout.has_allocation() && ddout_dims == std::vector<int64_t>({1, 1})) {
+    if (ddout.initialized() && ddout_dims == std::vector<int64_t>({1, 1})) {
       ddout = reshape<T>(ddout, IntArray(std::vector<int64_t>({1})));
     }
   } else if (x_ndim == 1) {
-    if (dx.has_allocation() && dx_dims[0] == 1) {
+    if (dx.initialized() && dx_dims[0] == 1) {
       dx = reshape<T>(dx, IntArray(x_dims));
     }
-    if (ddout.has_allocation() && ddout_dims[0] == 1) {
+    if (ddout.initialized() && ddout_dims[0] == 1) {
       ddout = reshape<T>(ddout,
                          IntArray(std::vector<int64_t>(
                              {ddout_dims.cbegin() + 1, ddout_dims.cend()})));
     }
   } else if (y_ndim == 1) {
-    if (dy.has_allocation() && dy_dims.back() == 1) {
+    if (dy.initialized() && dy_dims.back() == 1) {
       dy = reshape<T>(dy, IntArray(y_dims));
     }
-    if (ddout.has_allocation() && ddout_dims.back() == 1) {
+    if (ddout.initialized() && ddout_dims.back() == 1) {
       ddout = reshape<T>(ddout,
                          IntArray(std::vector<int64_t>(
                              {ddout_dims.cbegin(),

From 5483213331a7d8e7e4d722dd544b568f2a3cc378 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 14 Mar 2025 02:26:07 +0000
Subject: [PATCH 3/5] refine

---
 paddle/fluid/distributed/collective/reducer.cc | 2 +-
 paddle/fluid/pybind/eager.cc                   | 2 +-
 paddle/phi/api/lib/api_gen_utils.cc            | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc
index 4140c89635c184..cf6d8211eedd14 100644
--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -983,7 +983,7 @@ void EagerReducer::MarkGroupReady(size_t group_index) {
 
 bool EagerReducer::HasGrad(size_t var_index) {
   auto grad = egr::EagerUtils::mutable_grad(tensors_[var_index]);
-  if (grad && grad->has_allocation()) {
+  if (grad && grad->initialized()) {
     return true;
   } else {
     return false;
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 8f4551fdeb72bc..dba28787c1acf6 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -257,7 +257,7 @@ void InitDistTensorWithTensor(TensorObject* self,
     VLOG(4) << "Same place, do ShareDataWith for DistTensor.";
   } else {
     std::shared_ptr<phi::DenseTensor> tensor;
-    if (src.has_allocation()) {
+    if (src.initialized()) {
       tensor = std::static_pointer_cast<phi::DenseTensor>(
           src.copy_to(place, true).impl());
     } else {
diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc
index 781f9e2ab655b1..5c9e1a2435e465 100644
--- a/paddle/phi/api/lib/api_gen_utils.cc
+++ b/paddle/phi/api/lib/api_gen_utils.cc
@@ -250,7 +250,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(std::vector<Tensor*>* out) {
 }
 
 phi::SelectedRows* SetSelectedRowsKernelOutput(Tensor* out) {
-  if (!out->defined()) {
+  if (!out->initialized()) {
     auto select_rows = std::make_shared<phi::SelectedRows>();
     out->set_impl(select_rows);
     return select_rows.get();
@@ -262,7 +262,7 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
   if (!out) {
     return nullptr;
   }
-  if (!out->defined()) {
+  if (!out->initialized()) {
     if (type == TensorType::SPARSE_COO) {
       auto sparse_tensor = std::make_shared<phi::SparseCooTensor>(
           phi::DenseTensor(), phi::DenseTensor(), phi::DDim{-1});
@@ -286,7 +286,7 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
 }
 
 phi::TensorBase* SetStringsKernelOutput(Tensor* out, TensorType type) {
-  if (!out->defined()) {
+  if (!out->initialized()) {
     if (type == TensorType::STRING_TENSOR) {
       if (out->impl() == nullptr) {
         auto strings_tensor = std::make_shared<phi::StringTensor>();

From f003a1a38e1a297a1a99ad8a97617facf8bc2d9c Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 14 Mar 2025 02:43:39 +0000
Subject: [PATCH 4/5] refine

---
 paddle/fluid/pybind/eager_method.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 07c27fc566b3f6..a3cb6725f7e26f 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -691,7 +691,7 @@ static PyObject* tensor_method_copy_(TensorObject* self,
   bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
   VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
           << self->tensor.name();
-  if (!self->tensor.has_allocation()) {
+  if (!self->tensor.initialized()) {
     eager_gil_scoped_release guard;
 
     EagerSetDeviceId();
@@ -1757,7 +1757,7 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
     // Release gil and do tracing
     py::gil_scoped_release release;
     // use inplace set_value_ operator
-    if (value_tensor.has_allocation()) {
+    if (value_tensor.initialized()) {
       if (self->tensor.dtype() != value_tensor.dtype()) {
         if (egr::Controller::Instance().GetAMPLevel() !=
             paddle::imperative::AmpLevel::O0) {
@@ -1852,7 +1852,7 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
 
     // Release gil and do tracing
     py::gil_scoped_release release;
-    if (value_tensor.has_allocation()) {
+    if (value_tensor.initialized()) {
       if (self->tensor.dtype() != value_tensor.dtype()) {
         if (egr::Controller::Instance().GetAMPLevel() !=
             paddle::imperative::AmpLevel::O0) {

From e82591df3cb41baf5b44a9acd0c14471edc6361b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 14 Mar 2025 08:01:48 +0000
Subject: [PATCH 5/5] refine

---
 paddle/fluid/eager/utils.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc
index 4603b3cefc8f82..c632860a5e7639 100644
--- a/paddle/fluid/eager/utils.cc
+++ b/paddle/fluid/eager/utils.cc
@@ -556,7 +556,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput(
     const std::vector<GradSlotMeta>& grad_in_metas) {
   for (size_t i = 0; i < in_grads->size(); i++) {
     paddle::Tensor& grad = (*in_grads)[i];
-    if (!grad.has_allocation() && grad_in_metas[i].HasTensorMeta()) {
+    if (!grad.initialized() && grad_in_metas[i].HasTensorMeta()) {
       if (grad_in_metas[i].IsDistMeta()) {
         grad.set_impl(std::make_shared<phi::distributed::DistTensor>(
             grad_in_metas[i].DistTensorGlobalDims(),
@@ -591,7 +591,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput(
       continue;
     }
     paddle::Tensor& grad = (*output_grads)[i];
-    if (!grad.has_allocation() && grad_output_metas[i].HasTensorMeta()) {
+    if (!grad.initialized() && grad_output_metas[i].HasTensorMeta()) {
       if (grad.defined() && grad.is_selected_rows()) {
         continue;
       }
@@ -624,7 +624,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput(
 
 void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad,
                                            const GradSlotMeta& grad_in_meta) {
-  if (!in_grad->has_allocation()) {
+  if (!in_grad->initialized()) {
     PADDLE_ENFORCE(
         grad_in_meta.HasTensorMeta(),
         common::errors::Fatal(
@@ -663,7 +663,7 @@ void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad,
 
 void EagerUtils::FillZeroForEmptyOptionalGradInput(
     paddle::Tensor* in_grad, const GradSlotMeta& grad_in_meta) {
-  if (!in_grad->has_allocation() && grad_in_meta.HasTensorMeta()) {
+  if (!in_grad->initialized() && grad_in_meta.HasTensorMeta()) {
     const auto& tensor_meta = grad_in_meta.GetTensorMeta();
     if (grad_in_meta.IsDistMeta()) {
       in_grad->set_impl(std::make_shared<phi::distributed::DistTensor>(