PaddlePaddle
diff --git a/‎paddle/fluid/framework/data_type_transform.cc
Lines changed: 4 additions & 0 deletions b/‎paddle/fluid/framework/data_type_transform.cc
Lines changed: 4 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/framework.proto
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/framework/framework.proto
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/platform/float8_e4m3.h
Lines changed: 24 additions & 0 deletions b/‎paddle/fluid/platform/float8_e4m3.h
Lines changed: 24 additions & 0 deletions
diff --git a/‎paddle/fluid/pybind/protobuf.cc
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/pybind/protobuf.cc
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/pybind/pybind.cc
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/pybind/pybind.cc
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/pybind/tensor.cc
Lines changed: 33 additions & 0 deletions b/‎paddle/fluid/pybind/tensor.cc
Lines changed: 33 additions & 0 deletions
diff --git a/‎paddle/phi/common/data_type.h
Lines changed: 9 additions & 0 deletions b/‎paddle/phi/common/data_type.h
Lines changed: 9 additions & 0 deletions
@@ -194,6 +194,10 @@ void TransDataType(const phi::DenseTensor& in,
     case proto::VarType::FP32:
       framework::VisitDataType(dst_type, CastDataType<float>(in, out, ctx));
       break;
+    case proto::VarType::FP8:
+      framework::VisitDataType(
+          dst_type, CastDataType<platform::float8_e4m3>(in, out, ctx));
+      break;
     case proto::VarType::FP64:
       framework::VisitDataType(dst_type, CastDataType<double>(in, out, ctx));
       break;
 
@@ -156,6 +156,7 @@ message VarType {
     BF16 = 22;
     COMPLEX64 = 23;
     COMPLEX128 = 24;
+    FP8= 32;
 
     // Other types that may need additional descriptions
     LOD_TENSOR = 7;
 
@@ -0,0 +1,24 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/common/float8_e4m3.h"
+
+namespace paddle {
+namespace platform {
+using float8_e4m3 = phi::dtype::float8_e4m3;
+using namespace phi::dtype;  // NOLINT
+}  // namespace platform
+}  // namespace paddle
@@ -290,6 +290,7 @@ void BindVarDesc(pybind11::module *m) {
       .value("INT64", pd::proto::VarType::INT64)
       .value("FP16", pd::proto::VarType::FP16)
       .value("FP32", pd::proto::VarType::FP32)
+      .value("FP8", pd::proto::VarType::FP8)
       .value("FP64", pd::proto::VarType::FP64)
       .value("BF16", pd::proto::VarType::BF16)
       .value("COMPLEX64", pd::proto::VarType::COMPLEX64)
 
@@ -77,6 +77,7 @@ limitations under the License. */
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/fluid/platform/float8_e4m3.h"
 #include "paddle/fluid/prim/utils/utils.h"
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_v2.h"
@@ -2979,6 +2980,7 @@ All parameter, weight, gradient are variables in Paddle.
       .value("COMPLEX128", phi::DataType::COMPLEX128)
       .value("FLOAT16", phi::DataType::FLOAT16)
       .value("BFLOAT16", phi::DataType::BFLOAT16)
+      .value("FLOAT8", phi::DataType::FLOAT8)
       .export_values();
 
 #if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS)
 
@@ -68,6 +68,7 @@ limitations under the License. */
 #include "paddle/fluid/imperative/amp_auto_cast.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
+#include "paddle/fluid/memory/memcpy.h"
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/fluid/memory/allocation/cuda_ipc_allocator.h"
 #endif
@@ -203,6 +204,28 @@ static void TensorCopyFrom(phi::DenseTensor *dst,
   }
 }
 
+template <typename PlaceType>
+static void TensorCopyFromPaddleTensor(phi::DenseTensor *dst,
+                                       const paddle::Tensor &src,
+                                       const PlaceType &place,
+                                       int64_t batch_size) {
+  // paddle::memory::Copy(dst->place(),
+  //                     dst->Holder()->ptr(),
+  //                     place,
+  //                     src.data(),
+  //                     src.numel());
+
+#if defined(PADDLE_WITH_CUDA)
+  if (dst->place() == phi::GPUPlace() && place == phi::GPUPlace()) {
+    cudaMemcpy(
+        dst->Holder()->ptr(), src.data(), src.size(), cudaMemcpyDeviceToDevice);
+  } else if (dst->place() == phi::CPUPlace() && place == phi::GPUPlace()) {
+    cudaMemcpy(
+        dst->Holder()->ptr(), src.data(), src.size(), cudaMemcpyDeviceToHost);
+  }
+#endif
+}
+
 void BindTensor(pybind11::module &m) {  // NOLINT
   using namespace paddle::framework;    // NOLINT
   py::class_<phi::DenseTensor> framework_tensor(
@@ -349,6 +372,16 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            py::arg("tensor"),
            py::arg("place"),
            py::arg("batch_size") = -1)
+      .def("_copy_from_paddle_tensor",
+           &TensorCopyFromPaddleTensor<paddle::platform::Place>,
+           py::arg("tensor"),
+           py::arg("place"),
+           py::arg("batch_size") = -1)
+      .def("_copy_from_paddle_tensor",
+           &TensorCopyFromPaddleTensor<paddle::platform::CUDAPlace>,
+           py::arg("tensor"),
+           py::arg("place"),
+           py::arg("batch_size") = -1)
       .def("set",
            SetTensorFromPyArray<paddle::platform::CPUPlace>,
            py::arg("array"),
 
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/complex.h"
 #include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/float8_e4m3.h"
 #include "paddle/utils/test_macros.h"
 
 namespace phi {
@@ -33,6 +34,7 @@ using complex128 = ::phi::dtype::complex<double>;
 using float16 = ::phi::dtype::float16;
 using bfloat16 = ::phi::dtype::bfloat16;
 using pstring = ::phi::dtype::pstring;
+using float8 = ::phi::dtype::float8_e4m3;
 
 // The enum value are consistent with jit/property.proto
 enum class TEST_API DataType {
@@ -70,6 +72,7 @@ enum class TEST_API DataType {
   // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
   BFLOAT16,
 
+  FLOAT8,
   NUM_DATA_TYPES,
   // See Note [ Why we need ALL in basic kernel key member? ]
   ALL_DTYPE = UNDEFINED,
@@ -80,6 +83,7 @@ inline size_t SizeOf(DataType data_type) {
     case DataType::BOOL:
     case DataType::UINT8:
     case DataType::INT8:
+    case DataType::FLOAT8:
       return 1;
     case DataType::BFLOAT16:
     case DataType::FLOAT16:
@@ -120,6 +124,7 @@ inline size_t SizeOf(DataType data_type) {
   _(int64_t, DataType::INT64)         \
   _(uint64_t, DataType::UINT64)       \
   _(bfloat16, DataType::BFLOAT16)     \
+  _(float8, DataType::FLOAT8)         \
   _(float16, DataType::FLOAT16)       \
   _(float, DataType::FLOAT32)         \
   _(double, DataType::FLOAT64)        \
@@ -188,6 +193,9 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) {
     case DataType::BFLOAT16:
       os << "bfloat16";
       break;
+    case DataType::FLOAT8:
+      os << "float8";
+      break;
     case DataType::FLOAT16:
       os << "float16";
       break;
@@ -262,6 +270,7 @@ using bfloat16 = phi::bfloat16;
 using complex64 = phi::complex64;
 using complex128 = phi::complex128;
 using float16 = phi::float16;
+using float8 = phi::float8;
 using pstring = phi::pstring;
 
 }  // namespace paddle