Skip to content

[PHI]Add new Tensor type and migrate save_combine kernel #47856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Dec 12, 2022
34 changes: 34 additions & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,36 @@ function(find_register FILENAME PATTERN OUTPUT)
PARENT_SCOPE)
endfunction()

function(find_phi_register FILENAME ADD_PATH)
# find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

下面find的是PD_REGISTER_KERNEL?注释是不是不太match

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3q

# set op_name to OUTPUT
set(options "")
set(oneValueArgs "")
set(multiValueArgs "")
file(READ ${FILENAME} CONTENT)

string(
REGEX
MATCH
"PD_REGISTER_KERNEL\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
register
"${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "PD_REGISTER_KERNEL(" "" register "${register}")
string(REPLACE "," ";" register "${register}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" register "${register}")
string(REGEX REPLACE "//cuda_only" "" register "${register}")
list(GET register 0 kernel_name)
list(GET register 1 kernel_backend)
list(GET register 2 kernel_layout)

file(
APPEND ${ADD_PATH}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif()
endfunction()

function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
Expand Down Expand Up @@ -371,6 +401,8 @@ function(op_library TARGET)
foreach(cc_src ${cc_srcs})
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cc_src} ${pybind_file})
find_register(${cc_src} "REGISTER_OPERATOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
Expand Down Expand Up @@ -408,6 +440,8 @@ function(op_library TARGET)
# message("cu_srcs ${cu_srcs}")
foreach(cu_src ${cu_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file})
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ cc_test(
cc_library(
var_type_traits
SRCS var_type_traits.cc
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor)
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor
extended_tensor)
if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
Expand Down
14 changes: 14 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2930,6 +2930,9 @@ void OperatorWithKernel::BuildPhiKernelContext(
need_prepare_phi_data_ = true;
tensor_in = &(var->Get<framework::LoDTensorArray>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var->IsType<framework::Vocab>()) {
tensor_in = &(var->Get<framework::Vocab>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
Expand Down Expand Up @@ -2979,6 +2982,16 @@ void OperatorWithKernel::BuildPhiKernelContext(
// Note: If the input LoDTensorArray size is 0, the output
// LoDTensorArray is also 0
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else if (var->template IsType<phi::CPlusString>()) {
tensor_out = var->template GetMutable<phi::CPlusString>();
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else if (!var->IsInitialized()) {
// The following is for RAW type of var
if (output_defs[i].type_index ==
std::type_index(typeid(phi::CPlusString*))) {
tensor_out = var->template GetMutable<phi::CPlusString>();
}
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
Expand Down Expand Up @@ -3078,6 +3091,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
}
}
break;

case phi::AttributeType::SCALARS: {
PADDLE_ENFORCE_NE(
attr_iter,
Expand Down
74 changes: 72 additions & 2 deletions paddle/fluid/framework/string_array.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,83 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/phi/core/cplus_string.h"
#include "paddle/phi/core/extended_tensor.h"

namespace paddle {
namespace framework {

using String = std::string;
class Vocab : public phi::ExtendedTensor,
public phi::TypeInfoTraits<phi::TensorBase, Vocab> {
public:
Vocab() = default;

Vocab(Vocab&& other) = default;

Vocab(const Vocab& other) = default;

Vocab& operator=(const Vocab& other) = default;

Vocab& operator=(Vocab&& other) = default;

Vocab& operator=(
const std::unordered_map<std::wstring, std::int32_t>& other) {
this->data_ = other;
return *this;
}

/// \brief Destroy the Vocab and release exclusive resources.
virtual ~Vocab() = default;

public:
/// \brief Returns the name of the class for type traits.
/// \return The name of the class.
static const char* name() { return "Vocab"; }

size_t size() const { return data_.size(); }

void clear() { data_.clear(); }

void emplace(const std::wstring& key, std::int32_t value) {
data_.emplace(key, value);
}

std::int32_t at(const std::wstring& key) { return data_.at(key); }

std::int32_t at(const std::wstring& key) const { return data_.at(key); }

std::unordered_map<std::wstring, std::int32_t>::iterator find(
const std::wstring& key) {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator find(
const std::wstring& key) const {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::iterator begin() {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator begin() const {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::iterator end() {
return data_.end();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator end() const {
return data_.end();
}

private:
std::unordered_map<std::wstring, std::int32_t> data_;
};

using String = phi::CPlusString;
using Strings = std::vector<std::string>;
using Vocab = std::unordered_map<std::wstring, std::int32_t>;

// Convert the std::string type to the std::string type.
bool ConvertStrToWstr(const std::string& src, std::wstring* res);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/variable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ namespace framework {
TEST(Variable, GetMutable) {
std::unique_ptr<Variable> v(new Variable());

auto* t = v->GetMutable<std::string>();
auto* t = v->GetMutable<String>()->Get();
*t = "1234";

const auto& tt = v->Get<std::string>();
const auto& tt = v->Get<String>().Get();
EXPECT_EQ("1234", tt);

try {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cc_library(
cc_library(
var_helper
SRCS var_helper.cc
DEPS tensor selected_rows)
DEPS tensor selected_rows extended_tensor)
if(WITH_XPU)
cc_library(
prepared_operator
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ std::vector<std::string> Layer::FunctionNames() const {

PD_SPECIALZE_ATTRIBUTE_TYPE(int)
PD_SPECIALZE_ATTRIBUTE_TYPE(float)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::string)
PD_SPECIALZE_ATTRIBUTE_TYPE(framework::String)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/layer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ TEST(CpuLayerTest, Construct) {
int ds = layer.Attribute<int>("down_sampling");
EXPECT_EQ(ds, 4);

std::string fstr = layer.Attribute<std::string>("fstr");
std::string fstr = *(layer.Attribute<framework::String>("fstr").Get());
EXPECT_STREQ(fstr.c_str(), "save str property");

std::vector<int> ints = layer.Attribute<std::vector<int>>("ints");
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/property.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::unordered_map<std::string, std::shared_ptr<Variable>> Property::Values() {
*var->GetMutable<int>() = static_cast<int>(GetInt64(n));
break;
case ValueProto::STRING:
*var->GetMutable<std::string>() = GetString(n);
*var->GetMutable<paddle::framework::String>() = GetString(n);
break;
case ValueProto::FLOATS:
*var->GetMutable<std::vector<float>>() = GetFloats(n);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ unset(OP_LIBRARY CACHE)
set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.tmp CACHE INTERNAL "pybind.h file")
set(pybind_file_prune ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.prune CACHE INTERNAL "pybind.h file")
set(pybind_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h)
file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n")
file(WRITE ${pybind_file} "#include \"paddle/phi/core/kernel_registry.h\" // Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n")

add_subdirectory(math)
add_subdirectory(controlflow)
Expand Down Expand Up @@ -109,7 +109,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin
op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc run_program_op_npu.cc DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op cuda_graph_with_memory_pool)
op_library(quantize_linear_op DEPS phi)
op_library(save_combine_op DEPS string_array)
op_library(save_combine_op DEPS string_array phi)
op_library(load_combine_op DEPS string_array)

if (WITH_GPU OR WITH_ROCM)
Expand Down
30 changes: 23 additions & 7 deletions paddle/fluid/operators/save_combine_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ limitations under the License. */

#include <string>

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/core/kernel_registry.h"

namespace paddle {
namespace operators {

Expand Down Expand Up @@ -102,10 +106,22 @@ REGISTER_OPERATOR(save_combine,
ops::SaveCombineOpProtoMaker,
ops::SaveCombineOpInferVarType);

REGISTER_OP_CPU_KERNEL(
save_combine,
ops::SaveCombineOpKernel<phi::CPUContext, float>,
ops::SaveCombineOpKernel<phi::CPUContext, double>,
ops::SaveCombineOpKernel<phi::CPUContext, paddle::platform::bfloat16>,
ops::SaveCombineOpKernel<phi::CPUContext, int>,
ops::SaveCombineOpKernel<phi::CPUContext, int64_t>);
PD_REGISTER_KERNEL(save_combine_tensor,
CPU,
ALL_LAYOUT,
paddle::operators::SaveCombineTensorKernel,
int,
int64_t,
float,
double,
phi::dtype::bfloat16) {}

PD_REGISTER_KERNEL(save_combine_vocab,
CPU,
ALL_LAYOUT,
paddle::operators::SaveCombineVocabKernel,
int,
int64_t,
float,
double,
phi::dtype::bfloat16) {}
50 changes: 31 additions & 19 deletions paddle/fluid/operators/save_combine_op.cu
Original file line number Diff line number Diff line change
@@ -1,23 +1,35 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这种不需要改文件日期,以文件创建日期为基准

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/save_combine_op.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"

namespace ops = paddle::operators;
PD_REGISTER_KERNEL(save_combine_tensor,
GPU,
ALL_LAYOUT,
paddle::operators::SaveCombineTensorKernel,
int,
int64_t,
float,
double) {}

REGISTER_OP_CUDA_KERNEL(save_combine,
ops::SaveCombineOpKernel<phi::GPUContext, float>,
ops::SaveCombineOpKernel<phi::GPUContext, double>,
ops::SaveCombineOpKernel<phi::GPUContext, int>,
ops::SaveCombineOpKernel<phi::GPUContext, int64_t>);
PD_REGISTER_KERNEL(save_combine_vocab,
GPU,
ALL_LAYOUT,
paddle::operators::SaveCombineVocabKernel,
int,
int64_t,
float,
double) {}
Loading