From 295efada0ae046f6d29973f8a6ae307ebbda8123 Mon Sep 17 00:00:00 2001 From: lvyongkang Date: Wed, 17 Jul 2024 10:32:38 +0000 Subject: [PATCH] add nvtx event for profiling --- .../instruction/phi_kernel_instruction.cc | 13 +++++++++++-- .../framework/new_executor/pir_interpreter.cc | 6 +++++- .../fluid/ir_adaptor/translator/op_translator.cc | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc index 504b3eaf48fc30..25340c08c9ddf7 100644 --- a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc @@ -24,10 +24,10 @@ #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/type_defs.h" - #include "paddle/pir/include/core/builtin_attribute.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" @@ -178,11 +178,20 @@ PhiKernelInstruction::~PhiKernelInstruction() { delete phi_kernel_; } void PhiKernelInstruction::Run() { VLOG(6) << "Begin run op " << phi_op_name_ << " infer meta."; if (infer_meta_interface_) { + platform::RecordEvent record_event("PhiKernelInstruction::infermeta", + platform::TracerEventType::UserDefined, + 1); infer_meta_interface_->infer_meta_(&(infer_meta_context_)); } VLOG(6) << "End run op " << phi_op_name_ << " infer meta."; VLOG(6) << "Begin run op " << phi_op_name_ << " kernel."; - (*(phi_kernel_))(&(kernel_context_)); + { + platform::RecordEvent record_event("PhiKernelInstruction::kernel launch", + platform::TracerEventType::UserDefined, + 1); + (*(phi_kernel_))(&(kernel_context_)); + } + VLOG(6) << "End run op " << phi_op_name_ << " kernel."; } diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index 145600114edcd9..7fc1afc24786a5 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -1880,7 +1880,11 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) { } if (!instr_node->IsArtificial()) { - instr_node->Run(); + { + platform::RecordEvent record( + "InstrRun", platform::TracerEventType::UserDefined, 10); + instr_node->Run(); + } if (FLAGS_benchmark) { instr_node->DeviceContext().Wait(); diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index 9406dc309e4c9f..a32e0de2d0d331 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -3179,6 +3179,20 @@ struct RepeatInterLeaveGradOpTranscriber : public OpTranscriber { } }; +struct TopPSamplingOpTranscriber : public OpTranscriber { + void HandleNonexistentAttribute(pir::IrContext* ctx, + pir::AttributeMap* attribute_map, + const OpAttributeInfo& info) override { + if (info.name == "seed") { + (*attribute_map)[info.name] = pir::Int32Attribute::get(ctx, -1); + } else if (info.name == "k") { + (*attribute_map)[info.name] = pir::Int32Attribute::get(ctx, 0); + } else if (info.name == "mode") { + (*attribute_map)[info.name] = pir::StrAttribute::get(ctx, "truncated"); + } + } +}; + struct FusedElemwiseAddActivationOpTranscriber : public OpTranscriber { void HandleNonexistentAttribute(pir::IrContext* ctx, pir::AttributeMap* attribute_map, @@ -3629,6 +3643,7 @@ OpTranslator::OpTranslator() { special_handlers["slice"] = SliceOpTranscriber(); special_handlers["split"] = SplitOpTranscriber(); special_handlers["sum"] = AddNOpTranscriber(); + special_handlers["top_p_sampling"] = TopPSamplingOpTranscriber(); special_handlers["tril_triu"] = TrilAndTriuOpTranscriber(); special_handlers["tril_triu_grad"] = TrilAndTriuGradOpTranscriber(); special_handlers["matmul"] = LegacyMatmulOpTranscriber();