33
33
#include " paddle/fluid/platform/cuda_graph_with_memory_pool.h"
34
34
#include " paddle/phi/backends/device_manager.h"
35
35
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
36
+ #include " paddle/fluid/framework/io/save_load_tensor.h"
36
37
#include " paddle/fluid/platform/device/gpu/nccl_helper.h"
37
38
#include " paddle/phi/core/distributed/comm_context_manager.h"
38
39
#include " paddle/phi/core/distributed/nccl_comm_context.h"
@@ -42,7 +43,8 @@ PHI_DECLARE_bool(dynamic_static_unified_comm);
42
43
43
44
PD_DECLARE_bool (enable_host_event_recorder_hook);
44
45
PD_DECLARE_bool (log_memory_stats);
45
-
46
+ PHI_DECLARE_string (static_runtime_data_save_path);
47
+ PHI_DECLARE_bool (save_static_runtime_data);
46
48
namespace paddle {
47
49
namespace framework {
48
50
@@ -1053,6 +1055,44 @@ void ProgramInterpreter::RunOperator(const Instruction& instr_node) {
1053
1055
}
1054
1056
}
1055
1057
1058
+ if (op_with_kernel != nullptr && FLAGS_save_static_runtime_data) {
1059
+ VLOG (6 ) << " start to save paddle variable" ;
1060
+ auto root_path = FLAGS_static_runtime_data_save_path;
1061
+ for (auto & vname : op->InputVars ()) {
1062
+ auto * var = local_scope->FindVar (vname);
1063
+ if (var == nullptr ) continue ;
1064
+ const phi::DenseTensor* tensor{nullptr };
1065
+ if (var->IsType <phi::DenseTensor>()) {
1066
+ tensor = &var->Get <phi::DenseTensor>();
1067
+ } else {
1068
+ VLOG (6 ) << vname << " is not DenseTensor" ;
1069
+ continue ;
1070
+ }
1071
+ if (!tensor->IsInitialized ()) continue ;
1072
+ paddle::framework::SaveTensor (
1073
+ *tensor,
1074
+ root_path + " /saved_tensors/" + op->Type () + " -input-" + vname,
1075
+ false );
1076
+ }
1077
+ for (auto & vname : op->OutputVars (true )) {
1078
+ auto * var = local_scope->FindVar (vname);
1079
+ if (var == nullptr ) continue ;
1080
+ const phi::DenseTensor* tensor{nullptr };
1081
+ if (var->IsType <phi::DenseTensor>()) {
1082
+ tensor = &var->Get <phi::DenseTensor>();
1083
+ } else {
1084
+ VLOG (6 ) << vname << " is not DenseTensor" ;
1085
+ continue ;
1086
+ }
1087
+ if (!tensor->IsInitialized ()) continue ;
1088
+ paddle::framework::SaveTensor (
1089
+ *tensor,
1090
+ root_path + " /saved_tensors/" + op->Type () + " -output-" + vname,
1091
+ false );
1092
+ }
1093
+ VLOG (6 ) << " end save paddle variable" ;
1094
+ }
1095
+
1056
1096
// for debug nan/inf
1057
1097
if (op_with_kernel != nullptr && FLAGS_check_nan_inf) {
1058
1098
VLOG (4 ) << " Check nan/inf" ;
0 commit comments