PaddlePaddle · danleifeng · Feb 2, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt
@@ -9,3 +9,7 @@ foreach(TEST_OP ${TEST_OPS})
   list(APPEND TEST_OPS ${TEST_OP})
   set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 50)
 endforeach()
+
+if(WITH_PSCORE)
+  set_tests_properties(test_gpubox_ps PROPERTIES LABELS "RUN_TYPE=GPUPS")
+endif()
diff --git a/python/paddle/fluid/tests/unittests/ps/config_gpubox.yaml b/python/paddle/fluid/tests/unittests/ps/config_gpubox.yaml
@@ -0,0 +1,55 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workspace
+#workspace: "models/rank/dnn"
+
+
+runner:
+  train_data_dir: "data/sample_data/train"
+  train_reader_path: "criteo_reader" # importlib format
+  use_gpu: True
+  use_auc: False
+  train_batch_size: 32
+  epochs: 3
+  print_interval: 10
+  model_save_path: "output_model_dnn_queue"
+
+  sync_mode: "gpubox"
+  thread_num: 30
+  reader_type: "InmemoryDataset"  # DataLoader / QueueDataset / RecDataset / InmemoryDataset
+  pipe_command: "python3.7 dataset_generator_criteo.py"
+  dataset_debug: False
+  split_file_list: False
+
+  infer_batch_size: 2
+  infer_reader_path: "criteo_reader" # importlib format
+  test_data_dir: "data/sample_data/train"
+  infer_load_path: "output_model_dnn_queue"
+  infer_start_epoch: 0
+  infer_end_epoch: 3
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_inputs_slots: 27
+  sparse_feature_number: 1024
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+  distributed_embedding: 0
diff --git a/python/paddle/fluid/tests/unittests/ps/dataset_generator_criteo.py b/python/paddle/fluid/tests/unittests/ps/dataset_generator_criteo.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import paddle.distributed.fleet as fleet
+
+logging.basicConfig(
+    format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+
+
+class Reader(fleet.MultiSlotDataGenerator):
+    def init(self):
+        padding = 0
+        sparse_slots = "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
+        self.sparse_slots = sparse_slots.strip().split(" ")
+        self.dense_slots = ["dense_feature"]
+        self.dense_slots_shape = [13]
+        self.slots = self.sparse_slots + self.dense_slots
+        self.slot2index = {}
+        self.visit = {}
+        for i in range(len(self.slots)):
+            self.slot2index[self.slots[i]] = i
+            self.visit[self.slots[i]] = False
+        self.padding = padding
+        logger.info("pipe init success")
+
+    def line_process(self, line):
+        line = line.strip().split(" ")
+        output = [(i, []) for i in self.slots]
+        for i in line:
+            slot_feasign = i.split(":")
+            slot = slot_feasign[0]
+            if slot not in self.slots:
+                continue
+            if slot in self.sparse_slots:
+                feasign = int(slot_feasign[1])
+            else:
+                feasign = float(slot_feasign[1])
+            output[self.slot2index[slot]][1].append(feasign)
+            self.visit[slot] = True
+        for i in self.visit:
+            slot = i
+            if not self.visit[slot]:
+                if i in self.dense_slots:
+                    output[self.slot2index[i]][1].extend(
+                        [self.padding]
+                        * self.dense_slots_shape[self.slot2index[i]]
+                    )
+                else:
+                    output[self.slot2index[i]][1].extend([self.padding])
+            else:
+                self.visit[slot] = False
+
+        return output
+        # return [label] + sparse_feature + [dense_feature]
+
+    def generate_sample(self, line):
+        r"Dataset Generator"
+
+        def reader():
+            output_dict = self.line_process(line)
+            # {key, value} dict format: {'labels': [1], 'sparse_slot1': [2, 3], 'sparse_slot2': [4, 5, 6, 8], 'dense_slot': [1,2,3,4]}
+            # dict must match static_model.create_feed()
+            yield output_dict
+
+        return reader
+
+
+if __name__ == "__main__":
+
+    r = Reader()
+    r.init()
+    r.run_from_stdin()
diff --git a/python/paddle/fluid/tests/unittests/ps/download_criteo_data.sh b/python/paddle/fluid/tests/unittests/ps/download_criteo_data.sh
@@ -0,0 +1,17 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget --no-check-certificate https://paddlerec.bj.bcebos.com/benchmark/sample_train.txt
+mkdir train_data
+mv sample_train.txt train_data/
diff --git a/python/paddle/fluid/tests/unittests/ps/gpubox_run.sh b/python/paddle/fluid/tests/unittests/ps/gpubox_run.sh
@@ -0,0 +1,60 @@
+# !/bin/bash
+
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ ! -d "./log" ]; then
+  mkdir ./log
+  echo "Create log floder for store running log"
+fi
+
+export FLAGS_LAUNCH_BARRIER=0
+export PADDLE_TRAINER_ID=0
+export PADDLE_PSERVER_NUMS=1
+export PADDLE_TRAINERS=1
+export PADDLE_TRAINERS_NUM=${PADDLE_TRAINERS}
+export POD_IP=127.0.0.1
+
+# set free port if 29011 is occupied
+export PADDLE_PSERVERS_IP_PORT_LIST="127.0.0.1:29011"
+export PADDLE_PSERVER_PORT_ARRAY=(29011)
+
+# set gpu numbers according to your device
+export FLAGS_selected_gpus="0,1,2,3,4,5,6,7"
+#export FLAGS_selected_gpus="0,1"
+
+# set your model yaml
+#SC="gpubox_ps_trainer.py"
+SC="static_gpubox_trainer.py"
+
+# run pserver
+export TRAINING_ROLE=PSERVER
+for((i=0;i<$PADDLE_PSERVER_NUMS;i++))
+do
+    cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
+    echo "PADDLE WILL START PSERVER "$cur_port
+    export PADDLE_PORT=${cur_port}
+    python3.7 -u $SC &> ./log/pserver.$i.log &
+done
+
+# run trainer
+export TRAINING_ROLE=TRAINER
+for((i=0;i<$PADDLE_TRAINERS;i++))
+do
+    echo "PADDLE WILL START Trainer "$i
+    export PADDLE_TRAINER_ID=$i
+    python3.7 -u $SC &> ./log/worker.$i.log
+done
+
+echo "Training log stored in ./log/"