update docs according to testing results

haohongxiang · haohongxiang · commit 2ad60788d045 · 2022-08-09T09:33:32.000Z
diff --git a/examples/gpt/README.md b/examples/gpt/README.md
@@ -28,21 +28,18 @@ GPT-[2](https://cdn.openai.com/better-language-models/language_models_are_unsupe
 - regex
 - colorlog
 - colorama
-- cached_path >= 1.1.5
 - omegaconf
 - sentencepiece >= 0.1.94
 - tqdm
 - visualdl
-- paddlepaddle-gpu >= 2.2rc
 - pybind11
 - lac (可选)
 - zstandard (可选)
 
 **安装命令**
 ```shell
-pip install regex colorlog colorama cached_path omegaconf sentencepiece tqdm visualdl pybind11 lac zstandard
+python -m pip install regex colorlog colorama omegaconf sentencepiece tqdm visualdl 
 ```
-注：需要PaddlePaddle版本大于等于2.2rc，或者使用最新develop版本，安装方法请参见Paddle[官网](https://www.paddlepaddle.org.cn)。
 
 ### 数据准备
 
diff --git a/examples/gpt/hybrid_parallel/README.md b/examples/gpt/hybrid_parallel/README.md
@@ -215,18 +215,28 @@ python -m paddle.distributed.launch --log_dir $log_dir --devices "0,1,2,3,4,5,6,
 
 ### 多机训练
 
-若需要在更多机器上进行大模型训练，则需要在每个参与训练的节点上执行启动命令。以2机6.7B模型分组切分并行训练为例，启动命令为：
+若需要在更多机器上进行大模型训练，则需要在每个参与训练的节点上设置master节点ip/port信息后执行启动命令（master节点ip为训练所用某一台机器的ip即可）。
+
+以2机6.7B模型分组切分并行训练为例，启动命令为：
 
 ```shell
+master_ip=master节点ip
+master_port=可用的空闲端口号
+
 log_dir=log_sharding16
-python -m paddle.distributed.launch --log_dir $log_dir --master=10.10.1.1:49178 --nnodes=2 --devices "0,1,2,3,4,5,6,7" run_pretrain.py \
+python -m paddle.distributed.launch --log_dir $log_dir --master=$master_ip:$master_port --nnodes=2 --devices "0,1,2,3,4,5,6,7" run_pretrain.py \
     -c ./configs_6.7B_sharding16.yaml
 ```
 
-若要执行16机175B大模型混合并行训练，由于节点较多，可以考虑使用 `ssh` 脚本或 `mpirun` 进行跨节点命令分发，启动命令为：
+若要执行16机175B大模型混合并行训练，启动命令为：
 
 ```shell
+master_ip=master节点ip
+master_port=可用的空闲端口号
+
 log_dir=log_mp8_pp16
-mpirun python -m paddle.distributed.launch --log_dir $log_dir --master=10.10.1.1:49178 --nnodes=16 --devices "0,1,2,3,4,5,6,7" run_pretrain.py \
+python -m paddle.distributed.launch --log_dir $log_dir --master=$master_ip:$master_port --nnodes=16 --devices "0,1,2,3,4,5,6,7" run_pretrain.py \
     -c ./configs_175B_mp8_pp16.yaml
 ```
+
+当节点较多时，可以考虑使用 `ssh` 脚本或 `mpirun` 进行跨节点命令分发。
diff --git a/examples/gpt/hybrid_parallel/run.sh b/examples/gpt/hybrid_parallel/run.sh
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export PYTHONPATH=$PYTHONPATH:../../../
-
 log_dir=log_hybrid
 rm -rf $log_dir
 
diff --git a/examples/gpt/hybrid_parallel/run_pretrain.py b/examples/gpt/hybrid_parallel/run_pretrain.py
@@ -18,17 +18,17 @@
 import random
 import time
 import sys
-sys.path.append("..")
-from examples.gpt.tools import parse_args, parse_yaml
-
 import numpy as np
+
 import paddle
 from paddle.distributed import fleet
 from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
 from paddle.distributed.sharding import group_sharded_parallel
 from paddle.fluid.dygraph.parallel import sync_params_buffers
 from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
 
+sys.path.append("../../../")
+from examples.gpt.tools import parse_args, parse_yaml
 from fleetx.datasets.gpt import create_pretrained_dataset, get_train_data_file
 from fleetx.data.tokenizers import GPTTokenizer
 from fleetx.utils import logger
diff --git a/examples/gpt/single/run.sh b/examples/gpt/single/run.sh
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export PYTHONPATH=$PYTHONPATH:../../../
-
 # 345M
 python run_pretrain.py -c ./configs_345m_single_card.yaml
 
diff --git a/examples/gpt/single/run_pretrain.py b/examples/gpt/single/run_pretrain.py
@@ -18,11 +18,11 @@
 import random
 import time
 import sys
-sys.path.append("..")
-from examples.gpt.tools import parse_args, parse_yaml
-
 import numpy as np
+
 import paddle
+sys.path.append("../../../")
+from examples.gpt.tools import parse_args, parse_yaml
 from fleetx.models.gpt_model.modeling import GPTModel, GPTForPretraining, GPTPretrainingCriterion
 from fleetx.datasets.gpt import create_pretrained_dataset, get_train_data_file
 from fleetx.data.tokenizers import GPTTokenizer
diff --git a/fleetx/data/data_tools/cpp/Makefile b/fleetx/data/data_tools/cpp/Makefile
@@ -1,5 +1,5 @@
 CXXFLAGS += -O3 -Wall -shared -std=c++11 -fPIC -fdiagnostics-color
-CPPFLAGS += $(shell python3 -m pybind11 --includes)
+CPPFLAGS += $(shell python -m pybind11 --includes)
 LIBNAME = fast_index_map_helpers
 LIBEXT = ".so"
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 regex
 colorlog
 colorama
-inspect
 omegaconf
 tqdm
+pybind11