From 7e988a812943a0eb0ec26a08d7c84e9a7e5cd41a Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Sun, 22 Sep 2024 21:45:18 +0800
Subject: [PATCH 01/13] fix-docs

---
 .../api_difference/Tensor/torch.Tensor.new_tensor.md      | 2 +-
 .../api_difference/cuda/torch.cuda.Stream__upper.md       | 7 ++++---
 .../api_difference/cuda/torch.cuda.stream.md              | 4 ++--
 .../api_difference/nn/torch.nn.GRU.md                     | 6 +++++-
 .../api_difference/nn/torch.nn.LSTM.md                    | 6 +++++-
 .../api_difference/nn/torch.nn.Module.named_buffers.md    | 8 ++++----
 .../api_difference/nn/torch.nn.Module.named_modules.md    | 8 ++++----
 .../api_difference/nn/torch.nn.Module.named_parameters.md | 8 ++++----
 .../api_difference/nn/torch.nn.RNN.md                     | 6 +++++-
 .../api_difference/nn/torch.nn.Transformer.md             | 5 +++--
 .../api_difference/nn/torch.nn.TransformerDecoderLayer.md | 3 ++-
 .../api_difference/nn/torch.nn.TransformerEncoderLayer.md | 4 ++--
 ...scale.nn.model_parallel.layers.ColumnParallelLinear.md | 4 ++--
 ...airscale.nn.model_parallel.layers.RowParallelLinear.md | 4 ++--
 .../transformers/transformers.AddedToken.md               | 8 ++++----
 .../transformers/transformers.PreTrainedTokenizer.md      | 4 ++--
 16 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
index 18372308fd7..73a0f951f78 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
@@ -3,7 +3,7 @@
 ### [torch.Tensor.new_tensor](https://pytorch.org/docs/stable/generated/torch.Tensor.new_tensor.html#torch-tensor-new-tensor)
 
 ```python
-torch.Tensor.new_tensor(data, *, dtype=None, device=None, requires_grad=False)
+torch.Tensor.new_tensor(data, *, dtype=None, device=None, requires_grad=False, layout=torch.strided, pin_memory=False)
 ```
 
 ### [paddle.to_tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/to_tensor_cn.html)
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
index 59f90d86273..84e14a94d0e 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
@@ -6,10 +6,10 @@
 torch.cuda.Stream(device=None, priority=0, **kwargs)
 ```
 
-### [paddle.device.cuda.Stream](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/device/cuda/Stream_cn.html)
+### [paddle.device.Stream](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/device/Stream_cn.html#stream)
 
 ```python
-paddle.device.cuda.Stream(device=None, priority=None)
+paddle.device.Stream(device=None, priority=None, blocking=False)
 ```
 
 两者功能一致，参数用法不一致，具体如下：
@@ -20,6 +20,7 @@ paddle.device.cuda.Stream(device=None, priority=None)
 | -------- | ------------ | ----------------------------------------------------------------------------------------- |
 | device   | device       | 希望分配 stream 的设备。                                                                  |
 | priority | priority     | stream 的优先级，PyTorch 取值范围为-1、0，Paddle 的取值范围为 1、2，需要转写。 |
+| - | blocking     | stream 是否同步执行。 |
 
 ### 转写示例
 
@@ -34,5 +35,5 @@ y = torch.cuda.Stream(priority=default_priority)
 # Paddle 写法
 high_priority = 1
 default_priority = 2
-y = paddle.device.cuda.Stream(priority=default_priority)
+y = paddle.device.Stream(priority=default_priority)
 ```
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
index 20eb5a8d268..5a714ea0383 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
@@ -6,10 +6,10 @@
 torch.cuda.stream(stream)
 ```
 
-### [paddle.device.cuda.stream_guard](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/device/cuda/stream_guard_cn.html)
+### [paddle.device.stream_guard](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/device/stream_guard_cn.html#stream-guard)
 
 ```python
-paddle.device.cuda.stream_guard(stream)
+paddle.device.stream_guard(stream)
 ```
 
 功能一致，参数完全一致，具体如下：
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.GRU.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.GRU.md
index 23f0fea94e9..69cc517c03a 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.GRU.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.GRU.md
@@ -7,7 +7,9 @@ torch.nn.GRU(input_size,
              bias=True,
              batch_first=False,
              dropout=0,
-             bidirectional=False)
+             bidirectional=False,
+             device=None,
+             dtype=None)
 ```
 
 ### [paddle.nn.GRU](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/GRU_cn.html#gru)
@@ -36,6 +38,8 @@ paddle.nn.GRU(input_size,
 | batch_first   | time_major   | PyTorch 表示 batch size 是否为第一维，PaddlePaddle 表示 time steps 是否为第一维，它们的意义相反。需要转写。  |
 | dropout   | dropout   | 表示 dropout 概率。  |
 | bidirectional | direction    | PyTorch 表示是否进行双向 GRU，Paddle 使用字符串表示是双向 GRU（`bidirectional`）还是单向 GRU（`forward`）。 |
+| device   | -   | 指定 Tensor 的设备，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。  |
+| dtype   | -   | Tensor 的所需数据类型，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。 |
 | -             |weight_ih_attr| weight_ih 的参数， PyTorch 无此参数， Paddle 保持默认即可。  |
 | -             |weight_hh_attr| weight_hh 的参数，  PyTorch 无此参数， Paddle 保持默认即可。  |
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.LSTM.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.LSTM.md
index a139fc71f3c..014f18a2d19 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.LSTM.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.LSTM.md
@@ -9,7 +9,9 @@ torch.nn.LSTM(input_size,
               batch_first=False,
               dropout=0,
               bidirectional=False,
-              proj_size=0)
+              proj_size=0,
+              device=None,
+              dtype=None)
 ```
 
 ### [paddle.nn.LSTM](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/LSTM_cn.html#lstm)
@@ -42,6 +44,8 @@ paddle.nn.LSTM(input_size,
 | dropout   | dropout   | 表示 dropout 概率。  |
 | bidirectional | direction    | PyTorch 表示是否进行双向，Paddle 使用字符串表示是双向 LSTM（`bidirectional`）还是单向 LSTM（`forward`）|
 | proj_size     | proj_size            | 表示 LSTM 后将 `hidden state` 映射到对应的大小。 |
+| device   | -   | 指定 Tensor 的设备，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。  |
+| dtype   | -   | Tensor 的所需数据类型，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。 |
 | -             |weight_ih_attr| weight_ih 的参数，PyTorch 无此参数，Paddle 保持默认即可。  |
 | -             |weight_hh_attr| weight_hh 的参数，PyTorch 无此参数，Paddle 保持默认即可。  |
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
index 04968e47f62..37e16a1bd25 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
@@ -1,4 +1,4 @@
-## [ 仅参数名不一致 ]torch.nn.Module.named_buffers
+## [torch 参数更多 ]torch.nn.Module.named_buffers
 
 ### [torch.nn.Module.named_buffers](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.named_buffers)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_buffers(prefix='', recurse=True, remove_duplicate=True)
 ### [paddle.nn.Layer.named_buffers](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-buffers-prefix-include-sublayers-true)
 
 ```python
-paddle.nn.Layer.named_buffers(prefix='', include_sublayers=True, remove_duplicate=True)
+paddle.nn.Layer.named_buffers(prefix='', include_sublayers=True)
 ```
 
-两者功能一致且参数用法一致，仅参数名不一致，具体如下：
+PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -20,4 +20,4 @@ paddle.nn.Layer.named_buffers(prefix='', include_sublayers=True, remove_duplicat
 | -------------- | ------------ | ------------------------------------------------------------- |
 | prefix         | prefix       | 在所有参数名称前加的前缀。                                            |
 | recurse        | include_sublayers     | 生成该模块和所有子模块的缓冲区，仅参数名不一致。                               |
-| remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                        |
+| remove_duplicate   | -  | 是否删除结果中重复的模块实例，Paddle 暂无转写方式。                                        |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
index cf06b001f77..8a920b30e78 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
@@ -1,4 +1,4 @@
-## [paddle 参数更多]torch.nn.Module.named_modules
+## [torch 参数更多 ]torch.nn.Module.named_modules
 
 ### [torch.nn.Module.named_modules](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.named_modules)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_modules(memo=None, prefix='', remove_duplicate=True)
 ### [paddle.nn.Layer.named_sublayers](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-sublayers-prefix-include-self-false-layers-set-none)
 
 ```python
-paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None, remove_duplicate=True)
+paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None)
 ```
 
-Paddle 相比 PyTorch 支持更多其他参数，具体如下：
+PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -20,5 +20,5 @@ Paddle 相比 PyTorch 支持更多其他参数，具体如下：
 | -------------- | ------------ | ------------------------------------------------------------- |
 | memo          | layers_set   | 用来记录已经加入结果的子层的集合，仅参数名不一致。                               |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
-| remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                            |
+| remove_duplicate   | -  | 是否删除结果中重复的模块实例，Paddle 无此参数，暂无转写方式。                                            |
 | -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 保持默认即可。                                                |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
index ce8d7dd8211..31e4fd8977d 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
@@ -1,4 +1,4 @@
-## [ 仅参数名不一致 ]torch.nn.Module.named_parameters
+## [torch 参数更多 ]torch.nn.Module.named_parameters
 
 ### [torch.nn.Module.named_parameters](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=torch+nn+module+named_parameters#torch.nn.Module.named_parameters)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_parameters(prefix='', recurse=True, remove_duplicate=True)
 ### [paddle.nn.Layer.named_parameters](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-parameters-prefix-include-sublayers-true)
 
 ```python
-paddle.nn.Layer.named_parameters(prefix='', include_sublayers=True, remove_duplicate=True)
+paddle.nn.Layer.named_parameters(prefix='', include_sublayers=True)
 ```
 
-两者功能一致且参数用法一致，仅参数名不一致，具体如下：
+PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -20,4 +20,4 @@ paddle.nn.Layer.named_parameters(prefix='', include_sublayers=True, remove_dupli
 | -------------- | ------------ | ------------------------------------------------------------- |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
 | recurse   | include_sublayers  | 生成该模块和所有子模块的参数, 仅参数名不一致。                                            |
-| remove_duplicate   | remove_duplicate  | 是否删除结果中的重复参数。                                        |
+| remove_duplicate   | -  | 是否删除结果中的重复参数，Paddle 无此参数，暂无转写方式。                                       |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.RNN.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.RNN.md
index 3e49cdb8af0..aab6b8ea854 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.RNN.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.RNN.md
@@ -8,7 +8,9 @@ torch.nn.RNN(input_size,
              bias=True,
              batch_first=False,
              dropout=0,
-             bidirectional=False)
+             bidirectional=False，
+             device=None,
+             dtype=None)
 ```
 
 ### [paddle.nn.SimpleRNN](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/SimpleRNN_cn.html#simplernn)
@@ -29,6 +31,8 @@ paddle.nn.SimpleRNN(input_size, hidden_size, num_layers=1, activation='tanh', di
 | batch_first   | time_major   | PyTorch 表示 batch size 是否为第一维，PaddlePaddle 表示 time steps 是否为第一维，它们的意义相反。需要转写。  |
 | dropout   | dropout   | 表示 dropout 概率。  |
 | bidirectional | direction    | PyTorch 表示是否进行双向 RNN，Paddle 使用字符串表示是双向 RNN（`bidirectional`）还是单向 RNN（`forward`）。 |
+| device   | -   | 指定 Tensor 的设备，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。  |
+| dtype   | -   | Tensor 的所需数据类型，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。 |
 | -             |weight_ih_attr| weight_ih 的参数， PyTorch 无此参数， Paddle 保持默认即可。  |
 | -             |weight_hh_attr| weight_hh 的参数，  PyTorch 无此参数， Paddle 保持默认即可。  |
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Transformer.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Transformer.md
index f6990ea928d..b3fa7545edb 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Transformer.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Transformer.md
@@ -3,7 +3,7 @@
 ### [torch.nn.Transformer](https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html#torch.nn.Transformer)
 
 ```python
-torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, device=None, dtype=None)
+torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)
 ```
 
 ### [paddle.nn.Transformer](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Transformer_cn.html)
@@ -30,12 +30,13 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | layer_norm_eps     | -                  | 层 normalization 组件的 eps 值，Paddle 无此参数，暂无转写方式。                     |
 | batch_first        | -                  | 表示输入数据的第 0 维是否代表 batch_size，Paddle 无此参数，暂无转写方式。           |
 | norm_first         | normalize_before   | 是否 LayerNorms 操作在 attention 和 feedforward 前，仅参数名不一致。                |
+| bias                 | bias_attr          | 指定偏置参数属性的对象，仅参数名不一致。                     |
 | device             | -                  | Tensor 的设备，Paddle 无此参数，需要转写。                                      |
 | dtype              | -                  | Tensor 的数据类型，Paddle 无此参数，需要转写。                                  |
 | -                  | attn_dropout       | 多头自注意力机制中对注意力目标的随机失活率，PyTorch 无此参数，Paddle 保持默认即可。 |
 | -                  | act_dropout        | 前馈神经网络的激活函数后的 dropout，PyTorch 无此参数，Paddle 保持默认即可。         |
 | -                  | weight_attr        | 指定权重参数属性的对象，PyTorch 无此参数，Paddle 保持默认即可。                     |
-| -                  | bias_attr          | 指定偏置参数属性的对象，PyTorch 无此参数，Paddle 保持默认即可。                     |
+
 
 ### 转写示例
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerDecoderLayer.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerDecoderLayer.md
index 01a879d962d..190c2f268e2 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerDecoderLayer.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerDecoderLayer.md
@@ -10,6 +10,7 @@ torch.nn.TransformerDecoderLayer(d_model,
                                  layer_norm_eps=1e-05,
                                  batch_first=False,
                                  norm_first=False,
+                                 bias=True,
                                  device=None,
                                  dtype=None)
 ```
@@ -42,7 +43,7 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | layer_norm_eps | layer_norm_eps       | layer normalization 层的 eps 值。  |
 | batch_first     | -      | 输入和输出 tensor 的 shape，Paddle 无此参数，暂无转写方式  |
 | norm_first             | normalize_before  | 设置对每个子层的输入输出的处理。如果为 True，则对每个子层的输入进行层标准化（Layer Normalization），对每个子层的输出进行 dropout 和残差连接（residual connection）。否则（即为 False），则对每个子层的输入不进行处理，只对每个子层的输出进行 dropout、残差连接（residual connection）和层标准化（Layer Normalization）。默认值：False。  仅参数名不一致|
+| bias                 | bias_attr          | 指定偏置参数属性的对象，仅参数名不一致。                     |
 | device        | -            | 设备类型，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。        |
 | dtype         | -            | 参数类型，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。        |
 | -             | weight_attr  | 指定权重参数的属性，PyTorch 无此参数，Paddle 保持默认即可。 |
-| -             | bias_attr    | 指定偏置参数的属性, PyTorch 无此参数，Paddle 保持默认即可。 |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerEncoderLayer.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerEncoderLayer.md
index 3aa7056e51b..28712c6a4ca 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerEncoderLayer.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.TransformerEncoderLayer.md
@@ -3,7 +3,7 @@
 ### [torch.nn.TransformerEncoderLayer](https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoderLayer.html#torch.nn.TransformerEncoderLayer)
 
 ```python
-torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, device=None, dtype=None)
+torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)
 ```
 
 ### [paddle.nn.TransformerEncoderLayer](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/TransformerEncoderLayer_cn.html)
@@ -26,12 +26,12 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | layer_norm_eps  | layer_norm_eps   | 层 normalization 组件的 eps 值。                                                  |
 | batch_first     | -                | 表示输入数据的第 0 维是否代表 batch_size，Paddle 无此参数，暂无转写方式。           |
 | norm_first      | normalize_before | 是否 LayerNorms 操作在 attention 和 feedforward 前，仅参数名不一致。                |
+| bias                 | bias_attr          | 指定偏置参数属性的对象，仅参数名不一致。                     |
 | device          | -                | Tensor 的设备，Paddle 无此参数，需要转写。                                      |
 | dtype           | -                | Tensor 的数据类型，Paddle 无此参数，需要转写。                                  |
 | -               | attn_dropout     | 多头自注意力机制中对注意力目标的随机失活率，PyTorch 无此参数，Paddle 保持默认即可。 |
 | -               | act_dropout      | 前馈神经网络的激活函数后的 dropout，PyTorch 无此参数，Paddle 保持默认即可。         |
 | -               | weight_attr      | 指定权重参数属性的对象，PyTorch 无此参数，Paddle 保持默认即可。                     |
-| -               | bias_attr        | 指定偏置参数属性的对象，PyTorch 无此参数，Paddle 保持默认即可。                     |
 
 ### 转写示例
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md
index c448b7e25b7..2f29477d94e 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md
@@ -5,10 +5,10 @@
 ```python
 fairscale.nn.model_parallel.layers.ColumnParallelLinear(in_features: int, out_features: int, bias: bool = True, gather_output: bool = True, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, stride: int = 1, keep_master_weight_for_test: bool = False)
 ```
-### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L153)
+### [paddle.distributed.fleet.meta_parallel.ColumnParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L153)
 
 ```python
-paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features, out_features, weight_attr=None, has_bias=None, gather_output=True, fuse_matmul_bias=False, mp_group=None, name=None)
+paddle.distributed.fleet.meta_parallel.ColumnParallelLinear(in_features, out_features, weight_attr=None, has_bias=None, gather_output=True, fuse_matmul_bias=False, mp_group=None, name=None)
 ```
 
 PyTorch 相比 Paddle 支持更多其他参数，具体如下：
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md
index e9feb84aa3f..c520b00846f 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md
@@ -6,10 +6,10 @@
 fairscale.nn.model_parallel.layers.RowParallelLinear(in_features: int, out_features: int, bias: bool = True, input_is_parallel: bool = False, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, stride: int = 1, keep_master_weight_for_test: bool = False)
 ```
 
-### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L291)
+### [paddle.distributed.fleet.meta_parallel.RowParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L291)
 
 ```python
-paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features, out_features, weight_attr=None, has_bias=True, input_is_parallel=False, fuse_matmul_bias=False, mp_group=None, name=None)
+paddle.distributed.fleet.meta_parallel.RowParallelLinear(in_features, out_features, weight_attr=None, has_bias=True, input_is_parallel=False, fuse_matmul_bias=False, mp_group=None, name=None)
 ```
 
 PyTorch 相比 Paddle 支持更多其他参数，具体如下：
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.AddedToken.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.AddedToken.md
index 9eff3049393..7f828763d9b 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.AddedToken.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.AddedToken.md
@@ -1,4 +1,4 @@
-## [参数完全一致]transformers.AddedToken
+## [ torch 参数更多 ]transformers.AddedToken
 
 ### [transformers.AddedToken](https://github.com/huggingface/transformers/blob/d625294d79341662784495551abdf45e6cb9372f/src/transformers/tokenization_utils_base.py#L84)
 
@@ -9,10 +9,10 @@ transformers.AddedToken(content: str, single_word=False, lstrip=False, rstrip=Fa
 ### [paddlenlp.transformers.AddedToken](https://github.com/PaddlePaddle/PaddleNLP/blob/e336e78c338d2514ee6c937982ce5d8c960b85ff/paddlenlp/transformers/tokenizer_utils_base.py#L48)
 
 ```python
-paddlenlp.transformers.AddedToken(content: str = field(default_factory=str), single_word: bool = False， lstrip: bool = False, rstrip: bool = False, normalized: bool = True, special: bool = True)
+paddlenlp.transformers.AddedToken(content: str = field(default_factory=str), single_word: bool = False， lstrip: bool = False, rstrip: bool = False, normalized: bool = True)
 ```
 
-功能一致，参数完全一致，具体如下：
+PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -22,5 +22,5 @@ paddlenlp.transformers.AddedToken(content: str = field(default_factory=str), sin
 | single_word  | single_word  | token 是否视为独立的词。 |
 | lstrip       | lstrip       | 是否移除左侧空白符。    |
 | rstrip       | rstrip       | 是否移除左侧空白符。    |
-| special      | special      | 是否有特殊的处理方式。  |
+| special      | -      | 是否有特殊的处理方式，Paddle 暂不支持该参数.  |
 | normalized   | normalized  | 是否进行规范化处理。  |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.PreTrainedTokenizer.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.PreTrainedTokenizer.md
index 4266dcf7d49..cd59dfb578a 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.PreTrainedTokenizer.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/transformers/transformers.PreTrainedTokenizer.md
@@ -6,10 +6,10 @@
 transformers.PreTrainedTokenizer(**kwargs)
 ```
 
-### [paddlenlp.transformers.PreTrainedTokenizer](https://github.com/PaddlePaddle/PaddleNLP/blob/e336e78c338d2514ee6c937982ce5d8c960b85ff/paddlenlp/transformers/tokenizer_utils.py#L881)
+### [paddlenlp.transformers.PretrainedTokenizer](https://github.com/PaddlePaddle/PaddleNLP/blob/e336e78c338d2514ee6c937982ce5d8c960b85ff/paddlenlp/transformers/tokenizer_utils.py#L881)
 
 ```python
-paddlenlp.transformers.PreTrainedTokenizer(**kwargs)
+paddlenlp.transformers.PretrainedTokenizer(**kwargs)
 ```
 
 功能一致，参数完全一致，具体如下：

From 6bbf69b67b37cb62599943e581cfbdbb89bd62c8 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Tue, 24 Sep 2024 16:12:16 +0800
Subject: [PATCH 02/13] fix-docs2

---
 .../distributions/torch.distributions.Binomial.md               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
index c89f508cd1d..4a4e481ebaa 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
@@ -22,7 +22,7 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 | PyTorch       | PaddlePaddle | 备注                                                         |
 | ------------- | ------ | ------------------------------------------------------------ |
-| total_count        | total_count      | 样本大小。                         |
+| total_count        | total_count      | 样本大小，当torch不指定时，paddle应设置该值为1。                         |
 | probs           | probs      | 每次伯努利实验中事件发生的概率。         |
 | logits         | -  | 采样 1 的 log-odds，Paddle 无此参数，暂无转写方式。 |
 | validate_args        | -      | 是否添加验证环节。Paddle 无此参数，一般对训练结果影响不大，可直接删除。 |

From d8658962c78c7bddc888e4ae2f5edde9b12c44a6 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Tue, 24 Sep 2024 17:14:12 +0800
Subject: [PATCH 03/13] fix-docs2

---
 .../distributions/torch.distributions.Binomial.md               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
index 4a4e481ebaa..6ae5d016935 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
@@ -22,7 +22,7 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 | PyTorch       | PaddlePaddle | 备注                                                         |
 | ------------- | ------ | ------------------------------------------------------------ |
-| total_count        | total_count      | 样本大小，当torch不指定时，paddle应设置该值为1。                         |
+| total_count        | total_count      | 样本大小，当torch不指定时，Paddle 应设置该值为1。                         |
 | probs           | probs      | 每次伯努利实验中事件发生的概率。         |
 | logits         | -  | 采样 1 的 log-odds，Paddle 无此参数，暂无转写方式。 |
 | validate_args        | -      | 是否添加验证环节。Paddle 无此参数，一般对训练结果影响不大，可直接删除。 |

From 1d80e56109b249e57c3698cff78d7eeb2dc8edf1 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Tue, 24 Sep 2024 19:24:15 +0800
Subject: [PATCH 04/13] fix-docs2

---
 .../distributions/torch.distributions.Binomial.md               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
index 6ae5d016935..38012b3b6fb 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributions/torch.distributions.Binomial.md
@@ -22,7 +22,7 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 
 | PyTorch       | PaddlePaddle | 备注                                                         |
 | ------------- | ------ | ------------------------------------------------------------ |
-| total_count        | total_count      | 样本大小，当torch不指定时，Paddle 应设置该值为1。                         |
+| total_count        | total_count      | 样本大小，当 torch 不指定时，Paddle 应设置该值为 1。                         |
 | probs           | probs      | 每次伯努利实验中事件发生的概率。         |
 | logits         | -  | 采样 1 的 log-odds，Paddle 无此参数，暂无转写方式。 |
 | validate_args        | -      | 是否添加验证环节。Paddle 无此参数，一般对训练结果影响不大，可直接删除。 |

From 53e25aa2f3cdee2f0d9c48834cf7e6d6b56d7016 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Thu, 26 Sep 2024 16:38:06 +0800
Subject: [PATCH 05/13] fix-docs2

---
 .../api_difference/Tensor/torch.Tensor.new_tensor.md            | 2 +-
 .../api_difference/cuda/torch.cuda.Stream__upper.md             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
index 73a0f951f78..18372308fd7 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/Tensor/torch.Tensor.new_tensor.md
@@ -3,7 +3,7 @@
 ### [torch.Tensor.new_tensor](https://pytorch.org/docs/stable/generated/torch.Tensor.new_tensor.html#torch-tensor-new-tensor)
 
 ```python
-torch.Tensor.new_tensor(data, *, dtype=None, device=None, requires_grad=False, layout=torch.strided, pin_memory=False)
+torch.Tensor.new_tensor(data, *, dtype=None, device=None, requires_grad=False)
 ```
 
 ### [paddle.to_tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/to_tensor_cn.html)
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
index 84e14a94d0e..37dd537c81a 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
@@ -20,7 +20,7 @@ paddle.device.Stream(device=None, priority=None, blocking=False)
 | -------- | ------------ | ----------------------------------------------------------------------------------------- |
 | device   | device       | 希望分配 stream 的设备。                                                                  |
 | priority | priority     | stream 的优先级，PyTorch 取值范围为-1、0，Paddle 的取值范围为 1、2，需要转写。 |
-| - | blocking     | stream 是否同步执行。 |
+| - | blocking     | stream 是否同步执行，Paddle保持默认值即可。 |
 
 ### 转写示例
 

From 96c1db16497a8bc474cc15edbb1183579b53b088 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Thu, 26 Sep 2024 17:11:43 +0800
Subject: [PATCH 06/13] fix-docs2

---
 .../api_difference/cuda/torch.cuda.Stream__upper.md             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
index 37dd537c81a..58aae66cde8 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
@@ -20,7 +20,7 @@ paddle.device.Stream(device=None, priority=None, blocking=False)
 | -------- | ------------ | ----------------------------------------------------------------------------------------- |
 | device   | device       | 希望分配 stream 的设备。                                                                  |
 | priority | priority     | stream 的优先级，PyTorch 取值范围为-1、0，Paddle 的取值范围为 1、2，需要转写。 |
-| - | blocking     | stream 是否同步执行，Paddle保持默认值即可。 |
+| - | blocking     | stream 是否同步执行，Paddle 保持默认值即可。 |
 
 ### 转写示例
 

From 11c922b88c09511a9ccb3169d66a74dc9b6d01e3 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Thu, 26 Sep 2024 22:50:30 +0800
Subject: [PATCH 07/13] fix-docs2

---
 .../api_difference/nn/torch.nn.Module.named_buffers.md |  8 ++++----
 .../api_difference/nn/torch.nn.Module.named_modules.md | 10 +++++-----
 .../nn/torch.nn.Module.named_parameters.md             |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
index 37e16a1bd25..2cca04d4082 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_buffers.md
@@ -1,4 +1,4 @@
-## [torch 参数更多 ]torch.nn.Module.named_buffers
+## [仅参数名不一致]torch.nn.Module.named_buffers
 
 ### [torch.nn.Module.named_buffers](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.named_buffers)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_buffers(prefix='', recurse=True, remove_duplicate=True)
 ### [paddle.nn.Layer.named_buffers](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-buffers-prefix-include-sublayers-true)
 
 ```python
-paddle.nn.Layer.named_buffers(prefix='', include_sublayers=True)
+paddle.nn.Layer.named_buffers(prefix='', include_sublayers=True, remove_duplicate=True)
 ```
 
-PyTorch 相比 Paddle 支持更多其他参数，具体如下：
+两者功能一致且参数用法一致，仅参数名不一致，具体如下：
 
 ### 参数映射
 
@@ -20,4 +20,4 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | -------------- | ------------ | ------------------------------------------------------------- |
 | prefix         | prefix       | 在所有参数名称前加的前缀。                                            |
 | recurse        | include_sublayers     | 生成该模块和所有子模块的缓冲区，仅参数名不一致。                               |
-| remove_duplicate   | -  | 是否删除结果中重复的模块实例，Paddle 暂无转写方式。                                        |
+| remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例 |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
index 8a920b30e78..7b3555ef602 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
@@ -1,4 +1,4 @@
-## [torch 参数更多 ]torch.nn.Module.named_modules
+## [paddle 参数更多]torch.nn.Module.named_modules
 
 ### [torch.nn.Module.named_modules](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.named_modules)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_modules(memo=None, prefix='', remove_duplicate=True)
 ### [paddle.nn.Layer.named_sublayers](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-sublayers-prefix-include-self-false-layers-set-none)
 
 ```python
-paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None)
+paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None, remove_duplicate=True)
 ```
 
-PyTorch 相比 Paddle 支持更多其他参数，具体如下：
+其中 Paddle 相比 PyTorch 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -20,5 +20,5 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | -------------- | ------------ | ------------------------------------------------------------- |
 | memo          | layers_set   | 用来记录已经加入结果的子层的集合，仅参数名不一致。                               |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
-| remove_duplicate   | -  | 是否删除结果中重复的模块实例，Paddle 无此参数，暂无转写方式。                                            |
-| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 保持默认即可。                                                |
+| remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                            |
+| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 设置为True。                                                |
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
index 31e4fd8977d..e7af6286449 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_parameters.md
@@ -1,4 +1,4 @@
-## [torch 参数更多 ]torch.nn.Module.named_parameters
+## [仅参数名不一致]torch.nn.Module.named_parameters
 
 ### [torch.nn.Module.named_parameters](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=torch+nn+module+named_parameters#torch.nn.Module.named_parameters)
 
@@ -9,10 +9,10 @@ torch.nn.Module.named_parameters(prefix='', recurse=True, remove_duplicate=True)
 ### [paddle.nn.Layer.named_parameters](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/nn/Layer_cn.html#named-parameters-prefix-include-sublayers-true)
 
 ```python
-paddle.nn.Layer.named_parameters(prefix='', include_sublayers=True)
+paddle.nn.Layer.named_parameters(prefix='', include_sublayers=True, remove_duplicate=True)
 ```
 
-PyTorch 相比 Paddle 支持更多其他参数，具体如下：
+两者功能一致且参数用法一致，仅参数名不一致，具体如下：
 
 ### 参数映射
 
@@ -20,4 +20,4 @@ PyTorch 相比 Paddle 支持更多其他参数，具体如下：
 | -------------- | ------------ | ------------------------------------------------------------- |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
 | recurse   | include_sublayers  | 生成该模块和所有子模块的参数, 仅参数名不一致。                                            |
-| remove_duplicate   | -  | 是否删除结果中的重复参数，Paddle 无此参数，暂无转写方式。                                       |
+| remove_duplicate   | remove_duplicate  | 是否删除结果中的重复参数。|

From 26d4b49f08c7e79096927ff93234217250ce26fa Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Fri, 27 Sep 2024 14:14:01 +0800
Subject: [PATCH 08/13] fix-docs2

---
 .../api_difference/cuda/torch.cuda.Stream__upper.md            | 3 +--
 .../api_difference/nn/torch.nn.Module.named_modules.md         | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
index 58aae66cde8..47f256938b0 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
@@ -9,7 +9,7 @@ torch.cuda.Stream(device=None, priority=0, **kwargs)
 ### [paddle.device.Stream](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/device/Stream_cn.html#stream)
 
 ```python
-paddle.device.Stream(device=None, priority=None, blocking=False)
+paddle.device.Stream(device=None, priority=None)
 ```
 
 两者功能一致，参数用法不一致，具体如下：
@@ -20,7 +20,6 @@ paddle.device.Stream(device=None, priority=None, blocking=False)
 | -------- | ------------ | ----------------------------------------------------------------------------------------- |
 | device   | device       | 希望分配 stream 的设备。                                                                  |
 | priority | priority     | stream 的优先级，PyTorch 取值范围为-1、0，Paddle 的取值范围为 1、2，需要转写。 |
-| - | blocking     | stream 是否同步执行，Paddle 保持默认值即可。 |
 
 ### 转写示例
 
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
index 7b3555ef602..9ce925313d0 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
@@ -21,4 +21,4 @@ paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None,
 | memo          | layers_set   | 用来记录已经加入结果的子层的集合，仅参数名不一致。                               |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
 | remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                            |
-| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 设置为True。                                                |
+| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 设置为 True。                                                |

From e488314b1ffd0ba48d1e8618637a9faa210e5da5 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Fri, 27 Sep 2024 20:56:33 +0800
Subject: [PATCH 09/13] fix-docs2

---
 .../fairscale.nn.model_parallel.layers.ParallelEmbedding.md  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
index cff1f581eaf..33bc3b7088e 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
@@ -5,10 +5,10 @@
 ```python
 fairscale.nn.model_parallel.layers.ParallelEmbedding(num_embeddings: int, embedding_dim: int ,padding_idx: Optional[int] = None, max_norm: Optional[float] = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, sparse: bool = False, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, keep_master_weight_for_test: bool = False)
 ```
-### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L37)
+### [paddle.distributed.fleet.meta_parallel.VocabParallelEmbedding](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L37)
 
 ```python
-paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings, embedding_dim, weight_attr=None, mp_group=None, name=None)
+paddle.distributed.fleet.meta_parallel.VocabParallelEmbedding(num_embeddings, embedding_dim, weight_attr=None, mp_group=None, name=None)
 ```
 
 两者功能大体一致，但内部实现细节不一样，ParallelEmbedding 的切分方向沿着 embedding 方向，VocabParallelEmbedding 的切分方向沿着 vocab(词汇表)方向，故在多卡训练时，load 参数时需手动修改以匹配参数切分方式的不同。
@@ -28,3 +28,4 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbeddin
 | keep_master_weight_for_test  | -              | 返回主参数用于测试，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。 |
 | -                            | mp_group       | 模型并行组，PyTorch 无此参数，Paddle 保持默认即可。 |
 | -                            | name           | 网络层名称，PyTorch 无此参数，Paddle 保持默认即可。 |
+| -                            | weight_attr           | 指定权重参数属性，PyTorch 无此参数，Paddle 设置为paddle.nn.initializer.Constant(0)。 |

From a5636934d056cef577b76ce518ef30c0cb6e6e59 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Fri, 27 Sep 2024 21:02:50 +0800
Subject: [PATCH 10/13] fix-docs2

---
 .../api_difference/nn/torch.nn.Module.named_modules.md        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
index 9ce925313d0..c74c7f831ba 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
@@ -12,7 +12,7 @@ torch.nn.Module.named_modules(memo=None, prefix='', remove_duplicate=True)
 paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None, remove_duplicate=True)
 ```
 
-其中 Paddle 相比 PyTorch 支持更多其他参数，具体如下：
+Paddle 相比 PyTorch 支持更多其他参数，具体如下：
 
 ### 参数映射
 
@@ -21,4 +21,4 @@ paddle.nn.Layer.named_sublayers(prefix='', include_self=False, layers_set=None,
 | memo          | layers_set   | 用来记录已经加入结果的子层的集合，仅参数名不一致。                               |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
 | remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                            |
-| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 设置为 True。                                                |
+| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 需设为 True 才与 Pytorch 一致。           |
\ No newline at end of file

From 07441f8caa497b2d08f8e28f39c81f61ae7064af Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Fri, 27 Sep 2024 21:07:25 +0800
Subject: [PATCH 11/13] fix-docs2

---
 .../api_difference/nn/torch.nn.Module.named_modules.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
index c74c7f831ba..f48655393b4 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/nn/torch.nn.Module.named_modules.md
@@ -21,4 +21,4 @@ Paddle 相比 PyTorch 支持更多其他参数，具体如下：
 | memo          | layers_set   | 用来记录已经加入结果的子层的集合，仅参数名不一致。                               |
 | prefix   | prefix  | 在所有参数名称前加的前缀。                                            |
 | remove_duplicate   | remove_duplicate  | 是否删除结果中重复的模块实例。                                            |
-| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 需设为 True 才与 Pytorch 一致。           |
\ No newline at end of file
+| -         | include_self      | 是否包含该层自身，PyTorch 无此参数，Paddle 需设为 True 才与 Pytorch 一致。           |

From 3669cb5c9b22c304a8d6bd20f600be12b8cbffd8 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Sun, 29 Sep 2024 16:49:13 +0800
Subject: [PATCH 12/13] fix-docs2

---
 .../fairscale.nn.model_parallel.layers.ParallelEmbedding.md     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
index 33bc3b7088e..0c957cd0a84 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md
@@ -28,4 +28,4 @@ paddle.distributed.fleet.meta_parallel.VocabParallelEmbedding(num_embeddings, em
 | keep_master_weight_for_test  | -              | 返回主参数用于测试，Paddle 无此参数，一般对网络训练结果影响不大，可直接删除。 |
 | -                            | mp_group       | 模型并行组，PyTorch 无此参数，Paddle 保持默认即可。 |
 | -                            | name           | 网络层名称，PyTorch 无此参数，Paddle 保持默认即可。 |
-| -                            | weight_attr           | 指定权重参数属性，PyTorch 无此参数，Paddle 设置为paddle.nn.initializer.Constant(0)。 |
+| -                            | weight_attr           | 指定权重参数属性，PyTorch 无此参数，Paddle 设置为 paddle.nn.initializer.Constant(0)。 |

From 311b89f8725e1601d92f794adabf1e85e112b7a5 Mon Sep 17 00:00:00 2001
From: xxa <1829994704@qq.com>
Date: Sun, 29 Sep 2024 16:57:36 +0800
Subject: [PATCH 13/13] fix-docs2

---
 .../api_difference/cuda/torch.cuda.Stream__upper.md             | 2 +-
 .../api_difference/cuda/torch.cuda.stream.md                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
index 47f256938b0..cae6c35d932 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.Stream__upper.md
@@ -6,7 +6,7 @@
 torch.cuda.Stream(device=None, priority=0, **kwargs)
 ```
 
-### [paddle.device.Stream](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/device/Stream_cn.html#stream)
+### [paddle.device.Stream](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/device/Stream_cn.html#stream)
 
 ```python
 paddle.device.Stream(device=None, priority=None)
diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
index 5a714ea0383..c1546b8116f 100644
--- a/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
+++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/cuda/torch.cuda.stream.md
@@ -6,7 +6,7 @@
 torch.cuda.stream(stream)
 ```
 
-### [paddle.device.stream_guard](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/device/stream_guard_cn.html#stream-guard)
+### [paddle.device.stream_guard](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/device/stream_guard_cn.html#stream-guard)
 
 ```python
 paddle.device.stream_guard(stream)