Skip to content

Commit 1ecff72

Browse files
authored
[PRETRAIN] Change some hyper parameters of ernie-1.0 (#1344)
* fix ernie, Normal->TruncatedNormal, smaller steps. * fix by set attn_mask to -1e4
1 parent 868e7a2 commit 1ecff72

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

examples/language_model/data_tools/ernie_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions,
203203
# Padding mask.
204204
padding_mask_np = np.array(
205205
[1] * num_tokens + [0] * padding_length, dtype=np.float32)
206+
padding_mask_np = (1 - padding_mask_np) * -1e4
207+
206208
padding_mask_np = padding_mask_np.reshape([1, 1, -1])
207209
# Lables and loss mask.
208210
labels = [-1] * max_seq_length

examples/language_model/ernie-1.0/run_gb512_s400.sh renamed to examples/language_model/ernie-1.0/run_gb512_s200.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ PYTHONPATH=../../../ python -u -m paddle.distributed.launch \
3030
--use_recompute false \
3131
--max_lr 0.0001 \
3232
--min_lr 0.00001 \
33-
--max_steps 4000000 \
33+
--max_steps 2000000 \
3434
--save_steps 50000 \
3535
--checkpoint_steps 5000 \
36-
--decay_steps 3960000 \
36+
--decay_steps 1980000 \
3737
--weight_decay 0.01 \
38-
--warmup_rate 0.0025 \
38+
--warmup_rate 0.005 \
3939
--grad_clip 1.0 \
4040
--logging_freq 20\
4141
--num_workers 2 \

paddlenlp/transformers/ernie/modeling.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,9 @@ def __init__(self,
293293
super(ErnieModel, self).__init__()
294294
self.pad_token_id = pad_token_id
295295
self.initializer_range = initializer_range
296-
weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal(
297-
mean=0.0, std=self.initializer_range))
296+
weight_attr = paddle.ParamAttr(
297+
initializer=nn.initializer.TruncatedNormal(
298+
mean=0.0, std=self.initializer_range))
298299
self.embeddings = ErnieEmbeddings(
299300
vocab_size, hidden_size, hidden_dropout_prob,
300301
max_position_embeddings, type_vocab_size, pad_token_id, weight_attr)
@@ -683,8 +684,9 @@ class ErnieForPretraining(ErniePretrainedModel):
683684
def __init__(self, ernie):
684685
super(ErnieForPretraining, self).__init__()
685686
self.ernie = ernie
686-
weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal(
687-
mean=0.0, std=self.ernie.initializer_range))
687+
weight_attr = paddle.ParamAttr(
688+
initializer=nn.initializer.TruncatedNormal(
689+
mean=0.0, std=self.ernie.initializer_range))
688690
self.cls = ErniePretrainingHeads(
689691
self.ernie.config["hidden_size"],
690692
self.ernie.config["vocab_size"],

0 commit comments

Comments
 (0)