Skip to content

Commit 1f446ff

Browse files
authored
[GPT-3] Add GPT configs 89B and 175B (#2504)
* Add configs for 89B and 175B
1 parent d99b333 commit 1f446ff

File tree

3 files changed

+66
-0
lines changed

3 files changed

+66
-0
lines changed

examples/language_model/gpt-3/dygraph/modeling.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,36 @@ class GPTPretrainedModel(PretrainedModel):
552552
"num_partitions": 1,
553553
"use_recompute": False,
554554
},
555+
"gpt3-89B-en": { # 89B
556+
"vocab_size": 51200,
557+
"hidden_size": 12288,
558+
"num_hidden_layers": 48,
559+
"num_attention_heads": 96,
560+
"intermediate_size": 49152,
561+
"hidden_act": "gelu",
562+
"hidden_dropout_prob": 0.1,
563+
"attention_probs_dropout_prob": 0.1,
564+
"max_position_embeddings": 1024,
565+
"type_vocab_size": 1, # no use
566+
"initializer_range": 0.02,
567+
"eos_token_id": 50256,
568+
"eol_token_id": 198,
569+
},
570+
"gpt3-175B-en": { # 175B
571+
"vocab_size": 51200,
572+
"hidden_size": 12288,
573+
"num_hidden_layers": 96,
574+
"num_attention_heads": 96,
575+
"intermediate_size": 49152,
576+
"hidden_act": "gelu",
577+
"hidden_dropout_prob": 0.1,
578+
"attention_probs_dropout_prob": 0.1,
579+
"max_position_embeddings": 1024,
580+
"type_vocab_size": 1, # no use
581+
"initializer_range": 0.02,
582+
"eos_token_id": 50256,
583+
"eol_token_id": 198,
584+
},
555585
"gpt3-13B-en": { # 13B
556586
"vocab_size": 50304,
557587
"hidden_size": 5120,

examples/language_model/gpt-3/static/modeling.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,36 @@ class GPTPretrainedModel(PretrainedModel):
627627
"bos_token_id": 0,
628628
"eol_token_id": 3,
629629
},
630+
"gpt3-89B-en": { # 89B
631+
"vocab_size": 51200,
632+
"hidden_size": 12288,
633+
"num_hidden_layers": 48,
634+
"num_attention_heads": 96,
635+
"intermediate_size": 49152,
636+
"hidden_act": "gelu",
637+
"hidden_dropout_prob": 0.1,
638+
"attention_probs_dropout_prob": 0.1,
639+
"max_position_embeddings": 1024,
640+
"type_vocab_size": 1, # no use
641+
"initializer_range": 0.02,
642+
"eos_token_id": 50256,
643+
"eol_token_id": 198,
644+
},
645+
"gpt3-175B-en": { # 175B
646+
"vocab_size": 51200,
647+
"hidden_size": 12288,
648+
"num_hidden_layers": 96,
649+
"num_attention_heads": 96,
650+
"intermediate_size": 49152,
651+
"hidden_act": "gelu",
652+
"hidden_dropout_prob": 0.1,
653+
"attention_probs_dropout_prob": 0.1,
654+
"max_position_embeddings": 1024,
655+
"type_vocab_size": 1, # no use
656+
"initializer_range": 0.02,
657+
"eos_token_id": 50256,
658+
"eol_token_id": 198,
659+
},
630660
"gpt3-13B-en": { # 13B
631661
"vocab_size": 50304,
632662
"hidden_size": 5120,

paddlenlp/transformers/gpt/tokenizer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ class GPTTokenizer(PretrainedTokenizer):
323323
gpt_merges_link = "http://bj.bcebos.com/paddlenlp/models/transformers/gpt/gpt-en-merges.txt"
324324
pretrained_resource_files_map = {
325325
"vocab_file": {
326+
"gpt3-175B-en": gpt_vocab_link,
327+
"gpt3-89B-en": gpt_vocab_link,
326328
"gpt3-13B-en": gpt_vocab_link,
327329
"gpt3-1.3B-en": gpt_vocab_link,
328330
"gpt2-xl-en": gpt_vocab_link,
@@ -332,6 +334,8 @@ class GPTTokenizer(PretrainedTokenizer):
332334
"gpt2-small-en": gpt_vocab_link,
333335
},
334336
"merges_file": {
337+
"gpt3-175B-en": gpt_merges_link,
338+
"gpt3-89B-en": gpt_merges_link,
335339
"gpt3-13B-en": gpt_merges_link,
336340
"gpt3-1.3B-en": gpt_merges_link,
337341
"gpt2-xl-en": gpt_merges_link,
@@ -342,6 +346,8 @@ class GPTTokenizer(PretrainedTokenizer):
342346
}
343347
}
344348
pretrained_init_configuration = {
349+
"gpt3-175B-en": {},
350+
"gpt3-89B-en": {},
345351
"gpt3-13B-en": {},
346352
"gpt3-1.3B-en": {},
347353
"gpt2-xl-en": {},

0 commit comments

Comments
 (0)