File tree 3 files changed +66
-0
lines changed
examples/language_model/gpt-3
paddlenlp/transformers/gpt
3 files changed +66
-0
lines changed Original file line number Diff line number Diff line change @@ -552,6 +552,36 @@ class GPTPretrainedModel(PretrainedModel):
552
552
"num_partitions" : 1 ,
553
553
"use_recompute" : False ,
554
554
},
555
+ "gpt3-89B-en" : { # 89B
556
+ "vocab_size" : 51200 ,
557
+ "hidden_size" : 12288 ,
558
+ "num_hidden_layers" : 48 ,
559
+ "num_attention_heads" : 96 ,
560
+ "intermediate_size" : 49152 ,
561
+ "hidden_act" : "gelu" ,
562
+ "hidden_dropout_prob" : 0.1 ,
563
+ "attention_probs_dropout_prob" : 0.1 ,
564
+ "max_position_embeddings" : 1024 ,
565
+ "type_vocab_size" : 1 , # no use
566
+ "initializer_range" : 0.02 ,
567
+ "eos_token_id" : 50256 ,
568
+ "eol_token_id" : 198 ,
569
+ },
570
+ "gpt3-175B-en" : { # 175B
571
+ "vocab_size" : 51200 ,
572
+ "hidden_size" : 12288 ,
573
+ "num_hidden_layers" : 96 ,
574
+ "num_attention_heads" : 96 ,
575
+ "intermediate_size" : 49152 ,
576
+ "hidden_act" : "gelu" ,
577
+ "hidden_dropout_prob" : 0.1 ,
578
+ "attention_probs_dropout_prob" : 0.1 ,
579
+ "max_position_embeddings" : 1024 ,
580
+ "type_vocab_size" : 1 , # no use
581
+ "initializer_range" : 0.02 ,
582
+ "eos_token_id" : 50256 ,
583
+ "eol_token_id" : 198 ,
584
+ },
555
585
"gpt3-13B-en" : { # 13B
556
586
"vocab_size" : 50304 ,
557
587
"hidden_size" : 5120 ,
Original file line number Diff line number Diff line change @@ -627,6 +627,36 @@ class GPTPretrainedModel(PretrainedModel):
627
627
"bos_token_id" : 0 ,
628
628
"eol_token_id" : 3 ,
629
629
},
630
+ "gpt3-89B-en" : { # 89B
631
+ "vocab_size" : 51200 ,
632
+ "hidden_size" : 12288 ,
633
+ "num_hidden_layers" : 48 ,
634
+ "num_attention_heads" : 96 ,
635
+ "intermediate_size" : 49152 ,
636
+ "hidden_act" : "gelu" ,
637
+ "hidden_dropout_prob" : 0.1 ,
638
+ "attention_probs_dropout_prob" : 0.1 ,
639
+ "max_position_embeddings" : 1024 ,
640
+ "type_vocab_size" : 1 , # no use
641
+ "initializer_range" : 0.02 ,
642
+ "eos_token_id" : 50256 ,
643
+ "eol_token_id" : 198 ,
644
+ },
645
+ "gpt3-175B-en" : { # 175B
646
+ "vocab_size" : 51200 ,
647
+ "hidden_size" : 12288 ,
648
+ "num_hidden_layers" : 96 ,
649
+ "num_attention_heads" : 96 ,
650
+ "intermediate_size" : 49152 ,
651
+ "hidden_act" : "gelu" ,
652
+ "hidden_dropout_prob" : 0.1 ,
653
+ "attention_probs_dropout_prob" : 0.1 ,
654
+ "max_position_embeddings" : 1024 ,
655
+ "type_vocab_size" : 1 , # no use
656
+ "initializer_range" : 0.02 ,
657
+ "eos_token_id" : 50256 ,
658
+ "eol_token_id" : 198 ,
659
+ },
630
660
"gpt3-13B-en" : { # 13B
631
661
"vocab_size" : 50304 ,
632
662
"hidden_size" : 5120 ,
Original file line number Diff line number Diff line change @@ -323,6 +323,8 @@ class GPTTokenizer(PretrainedTokenizer):
323
323
gpt_merges_link = "http://bj.bcebos.com/paddlenlp/models/transformers/gpt/gpt-en-merges.txt"
324
324
pretrained_resource_files_map = {
325
325
"vocab_file" : {
326
+ "gpt3-175B-en" : gpt_vocab_link ,
327
+ "gpt3-89B-en" : gpt_vocab_link ,
326
328
"gpt3-13B-en" : gpt_vocab_link ,
327
329
"gpt3-1.3B-en" : gpt_vocab_link ,
328
330
"gpt2-xl-en" : gpt_vocab_link ,
@@ -332,6 +334,8 @@ class GPTTokenizer(PretrainedTokenizer):
332
334
"gpt2-small-en" : gpt_vocab_link ,
333
335
},
334
336
"merges_file" : {
337
+ "gpt3-175B-en" : gpt_merges_link ,
338
+ "gpt3-89B-en" : gpt_merges_link ,
335
339
"gpt3-13B-en" : gpt_merges_link ,
336
340
"gpt3-1.3B-en" : gpt_merges_link ,
337
341
"gpt2-xl-en" : gpt_merges_link ,
@@ -342,6 +346,8 @@ class GPTTokenizer(PretrainedTokenizer):
342
346
}
343
347
}
344
348
pretrained_init_configuration = {
349
+ "gpt3-175B-en" : {},
350
+ "gpt3-89B-en" : {},
345
351
"gpt3-13B-en" : {},
346
352
"gpt3-1.3B-en" : {},
347
353
"gpt2-xl-en" : {},
You can’t perform that action at this time.
0 commit comments