1
- # -*- coding:utf-8 -*-
2
- from __future__ import absolute_import
3
- from __future__ import division
4
- from __future__ import print_function
5
-
6
1
import math
7
2
import os
8
3
9
- import paddle .fluid as fluid
10
4
import paddlehub as hub
11
- from paddlehub .common .paddle_helper import add_vars_prefix
12
- from paddlehub .module .module import moduleinfo , serving
13
-
14
- from emotion_detection_textcnn .net import textcnn_net
15
- from emotion_detection_textcnn .processor import load_vocab , preprocess , postprocess
16
-
17
-
18
- @moduleinfo (
19
- name = "emotion_detection_textcnn" ,
20
- version = "1.2.0" ,
21
- summary = "Baidu's open-source Emotion Detection Model(TextCNN)." ,
22
- author = "baidu-nlp" ,
23
- author_email = "" ,
24
- type = "nlp/sentiment_analysis" )
5
+ from .processor import load_vocab
6
+ from .processor import postprocess
7
+ from .processor import preprocess
8
+ from paddlehub .module .module import moduleinfo
9
+ from paddlehub .module .module import serving
10
+
11
+
12
+ @moduleinfo (name = "emotion_detection_textcnn" ,
13
+ version = "1.3.0" ,
14
+ summary = "Baidu's open-source Emotion Detection Model(TextCNN)." ,
15
+ author = "baidu-nlp" ,
16
+ author_email = "" ,
17
+ type = "nlp/sentiment_analysis" )
25
18
class EmotionDetectionTextCNN (hub .NLPPredictionModule ):
19
+
26
20
def _initialize (self ):
27
21
"""
28
22
initialize with the necessary elements
@@ -45,111 +39,6 @@ def word_seg_module(self):
45
39
self ._word_seg_module = hub .Module (name = "lac" )
46
40
return self ._word_seg_module
47
41
48
- def context (self , trainable = False , max_seq_len = 128 , num_slots = 1 ):
49
- """
50
- Get the input ,output and program of the pretrained emotion_detection_textcnn
51
-
52
- Args:
53
- trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
54
- max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
55
- num_slots(int): It's number of data inputted to the model, selectted as following options:
56
-
57
- - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
58
- - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
59
- - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
60
-
61
- Returns:
62
- inputs(dict): the input variables of emotion_detection_textcnn (words)
63
- outputs(dict): the output variables of input words (word embeddings and label probilities);
64
- the sentence embedding and sequence length of the first input text.
65
- main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
66
- """
67
- assert num_slots >= 1 and num_slots <= 3 , "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
68
- main_program = fluid .Program ()
69
- startup_program = fluid .Program ()
70
- with fluid .program_guard (main_program , startup_program ):
71
- text_1 = fluid .layers .data (name = "text" , shape = [- 1 , max_seq_len , 1 ], dtype = "int64" , lod_level = 0 )
72
- seq_len = fluid .layers .data (name = "seq_len" , shape = [1 ], dtype = 'int64' , lod_level = 0 )
73
- seq_len_used = fluid .layers .squeeze (seq_len , axes = [1 ])
74
-
75
- # Add embedding layer.
76
- w_param_attrs = fluid .ParamAttr (
77
- name = "embedding_0.w_0" , initializer = fluid .initializer .TruncatedNormal (scale = 0.02 ), trainable = trainable )
78
- dict_dim = 240466
79
- emb_1 = fluid .layers .embedding (
80
- input = text_1 ,
81
- size = [dict_dim , 128 ],
82
- is_sparse = True ,
83
- padding_idx = dict_dim - 1 ,
84
- dtype = 'float32' ,
85
- param_attr = w_param_attrs )
86
- emb_1_name = emb_1 .name
87
- data_list = [text_1 ]
88
- emb_name_list = [emb_1_name ]
89
-
90
- # Add lstm layer.
91
- pred , fc = textcnn_net (emb_1 , seq_len_used )
92
- pred_name = pred .name
93
- fc_name = fc .name
94
-
95
- if num_slots > 1 :
96
- text_2 = fluid .data (name = 'text_2' , shape = [- 1 , max_seq_len ], dtype = 'int64' , lod_level = 0 )
97
- emb_2 = fluid .embedding (
98
- input = text_2 ,
99
- size = [dict_dim , 128 ],
100
- is_sparse = True ,
101
- padding_idx = dict_dim - 1 ,
102
- dtype = 'float32' ,
103
- param_attr = w_param_attrs )
104
- emb_2_name = emb_2 .name
105
- data_list .append (text_2 )
106
- emb_name_list .append (emb_2_name )
107
-
108
- if num_slots > 2 :
109
- text_3 = fluid .data (name = 'text_3' , shape = [- 1 , max_seq_len ], dtype = 'int64' , lod_level = 0 )
110
- emb_3 = fluid .embedding (
111
- input = text_3 ,
112
- size = [dict_dim , 128 ],
113
- is_sparse = True ,
114
- padding_idx = dict_dim - 1 ,
115
- dtype = 'float32' ,
116
- param_attr = w_param_attrs )
117
- emb_3_name = emb_3 .name
118
- data_list .append (text_3 )
119
- emb_name_list .append (emb_3_name )
120
-
121
- variable_names = filter (lambda v : v not in ['text' , 'text_2' , 'text_3' , "seq_len" ],
122
- list (main_program .global_block ().vars .keys ()))
123
- prefix_name = "@HUB_{}@" .format (self .name )
124
- add_vars_prefix (program = main_program , prefix = prefix_name , vars = variable_names )
125
-
126
- for param in main_program .global_block ().iter_parameters ():
127
- param .trainable = trainable
128
-
129
- place = fluid .CPUPlace ()
130
- exe = fluid .Executor (place )
131
-
132
- # Load the emotion_detection_textcnn pretrained model.
133
- def if_exist (var ):
134
- return os .path .exists (os .path .join (self .pretrained_model_path , var .name ))
135
-
136
- fluid .io .load_vars (exe , self .pretrained_model_path , predicate = if_exist )
137
-
138
- inputs = {'seq_len' : seq_len }
139
- outputs = {
140
- "class_probs" : main_program .global_block ().vars [prefix_name + pred_name ],
141
- "sentence_feature" : main_program .global_block ().vars [prefix_name + fc_name ]
142
- }
143
- for index , data in enumerate (data_list ):
144
- if index == 0 :
145
- inputs ['text' ] = data
146
- outputs ['emb' ] = main_program .global_block ().vars [prefix_name + emb_name_list [0 ]]
147
- else :
148
- inputs ['text_%s' % (index + 1 )] = data
149
- outputs ['emb_%s' % (index + 1 )] = main_program .global_block ().vars [prefix_name +
150
- emb_name_list [index ]]
151
- return inputs , outputs , main_program
152
-
153
42
@serving
154
43
def emotion_classify (self , texts = [], data = {}, use_gpu = False , batch_size = 1 ):
155
44
"""
@@ -208,22 +97,3 @@ def get_labels(self):
208
97
"""
209
98
self .labels = {"positive" : 2 , "negative" : 0 , "neutral" : 1 }
210
99
return self .labels
211
-
212
-
213
- if __name__ == "__main__" :
214
- emotion_detection_textcnn = EmotionDetectionTextCNN ()
215
- inputs , outputs , main_program = emotion_detection_textcnn .context (num_slots = 3 )
216
- print (inputs )
217
- print (outputs )
218
- # Data to be predicted
219
- test_text = ["今天天气真好" , "湿纸巾是干垃圾" , "别来吵我" ]
220
-
221
- input_dict = {"text" : test_text }
222
- results = emotion_detection_textcnn .emotion_classify (data = input_dict , batch_size = 2 )
223
- for result in results :
224
- print (result ['text' ])
225
- print (result ['emotion_label' ])
226
- print (result ['emotion_key' ])
227
- print (result ['positive_probs' ])
228
- print (result ['negative_probs' ])
229
- print (result ['neutral_probs' ])
0 commit comments