Skip to content

Commit 751f30b

Browse files
authored
update emotion_detection_textcnn (#2105)
1 parent beec7ed commit 751f30b

File tree

4 files changed

+28
-182
lines changed

4 files changed

+28
-182
lines changed

modules/text/sentiment_analysis/emotion_detection_textcnn/README.md

+13-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# emotion_detection_textcnn
22

33
|模型名称|emotion_detection_textcnn|
4-
| :--- | :---: |
4+
| :--- | :---: |
55
|类别|文本-情感分析|
66
|网络|TextCNN|
77
|数据集|百度自建数据集|
@@ -25,7 +25,7 @@
2525
- ### 1、环境依赖
2626

2727
- paddlepaddle >= 1.8.0
28-
28+
2929
- paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)
3030

3131
- ### 2、安装
@@ -51,12 +51,12 @@
5151
- ```shell
5252
$ hub run emotion_detection_textcnn --input_file test.txt
5353
```
54-
54+
5555
- test.txt 存放待预测文本, 如:
5656
> 这家餐厅很好吃
57-
57+
5858
> 这部电影真的很差劲
59-
59+
6060
- 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)
6161

6262
- ### 2、预测代码示例
@@ -75,12 +75,12 @@
7575
print(result['positive_probs'])
7676
print(result['neutral_probs'])
7777
print(result['negative_probs'])
78-
78+
7979
# 今天天气真好 2 positive 0.9267 0.0714 0.0019
8080
# 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042
8181
# 别来吵我 0 negative 0.0732 0.1477 0.7791
8282
```
83-
83+
8484
- ### 3、API
8585
8686
- ```python
@@ -184,7 +184,11 @@
184184
* 1.2.0
185185
186186
模型升级,支持用于文本分类,文本匹配等各种任务迁移学习
187-
187+
188+
* 1.3.0
189+
190+
移除 Fluid API
191+
188192
- ```shell
189-
$ hub install emotion_detection_textcnn==1.2.0
193+
$ hub install emotion_detection_textcnn==1.3.0
190194
```
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,22 @@
1-
# -*- coding:utf-8 -*-
2-
from __future__ import absolute_import
3-
from __future__ import division
4-
from __future__ import print_function
5-
61
import math
72
import os
83

9-
import paddle.fluid as fluid
104
import paddlehub as hub
11-
from paddlehub.common.paddle_helper import add_vars_prefix
12-
from paddlehub.module.module import moduleinfo, serving
13-
14-
from emotion_detection_textcnn.net import textcnn_net
15-
from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess
16-
17-
18-
@moduleinfo(
19-
name="emotion_detection_textcnn",
20-
version="1.2.0",
21-
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
22-
author="baidu-nlp",
23-
author_email="",
24-
type="nlp/sentiment_analysis")
5+
from .processor import load_vocab
6+
from .processor import postprocess
7+
from .processor import preprocess
8+
from paddlehub.module.module import moduleinfo
9+
from paddlehub.module.module import serving
10+
11+
12+
@moduleinfo(name="emotion_detection_textcnn",
13+
version="1.3.0",
14+
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
15+
author="baidu-nlp",
16+
author_email="",
17+
type="nlp/sentiment_analysis")
2518
class EmotionDetectionTextCNN(hub.NLPPredictionModule):
19+
2620
def _initialize(self):
2721
"""
2822
initialize with the necessary elements
@@ -45,111 +39,6 @@ def word_seg_module(self):
4539
self._word_seg_module = hub.Module(name="lac")
4640
return self._word_seg_module
4741

48-
def context(self, trainable=False, max_seq_len=128, num_slots=1):
49-
"""
50-
Get the input ,output and program of the pretrained emotion_detection_textcnn
51-
52-
Args:
53-
trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
54-
max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
55-
num_slots(int): It's number of data inputted to the model, selectted as following options:
56-
57-
- 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
58-
- 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
59-
- 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
60-
61-
Returns:
62-
inputs(dict): the input variables of emotion_detection_textcnn (words)
63-
outputs(dict): the output variables of input words (word embeddings and label probilities);
64-
the sentence embedding and sequence length of the first input text.
65-
main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
66-
"""
67-
assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
68-
main_program = fluid.Program()
69-
startup_program = fluid.Program()
70-
with fluid.program_guard(main_program, startup_program):
71-
text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
72-
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
73-
seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
74-
75-
# Add embedding layer.
76-
w_param_attrs = fluid.ParamAttr(
77-
name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
78-
dict_dim = 240466
79-
emb_1 = fluid.layers.embedding(
80-
input=text_1,
81-
size=[dict_dim, 128],
82-
is_sparse=True,
83-
padding_idx=dict_dim - 1,
84-
dtype='float32',
85-
param_attr=w_param_attrs)
86-
emb_1_name = emb_1.name
87-
data_list = [text_1]
88-
emb_name_list = [emb_1_name]
89-
90-
# Add lstm layer.
91-
pred, fc = textcnn_net(emb_1, seq_len_used)
92-
pred_name = pred.name
93-
fc_name = fc.name
94-
95-
if num_slots > 1:
96-
text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
97-
emb_2 = fluid.embedding(
98-
input=text_2,
99-
size=[dict_dim, 128],
100-
is_sparse=True,
101-
padding_idx=dict_dim - 1,
102-
dtype='float32',
103-
param_attr=w_param_attrs)
104-
emb_2_name = emb_2.name
105-
data_list.append(text_2)
106-
emb_name_list.append(emb_2_name)
107-
108-
if num_slots > 2:
109-
text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
110-
emb_3 = fluid.embedding(
111-
input=text_3,
112-
size=[dict_dim, 128],
113-
is_sparse=True,
114-
padding_idx=dict_dim - 1,
115-
dtype='float32',
116-
param_attr=w_param_attrs)
117-
emb_3_name = emb_3.name
118-
data_list.append(text_3)
119-
emb_name_list.append(emb_3_name)
120-
121-
variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
122-
list(main_program.global_block().vars.keys()))
123-
prefix_name = "@HUB_{}@".format(self.name)
124-
add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
125-
126-
for param in main_program.global_block().iter_parameters():
127-
param.trainable = trainable
128-
129-
place = fluid.CPUPlace()
130-
exe = fluid.Executor(place)
131-
132-
# Load the emotion_detection_textcnn pretrained model.
133-
def if_exist(var):
134-
return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
135-
136-
fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
137-
138-
inputs = {'seq_len': seq_len}
139-
outputs = {
140-
"class_probs": main_program.global_block().vars[prefix_name + pred_name],
141-
"sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
142-
}
143-
for index, data in enumerate(data_list):
144-
if index == 0:
145-
inputs['text'] = data
146-
outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
147-
else:
148-
inputs['text_%s' % (index + 1)] = data
149-
outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
150-
emb_name_list[index]]
151-
return inputs, outputs, main_program
152-
15342
@serving
15443
def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
15544
"""
@@ -208,22 +97,3 @@ def get_labels(self):
20897
"""
20998
self.labels = {"positive": 2, "negative": 0, "neutral": 1}
21099
return self.labels
211-
212-
213-
if __name__ == "__main__":
214-
emotion_detection_textcnn = EmotionDetectionTextCNN()
215-
inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
216-
print(inputs)
217-
print(outputs)
218-
# Data to be predicted
219-
test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
220-
221-
input_dict = {"text": test_text}
222-
results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
223-
for result in results:
224-
print(result['text'])
225-
print(result['emotion_label'])
226-
print(result['emotion_key'])
227-
print(result['positive_probs'])
228-
print(result['negative_probs'])
229-
print(result['neutral_probs'])

modules/text/sentiment_analysis/emotion_detection_textcnn/net.py

-28
This file was deleted.

modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# -*- coding:utf-8 -*-
21
import io
2+
33
import numpy as np
44

55

0 commit comments

Comments
 (0)