Skip to content

update emotion_detection_textcnn #2105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# emotion_detection_textcnn

|模型名称|emotion_detection_textcnn|
| :--- | :---: |
| :--- | :---: |
|类别|文本-情感分析|
|网络|TextCNN|
|数据集|百度自建数据集|
Expand All @@ -25,7 +25,7 @@
- ### 1、环境依赖

- paddlepaddle >= 1.8.0

- paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)

- ### 2、安装
Expand All @@ -51,12 +51,12 @@
- ```shell
$ hub run emotion_detection_textcnn --input_file test.txt
```

- test.txt 存放待预测文本, 如:
> 这家餐厅很好吃

> 这部电影真的很差劲

- 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)

- ### 2、预测代码示例
Expand All @@ -75,12 +75,12 @@
print(result['positive_probs'])
print(result['neutral_probs'])
print(result['negative_probs'])

# 今天天气真好 2 positive 0.9267 0.0714 0.0019
# 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042
# 别来吵我 0 negative 0.0732 0.1477 0.7791
```

- ### 3、API

- ```python
Expand Down Expand Up @@ -184,7 +184,11 @@
* 1.2.0

模型升级,支持用于文本分类,文本匹配等各种任务迁移学习


* 1.3.0

移除 Fluid API

- ```shell
$ hub install emotion_detection_textcnn==1.2.0
$ hub install emotion_detection_textcnn==1.3.0
```
158 changes: 14 additions & 144 deletions modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os

import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.common.paddle_helper import add_vars_prefix
from paddlehub.module.module import moduleinfo, serving

from emotion_detection_textcnn.net import textcnn_net
from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess


@moduleinfo(
name="emotion_detection_textcnn",
version="1.2.0",
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
author="baidu-nlp",
author_email="",
type="nlp/sentiment_analysis")
from .processor import load_vocab
from .processor import postprocess
from .processor import preprocess
from paddlehub.module.module import moduleinfo
from paddlehub.module.module import serving


@moduleinfo(name="emotion_detection_textcnn",
version="1.3.0",
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
author="baidu-nlp",
author_email="",
type="nlp/sentiment_analysis")
class EmotionDetectionTextCNN(hub.NLPPredictionModule):

def _initialize(self):
"""
initialize with the necessary elements
Expand All @@ -45,111 +39,6 @@ def word_seg_module(self):
self._word_seg_module = hub.Module(name="lac")
return self._word_seg_module

def context(self, trainable=False, max_seq_len=128, num_slots=1):
"""
Get the input ,output and program of the pretrained emotion_detection_textcnn

Args:
trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
num_slots(int): It's number of data inputted to the model, selectted as following options:

- 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
- 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
- 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).

Returns:
inputs(dict): the input variables of emotion_detection_textcnn (words)
outputs(dict): the output variables of input words (word embeddings and label probilities);
the sentence embedding and sequence length of the first input text.
main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
"""
assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])

# Add embedding layer.
w_param_attrs = fluid.ParamAttr(
name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
dict_dim = 240466
emb_1 = fluid.layers.embedding(
input=text_1,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_1_name = emb_1.name
data_list = [text_1]
emb_name_list = [emb_1_name]

# Add lstm layer.
pred, fc = textcnn_net(emb_1, seq_len_used)
pred_name = pred.name
fc_name = fc.name

if num_slots > 1:
text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_2 = fluid.embedding(
input=text_2,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_2_name = emb_2.name
data_list.append(text_2)
emb_name_list.append(emb_2_name)

if num_slots > 2:
text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_3 = fluid.embedding(
input=text_3,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_3_name = emb_3.name
data_list.append(text_3)
emb_name_list.append(emb_3_name)

variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
list(main_program.global_block().vars.keys()))
prefix_name = "@HUB_{}@".format(self.name)
add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)

for param in main_program.global_block().iter_parameters():
param.trainable = trainable

place = fluid.CPUPlace()
exe = fluid.Executor(place)

# Load the emotion_detection_textcnn pretrained model.
def if_exist(var):
return os.path.exists(os.path.join(self.pretrained_model_path, var.name))

fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)

inputs = {'seq_len': seq_len}
outputs = {
"class_probs": main_program.global_block().vars[prefix_name + pred_name],
"sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
}
for index, data in enumerate(data_list):
if index == 0:
inputs['text'] = data
outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
else:
inputs['text_%s' % (index + 1)] = data
outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
emb_name_list[index]]
return inputs, outputs, main_program

@serving
def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
"""
Expand Down Expand Up @@ -208,22 +97,3 @@ def get_labels(self):
"""
self.labels = {"positive": 2, "negative": 0, "neutral": 1}
return self.labels


if __name__ == "__main__":
emotion_detection_textcnn = EmotionDetectionTextCNN()
inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
print(inputs)
print(outputs)
# Data to be predicted
test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]

input_dict = {"text": test_text}
results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
for result in results:
print(result['text'])
print(result['emotion_label'])
print(result['emotion_key'])
print(result['positive_probs'])
print(result['negative_probs'])
print(result['neutral_probs'])
28 changes: 0 additions & 28 deletions modules/text/sentiment_analysis/emotion_detection_textcnn/net.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding:utf-8 -*-
import io

import numpy as np


Expand Down