update emotion_detection_textcnn (#2105)

jm12138 · web-flow · commit 751f30b86467 · 2022-11-07T17:42:30.000+08:00
diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md
@@ -1,7 +1,7 @@
 # emotion_detection_textcnn
 
 |模型名称|emotion_detection_textcnn|
-| :--- | :---: | 
+| :--- | :---: |
 |类别|文本-情感分析|
 |网络|TextCNN|
 |数据集|百度自建数据集|
@@ -25,7 +25,7 @@
 - ### 1、环境依赖  
 
   - paddlepaddle >= 1.8.0
-  
+
   - paddlehub >= 1.8.0    | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)
 
 - ### 2、安装
@@ -51,12 +51,12 @@
   - ```shell
     $ hub run emotion_detection_textcnn --input_file test.txt
     ```
-    
+
     - test.txt 存放待预测文本， 如：
       > 这家餐厅很好吃
- 
+
       > 这部电影真的很差劲
-      
+
   - 通过命令行方式实现文字识别模型的调用，更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)
 
 - ### 2、预测代码示例
@@ -75,12 +75,12 @@
         print(result['positive_probs'])
         print(result['neutral_probs'])
         print(result['negative_probs'])
-        
+
     # 今天天气真好 2 positive 0.9267 0.0714 0.0019
     # 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042
     # 别来吵我 0 negative 0.0732 0.1477 0.7791
     ```
-       
+
 - ### 3、API
 
   - ```python
@@ -184,7 +184,11 @@
 * 1.2.0
 
   模型升级，支持用于文本分类，文本匹配等各种任务迁移学习
-  
+
+* 1.3.0
+
+  移除 Fluid API
+
   - ```shell
-    $ hub install emotion_detection_textcnn==1.2.0
+    $ hub install emotion_detection_textcnn==1.3.0
     ```
diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
@@ -1,28 +1,22 @@
-# -*- coding:utf-8 -*-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import math
 import os
 
-import paddle.fluid as fluid
 import paddlehub as hub
-from paddlehub.common.paddle_helper import add_vars_prefix
-from paddlehub.module.module import moduleinfo, serving
-
-from emotion_detection_textcnn.net import textcnn_net
-from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess
-
-
-@moduleinfo(
-    name="emotion_detection_textcnn",
-    version="1.2.0",
-    summary="Baidu's open-source Emotion Detection Model(TextCNN).",
-    author="baidu-nlp",
-    author_email="",
-    type="nlp/sentiment_analysis")
+from .processor import load_vocab
+from .processor import postprocess
+from .processor import preprocess
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import serving
+
+
+@moduleinfo(name="emotion_detection_textcnn",
+            version="1.3.0",
+            summary="Baidu's open-source Emotion Detection Model(TextCNN).",
+            author="baidu-nlp",
+            author_email="",
+            type="nlp/sentiment_analysis")
 class EmotionDetectionTextCNN(hub.NLPPredictionModule):
+
     def _initialize(self):
         """
         initialize with the necessary elements
@@ -45,111 +39,6 @@ def word_seg_module(self):
             self._word_seg_module = hub.Module(name="lac")
         return self._word_seg_module
 
-    def context(self, trainable=False, max_seq_len=128, num_slots=1):
-        """
-        Get the input ,output and program of the pretrained emotion_detection_textcnn
-
-        Args:
-             trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
-             max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
-             num_slots(int): It's number of data inputted to the model, selectted as following options:
-
-                 - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
-                 - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
-                 - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
-
-        Returns:
-             inputs(dict): the input variables of emotion_detection_textcnn (words)
-             outputs(dict): the output variables of input words (word embeddings and label probilities);
-                 the sentence embedding and sequence length of the first input text.
-             main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
-        """
-        assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(main_program, startup_program):
-            text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
-            seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
-            seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
-
-            # Add embedding layer.
-            w_param_attrs = fluid.ParamAttr(
-                name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
-            dict_dim = 240466
-            emb_1 = fluid.layers.embedding(
-                input=text_1,
-                size=[dict_dim, 128],
-                is_sparse=True,
-                padding_idx=dict_dim - 1,
-                dtype='float32',
-                param_attr=w_param_attrs)
-            emb_1_name = emb_1.name
-            data_list = [text_1]
-            emb_name_list = [emb_1_name]
-
-            # Add lstm layer.
-            pred, fc = textcnn_net(emb_1, seq_len_used)
-            pred_name = pred.name
-            fc_name = fc.name
-
-            if num_slots > 1:
-                text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_2 = fluid.embedding(
-                    input=text_2,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_2_name = emb_2.name
-                data_list.append(text_2)
-                emb_name_list.append(emb_2_name)
-
-            if num_slots > 2:
-                text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_3 = fluid.embedding(
-                    input=text_3,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_3_name = emb_3.name
-                data_list.append(text_3)
-                emb_name_list.append(emb_3_name)
-
-            variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
-                                    list(main_program.global_block().vars.keys()))
-            prefix_name = "@HUB_{}@".format(self.name)
-            add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
-
-            for param in main_program.global_block().iter_parameters():
-                param.trainable = trainable
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-
-            # Load the emotion_detection_textcnn pretrained model.
-            def if_exist(var):
-                return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
-
-            fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
-
-            inputs = {'seq_len': seq_len}
-            outputs = {
-                "class_probs": main_program.global_block().vars[prefix_name + pred_name],
-                "sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
-            }
-            for index, data in enumerate(data_list):
-                if index == 0:
-                    inputs['text'] = data
-                    outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
-                else:
-                    inputs['text_%s' % (index + 1)] = data
-                    outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
-                                                                                       emb_name_list[index]]
-            return inputs, outputs, main_program
-
     @serving
     def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
         """
@@ -208,22 +97,3 @@ def get_labels(self):
         """
         self.labels = {"positive": 2, "negative": 0, "neutral": 1}
         return self.labels
-
-
-if __name__ == "__main__":
-    emotion_detection_textcnn = EmotionDetectionTextCNN()
-    inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
-    print(inputs)
-    print(outputs)
-    # Data to be predicted
-    test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
-
-    input_dict = {"text": test_text}
-    results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
-    for result in results:
-        print(result['text'])
-        print(result['emotion_label'])
-        print(result['emotion_key'])
-        print(result['positive_probs'])
-        print(result['negative_probs'])
-        print(result['neutral_probs'])
diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py
diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py
@@ -1,5 +1,5 @@
-# -*- coding:utf-8 -*-
 import io
+
 import numpy as np