Skip to content

Commit 1226702

Browse files
authored
请教条和谣言检测 (#1211)
* 微博谣言预测 * 请教条 * Update module.py * Update module.py * Update module.py * Update README.md * Update README.md
1 parent 045e4e2 commit 1226702

File tree

11 files changed

+1422
-0
lines changed

11 files changed

+1422
-0
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
## 概述
2+
3+
4+
Rumor_prediction是预测语句是否为谣言的模型。
5+
6+
## 命令行预测
7+
8+
```shell
9+
$ hub run Rumor_prediction --input_text='兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!'
10+
```
11+
12+
## API
13+
14+
```python
15+
def Rumor(texts, use_gpu=False):
16+
```
17+
18+
预测API,预测语句是否为谣言。
19+
20+
**参数**
21+
22+
* texts (list\[str\]): 想要预测是否为谣言的语句;
23+
* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**
24+
25+
**返回**
26+
27+
* results (list[dict]): 预测结果的列表,列表中每一个元素为 dict,各字段为:
28+
29+
- content(str):输入文本内容
30+
- prediction(str):预测结果
31+
- probability(float):预测结果概率
32+
33+
**代码示例**
34+
35+
```python
36+
import paddlehub as hub
37+
38+
module = hub.Module(name="Rumor_prediction")
39+
40+
test_texts = ['兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!']
41+
results = module.Rumor(texts=test_texts, use_gpu=True)
42+
print(results)
43+
```
44+
45+
46+
### 依赖
47+
48+
paddlepaddle >= 2.0.0rc1
49+
50+
paddlehub >= 2.0.0rc0

modules/text/text_generation/Rumor_prediction/dict.txt

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# coding:utf-8
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import argparse
15+
import ast
16+
import os
17+
import math
18+
import six
19+
import time
20+
from pathlib import Path
21+
22+
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
23+
from paddlehub.module.module import runnable, serving, moduleinfo
24+
from paddlehub.io.parser import txt_parser
25+
from paddlehub.compat.module.nlp_module import DataFormatError
26+
import numpy as np
27+
import paddle
28+
import paddlehub as hub
29+
30+
@moduleinfo(
31+
name="Rumor_prediction",
32+
version="1.0.0",
33+
type="nlp/semantic_model",
34+
summary=
35+
"Is the input text prediction a rumor",
36+
author="彭兆帅,郑博培",
37+
author_email="1084667371@qq.com,2733821739@qq.com")
38+
class Rumorprediction(hub.Module):
39+
def _initialize(self):
40+
"""
41+
Initialize with the necessary elements
42+
"""
43+
# 加载模型路径
44+
self.default_pretrained_model_path = os.path.join(self.directory, "infer_model")
45+
46+
def Rumor(self, texts, use_gpu=False):
47+
"""
48+
Get the input and program of the infer model
49+
50+
Args:
51+
image (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR.
52+
use_gpu(bool): Weather to use gpu
53+
"""
54+
# 获取数据
55+
def get_data(sentence):
56+
# 读取数据字典
57+
with open(self.directory + '/dict.txt', 'r', encoding='utf-8') as f_data:
58+
dict_txt = eval(f_data.readlines()[0])
59+
dict_txt = dict(dict_txt)
60+
# 把字符串数据转换成列表数据
61+
keys = dict_txt.keys()
62+
data = []
63+
for s in sentence:
64+
# 判断是否存在未知字符
65+
if not s in keys:
66+
s = '<unk>'
67+
data.append(int(dict_txt[s]))
68+
return data
69+
data = []
70+
for text in texts:
71+
text = get_data(text)
72+
data.append(text)
73+
base_shape = [[len(c) for c in data]]
74+
paddle.enable_static()
75+
place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace()
76+
exe = paddle.static.Executor(place)
77+
exe.run(paddle.static.default_startup_program())
78+
[infer_program, feeded_var_names, target_var] = paddle.fluid.io.load_inference_model(dirname=self.default_pretrained_model_path, executor=exe)
79+
# 生成预测数据
80+
tensor_words = paddle.fluid.create_lod_tensor(data, base_shape, place)
81+
# 执行预测
82+
result = exe.run(program=infer_program,
83+
feed={feeded_var_names[0]: tensor_words},
84+
fetch_list=target_var)
85+
# 分类名称
86+
names = [ '谣言', '非谣言']
87+
88+
89+
results = []
90+
91+
# 获取结果概率最大的label
92+
for i in range(len(data)):
93+
content = texts[i]
94+
lab = np.argsort(result)[0][i][-1]
95+
96+
alltext = {
97+
'content': content,
98+
'prediction': names[lab],
99+
'probability': result[0][i][lab]
100+
}
101+
alltext = [alltext]
102+
results = results + alltext
103+
104+
return results
105+
106+
107+
def add_module_config_arg(self):
108+
"""
109+
Add the command config options
110+
"""
111+
self.arg_config_group.add_argument(
112+
'--use_gpu',
113+
type=ast.literal_eval,
114+
default=False,
115+
help="whether use GPU for prediction")
116+
117+
def add_module_input_arg(self):
118+
"""
119+
Add the command input options
120+
"""
121+
self.arg_input_group.add_argument(
122+
'--input_text',
123+
type=str,
124+
default=None,
125+
help="input_text is str")
126+
@runnable
127+
def run_cmd(self, argvs):
128+
"""
129+
Run as a command
130+
"""
131+
self.parser = argparse.ArgumentParser(
132+
description='Run the %s module.' % self.name,
133+
prog='hub run %s' % self.name,
134+
usage='%(prog)s',
135+
add_help=True)
136+
137+
self.arg_input_group = self.parser.add_argument_group(
138+
title="Input options", description="Input data. Required")
139+
self.arg_config_group = self.parser.add_argument_group(
140+
title="Config options",
141+
description=
142+
"Run configuration for controlling module behavior, optional.")
143+
144+
self.add_module_config_arg()
145+
self.add_module_input_arg()
146+
147+
args = self.parser.parse_args(argvs)
148+
input_text = [args.input_text]
149+
results = self.Rumor(
150+
texts=input_text, use_gpu=args.use_gpu)
151+
152+
return results
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
## 概述
2+
3+
4+
ernie_gen_leave是基于ERNIE-GEN进行微调的模型,该模型的主要功能为生成请假条。输出一个关键词,给出你的请假理由。
5+
6+
## 命令行预测
7+
8+
```shell
9+
$ hub run ernie_gen_leave --input_text="理由" --use_gpu True --beam_width 5
10+
```
11+
12+
## API
13+
14+
```python
15+
def generate(texts, use_gpu=False, beam_width=5):
16+
```
17+
18+
预测API,输入关键字给出请假理由。
19+
20+
**参数**
21+
22+
* texts (list\[str\]): 请假关键字;
23+
* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**
24+
* beam\_width: beam search宽度,决定输出多少理由的数量。
25+
26+
**返回**
27+
28+
* results (list\[list\]\[str\]): 输出请假理由。
29+
30+
**代码示例**
31+
32+
```python
33+
import paddlehub as hub
34+
35+
module = hub.Module(name="ernie_gen_leave")
36+
37+
test_texts = ["理由"]
38+
results = module.generate(texts=test_texts, use_gpu=False, beam_width=2)
39+
for result in results:
40+
print(result)
41+
```
42+
43+
44+
## 查看代码
45+
46+
https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-rc/modules/text/text_generation/ernie_gen_leave
47+
48+
### 依赖
49+
50+
paddlepaddle >= 2.0.0rc1
51+
52+
paddlehub >= 2.0.0rc0

0 commit comments

Comments
 (0)