Skip to content

Commit 75edf33

Browse files
authored
Fix datas to data (#2410)
* fix datas to data * update code * change all_data to texts in triton deploy
1 parent 0e76981 commit 75edf33

File tree

20 files changed

+89
-90
lines changed

20 files changed

+89
-90
lines changed

applications/neural_search/recall/domain_adaptive_pretraining/data_tools/dataset_utils.py

100644100755
+4-4
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def __init__(self, path, skip_warmup=False):
5252

5353
self._token_ids = np.load(
5454
path + "_ids.npy", mmap_mode="r", allow_pickle=True)
55-
process_datas = np.load(path + "_idx.npz")
56-
self._sizes = process_datas["lens"]
57-
self._pointers = process_datas["sents"]
58-
self._doc_idx = process_datas["docs"]
55+
process_data = np.load(path + "_idx.npz")
56+
self._sizes = process_data["lens"]
57+
self._pointers = process_data["sents"]
58+
self._doc_idx = process_data["docs"]
5959

6060
def __getstate__(self):
6161
return self._path

examples/information_extraction/DuEE/duee_1_postprocess.py

100644100755
+6-6
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424
def predict_data_process(trigger_file, role_file, schema_file, save_path):
2525
"""predict_data_process"""
2626
pred_ret = []
27-
trigger_datas = read_by_lines(trigger_file)
27+
trigger_data = read_by_lines(trigger_file)
2828
role_data = read_by_lines(role_file)
29-
schema_datas = read_by_lines(schema_file)
29+
schema_data = read_by_lines(schema_file)
3030
print("trigger predict {} load from {}".format(
31-
len(trigger_datas), trigger_file))
31+
len(trigger_data), trigger_file))
3232
print("role predict {} load from {}".format(len(role_data), role_file))
33-
print("schema {} load from {}".format(len(schema_datas), schema_file))
33+
print("schema {} load from {}".format(len(schema_data), schema_file))
3434

3535
schema = {}
36-
for s in schema_datas:
36+
for s in schema_data:
3737
d_json = json.loads(s)
3838
schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]]
3939

@@ -50,7 +50,7 @@ def predict_data_process(trigger_file, role_file, schema_file, save_path):
5050
role_ret[role_type].append("".join(r["text"]))
5151
sent_role_mapping[d_json["id"]] = role_ret
5252

53-
for d in trigger_datas:
53+
for d in trigger_data:
5454
d_json = json.loads(d)
5555
t_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
5656
pred_event_types = list(set([t["type"] for t in t_ret]))

examples/language_model/chinesebert/utils.py

100644100755
+2-2
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def load_ds(datafiles):
210210
MapDataset
211211
'''
212212

213-
datas = []
213+
data = []
214214

215215
def read(ds_file):
216216
with open(ds_file, 'r', encoding='utf-8') as fp:
@@ -229,7 +229,7 @@ def read(ds_file):
229229

230230

231231
def load_ds_xnli(datafiles):
232-
datas = []
232+
data = []
233233

234234
def read(ds_file):
235235
with open(ds_file, 'r', encoding='utf-8') as fp:

examples/language_model/elmo/elmo.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,10 @@ def encode(self, sentences: List[List[str]]):
338338
Each sentence is a list of tokens without <s> or </s>, e.g.
339339
[['The', 'first', 'sentence', '.'], ['Second', '.']]
340340
"""
341-
batch_datas = create_batches(sentences, self._batch_size, self._vocab,
342-
self._max_seq_len)
341+
batch_data = create_batches(sentences, self._batch_size, self._vocab,
342+
self._max_seq_len)
343343
embeddings = []
344-
for data in batch_datas:
344+
for data in batch_data:
345345
ids, ids_reverse, seq_lens = data
346346
# [batch_size, num_lstm_layers + 1, max_seq_len, projection_dim * 2]
347347
outputs = self._elmo([ids, ids_reverse])

examples/language_model/gpt-3/dygraph/dataset.py

100644100755
+5-5
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,10 @@ def create_pretrained_dataset(args,
301301
logger.warning(
302302
"You are using compatible dataset, please make new dataset as the readme!"
303303
)
304-
process_datas = np.load(
304+
process_data = np.load(
305305
input_prefix + "_ids.npz", mmap_mode="r+", allow_pickle=True)
306-
sample_ids = process_datas["ids"]
307-
sample_lens = process_datas["lens"].astype("int32")
306+
sample_ids = process_data["ids"]
307+
sample_lens = process_data["lens"].astype("int32")
308308
else:
309309
for suffix in ["_ids.npy", "_idx.npz"]:
310310
if not os.path.isfile(input_prefix + suffix):
@@ -314,10 +314,10 @@ def create_pretrained_dataset(args,
314314
input_prefix + "_ids.npy", mmap_mode="r", allow_pickle=True)
315315
# All documment ids, extend as 1-D array.
316316

317-
process_datas = np.load(input_prefix + "_idx.npz")
317+
process_data = np.load(input_prefix + "_idx.npz")
318318
# The len(sample_lens) num of docs
319319
# The sum(sample_lens) should equal len(sample_ids)
320-
sample_lens = process_datas["lens"]
320+
sample_lens = process_data["lens"]
321321

322322
splits = get_train_valid_test_split_(args.split, len(sample_lens))
323323
assert len(sample_lens) >= splits[

examples/language_model/gpt-3/static/dataset.py

100644100755
+5-5
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,10 @@ def create_pretrained_dataset(args,
302302
logger.warning(
303303
"You are using compatible dataset, please make new dataset as the readme!"
304304
)
305-
process_datas = np.load(
305+
process_data = np.load(
306306
input_prefix + "_ids.npz", mmap_mode="r+", allow_pickle=True)
307-
sample_ids = process_datas["ids"]
308-
sample_lens = process_datas["lens"].astype("int32")
307+
sample_ids = process_data["ids"]
308+
sample_lens = process_data["lens"].astype("int32")
309309
else:
310310
for suffix in ["_ids.npy", "_idx.npz"]:
311311
if not os.path.isfile(input_prefix + suffix):
@@ -315,10 +315,10 @@ def create_pretrained_dataset(args,
315315
input_prefix + "_ids.npy", mmap_mode="r", allow_pickle=True)
316316
# All documment ids, extend as 1-D array.
317317

318-
process_datas = np.load(input_prefix + "_idx.npz")
318+
process_data = np.load(input_prefix + "_idx.npz")
319319
# The len(sample_lens) num of docs
320320
# The sum(sample_lens) should equal len(sample_ids)
321-
sample_lens = process_datas["lens"]
321+
sample_lens = process_data["lens"]
322322

323323
splits = get_train_valid_test_split_(args.split, len(sample_lens))
324324
assert len(sample_lens) >= splits[

examples/language_model/moe/dygraph/dataset.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -248,12 +248,12 @@ def create_pretrained_dataset(args,
248248
"The distributed run, total device num:{}, distinct dataflow num:{}.".
249249
format(device_world_size, data_world_size))
250250

251-
process_datas = np.load(input_path, mmap_mode="r+", allow_pickle=True)
251+
process_data = np.load(input_path, mmap_mode="r+", allow_pickle=True)
252252
# All documment ids, extend as 1-D array.
253-
sample_ids = process_datas["ids"]
253+
sample_ids = process_data["ids"]
254254
# The len(sample_lens) num of docs
255255
# The sum(sample_lens) should equal len(sample_ids)
256-
sample_lens = process_datas["lens"]
256+
sample_lens = process_data["lens"]
257257

258258
splits = get_train_valid_test_split_(args.split, len(sample_lens))
259259
assert len(sample_lens) >= splits[

examples/text_graph/erniesage/data/graph_reader.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def construct(tensors):
5454
"""
5555
graph_num = 1
5656
start_len = 0
57-
datas = []
57+
data = []
5858
graph_list = []
5959
for graph in range(graph_num):
6060
graph_list.append(
@@ -64,7 +64,7 @@ def construct(tensors):
6464
start_len += 2
6565

6666
for i in range(start_len, len(tensors)):
67-
datas.append(tensors[i])
68-
return graph_list, datas
67+
data.append(tensors[i])
68+
return graph_list, data
6969

7070
return construct

examples/text_graph/erniesage/models/model.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@ def __init__(self, ernie, config):
4646
self.encoder = Encoder.factory(self.config_file, self.ernie)
4747
self.loss_func = LossFactory(self.config_file)
4848

49-
def forward(self, graphs, datas):
49+
def forward(self, graphs, data):
5050
"""Forward function of link prediction task.
5151
5252
Args:
5353
graphs (Graph List): the Graph list.
54-
datas (Tensor List): other input of the model.
54+
data (Tensor List): other input of the model.
5555
5656
Returns:
5757
Tensor: loss and output tensors.
5858
"""
59-
term_ids, user_index, pos_item_index, neg_item_index, user_real_index, pos_item_real_index = datas
59+
term_ids, user_index, pos_item_index, neg_item_index, user_real_index, pos_item_real_index = data
6060
# encoder model
6161
outputs = self.encoder(graphs, term_ids,
6262
[user_index, pos_item_index, neg_item_index])

model_zoo/electra/deploy/python/predict.py

100644100755
+4-4
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def predict(args, sentences=[], paths=[]):
157157
predictor = create_paddle_predictor(config)
158158

159159
start_time = time.time()
160-
output_datas = []
160+
output_data = []
161161
count = 0
162162
for i, sen in enumerate(predicted_input):
163163
sen = np.array(sen).astype("int64")
@@ -176,9 +176,9 @@ def predict(args, sentences=[], paths=[]):
176176
output_names = predictor.get_output_names()
177177
# get output pointer and copy data(nd.array)
178178
output_tensor = predictor.get_output_tensor(output_names[0])
179-
output_data = output_tensor.copy_to_cpu()
180-
output_res = np.argmax(output_data, axis=1).tolist()
181-
output_datas.append(output_res)
179+
predict_data = output_tensor.copy_to_cpu()
180+
output_res = np.argmax(predict_data, axis=1).tolist()
181+
output_data.append(output_res)
182182

183183
print("===== batch {} =====".format(i))
184184
for j in range(len(predicted_sens[i])):

model_zoo/electra/deploy/serving/client.py

100644100755
-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ def predict(args, sentences=[], paths=[]):
146146
predicted_data, tokenizer, args.max_seq_length, args.batch_size)
147147

148148
start_time = time.time()
149-
output_datas = []
150149
count = 0
151150
for i, sen in enumerate(predicted_input):
152151
sen = np.array(sen).astype("int64")

model_zoo/ernie-1.0/data_tools/dataset_utils.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,12 @@ def __init__(self, path, skip_warmup=False):
158158

159159
self._token_ids = np.load(
160160
path + "_ids.npy", mmap_mode="r", allow_pickle=True)
161-
process_datas = np.load(path + "_idx.npz")
162-
self._sizes = process_datas["lens"]
161+
process_data = np.load(path + "_idx.npz")
162+
self._sizes = process_data["lens"]
163163
self._pointers = np.empty(len(self._sizes) + 1, dtype=np.int64)
164164
self._pointers[0] = 0
165165
np.cumsum(self._sizes, out=self._pointers[1:])
166-
self._doc_idx = process_datas["docs"]
166+
self._doc_idx = process_data["docs"]
167167

168168
def __getstate__(self):
169169
return self._path

model_zoo/ernie-3.0/deploy/paddle2onnx/ernie_predictor.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,18 @@ def infer(self, input_dict: dict):
5858
return result
5959

6060

61-
def token_cls_print_ret(infer_result, input_datas):
61+
def token_cls_print_ret(infer_result, input_data):
6262
rets = infer_result["value"]
6363
for i, ret in enumerate(rets):
64-
print("input data:", input_datas[i])
64+
print("input data:", input_data[i])
6565
print("The model detects all entities:")
6666
for iterm in ret:
6767
print("entity:", iterm["entity"], " label:", iterm["label"],
6868
" pos:", iterm["pos"])
6969
print("-----------------------------")
7070

7171

72-
def seq_cls_print_ret(infer_result, input_datas):
72+
def seq_cls_print_ret(infer_result, input_data):
7373
label_list = [
7474
"news_story", "news_culture", "news_entertainment", "news_sports",
7575
"news_finance", "news_house", "news_car", "news_edu", "news_tech",
@@ -79,7 +79,7 @@ def seq_cls_print_ret(infer_result, input_datas):
7979
label = infer_result["label"].squeeze().tolist()
8080
confidence = infer_result["confidence"].squeeze().tolist()
8181
for i, ret in enumerate(infer_result):
82-
print("input data:", input_datas[i])
82+
print("input data:", input_data[i])
8383
print("seq cls result:")
8484
print("label:", label_list[label[i]], " confidence:", confidence[i])
8585
print("-----------------------------")

model_zoo/ernie-3.0/deploy/python/ernie_predictor.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,18 @@ def infer(self, input_dict: dict):
143143
return result
144144

145145

146-
def token_cls_print_ret(infer_result, input_datas):
146+
def token_cls_print_ret(infer_result, input_data):
147147
rets = infer_result["value"]
148148
for i, ret in enumerate(rets):
149-
print("input data:", input_datas[i])
149+
print("input data:", input_data[i])
150150
print("The model detects all entities:")
151151
for iterm in ret:
152152
print("entity:", iterm["entity"], " label:", iterm["label"],
153153
" pos:", iterm["pos"])
154154
print("-----------------------------")
155155

156156

157-
def seq_cls_print_ret(infer_result, input_datas):
157+
def seq_cls_print_ret(infer_result, input_data):
158158
label_list = [
159159
"news_story", "news_culture", "news_entertainment", "news_sports",
160160
"news_finance", "news_house", "news_car", "news_edu", "news_tech",
@@ -164,7 +164,7 @@ def seq_cls_print_ret(infer_result, input_datas):
164164
label = infer_result["label"].squeeze().tolist()
165165
confidence = infer_result["confidence"].squeeze().tolist()
166166
for i, ret in enumerate(infer_result):
167-
print("input data:", input_datas[i])
167+
print("input data:", input_data[i])
168168
print("seq cls result:")
169169
print("label:", label_list[label[i]], " confidence:", confidence[i])
170170
print("-----------------------------")

model_zoo/ernie-3.0/deploy/serving/seq_cls_rpc_client.py

100644100755
+3-3
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def test_tnews_dataset(client):
4141
idx = 0
4242
batch_size = 32
4343
while idx < len(dev_ds):
44-
datas = []
44+
data = []
4545
label = []
4646
for i in range(batch_size):
4747
if idx + i >= len(dev_ds):
4848
break
49-
datas.append(dev_ds[idx + i]["sentence"])
49+
data.append(dev_ds[idx + i]["sentence"])
5050
label.append(dev_ds[idx + i]["label"])
51-
batches.append(datas)
51+
batches.append(data)
5252
labels.append(np.array(label))
5353
idx += batch_size
5454
"""

model_zoo/ernie-3.0/deploy/serving/token_cls_rpc_client.py

100644100755
+2-2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
import json
1919

2020

21-
def print_ret(rets, input_datas):
21+
def print_ret(rets, input_data):
2222
for i, ret in enumerate(rets):
23-
print("input data:", input_datas[i])
23+
print("input data:", input_data[i])
2424
print("The model detects all entities:")
2525
for iterm in ret:
2626
print("entity:", iterm["entity"], " label:", iterm["label"],

model_zoo/ernie-3.0/deploy/triton/seq_cls_grpc_client.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,14 @@ def test_tnews_dataset(runner):
102102
idx = 0
103103
batch_size = 32
104104
while idx < len(dev_ds):
105-
datas = []
105+
data = []
106106
label = []
107107
for i in range(batch_size):
108108
if idx + i >= len(dev_ds):
109109
break
110-
datas.append(dev_ds[idx + i]["sentence"])
110+
data.append(dev_ds[idx + i]["sentence"])
111111
label.append(dev_ds[idx + i]["label"])
112-
batches.append(datas)
112+
batches.append(data)
113113
labels.append(np.array(label))
114114
idx += batch_size
115115

@@ -126,12 +126,12 @@ def test_tnews_dataset(runner):
126126
model_version = "1"
127127
url = "localhost:8001"
128128
runner = SyncGRPCTritonRunner(url, model_name, model_version)
129-
datas = [["你家拆迁,要钱还是要房?答案一目了然", "军嫂探亲拧包入住,部队家属临时来队房标准有了规定,全面落实!"],
129+
texts = [["你家拆迁,要钱还是要房?答案一目了然", "军嫂探亲拧包入住,部队家属临时来队房标准有了规定,全面落实!"],
130130
["区块链投资心得,能做到就不会亏钱", ]]
131131

132-
for data in datas:
132+
for text in texts:
133133
# input format:[input1, input2 ... inputn], n = len(self._input_names)
134-
result = runner.Run([data])
134+
result = runner.Run([text])
135135
print(result)
136136

137137
test_tnews_dataset(runner)

model_zoo/gpt/dataset.py

100644100755
+5-5
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,10 @@ def create_pretrained_dataset(
300300
logger.warning(
301301
"You are using compatible dataset, please make new dataset as the readme!"
302302
)
303-
process_datas = np.load(
303+
process_data = np.load(
304304
input_prefix + "_ids.npz", mmap_mode="r+", allow_pickle=True)
305-
sample_ids = process_datas["ids"]
306-
sample_lens = process_datas["lens"].astype("int32")
305+
sample_ids = process_data["ids"]
306+
sample_lens = process_data["lens"].astype("int32")
307307
else:
308308
for suffix in ["_ids.npy", "_idx.npz"]:
309309
if not os.path.isfile(input_prefix + suffix):
@@ -313,10 +313,10 @@ def create_pretrained_dataset(
313313
input_prefix + "_ids.npy", mmap_mode="r", allow_pickle=True)
314314
# All documment ids, extend as 1-D array.
315315

316-
process_datas = np.load(input_prefix + "_idx.npz")
316+
process_data = np.load(input_prefix + "_idx.npz")
317317
# The len(sample_lens) num of docs
318318
# The sum(sample_lens) should equal len(sample_ids)
319-
sample_lens = process_datas["lens"]
319+
sample_lens = process_data["lens"]
320320

321321
splits = get_train_valid_test_split_(args.split, len(sample_lens))
322322
assert len(sample_lens) >= splits[

0 commit comments

Comments
 (0)