Skip to content

Commit 8555600

Browse files
authored
Merge branch 'develop' into develop
2 parents 007b27d + 51fd7c9 commit 8555600

File tree

1 file changed

+234
-0
lines changed

1 file changed

+234
-0
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# Inference Protocols
2+
3+
C++ Serving基于BRPC进行服务构建,支持BRPC、GRPC、RESTful请求。请求数据为protobuf格式,详见`core/general-server/proto/general_model_service.proto`。本文介绍构建请求以及解析结果的方法。
4+
5+
## Tensor
6+
7+
Tensor可以装载多种类型的数据,是Request和Response的基础单元。Tensor的具体定义如下:
8+
9+
```protobuf
10+
message Tensor {
11+
// VarType: INT64
12+
repeated int64 int64_data = 1;
13+
14+
// VarType: FP32
15+
repeated float float_data = 2;
16+
17+
// VarType: INT32
18+
repeated int32 int_data = 3;
19+
20+
// VarType: FP64
21+
repeated double float64_data = 4;
22+
23+
// VarType: UINT32
24+
repeated uint32 uint32_data = 5;
25+
26+
// VarType: BOOL
27+
repeated bool bool_data = 6;
28+
29+
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
30+
// represents the imaginary part
31+
repeated float complex64_data = 7;
32+
33+
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
34+
// represents the imaginary part
35+
repeated double complex128_data = 8;
36+
37+
// VarType: STRING
38+
repeated string data = 9;
39+
40+
// Element types:
41+
// 0 => INT64
42+
// 1 => FP32
43+
// 2 => INT32
44+
// 3 => FP64
45+
// 4 => INT16
46+
// 5 => FP16
47+
// 6 => BF16
48+
// 7 => UINT8
49+
// 8 => INT8
50+
// 9 => BOOL
51+
// 10 => COMPLEX64
52+
// 11 => COMPLEX128
53+
// 20 => STRING
54+
int32 elem_type = 10;
55+
56+
// Shape of the tensor, including batch dimensions.
57+
repeated int32 shape = 11;
58+
59+
// Level of data(LOD), support variable length data, only for fetch tensor
60+
// currently.
61+
repeated int32 lod = 12;
62+
63+
// Correspond to the variable 'name' in the model description prototxt.
64+
string name = 13;
65+
66+
// Correspond to the variable 'alias_name' in the model description prototxt.
67+
string alias_name = 14; // get from the Model prototxt
68+
69+
// VarType: FP16, INT16, INT8, BF16, UINT8
70+
bytes tensor_content = 15;
71+
};
72+
```
73+
74+
- elem_type:数据类型,当前支持FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
75+
76+
|elem_type|类型|
77+
|---------|----|
78+
|0|INT64|
79+
|1|FLOAT32|
80+
|2|INT32|
81+
|3|FP64|
82+
|4|INT16|
83+
|5|FP16|
84+
|6|BF16|
85+
|7|UINT8|
86+
|8|INT8|
87+
88+
- shape:数据维度
89+
- lod:lod信息,LoD(Level-of-Detail) Tensor是Paddle的高级特性,是对Tensor的一种扩充,用于支持更自由的数据输入。详见[LOD](../LOD_CN.md)
90+
- name/alias_name: 名称及别名,与模型配置对应
91+
92+
### 构建FLOAT32数据Tensor
93+
94+
```C
95+
// 原始数据
96+
std::vector<float> float_data;
97+
Tensor *tensor = new Tensor;
98+
// 设置维度,可以设置多维
99+
for (uint32_t j = 0; j < float_shape.size(); ++j) {
100+
tensor->add_shape(float_shape[j]);
101+
}
102+
// 设置LOD信息
103+
for (uint32_t j = 0; j < float_lod.size(); ++j) {
104+
tensor->add_lod(float_lod[j]);
105+
}
106+
// 设置类型、名称及别名
107+
tensor->set_elem_type(1);
108+
tensor->set_name(name);
109+
tensor->set_alias_name(alias_name);
110+
// 拷贝数据
111+
int total_number = float_data.size();
112+
tensor->mutable_float_data()->Resize(total_number, 0);
113+
memcpy(tensor->mutable_float_data()->mutable_data(), float_datadata(), total_number * sizeof(float));
114+
```
115+
116+
### 构建INT8数据Tensor
117+
118+
```C
119+
// 原始数据
120+
std::string string_data;
121+
Tensor *tensor = new Tensor;
122+
for (uint32_t j = 0; j < string_shape.size(); ++j) {
123+
tensor->add_shape(string_shape[j]);
124+
}
125+
for (uint32_t j = 0; j < string_lod.size(); ++j) {
126+
tensor->add_lod(string_lod[j]);
127+
}
128+
tensor->set_elem_type(8);
129+
tensor->set_name(name);
130+
tensor->set_alias_name(alias_name);
131+
tensor->set_tensor_content(string_data);
132+
```
133+
134+
## Request
135+
136+
Request为客户端需要发送的请求数据,其以Tensor为基础数据单元,并包含了额外的请求信息。定义如下:
137+
138+
```protobuf
139+
message Request {
140+
repeated Tensor tensor = 1;
141+
repeated string fetch_var_names = 2;
142+
bool profile_server = 3;
143+
uint64 log_id = 4;
144+
};
145+
```
146+
147+
- fetch_vat_names: 需要获取的输出数据名称,在GeneralResponseOP会根据该列表进行过滤.请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name`
148+
- profile_server: 调试参数,打开时会输出性能信息
149+
- log_id: 请求ID
150+
151+
### 构建Request
152+
153+
当使用BRPC或GRPC进行请求时,使用protobuf形式数据,构建方式如下:
154+
155+
```C
156+
Request req;
157+
req.set_log_id(log_id);
158+
for (auto &name : fetch_name) {
159+
req.add_fetch_var_names(name);
160+
}
161+
// 添加Tensor
162+
Tensor *tensor = req.add_tensor();
163+
...
164+
```
165+
166+
当使用RESTful请求时,可以使用JSON形式数据,具体格式如下:
167+
168+
```JSON
169+
{"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}
170+
```
171+
172+
## Response
173+
174+
Response为服务端返回给客户端的结果,包含了Tensor数据、错误码、错误信息等。定义如下:
175+
176+
```protobuf
177+
message Response {
178+
repeated ModelOutput outputs = 1;
179+
repeated int64 profile_time = 2;
180+
// Error code
181+
int32 err_no = 3;
182+
183+
// Error messages
184+
string err_msg = 4;
185+
};
186+
187+
message ModelOutput {
188+
repeated Tensor tensor = 1;
189+
string engine_name = 2;
190+
}
191+
```
192+
193+
- profile_time:当设置request->set_profile_server(true)时,会返回性能信息
194+
- err_no:错误码,详见`core/predictor/common/constant.h`
195+
- err_msg:错误信息,详见`core/predictor/common/constant.h`
196+
- engine_name:输出节点名称
197+
198+
|err_no|err_msg|
199+
|---------|----|
200+
|0|OK|
201+
|-5000|"Paddle Serving Framework Internal Error."|
202+
|-5001|"Paddle Serving Memory Alloc Error."|
203+
|-5002|"Paddle Serving Array Overflow Error."|
204+
|-5100|"Paddle Serving Op Inference Error."|
205+
206+
### 读取Response数据
207+
208+
```C
209+
uint32_t model_num = res.outputs_size();
210+
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
211+
std::string engine_name = output.engine_name();
212+
int idx = 0;
213+
// 读取tensor维度
214+
int shape_size = output.tensor(idx).shape_size();
215+
for (int i = 0; i < shape_size; ++i) {
216+
shape[i] = output.tensor(idx).shape(i);
217+
}
218+
// 读取LOD信息
219+
int lod_size = output.tensor(idx).lod_size();
220+
if (lod_size > 0) {
221+
lod.resize(lod_size);
222+
for (int i = 0; i < lod_size; ++i) {
223+
lod[i] = output.tensor(idx).lod(i);
224+
}
225+
}
226+
// 读取float数据
227+
int size = output.tensor(idx).float_data_size();
228+
float_data = std::vector<float>(
229+
output.tensor(idx).float_data().begin(),
230+
output.tensor(idx).float_data().begin() + size);
231+
// 读取int8数据
232+
string_data = output.tensor(idx).tensor_content();
233+
}
234+
```

0 commit comments

Comments
 (0)