|
| 1 | +# Inference Protocols |
| 2 | + |
| 3 | +C++ Serving基于BRPC进行服务构建,支持BRPC、GRPC、RESTful请求。请求数据为protobuf格式,详见`core/general-server/proto/general_model_service.proto`。本文介绍构建请求以及解析结果的方法。 |
| 4 | + |
| 5 | +## Tensor |
| 6 | + |
| 7 | +Tensor可以装载多种类型的数据,是Request和Response的基础单元。Tensor的具体定义如下: |
| 8 | + |
| 9 | +```protobuf |
| 10 | +message Tensor { |
| 11 | + // VarType: INT64 |
| 12 | + repeated int64 int64_data = 1; |
| 13 | +
|
| 14 | + // VarType: FP32 |
| 15 | + repeated float float_data = 2; |
| 16 | +
|
| 17 | + // VarType: INT32 |
| 18 | + repeated int32 int_data = 3; |
| 19 | +
|
| 20 | + // VarType: FP64 |
| 21 | + repeated double float64_data = 4; |
| 22 | +
|
| 23 | + // VarType: UINT32 |
| 24 | + repeated uint32 uint32_data = 5; |
| 25 | +
|
| 26 | + // VarType: BOOL |
| 27 | + repeated bool bool_data = 6; |
| 28 | +
|
| 29 | + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 |
| 30 | + // represents the imaginary part |
| 31 | + repeated float complex64_data = 7; |
| 32 | +
|
| 33 | + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 |
| 34 | + // represents the imaginary part |
| 35 | + repeated double complex128_data = 8; |
| 36 | +
|
| 37 | + // VarType: STRING |
| 38 | + repeated string data = 9; |
| 39 | +
|
| 40 | + // Element types: |
| 41 | + // 0 => INT64 |
| 42 | + // 1 => FP32 |
| 43 | + // 2 => INT32 |
| 44 | + // 3 => FP64 |
| 45 | + // 4 => INT16 |
| 46 | + // 5 => FP16 |
| 47 | + // 6 => BF16 |
| 48 | + // 7 => UINT8 |
| 49 | + // 8 => INT8 |
| 50 | + // 9 => BOOL |
| 51 | + // 10 => COMPLEX64 |
| 52 | + // 11 => COMPLEX128 |
| 53 | + // 20 => STRING |
| 54 | + int32 elem_type = 10; |
| 55 | +
|
| 56 | + // Shape of the tensor, including batch dimensions. |
| 57 | + repeated int32 shape = 11; |
| 58 | +
|
| 59 | + // Level of data(LOD), support variable length data, only for fetch tensor |
| 60 | + // currently. |
| 61 | + repeated int32 lod = 12; |
| 62 | +
|
| 63 | + // Correspond to the variable 'name' in the model description prototxt. |
| 64 | + string name = 13; |
| 65 | +
|
| 66 | + // Correspond to the variable 'alias_name' in the model description prototxt. |
| 67 | + string alias_name = 14; // get from the Model prototxt |
| 68 | +
|
| 69 | + // VarType: FP16, INT16, INT8, BF16, UINT8 |
| 70 | + bytes tensor_content = 15; |
| 71 | +}; |
| 72 | +``` |
| 73 | + |
| 74 | +- elem_type:数据类型,当前支持FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 |
| 75 | + |
| 76 | +|elem_type|类型| |
| 77 | +|---------|----| |
| 78 | +|0|INT64| |
| 79 | +|1|FLOAT32| |
| 80 | +|2|INT32| |
| 81 | +|3|FP64| |
| 82 | +|4|INT16| |
| 83 | +|5|FP16| |
| 84 | +|6|BF16| |
| 85 | +|7|UINT8| |
| 86 | +|8|INT8| |
| 87 | + |
| 88 | +- shape:数据维度 |
| 89 | +- lod:lod信息,LoD(Level-of-Detail) Tensor是Paddle的高级特性,是对Tensor的一种扩充,用于支持更自由的数据输入。详见[LOD](../LOD_CN.md) |
| 90 | +- name/alias_name: 名称及别名,与模型配置对应 |
| 91 | + |
| 92 | +### 构建FLOAT32数据Tensor |
| 93 | + |
| 94 | +```C |
| 95 | +// 原始数据 |
| 96 | +std::vector<float> float_data; |
| 97 | +Tensor *tensor = new Tensor; |
| 98 | +// 设置维度,可以设置多维 |
| 99 | +for (uint32_t j = 0; j < float_shape.size(); ++j) { |
| 100 | + tensor->add_shape(float_shape[j]); |
| 101 | +} |
| 102 | +// 设置LOD信息 |
| 103 | +for (uint32_t j = 0; j < float_lod.size(); ++j) { |
| 104 | + tensor->add_lod(float_lod[j]); |
| 105 | +} |
| 106 | +// 设置类型、名称及别名 |
| 107 | +tensor->set_elem_type(1); |
| 108 | +tensor->set_name(name); |
| 109 | +tensor->set_alias_name(alias_name); |
| 110 | +// 拷贝数据 |
| 111 | +int total_number = float_data.size(); |
| 112 | +tensor->mutable_float_data()->Resize(total_number, 0); |
| 113 | +memcpy(tensor->mutable_float_data()->mutable_data(), float_datadata(), total_number * sizeof(float)); |
| 114 | +``` |
| 115 | +
|
| 116 | +### 构建INT8数据Tensor |
| 117 | +
|
| 118 | +```C |
| 119 | +// 原始数据 |
| 120 | +std::string string_data; |
| 121 | +Tensor *tensor = new Tensor; |
| 122 | +for (uint32_t j = 0; j < string_shape.size(); ++j) { |
| 123 | + tensor->add_shape(string_shape[j]); |
| 124 | +} |
| 125 | +for (uint32_t j = 0; j < string_lod.size(); ++j) { |
| 126 | + tensor->add_lod(string_lod[j]); |
| 127 | +} |
| 128 | +tensor->set_elem_type(8); |
| 129 | +tensor->set_name(name); |
| 130 | +tensor->set_alias_name(alias_name); |
| 131 | +tensor->set_tensor_content(string_data); |
| 132 | +``` |
| 133 | + |
| 134 | +## Request |
| 135 | + |
| 136 | +Request为客户端需要发送的请求数据,其以Tensor为基础数据单元,并包含了额外的请求信息。定义如下: |
| 137 | + |
| 138 | +```protobuf |
| 139 | +message Request { |
| 140 | + repeated Tensor tensor = 1; |
| 141 | + repeated string fetch_var_names = 2; |
| 142 | + bool profile_server = 3; |
| 143 | + uint64 log_id = 4; |
| 144 | +}; |
| 145 | +``` |
| 146 | + |
| 147 | +- fetch_vat_names: 需要获取的输出数据名称,在GeneralResponseOP会根据该列表进行过滤.请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name`。 |
| 148 | +- profile_server: 调试参数,打开时会输出性能信息 |
| 149 | +- log_id: 请求ID |
| 150 | + |
| 151 | +### 构建Request |
| 152 | + |
| 153 | +当使用BRPC或GRPC进行请求时,使用protobuf形式数据,构建方式如下: |
| 154 | + |
| 155 | +```C |
| 156 | +Request req; |
| 157 | +req.set_log_id(log_id); |
| 158 | +for (auto &name : fetch_name) { |
| 159 | + req.add_fetch_var_names(name); |
| 160 | +} |
| 161 | +// 添加Tensor |
| 162 | +Tensor *tensor = req.add_tensor(); |
| 163 | +... |
| 164 | +``` |
| 165 | + |
| 166 | +当使用RESTful请求时,可以使用JSON形式数据,具体格式如下: |
| 167 | + |
| 168 | +```JSON |
| 169 | +{"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0} |
| 170 | +``` |
| 171 | + |
| 172 | +## Response |
| 173 | + |
| 174 | +Response为服务端返回给客户端的结果,包含了Tensor数据、错误码、错误信息等。定义如下: |
| 175 | + |
| 176 | +```protobuf |
| 177 | +message Response { |
| 178 | + repeated ModelOutput outputs = 1; |
| 179 | + repeated int64 profile_time = 2; |
| 180 | + // Error code |
| 181 | + int32 err_no = 3; |
| 182 | +
|
| 183 | + // Error messages |
| 184 | + string err_msg = 4; |
| 185 | +}; |
| 186 | +
|
| 187 | +message ModelOutput { |
| 188 | + repeated Tensor tensor = 1; |
| 189 | + string engine_name = 2; |
| 190 | +} |
| 191 | +``` |
| 192 | + |
| 193 | +- profile_time:当设置request->set_profile_server(true)时,会返回性能信息 |
| 194 | +- err_no:错误码,详见`core/predictor/common/constant.h` |
| 195 | +- err_msg:错误信息,详见`core/predictor/common/constant.h` |
| 196 | +- engine_name:输出节点名称 |
| 197 | + |
| 198 | +|err_no|err_msg| |
| 199 | +|---------|----| |
| 200 | +|0|OK| |
| 201 | +|-5000|"Paddle Serving Framework Internal Error."| |
| 202 | +|-5001|"Paddle Serving Memory Alloc Error."| |
| 203 | +|-5002|"Paddle Serving Array Overflow Error."| |
| 204 | +|-5100|"Paddle Serving Op Inference Error."| |
| 205 | + |
| 206 | +### 读取Response数据 |
| 207 | + |
| 208 | +```C |
| 209 | +uint32_t model_num = res.outputs_size(); |
| 210 | +for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { |
| 211 | + std::string engine_name = output.engine_name(); |
| 212 | + int idx = 0; |
| 213 | + // 读取tensor维度 |
| 214 | + int shape_size = output.tensor(idx).shape_size(); |
| 215 | + for (int i = 0; i < shape_size; ++i) { |
| 216 | + shape[i] = output.tensor(idx).shape(i); |
| 217 | + } |
| 218 | + // 读取LOD信息 |
| 219 | + int lod_size = output.tensor(idx).lod_size(); |
| 220 | + if (lod_size > 0) { |
| 221 | + lod.resize(lod_size); |
| 222 | + for (int i = 0; i < lod_size; ++i) { |
| 223 | + lod[i] = output.tensor(idx).lod(i); |
| 224 | + } |
| 225 | + } |
| 226 | + // 读取float数据 |
| 227 | + int size = output.tensor(idx).float_data_size(); |
| 228 | + float_data = std::vector<float>( |
| 229 | + output.tensor(idx).float_data().begin(), |
| 230 | + output.tensor(idx).float_data().begin() + size); |
| 231 | + // 读取int8数据 |
| 232 | + string_data = output.tensor(idx).tensor_content(); |
| 233 | +} |
| 234 | +``` |
0 commit comments