Merge pull request #258 from HydrogenSulfate/dev_tipc_cpp

huangjun12 · web-flow · commit e0da72176027 · 2021-12-10T14:07:33.000+08:00
Release PP-TSM, PP-TSN models' CPP inference TIPC
diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md
@@ -267,8 +267,10 @@ PaddleVideo模型部署。
 ​	以example_video_dir下的样例视频`example01.avi`为输入视频为例，最终屏幕上会输出检测结果如下。
 
 ```bash
-I1125 08:10:42.753679 13955 main.cpp:88] The predict video: ./example_video_dir/example01.avi
-5 archery       score: 0.999556
+[./inference/ppTSM]
+[./deploy/cpp_infer/example_video_dir]
+total videos num: 1
+./example_video_dir/example01.avi   class: 5 archery       score: 0.999556
 I1125 08:10:45.834288 13955 autolog.h:50] ----------------------- Config info -----------------------
 I1125 08:10:45.834458 13955 autolog.h:51] runtime_device: cpu
 I1125 08:10:45.834467 13955 autolog.h:52] ir_optim: True
diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md
@@ -266,8 +266,10 @@ More parameters are as follows:
 ​	Take the sample video `example01.avi` under example_video_dir as the input video as an example, the final 	screen will output the detection results as follows.
 
 ```bash
-I1125 08:10:42.753679 13955 main.cpp:88] The predict video: ./example_video_dir/example01.avi
-5 archery       score: 0.999556
+[./inference/ppTSM]
+[./deploy/cpp_infer/example_video_dir]
+total videos num: 1
+./example_video_dir/example01.avi   class: 5 archery       score: 0.999556
 I1125 08:10:45.834288 13955 autolog.h:50] ----------------------- Config info -----------------------
 I1125 08:10:45.834458 13955 autolog.h:51] runtime_device: cpu
 I1125 08:10:45.834467 13955 autolog.h:52] ir_optim: True
diff --git a/deploy/cpp_infer/src/video_rec.cpp b/deploy/cpp_infer/src/video_rec.cpp
@@ -64,11 +64,11 @@ namespace PaddleVideo
             // 3. Normalization(inplace operation)
             for (int i = 0; i < real_batch_num; ++i)
             {
-                for (int j = 0; j < num_views; ++j)
+                for (int j = 0; j < this->num_seg; ++j)
                 {
-                    for (int k = 0; k < this->num_seg; ++k)
+                    for (int k = 0; k < num_views; ++k)
                     {
-                        this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], this->mean_, this->scale_, this->is_scale_);
+                        this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * num_views + k], this->mean_, this->scale_, this->is_scale_);
                     }
                 }
             }
@@ -80,11 +80,11 @@ namespace PaddleVideo
             input = std::vector<float>(real_batch_num * num_views * this->num_seg *  crop_frames[0].rows * crop_frames[0].cols * rc, 0.0f);
             for (int i = 0; i < real_batch_num; ++i)
             {
-                for (int j = 0; j < num_views; ++j)
+                for (int j = 0; j < this->num_seg; ++j)
                 {
-                    for (int k = 0; k < this->num_seg; ++k)
+                    for (int k = 0; k < num_views; ++k)
                     {
-                        this->permute_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], input.data() + (i * num_views * this->num_seg + j * this->num_seg + k) * (rh * rw * rc));
+                        this->permute_op_.Run(&crop_frames[i * num_views * this->num_seg + j * num_views + k], input.data() + (i * num_views * this->num_seg + j * num_views + k) * (rh * rw * rc));
                     }
                 }
             }
@@ -115,11 +115,11 @@ namespace PaddleVideo
             // 3. Normalization(inplace operation)
             for (int i = 0; i < real_batch_num; ++i)
             {
-                for (int j = 0; j < num_views; ++j)
+                for (int j = 0; j < this->num_seg; ++j)
                 {
-                    for (int k = 0; k < this->num_seg; ++k)
+                    for (int k = 0; k < num_views; ++k)
                     {
-                        this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], this->mean_, this->scale_, this->is_scale_);
+                        this->normalize_op_.Run(&crop_frames[i * this->num_seg * num_views + j * num_views + k], this->mean_, this->scale_, this->is_scale_);
                     }
                 }
             }
@@ -128,11 +128,15 @@ namespace PaddleVideo
             int rh = crop_frames[0].rows;
             int rw = crop_frames[0].cols;
             int rc = crop_frames[0].channels();
-            for (int i = 0; i < this->num_seg; ++i)
+            input = std::vector<float>(real_batch_num * this->num_seg * num_views *  crop_frames[0].rows * crop_frames[0].cols * rc, 0.0f);
+            for (int i = 0; i < real_batch_num; ++i)
             {
-                for (int j = 0; j < num_views; ++j)
+                for (int j = 0; j < this->num_seg; ++j)
                 {
-                    this->permute_op_.Run(&crop_frames[i * num_views + j], input.data() + (i * num_views + j) * rh * rw * rc);
+                    for (int k = 0; k < num_views; ++k)
+                    {
+                        this->permute_op_.Run(&crop_frames[i * this->num_seg * num_views + j * num_views + k], input.data() + (i * this->num_seg * num_views + j * num_views + k) * (rh * rw * rc));
+                    }
                 }
             }
         }
@@ -213,29 +217,47 @@ namespace PaddleVideo
                 {
                     precision = paddle_infer::Config::Precision::kHalf;
                 }
-                if (this->precision_ == "int8")
+                else if (this->precision_ == "int8")
                 {
                     precision = paddle_infer::Config::Precision::kInt8;
                 }
-                config.EnableTensorRtEngine(
-                    1 << 20, 10, 3,
-                    precision,
-                    false, false);
-//                 std::map<std::string, std::vector<int>> min_input_shape =
-//                 {
-//                     {"x", {1, 1, 3, 224, 224}}
-//                 };
-//                 std::map<std::string, std::vector<int>> max_input_shape =
-//                 {
-//                     {"x", {4, 1 * this->num_seg, 3, 224, 224}}
-//                 };
-//                 std::map<std::string, std::vector<int>> opt_input_shape =
-//                 {
-//                     {"x", {1, 1 * this->num_seg, 3, 224, 224}}
-//                 };
 
-//                 config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-//                                               opt_input_shape);
+                if (this->inference_model_name == "ppTSM" || this->inference_model_name == "TSM")
+                {
+                    config.EnableTensorRtEngine(
+                        1 << 20, this->rec_batch_num * this->num_seg * 1, 3,
+                        precision,
+                        false, false);
+                }
+                else if(this->inference_model_name == "ppTSN" || this->inference_model_name == "TSN")
+                {
+                    config.EnableTensorRtEngine(
+                        1 << 20, this->rec_batch_num * this->num_seg * 10, 3,
+                        precision,
+                        false, false);
+                }
+                else
+                {
+                    config.EnableTensorRtEngine(
+                        1 << 20, this->rec_batch_num, 3,
+                        precision,
+                        false, false);
+                }
+                // std::map<std::string, std::vector<int>> min_input_shape =
+                // {
+                //     {"data_batch", {1, 1, 1, 1, 1}}
+                // };
+                // std::map<std::string, std::vector<int>> max_input_shape =
+                // {
+                //     {"data_batch", {10,  this->num_seg, 3, 224, 224}}
+                // };
+                // std::map<std::string, std::vector<int>> opt_input_shape =
+                // {
+                //     {"data_batch", {this->rec_batch_num,  this->num_seg, 3, 224, 224}}
+                // };
+
+                // config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                //                               opt_input_shape);
             }
         }
         else
diff --git a/test_tipc/README.md b/test_tipc/README.md
@@ -115,3 +115,4 @@ bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/PP-TSM/PP-TSM_
 ## 4. 开始测试
 各功能测试中涉及混合精度、裁剪、量化等训练相关，及mkldnn、Tensorrt等多种预测相关参数配置，请点击下方相应链接了解更多细节和使用教程：  
 - [test_train_inference_python 使用](docs/test_train_inference_python.md) ：测试基于Python的模型训练、评估、推理等基本功能。
+- [test_inference_cpp 使用](docs/test_inference_cpp.md) ：测试基于C++的模型推理功能。
diff --git a/test_tipc/configs/PP-TSM/PP-TSM_infer_cpp.txt b/test_tipc/configs/PP-TSM/PP-TSM_infer_cpp.txt
@@ -0,0 +1,18 @@
+===========================cpp_infer_params===========================
+model_name:PP-TSM
+use_opencv:True
+infer_model:./inference/ppTSM
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppvideo rec
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--rec_model_dir:
+--video_dir:./deploy/cpp_infer/example_video_dir
+--inference_model_name:ppTSM
+--benchmark:True
+--char_list_file:data/k400/Kinetics-400_label_list.txt
+--num_seg:8
diff --git a/test_tipc/configs/PP-TSN/PP-TSN_infer_cpp.txt b/test_tipc/configs/PP-TSN/PP-TSN_infer_cpp.txt
@@ -0,0 +1,18 @@
+===========================cpp_infer_params===========================
+model_name:PP-TSN
+use_opencv:True
+infer_model:./inference/ppTSN
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppvideo rec
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--rec_model_dir:
+--video_dir:./deploy/cpp_infer/example_video_dir
+--inference_model_name:ppTSN
+--benchmark:True
+--char_list_file:data/k400/Kinetics-400_label_list.txt
+--num_seg:25
diff --git a/test_tipc/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md
@@ -0,0 +1,95 @@
+# C++预测功能测试
+
+C++预测功能测试的主程序为`test_inference_cpp.sh`，可以测试基于C++预测库的模型推理功能。
+
+## 1. 测试结论汇总
+
+基于训练是否使用量化，进行本测试的模型可以分为`正常模型`和`量化模型`(TODO)，这两类模型对应的C++预测功能汇总如下：
+
+| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
+|  ----   |  ---- |   ----   |  :----:  |   :----:   |  :----:  |
+| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
+| 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
+
+## 2. 测试流程
+运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。
+
+### 2.1 功能测试
+先运行`prepare.sh`准备数据和模型，然后运行`test_inference_cpp.sh`进行测试，最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
+
+```bash
+bash test_tipc/prepare.sh test_tipc/configs/PP-TSM/PP-TSM_infer_cpp.txt 'cpp_infer'
+```
+```bash
+# 用法1:
+bash test_tipc/test_inference_cpp.sh test_tipc/configs/PP-TSM/PP-TSM_infer_cpp.txt
+# 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
+bash test_tipc/test_inference_cpp.sh test_tipc/configs/PP-TSM/PP-TSM_infer_cpp.txt 1
+```
+
+运行预测指令后，在`test_tipc/output`文件夹下自动会保存运行日志，包括以下文件：
+
+```shell
+test_tipc/PP-TSM/output/
+    ├── results_cpp.log    # 运行指令状态的日志
+    ├── cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log  # CPU上不开启Mkldnn，线程数设置为1，测试batch_size=1条件下的预测运行日志
+    ├── cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log  # CPU上不开启Mkldnn，线程数设置为6，测试batch_size=1条件下的预测运行日志
+    ├── cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上不开启TensorRT，测试batch_size=1的fp32精度预测日志
+    ├── cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log  # GPU上开启TensorRT，测试batch_size=1的fp16精度预测日志
+......
+```
+其中results_cpp.log中包含了每条指令的运行状态，如果运行成功会输出：
+
+```
+Run successfully with command - ./deploy/cpp_infer/build/ppvideo rec --use_gpu=True --use_tensorrt=False --precision=fp32 --rec_model_dir=./inference/ppTSM --rec_batch_num=1 --video_dir=./deploy/cpp_infer/example_video_dir --benchmark=True --inference_model_name=ppTSM --char_list_file=data/k400/Kinetics-400_label_list.txt --num_seg=8 > ./test_tipc/output/PP-TSM/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1
+......
+```
+如果运行失败，会输出：
+```
+Run failed with command - ./deploy/cpp_infer/build/ppvideo rec --use_gpu=False --enable_mkldnn=False --cpu_threads=1 --rec_model_dir=./inference/ppTSM --rec_batch_num=1 --video_dir=./deploy/cpp_infer/example_video_dir --benchmark=True --inference_model_name=ppTSM --char_list_file=data/k400/Kinetics-400_label_list.txt --num_seg=8 > ./test_tipc/output/PP-TSM/cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log 2>&1
+......
+```
+可以很方便的根据results_cpp.log中的内容判定哪一个指令运行错误。
+
+
+### 2.2 精度测试
+
+使用compare_results.py脚本比较模型预测的结果是否符合预期，主要步骤包括：
+- 提取预测输出文本的结果
+- 提取本地参考输出文本结果
+- 比较上述两个结果是否符合精度预期，误差大于设置阈值时会报错。
+
+#### 使用方式
+运行命令：
+```shell
+python3.7 test_tipc/compare_results.py --gt_file "test_tipc/results/PP-TSM_CPP/cpp_ppvideo_PP-TSM_results_fp*.txt" --log_file "test_tipc/output/PP-TSM/cpp_infer_*.log" --atol=1e-3 --rtol=1e-3
+```
+
+参数介绍：
+- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在test_tipc/result/ 文件夹下
+- log_file: 指向运行test_tipc/test_inference_cpp.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持cpp_infer_*.log格式传入
+- atol: 设置的绝对误差
+- rtol: 设置的相对误差
+
+#### 运行结果
+
+正常运行输出示例：
+```bash
+Assert allclose passed! The results of cpp_infer_cpu_usemkldnn_True_threads_1_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+Assert allclose passed! The results of cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+Assert allclose passed! The results of cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp16.txt are consistent!
+Assert allclose passed! The results of cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+Assert allclose passed! The results of cpp_infer_cpu_usemkldnn_True_threads_6_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+Assert allclose passed! The results of cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+Assert allclose passed! The results of cpp_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log and test_tipc/results/PP-TSN_CPP/cpp_ppvideo_PP-TSN_results_fp32.txt are consistent!
+```
+
+出现不一致结果时的运行输出示例：
+```bash
+ValueError: The results of cpp_infer_cpu_usemkldnn_True_threads_1_precision_fp32_batchsize_1.log and the results of test_tipc/results/PP-TSM_CPP/cpp_ppvideo_PP-TSM_results_fp32.txt are inconsistent!
+```
+
+
+## 3. 更多教程
+
+本文档为功能测试用，更详细的C++预测使用教程请参考：[服务器端C++预测](../../deploy/cpp_infer/readme.md)  
diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md
@@ -74,13 +74,13 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
     bash test_tipc/test_train_inference_python.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'lite_train_lite_infer'
     ```
 
-- 模式2：lite_train_whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
+- 模式2：**lite_train_whole_infer**，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
     ```shell
     bash test_tipc/prepare.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt  'lite_train_whole_infer'
     bash test_tipc/test_train_inference_python.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'lite_train_whole_infer'
     ```
 
-- 模式3：whole_infer，不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度；
+- 模式3：**whole_infer**，不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度；
     ```shell
     bash test_tipc/prepare.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'whole_infer'
     # 用法1:
@@ -89,14 +89,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
     bash test_tipc/test_train_inference_python.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'whole_infer' '1'
     ```
 
-- 模式4：whole_train_whole_infer，CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
+- 模式4：**whole_train_whole_infer**： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
     ```shell
     bash test_tipc/prepare.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'whole_train_whole_infer'
     bash test_tipc/test_train_inference_python.sh test_tipc/configs/PP-TSM/PP-TSM_train_infer_python.txt 'whole_train_whole_infer'
     ```
 
 
-最终在tests/output目录下生成.log后缀的日志文件
+最终在`tests/output/model_name`目录下生成.log后缀的日志文件
 
 
 ### 2.3 精度测试
@@ -109,12 +109,12 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
 #### 使用方式
 运行命令：
 ```shell
-python3.7 test_tipc/compare_results.py --gt_file=test_tipc/results/python_*.txt  --log_file=test_tipc/output/python_*.log --atol=1e-3 --rtol=1e-3
+python3.7 test_tipc/compare_results.py --gt_file="test_tipc/results/python_*.txt"  --log_file="test_tipc/output/python_*.log" --atol=1e-3 --rtol=1e-3
 ```
 
 参数介绍：  
 - gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在test_tipc/result/ 文件夹下
-- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持python_infer_*.log格式传入
+- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：预测文本，类别等等，同样支持python_infer_*.log格式传入
 - atol: 设置的绝对误差
 - rtol: 设置的相对误差
 
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
diff --git a/test_tipc/test_inference_cpp.sh b/test_tipc/test_inference_cpp.sh

Original file line number	Diff line number	Diff line change
`@@ -64,11 +64,11 @@ namespace PaddleVideo`
`64`	`64`	`// 3. Normalization(inplace operation)`
`65`	`65`	`for (int i = 0; i < real_batch_num; ++i)`
`66`	`66`	`{`
`67`		`- for (int j = 0; j < num_views; ++j)`
	`67`	`+ for (int j = 0; j < this->num_seg; ++j)`
`68`	`68`	`{`
`69`		`- for (int k = 0; k < this->num_seg; ++k)`
	`69`	`+ for (int k = 0; k < num_views; ++k)`
`70`	`70`	`{`
`71`		`- this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], this->mean_, this->scale_, this->is_scale_);`
	`71`	`+ this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * num_views + k], this->mean_, this->scale_, this->is_scale_);`
`72`	`72`	`}`
`73`	`73`	`}`
`74`	`74`	`}`
`@@ -80,11 +80,11 @@ namespace PaddleVideo`
`80`	`80`	`input = std::vector<float>(real_batch_num * num_views * this->num_seg * crop_frames[0].rows * crop_frames[0].cols * rc, 0.0f);`
`81`	`81`	`for (int i = 0; i < real_batch_num; ++i)`
`82`	`82`	`{`
`83`		`- for (int j = 0; j < num_views; ++j)`
	`83`	`+ for (int j = 0; j < this->num_seg; ++j)`
`84`	`84`	`{`
`85`		`- for (int k = 0; k < this->num_seg; ++k)`
	`85`	`+ for (int k = 0; k < num_views; ++k)`
`86`	`86`	`{`
`87`		`- this->permute_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], input.data() + (i * num_views * this->num_seg + j * this->num_seg + k) * (rh * rw * rc));`
	`87`	`+ this->permute_op_.Run(&crop_frames[i * num_views * this->num_seg + j * num_views + k], input.data() + (i * num_views * this->num_seg + j * num_views + k) * (rh * rw * rc));`
`88`	`88`	`}`
`89`	`89`	`}`
`90`	`90`	`}`
`@@ -115,11 +115,11 @@ namespace PaddleVideo`
`115`	`115`	`// 3. Normalization(inplace operation)`
`116`	`116`	`for (int i = 0; i < real_batch_num; ++i)`
`117`	`117`	`{`
`118`		`- for (int j = 0; j < num_views; ++j)`
	`118`	`+ for (int j = 0; j < this->num_seg; ++j)`
`119`	`119`	`{`
`120`		`- for (int k = 0; k < this->num_seg; ++k)`
	`120`	`+ for (int k = 0; k < num_views; ++k)`
`121`	`121`	`{`
`122`		`- this->normalize_op_.Run(&crop_frames[i * num_views * this->num_seg + j * this->num_seg + k], this->mean_, this->scale_, this->is_scale_);`
	`122`	`+ this->normalize_op_.Run(&crop_frames[i * this->num_seg * num_views + j * num_views + k], this->mean_, this->scale_, this->is_scale_);`
`123`	`123`	`}`
`124`	`124`	`}`
`125`	`125`	`}`
`@@ -128,11 +128,15 @@ namespace PaddleVideo`
`128`	`128`	`int rh = crop_frames[0].rows;`
`129`	`129`	`int rw = crop_frames[0].cols;`
`130`	`130`	`int rc = crop_frames[0].channels();`
`131`		`- for (int i = 0; i < this->num_seg; ++i)`
	`131`	`+ input = std::vector<float>(real_batch_num * this->num_seg * num_views * crop_frames[0].rows * crop_frames[0].cols * rc, 0.0f);`
	`132`	`+ for (int i = 0; i < real_batch_num; ++i)`
`132`	`133`	`{`
`133`		`- for (int j = 0; j < num_views; ++j)`
	`134`	`+ for (int j = 0; j < this->num_seg; ++j)`
`134`	`135`	`{`
`135`		`- this->permute_op_.Run(&crop_frames[i * num_views + j], input.data() + (i * num_views + j) * rh * rw * rc);`
	`136`	`+ for (int k = 0; k < num_views; ++k)`
	`137`	`+ {`
	`138`	`+ this->permute_op_.Run(&crop_frames[i * this->num_seg * num_views + j * num_views + k], input.data() + (i * this->num_seg * num_views + j * num_views + k) * (rh * rw * rc));`
	`139`	`+ }`
`136`	`140`	`}`
`137`	`141`	`}`
`138`	`142`	`}`
`@@ -213,29 +217,47 @@ namespace PaddleVideo`
`213`	`217`	`{`
`214`	`218`	`precision = paddle_infer::Config::Precision::kHalf;`
`215`	`219`	`}`
`216`		`- if (this->precision_ == "int8")`
	`220`	`+ else if (this->precision_ == "int8")`
`217`	`221`	`{`
`218`	`222`	`precision = paddle_infer::Config::Precision::kInt8;`
`219`	`223`	`}`
`220`		`- config.EnableTensorRtEngine(`
`221`		`- 1 << 20, 10, 3,`
`222`		`- precision,`
`223`		`- false, false);`
`224`		`-// std::map<std::string, std::vector<int>> min_input_shape =`
`225`		`-// {`
`226`		`-// {"x", {1, 1, 3, 224, 224}}`
`227`		`-// };`
`228`		`-// std::map<std::string, std::vector<int>> max_input_shape =`
`229`		`-// {`
`230`		`-// {"x", {4, 1 * this->num_seg, 3, 224, 224}}`
`231`		`-// };`
`232`		`-// std::map<std::string, std::vector<int>> opt_input_shape =`
`233`		`-// {`
`234`		`-// {"x", {1, 1 * this->num_seg, 3, 224, 224}}`
`235`		`-// };`
`236`	`224`
`237`		`-// config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,`
`238`		`-// opt_input_shape);`
	`225`	`+ if (this->inference_model_name == "ppTSM" \|\| this->inference_model_name == "TSM")`
	`226`	`+ {`
	`227`	`+ config.EnableTensorRtEngine(`
	`228`	`+ 1 << 20, this->rec_batch_num * this->num_seg * 1, 3,`
	`229`	`+ precision,`
	`230`	`+ false, false);`
	`231`	`+ }`
	`232`	`+ else if(this->inference_model_name == "ppTSN" \|\| this->inference_model_name == "TSN")`
	`233`	`+ {`
	`234`	`+ config.EnableTensorRtEngine(`
	`235`	`+ 1 << 20, this->rec_batch_num * this->num_seg * 10, 3,`
	`236`	`+ precision,`
	`237`	`+ false, false);`
	`238`	`+ }`
	`239`	`+ else`
	`240`	`+ {`
	`241`	`+ config.EnableTensorRtEngine(`
	`242`	`+ 1 << 20, this->rec_batch_num, 3,`
	`243`	`+ precision,`
	`244`	`+ false, false);`
	`245`	`+ }`
	`246`	`+ // std::map<std::string, std::vector<int>> min_input_shape =`
	`247`	`+ // {`
	`248`	`+ // {"data_batch", {1, 1, 1, 1, 1}}`
	`249`	`+ // };`
	`250`	`+ // std::map<std::string, std::vector<int>> max_input_shape =`
	`251`	`+ // {`
	`252`	`+ // {"data_batch", {10, this->num_seg, 3, 224, 224}}`
	`253`	`+ // };`
	`254`	`+ // std::map<std::string, std::vector<int>> opt_input_shape =`
	`255`	`+ // {`
	`256`	`+ // {"data_batch", {this->rec_batch_num, this->num_seg, 3, 224, 224}}`
	`257`	`+ // };`
	`258`	`+`
	`259`	`+ // config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,`
	`260`	`+ // opt_input_shape);`
`239`	`261`	`}`
`240`	`262`	`}`
`241`	`263`	`else`