add mot_pose_demo;sych with det benchmark codes

zhiboniu · zhiboniu · commit d80dc589c94b · 2021-05-20T03:47:41.000Z
diff --git a/README_cn.md b/README_cn.md
@@ -17,6 +17,7 @@ PaddleDetection模块化地实现了多种主流目标检测算法，提供了
 
 <div align="center">
   <img src="static/docs/images/football.gif" width='800'/>
+  <img src="docs/images/mot_pose_demo_640x360.gif" width='800'/>
 </div>
 
 ### 产品动态
diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/keypoint_det_unite_infer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-
 from PIL import Image
 import cv2
 import numpy as np
@@ -52,7 +51,7 @@ def get_person_from_rect(images, results):
     org_rects = []
     for rect in valid_rects:
         rect_image, new_rect, org_rect = expand_crop(images, rect)
-        if rect_image is None:
+        if rect_image is None or rect_image.size == 0:
             continue
         image_buff.append([rect_image, new_rect])
         org_rects.append(org_rect)
@@ -113,13 +112,13 @@ def topdown_unite_predict_video(detector, topdown_keypoint_detector, camera_id):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
-    index = 1
+    index = 0
     while (1):
         ret, frame = capture.read()
         if not ret:
             break
-        print('detect frame:%d' % (index))
         index += 1
+        print('detect frame:%d' % (index))
 
         frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         results = detector.predict(frame2, FLAGS.det_threshold)
@@ -136,7 +135,7 @@ def topdown_unite_predict_video(detector, topdown_keypoint_detector, camera_id):
         keypoint_res = {}
         keypoint_res['keypoint'] = [
             np.vstack(keypoint_vector), np.vstack(score_vector)
-        ]
+        ] if len(keypoint_vector) > 0 else [[], []]
         keypoint_res['bbox'] = rect_vecotr
         im = draw_pose(
             frame,
@@ -189,8 +188,6 @@ def main():
         # predict from image
         img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
         topdown_unite_predict(detector, topdown_keypoint_detector, img_list)
-        detector.det_times.info(average=True)
-        topdown_keypoint_detector.det_times.info(average=True)
 
 
 if __name__ == '__main__':
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
@@ -28,7 +28,8 @@
 from keypoint_visualize import draw_pose
 from paddle.inference import Config
 from paddle.inference import create_predictor
-from utils import argsparser, Timer, get_current_memory_mb, LoggerHelper
+from utils import argsparser, Timer, get_current_memory_mb
+from benchmark_utils import PaddleInferBenchmark
 from infer import get_test_images, print_arguments
 
 # Global dictionary
@@ -66,7 +67,7 @@ def __init__(self,
                  cpu_threads=1,
                  enable_mkldnn=False):
         self.pred_config = pred_config
-        self.predictor = load_predictor(
+        self.predictor, self.config = load_predictor(
             model_dir,
             run_mode=run_mode,
             min_subgraph_size=self.pred_config.min_subgraph_size,
@@ -129,15 +130,15 @@ def predict(self, image, threshold=0.5, warmup=0, repeats=1):
                             MaskRCNN's results include 'masks': np.ndarray:
                             shape: [N, im_h, im_w]
         '''
-        self.det_times.preprocess_time.start()
+        self.det_times.preprocess_time_s.start()
         inputs = self.preprocess(image)
         np_boxes, np_masks = None, None
         input_names = self.predictor.get_input_names()
 
         for i in range(len(input_names)):
             input_tensor = self.predictor.get_input_handle(input_names[i])
             input_tensor.copy_from_cpu(inputs[input_names[i]])
-        self.det_times.preprocess_time.end()
+        self.det_times.preprocess_time_s.end()
         for i in range(warmup):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -152,7 +153,7 @@ def predict(self, image, threshold=0.5, warmup=0, repeats=1):
                     inds_k.copy_to_cpu()
                 ]
 
-        self.det_times.inference_time.start()
+        self.det_times.inference_time_s.start()
         for i in range(repeats):
             self.predictor.run()
             output_names = self.predictor.get_output_names()
@@ -166,12 +167,12 @@ def predict(self, image, threshold=0.5, warmup=0, repeats=1):
                     masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(),
                     inds_k.copy_to_cpu()
                 ]
-        self.det_times.inference_time.end(repeats=repeats)
+        self.det_times.inference_time_s.end(repeats=repeats)
 
-        self.det_times.postprocess_time.start()
+        self.det_times.postprocess_time_s.start()
         results = self.postprocess(
             np_boxes, np_masks, inputs, threshold=threshold)
-        self.det_times.postprocess_time.end()
+        self.det_times.postprocess_time_s.end()
         self.det_times.img_num += 1
         return results
 
@@ -318,7 +319,7 @@ def load_predictor(model_dir,
     # disable feed, fetch OP, needed by zero_copy_run
     config.switch_use_feed_fetch_ops(False)
     predictor = create_predictor(config)
-    return predictor
+    return predictor, config
 
 
 def predict_image(detector, image_list):
@@ -347,7 +348,8 @@ def predict_video(detector, camera_id):
         video_name = 'output.mp4'
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
-        video_name = os.path.basename(os.path.split(FLAGS.video_file)[-1])
+        video_name = os.path.splitext(os.path.basename(FLAGS.video_file))[
+            0] + '.mp4'
     fps = 30
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -407,9 +409,22 @@ def main():
                 'gpu_rss': detector.gpu_mem / len(img_list),
                 'gpu_util': detector.gpu_util * 100 / len(img_list)
             }
-            det_logger = LoggerHelper(
-                FLAGS, detector.det_times.report(average=True), mems)
-            det_logger.report()
+
+            perf_info = detector.det_times.report(average=True)
+            model_dir = FLAGS.model_dir
+            mode = FLAGS.run_mode
+            model_info = {
+                'model_name': model_dir.strip('/').split('/')[-1],
+                'precision': mode.split('_')[-1]
+            }
+            data_info = {
+                'batch_size': 1,
+                'shape': "dynamic_shape",
+                'data_num': perf_info['img_num']
+            }
+            det_log = PaddleInferBenchmark(detector.config, model_info,
+                                           data_info, perf_info, mems)
+            det_log('KeyPoint')
 
 
 if __name__ == '__main__':
diff --git a/deploy/python/keypoint_visualize.py b/deploy/python/keypoint_visualize.py
@@ -19,11 +19,6 @@
 import math
 
 
-def map_coco_to_personlab(keypoints):
-    permute = [0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
-    return keypoints[:, permute, :]
-
-
 def draw_pose(imgfile,
               results,
               visual_thread=0.6,
@@ -39,9 +34,9 @@ def draw_pose(imgfile,
                      'for example: `pip install matplotlib`.')
         raise e
 
-    EDGES = [(0, 14), (0, 13), (0, 4), (0, 1), (14, 16), (13, 15), (4, 10),
-             (1, 7), (10, 11), (7, 8), (11, 12), (8, 9), (4, 5), (1, 2), (5, 6),
-             (2, 3)]
+    EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 8),
+             (7, 9), (8, 10), (5, 11), (6, 12), (11, 13), (12, 14), (13, 15),
+             (14, 16), (11, 12)]
     NUM_EDGES = len(EDGES)
 
     colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
@@ -52,33 +47,35 @@ def draw_pose(imgfile,
 
     img = cv2.imread(imgfile) if type(imgfile) == str else imgfile
     skeletons, scores = results['keypoint']
+    color_set = results['colors'] if 'colors' in results else None
 
     if 'bbox' in results:
         bboxs = results['bbox']
-        for idx, rect in enumerate(bboxs):
+        for j, rect in enumerate(bboxs):
             xmin, ymin, xmax, ymax = rect
-            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), colors[0], 1)
+            color = colors[0] if color_set is None else colors[color_set[j] %
+                                                               len(colors)]
+            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 1)
 
     canvas = img.copy()
     for i in range(17):
-        rgba = np.array(cmap(1 - i / 17. - 1. / 34))
-        rgba[0:3] *= 255
         for j in range(len(skeletons)):
             if skeletons[j][i, 2] < visual_thread:
                 continue
+            color = colors[i] if color_set is None else colors[color_set[j] %
+                                                               len(colors)]
             cv2.circle(
                 canvas,
                 tuple(skeletons[j][i, 0:2].astype('int32')),
                 2,
-                colors[i],
+                color,
                 thickness=-1)
 
     to_plot = cv2.addWeighted(img, 0.3, canvas, 0.7, 0)
     fig = matplotlib.pyplot.gcf()
 
     stickwidth = 2
 
-    skeletons = map_coco_to_personlab(skeletons)
     for i in range(NUM_EDGES):
         for j in range(len(skeletons)):
             edge = EDGES[i]
@@ -96,7 +93,9 @@ def draw_pose(imgfile,
             polygon = cv2.ellipse2Poly((int(mY), int(mX)),
                                        (int(length / 2), stickwidth),
                                        int(angle), 0, 360, 1)
-            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
+            color = colors[i] if color_set is None else colors[color_set[j] %
+                                                               len(colors)]
+            cv2.fillConvexPoly(cur_canvas, polygon, color)
             canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
     if returnimg:
         return canvas
diff --git a/docs/images/mot_pose_demo_640x360.gif b/docs/images/mot_pose_demo_640x360.gif