tinypose3d && modelzoo (#7844)

zhiboniu · web-flow · commit 716755d24ace · 2023-03-02T14:06:47.000+08:00
* metro con reverse

tinypose3d fix

readme modelzoo

* fix tinypose3d
diff --git a/configs/pose3d/README.md b/configs/pose3d/README.md
@@ -24,12 +24,12 @@
 
 PaddleDetection 中提供了两种3D Pose算法（稀疏关键点），分别是适用于服务器端的大模型Metro3D和移动端的TinyPose3D。其中Metro3D基于[End-to-End Human Pose and Mesh Reconstruction with Transformers](https://arxiv.org/abs/2012.09760)进行了稀疏化改造，TinyPose3D是在TinyPose基础上修改输出3D关键点。
 
-## 模型推荐（待补充）
+## 模型推荐
 
-|模型|适用场景|human3.6m精度|模型下载|
-|:--:|:--:|:--:|:--:|
-|Metro3D|服务器端|-|-|
-|TinyPose3D|移动端|-|-|
+|模型|适用场景|human3.6m精度(14关键点)|human3.6m精度(17关键点)|模型下载|
+|:--:|:--:|:--:|:--:|:--:|
+|Metro3D|服务器端|56.014|46.619|[metro3d_24kpts.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/metro3d_24kpts.pdparams)|
+|TinyPose3D|移动端|86.381|71.223|[tinypose3d_human36m.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/tinypose3d_human36M.pdparams)|
 
 注：
 1. 训练数据基于 [MeshTransfomer](https://github.com/microsoft/MeshTransformer) 中的训练数据。
@@ -137,13 +137,14 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/infer.py -c configs/pose3d/metro3d_24kpts.y
 
   我们的训练数据提供了大量的低精度自动生成式的数据，用户可以在此数据训练的基础上，标注自己高精度的目标动作数据进行finetune，即可得到相对稳定较好的模型。
 
-  我们在医疗康复高精度数据上的训练效果展示如下
+  我们在医疗康复高精度数据上的训练效果展示如下  [高清视频](https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4)
 
   <div align="center">
-    <img src="https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4" width='600'/>
+    <img src="https://user-images.githubusercontent.com/31800336/221747019-ceacfd64-e218-476b-a369-c6dc259816b2.gif" width='600'/>
   </div>
 
 
+
 ## 引用
 
 ```
diff --git a/configs/pose3d/tinypose3d_human36M.yml b/configs/pose3d/tinypose3d_human36M.yml
@@ -13,13 +13,12 @@ train_width: &train_width 128
 trainsize: &trainsize [*train_width, *train_height]
 
 #####model
-architecture: TinyPose3DHRNet
+architecture: TinyPose3DHRHeatmapNet
 pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams
 
-TinyPose3DHRNet:
+TinyPose3DHRHeatmapNet:
   backbone: LiteHRNet
   post_process: HR3DNetPostProcess
-  fc_channel: 1024
   num_joints: *num_joints
   width: &width 40
   loss: Pose3DLoss
@@ -56,17 +55,17 @@ OptimizerBuilder:
 #####data
 TrainDataset:
   !Pose3DDataset
-    dataset_dir: Human3.6M
-    image_dirs: ["Images"]
-    anno_list:  ['Human3.6m_train.json']
+    dataset_dir: dataset/traindata/
+    image_dirs: ["human3.6m"]
+    anno_list:  ['pose3d/Human3.6m_train.json']
     num_joints: *num_joints
     test_mode: False
 
 EvalDataset:
   !Pose3DDataset
-    dataset_dir: Human3.6M
-    image_dirs: ["Images"]
-    anno_list: ['Human3.6m_valid.json']
+    dataset_dir: dataset/traindata/
+    image_dirs: ["human3.6m"]
+    anno_list:  ['pose3d/Human3.6m_valid.json']
     num_joints: *num_joints
     test_mode: True
 
diff --git a/ppdet/data/source/pose3d_cmb.py b/ppdet/data/source/pose3d_cmb.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
-"""
-this code is base on https://github.com/open-mmlab/mmpose
-"""
+
 import os
 import cv2
 import numpy as np
@@ -80,7 +78,7 @@ def get_mask(self, mvm_percent=0.3):
             mjm_mask[indices, :] = 0.0
         # return mjm_mask
 
-        num_joints = 1
+        num_joints = 10
         mvm_mask = np.ones((num_joints, 1)).astype(np.float)
         if self.test_mode == False:
             num_vertices = num_joints
diff --git a/ppdet/metrics/pose3d_metrics.py b/ppdet/metrics/pose3d_metrics.py
@@ -137,11 +137,6 @@ def all_gather(data):
 
 
 class Pose3DEval(object):
-    """refer to
-        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
-        Copyright (c) Microsoft, under the MIT License.
-    """
-
     def __init__(self, output_eval, save_prediction_only=False):
         super(Pose3DEval, self).__init__()
         self.output_eval = output_eval
diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -46,7 +46,7 @@ def __init__(self,
                  use_dark=True):
         """
         HRNet network, see https://arxiv.org/abs/1902.09212
-
+ 
         Args:
             backbone (nn.Layer): backbone instance
             post_process (object): `HRNetPostProcess` instance
@@ -132,10 +132,10 @@ def __init__(self, use_dark=True):
 
     def get_max_preds(self, heatmaps):
         '''get predictions from score maps
-
+ 
         Args:
             heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
-
+ 
         Returns:
             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
             maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
@@ -220,12 +220,12 @@ def dark_postprocess(self, hm, coords, kernelsize):
     def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
         """the highest heatvalue location with a quarter offset in the
         direction from the highest response to the second highest response.
-
+ 
         Args:
             heatmaps (numpy.ndarray): The predicted heatmaps
             center (numpy.ndarray): The boxes center
             scale (numpy.ndarray): The scale factor
-
+ 
         Returns:
             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
             maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
@@ -341,10 +341,7 @@ def __init__(
         self.deploy = False
         self.num_joints = num_joints
 
-        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
-        # for heatmap output
-        self.final_conv_new = L.Conv2d(
-            width, num_joints * 32, 1, 1, 0, bias=True)
+        self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)
 
     @classmethod
     def from_config(cls, cfg, *args, **kwargs):
@@ -356,20 +353,19 @@ def from_config(cls, cfg, *args, **kwargs):
     def _forward(self):
         feats = self.backbone(self.inputs)  # feats:[[batch_size, 40, 32, 24]]
 
-        hrnet_outputs = self.final_conv_new(feats[0])
+        hrnet_outputs = self.final_conv(feats[0])
         res = soft_argmax(hrnet_outputs, self.num_joints)
-
-        if self.training:
-            return self.loss(res, self.inputs)
-        else:  # export model need
-            return res
+        return res
 
     def get_loss(self):
-        return self._forward()
+        pose3d = self._forward()
+        loss = self.loss(pose3d, None, self.inputs)
+        outputs = {'loss': loss}
+        return outputs
 
     def get_pred(self):
         res_lst = self._forward()
-        outputs = {'keypoint': res_lst}
+        outputs = {'pose3d': res_lst}
         return outputs
 
     def flip_back(self, output_flipped, matched_parts):
@@ -427,16 +423,23 @@ def from_config(cls, cfg, *args, **kwargs):
         return {'backbone': backbone, }
 
     def _forward(self):
-        feats = self.backbone(self.inputs)  # feats:[[batch_size, 40, 32, 24]]
+        '''
+        self.inputs is a dict
+        '''
+        feats = self.backbone(
+            self.inputs)  # feats:[[batch_size, 40, width/4, height/4]]
+
+        hrnet_outputs = self.final_conv(
+            feats[0])  # hrnet_outputs: [batch_size, num_joints*32,32,32]
 
-        hrnet_outputs = self.final_conv(feats[0])
         flatten_res = self.flatten(
-            hrnet_outputs)  #  [batch_size, 24, (height/4)*(width/4)]
+            hrnet_outputs)  # [batch_size,num_joints*32,32*32]
+
         res = self.fc1(flatten_res)
         res = self.act1(res)
         res = self.fc2(res)
         res = self.act2(res)
-        res = self.fc3(res)  # [batch_size, 24, 3]
+        res = self.fc3(res)
 
         if self.training:
             return self.loss(res, self.inputs)
@@ -448,7 +451,7 @@ def get_loss(self):
 
     def get_pred(self):
         res_lst = self._forward()
-        outputs = {'keypoint': res_lst}
+        outputs = {'pose3d': res_lst}
         return outputs
 
     def flip_back(self, output_flipped, matched_parts):
diff --git a/ppdet/modeling/architectures/pose3d_metro.py b/ppdet/modeling/architectures/pose3d_metro.py
@@ -53,7 +53,7 @@ def __init__(
             trans_encoder='',
             loss='Pose3DLoss', ):
         """
-        METRO network, see https://arxiv.org/abs/
+        Modified from METRO network, see https://arxiv.org/abs/2012.09760
 
         Args:
             backbone (nn.Layer): backbone instance
@@ -65,7 +65,7 @@ def __init__(
         self.deploy = False
 
         self.trans_encoder = trans_encoder
-        self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 1, 1)
+        self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
         self.cam_param_fc = paddle.nn.Linear(3, 2)
 
     @classmethod