diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py b/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py index 80d88cbf03..ad95c7b2ae 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py @@ -94,6 +94,7 @@ type='DetectAndRegress', backbone=None, pretrained=None, + keypoint_head=None, human_detector=dict( type='VoxelCenterDetector', image_size=image_size, diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py b/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py index 11df9dc3a5..183e3cbb23 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py @@ -94,6 +94,7 @@ model = dict( type='DetectAndRegress', backbone=None, + keypoint_head=None, pretrained=None, human_detector=dict( type='VoxelCenterDetector', diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md index b473572d59..4164438469 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md @@ -34,4 +34,4 @@ Results on CMU Panoptic dataset. | Arch | mAP | mAR | MPJPE | Recall@500mm | ckpt | log | | :--------------------------------------------------------- | :---: | :---: | :---: | :----------: | :--------------------------------------------------------: | :-------------------------------------------------------: | -| [prn64_cpn80_res50](/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py) | 97.31 | 97.99 | 17.57 | 99.85 | [ckpt](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth) | [log](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5_20211103.log.json) | +| [prn64_cpn80_res50](/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py) | 97.15 | 97.70 | 17.09 | 99.25 | [ckpt](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-358648cb_20230118.pth) | [log](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5_20230118.log.json) | diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py index 90996e1eef..8dd02a8183 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py @@ -65,44 +65,30 @@ subset='validation')) # model settings -backbone = dict( - type='AssociativeEmbedding', - pretrained=None, - backbone=dict(type='ResNet', depth=50), - keypoint_head=dict( - type='DeconvHead', - in_channels=2048, - out_channels=num_joints, - num_deconv_layers=3, - num_deconv_filters=(256, 256, 256), - num_deconv_kernels=(4, 4, 4), - loss_keypoint=dict( - type='MultiLossFactory', - num_joints=15, - num_stages=1, - ae_loss_type='exp', - with_ae_loss=[False], - push_loss_factor=[0.001], - pull_loss_factor=[0.001], - with_heatmaps_loss=[True], - heatmaps_loss_factor=[1.0], - )), - train_cfg=dict(), - test_cfg=dict( - num_joints=num_joints, - nms_kernel=None, - nms_padding=None, - tag_per_joint=None, - max_num_people=None, - detection_threshold=None, - tag_threshold=None, - use_detection_val=None, - ignore_too_much=None, +backbone = dict(type='ResNet', depth=50) +keypoint_head = dict( + type='DeconvHead', + in_channels=2048, + out_channels=num_joints, + num_deconv_layers=3, + num_deconv_filters=(256, 256, 256), + num_deconv_kernels=(4, 4, 4), + loss_keypoint=dict( + type='MultiLossFactory', + num_joints=15, + num_stages=1, + ae_loss_type='exp', + with_ae_loss=[False], + push_loss_factor=[0.001], + pull_loss_factor=[0.001], + with_heatmaps_loss=[True], + heatmaps_loss_factor=[1.0], )) model = dict( type='DetectAndRegress', backbone=backbone, + keypoint_head=keypoint_head, pretrained='checkpoints/resnet_50_deconv.pth.tar', human_detector=dict( type='VoxelCenterDetector', diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml index 8b5e57897f..8a67e35190 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml @@ -15,8 +15,8 @@ Models: Results: - Dataset: CMU Panoptic Metrics: - MPJPE: 17.57 - mAP: 97.31 - mAR: 97.99 + MPJPE: 17.09 + mAP: 97.15 + mAR: 97.7 Task: Body 3D Keypoint - Weights: https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth + Weights: https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-358648cb_20230118.pth diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py b/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py index 0eec22a22a..e30cee5903 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py @@ -91,6 +91,7 @@ model = dict( type='DetectAndRegress', backbone=None, + keypoint_head=None, pretrained=None, human_detector=dict( type='VoxelCenterDetector', diff --git a/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py b/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py index 9e4b5c376d..f08a7f171e 100644 --- a/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py +++ b/configs/body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py @@ -92,6 +92,7 @@ type='DetectAndRegress', backbone=None, pretrained=None, + keypoint_head=None, human_detector=dict( type='VoxelCenterDetector', image_size=image_size, diff --git a/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py b/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py index d59deb2b84..b965ba4c04 100644 --- a/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py +++ b/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py @@ -6,7 +6,6 @@ import json_tricks as json import numpy as np -from scipy.io import loadmat from torch.utils.data import Dataset from mmpose.datasets import DatasetInfo @@ -249,8 +248,5 @@ def _load_files(self): assert osp.exists(self.gt_pose_db_file), f'gt_pose_db_file ' \ f"{self.gt_pose_db_file} doesn't exist, please check again" - gt = loadmat(self.gt_pose_db_file) - self.gt_pose_db = np.array(np.array( - gt['actor3D'].tolist()).tolist()).squeeze() - + self.gt_pose_db = np.load(self.gt_pose_db_file) self.num_persons = len(self.gt_pose_db) diff --git a/mmpose/models/detectors/multiview_pose.py b/mmpose/models/detectors/multiview_pose.py index cfa5e32ea4..a16ef7accf 100644 --- a/mmpose/models/detectors/multiview_pose.py +++ b/mmpose/models/detectors/multiview_pose.py @@ -14,7 +14,7 @@ from mmpose.core.post_processing.post_transforms import ( affine_transform_torch, get_affine_transform) from .. import builder -from ..builder import POSENETS +from ..builder import BACKBONES, HEADS, POSENETS from ..utils.misc import torch_meshgrid_ij from .base import BasePose @@ -138,7 +138,9 @@ class DetectAndRegress(BasePose): """DetectAndRegress approach for multiview human pose detection. Args: - backbone (ConfigDict): Dictionary to construct the 2D pose detector + backbone (ConfigDict): Dictionary to construct the backbone. + keypoint_head (ConfigDict): Dictionary to construct the 2d + keypoint head. human_detector (ConfigDict): dictionary to construct human detector pose_regressor (ConfigDict): dictionary to construct pose regressor train_cfg (ConfigDict): Config for training. Default: None. @@ -150,6 +152,7 @@ class DetectAndRegress(BasePose): def __init__(self, backbone, + keypoint_head, human_detector, pose_regressor, train_cfg=None, @@ -158,11 +161,16 @@ def __init__(self, freeze_2d=True): super(DetectAndRegress, self).__init__() if backbone is not None: - self.backbone = builder.build_posenet(backbone) - if self.training and pretrained is not None: - load_checkpoint(self.backbone, pretrained) + self.backbone = BACKBONES.build(backbone) else: self.backbone = None + if keypoint_head is not None: + self.keypoint_head = HEADS.build(keypoint_head) + else: + self.keypoint_head = None + + if self.training and pretrained is not None: + load_checkpoint(self, pretrained) self.freeze_2d = freeze_2d self.human_detector = builder.MODELS.build(human_detector) @@ -188,8 +196,11 @@ def train(self, mode=True): Module: self """ super().train(mode) - if mode and self.freeze_2d and self.backbone is not None: - self._freeze(self.backbone) + if mode and self.freeze_2d: + if self.backbone is not None: + self._freeze(self.backbone) + if self.keypoint_head is not None: + self._freeze(self.keypoint_head) return self @@ -283,6 +294,12 @@ def train_step(self, data_batch, optimizer, **kwargs): return outputs + def predict_heatmap(self, img): + output = self.backbone(img) + output = self.keypoint_head(output) + + return output + def forward_train(self, img, img_metas, @@ -331,7 +348,7 @@ def forward_train(self, feature_maps = [] assert isinstance(img, list) for img_ in img: - feature_maps.append(self.backbone.forward_dummy(img_)[0]) + feature_maps.append(self.predict_heatmap(img_)[0]) losses = dict() human_candidates, human_loss = self.human_detector.forward_train( @@ -351,8 +368,9 @@ def forward_train(self, heatmaps_tensor = torch.cat(feature_maps, dim=0) targets_tensor = torch.cat(targets, dim=0) masks_tensor = torch.cat(masks, dim=0) - losses_2d_ = self.backbone.get_loss(heatmaps_tensor, - targets_tensor, masks_tensor) + losses_2d_ = self.keypoint_head.get_loss(heatmaps_tensor, + targets_tensor, + masks_tensor) for k, v in losses_2d_.items(): losses_2d[k + '_2d'] = v losses.update(losses_2d) @@ -400,7 +418,7 @@ def forward_test( feature_maps = [] assert isinstance(img, list) for img_ in img: - feature_maps.append(self.backbone.forward_dummy(img_)[0]) + feature_maps.append(self.predict_heatmap(img_)[0]) human_candidates = self.human_detector.forward_test( None, img_metas, feature_maps) @@ -506,7 +524,7 @@ def forward_dummy(self, img, input_heatmaps=None, num_candidates=5): feature_maps = [] assert isinstance(img, list) for img_ in img: - feature_maps.append(self.backbone.forward_dummy(img_)[0]) + feature_maps.append(self.predict_heatmap(img_)[0]) _ = self.human_detector.forward_dummy(feature_maps) diff --git a/model-index.yml b/model-index.yml index ed2300dbd2..03c8811120 100644 --- a/model-index.yml +++ b/model-index.yml @@ -109,8 +109,8 @@ Import: - configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml - configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml - configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml -- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml - configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion2/resnet_deepfashion2.yml +- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml - configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml - configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml - configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml diff --git a/tests/data/campus/actorsGT.mat b/tests/data/campus/actorsGT.mat deleted file mode 100644 index c73c8b0aca..0000000000 Binary files a/tests/data/campus/actorsGT.mat and /dev/null differ diff --git a/tests/data/campus/actorsGT.npy b/tests/data/campus/actorsGT.npy new file mode 100644 index 0000000000..c7be2a9f20 Binary files /dev/null and b/tests/data/campus/actorsGT.npy differ diff --git a/tests/data/shelf/actorsGT.mat b/tests/data/shelf/actorsGT.mat deleted file mode 100644 index e1530021bd..0000000000 Binary files a/tests/data/shelf/actorsGT.mat and /dev/null differ diff --git a/tests/data/shelf/actorsGT.npy b/tests/data/shelf/actorsGT.npy new file mode 100644 index 0000000000..45748c90e4 Binary files /dev/null and b/tests/data/shelf/actorsGT.npy differ diff --git a/tests/test_datasets/test_body3d_dataset.py b/tests/test_datasets/test_body3d_dataset.py index 1b41c20e21..f2337c592b 100644 --- a/tests/test_datasets/test_body3d_dataset.py +++ b/tests/test_datasets/test_body3d_dataset.py @@ -379,7 +379,7 @@ def test_body3dmview_direct_campus_dataset(): cam_file=f'{data_root}/calibration_campus.json', train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl', test_pose_db_file=f'{data_root}/pred_campus_maskrcnn_hrnet_coco.pkl', - gt_pose_db_file=f'{data_root}/actorsGT.mat', + gt_pose_db_file=f'{data_root}/actorsGT.npy', ) test_data_cfg = dict( @@ -398,7 +398,7 @@ def test_body3dmview_direct_campus_dataset(): cam_file=f'{data_root}/calibration_campus.json', train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl', test_pose_db_file=f'{data_root}/pred_campus_maskrcnn_hrnet_coco.pkl', - gt_pose_db_file=f'{data_root}/actorsGT.mat', + gt_pose_db_file=f'{data_root}/actorsGT.npy', ) # test when dataset_info is None @@ -507,7 +507,7 @@ def test_body3dmview_direct_shelf_dataset(): cam_file=f'{data_root}/calibration_shelf.json', train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl', test_pose_db_file=f'{data_root}/pred_shelf_maskrcnn_hrnet_coco.pkl', - gt_pose_db_file=f'{data_root}/actorsGT.mat', + gt_pose_db_file=f'{data_root}/actorsGT.npy', ) test_data_cfg = dict( @@ -526,7 +526,7 @@ def test_body3dmview_direct_shelf_dataset(): cam_file=f'{data_root}/calibration_shelf.json', train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl', test_pose_db_file=f'{data_root}/pred_shelf_maskrcnn_hrnet_coco.pkl', - gt_pose_db_file=f'{data_root}/actorsGT.mat', + gt_pose_db_file=f'{data_root}/actorsGT.npy', ) # test when dataset_info is None diff --git a/tests/test_models/test_multiview_pose.py b/tests/test_models/test_multiview_pose.py index d37ded8619..880b1dfe36 100644 --- a/tests/test_models/test_multiview_pose.py +++ b/tests/test_models/test_multiview_pose.py @@ -63,6 +63,7 @@ def test_voxelpose_forward(): model_cfg = dict( type='DetectAndRegress', backbone=None, + keypoint_head=None, human_detector=dict( type='VoxelCenterDetector', image_size=[960, 512],