open-mmlab · wukurua · May 3, 2023 · Apr 25, 2023 · May 3, 2023 · May 4, 2023
diff --git a/configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-animalpose.yml b/configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-animalpose.yml
@@ -0,0 +1,89 @@
+Models:
+- Config: configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-l_8xb64-210e_animalpose-256x256.py
+  In Collection: RTMPose
+  Metadata:
+    Architecture:
+      - RTMPose
+    Training Data: Animal-Pose
+  Name: rtmpose-l_8xb64-210e_animalpose-256x256
+  Results:
+  - Dataset: Animal-Pose
+    Metrics:
+      AP: 0.766
+      [email protected]: 0.959
+      [email protected]: 0.855
+      AP (M): 0.725
+      AP (L): 0.778
+      AR: 0.800
+      [email protected]: 0.968
+      [email protected]: 0.874
+      AR (M): 0.769
+      AR (L): 0.808
+    Task: Animal 2D Keypoint
+#  Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
+- Config: configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-m_8xb64-210e_animalpose-256x256.py
+  In Collection: RTMPose
+  Metadata:
+    Architecture:
+      - RTMPose
+    Training Data: Animal-Pose
+  Name: rtmpose-m_8xb64-210e_animalpose-256x256
+  Results:
+  - Dataset: Animal-Pose
+    Metrics:
+      AP: 0.598
+      [email protected]: 0.896
+      [email protected]: 0.653
+      AP (M): 0.596
+      AP (L): 0.603
+      AR: 0.642
+      [email protected]: 0.900
+      [email protected]: 0.699
+      AR (M): 0.660
+      AR (L): 0.641
+    Task: Animal 2D Keypoint
+#  Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
+- Config: configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-s_8xb64-210e_animalpose-256x256.py
+  In Collection: RTMPose
+  Metadata:
+    Architecture:
+      - RTMPose
+    Training Data: Animal-Pose
+  Name: rtmpose-s_8xb64-210e_animalpose-256x256
+  Results:
+  - Dataset: Animal-Pose
+    Metrics:
+      AP: 0.709
+      [email protected]: 0.938
+      [email protected]: 0.799
+      AP (M): 0.674
+      AP (L): 0.718
+      AR: 0.748
+      [email protected]: 0.946
+      [email protected]: 0.824
+      AR (M): 0.730
+      AR (L): 0.754
+    Task: Animal 2D Keypoint
+#  Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
+- Config: configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-t_8xb64-210e_animalpose-256x256.py
+  In Collection: RTMPose
+  Metadata:
+    Architecture:
+      - RTMPose
+    Training Data: Animal-Pose
+  Name: rtmpose-t_8xb64-210e_animalpose-256x256
+  Results:
+  - Dataset: Animal-Pose
+    Metrics:
+      AP: 0.680
+      [email protected]: 0.927
+      [email protected]: 0.770
+      AP (M): 0.657
+      AP (L): 0.688
+      AR: .718
+      [email protected]: 0.934
+      [email protected]: 0.792
+      AR (M): 0.712
+      AR (L): 0.721
+    Task: Animal 2D Keypoint
+#  Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
diff --git a/configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-l_8xb64-210e_animalpose-256x256.py b/configs/animal_2d_keypoint/rtmpose/animalpose/rtmpose-l_8xb64-210e_animalpose-256x256.py
@@ -0,0 +1,172 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+base_lr = 5e-4
+num_workers = 8
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=23)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=5e-4,
+))
+
+# learning rate
+batch_size = 64
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=210,
+        milestones=[170, 200],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=batch_size)
+
+# hooks
+default_hooks = dict(
+    checkpoint=dict(save_best='animalpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='SimCCLabel',
+    input_size=(256, 256),
+    sigma=(5.66, 5.66),
+    simcc_split_ratio=2.0,
+    normalize=False,
+    use_dark=False)
+
+# model settings
+model = dict(
+    type='TopdownPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        _scope_='mmdet',
+        type='CSPNeXt',
+        arch='P5',
+        expand_ratio=0.5,
+        deepen_factor=1.,
+        widen_factor=1.,
+        out_indices=(4, ),
+        channel_attention=True,
+        norm_cfg=dict(type='SyncBN'),
+        act_cfg=dict(type='SiLU'),
+        init_cfg=dict(
+            type='Pretrained',
+            prefix='backbone.',
+            checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+            'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'
+        )),
+    head=dict(
+        type='RTMCCHead',
+        in_channels=1024,
+        out_channels=20,
+        input_size=codec['input_size'],
+        in_featuremap_size=(8, 8),
+        simcc_split_ratio=codec['simcc_split_ratio'],
+        final_layer_kernel_size=7,
+        gau_cfg=dict(
+            hidden_dims=256,
+            s=128,
+            expansion_factor=2,
+            dropout_rate=0.,
+            drop_path=0.,
+            act_fn='SiLU',
+            use_rel_bias=False,
+            pos_enc=False),
+        loss=dict(
+            type='KLDiscretLoss',
+            use_target_weight=True,
+            beta=10.,
+            label_softmax=True),
+        decoder=codec),
+    test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='RandomHalfBody'),
+    dict(type='RandomBBoxTransform'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs')
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=batch_size,
+    num_workers=num_workers,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/animalpose_train.json',
+        data_prefix=dict(img=''),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=batch_size,
+    num_workers=num_workers,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/animalpose_val.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = dict(
+    batch_size=batch_size,
+    num_workers=num_workers,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/animalpose_test.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = dict(
+    type='CocoMetric', ann_file=data_root + 'annotations/animalpose_test.json')