From e89664519d6c2cee77bb522ad7dd9b044e4893db Mon Sep 17 00:00:00 2001 From: yang <2112112157@edu.zjut.cn> Date: Thu, 29 Feb 2024 15:06:02 +0800 Subject: [PATCH] v1.0.0 --- configs/rtmdet/README.md | 83 +++++ .../cspnext_imagenet_pretrain/README.md | 53 +++ .../cspnext-s_8xb256-rsb-a1-600e_in1k.py | 67 ++++ .../cspnext-tiny_8xb256-rsb-a1-600e_in1k.py | 5 + configs/rtmdet/distillation/README.md | 146 ++++++++ .../kd_l_rtmdet_x_neck_300e_coco.py | 99 ++++++ .../kd_m_rtmdet_l_neck_300e_coco.py | 99 ++++++ .../kd_s_rtmdet_m_neck_300e_coco.py | 99 ++++++ .../kd_tiny_rtmdet_s_neck_300e_coco.py | 99 ++++++ configs/rtmdet/metafile.yml | 215 ++++++++++++ ...rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py | 30 ++ .../rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py | 331 ++++++++++++++++++ ...mdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py | 168 +++++++++ ...cbn_fast_coco-pretrain_2xb4-36e_dota-ms.py | 20 ++ ...rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py | 33 ++ .../rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py | 33 ++ .../rtmdet-r_s_fast_1xb8-36e_dota-ms.py | 38 ++ .../rotated/rtmdet-r_s_fast_1xb8-36e_dota.py | 38 ++ .../rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py | 38 ++ .../rtmdet-r_tiny_fast_1xb8-36e_dota.py | 38 ++ ...tmdet-ins_s_syncbn_fast_8xb32-300e_coco.py | 31 ++ .../rtmdet_l_syncbn_fast_8xb32-300e_coco.py | 304 ++++++++++++++++ .../rtmdet_m_syncbn_fast_8xb32-300e_coco.py | 11 + .../rtmdet_s_syncbn_fast_8xb32-300e_coco.py | 92 +++++ .../rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py | 70 ++++ ...rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py | 58 +++ .../rtmdet_x_syncbn_fast_8xb32-300e_coco.py | 11 + 27 files changed, 2309 insertions(+) create mode 100644 configs/rtmdet/README.md create mode 100644 configs/rtmdet/cspnext_imagenet_pretrain/README.md create mode 100644 configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py create mode 100644 configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py create mode 100644 configs/rtmdet/distillation/README.md create mode 100644 configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py create mode 100644 configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py create mode 100644 configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py create mode 100644 configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py create mode 100644 configs/rtmdet/metafile.yml create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py create mode 100644 configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py create mode 100644 configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py create mode 100644 configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py create mode 100644 configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py create mode 100644 configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py create mode 100644 configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py create mode 100644 configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py create mode 100644 configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md new file mode 100644 index 0000000..94e8654 --- /dev/null +++ b/configs/rtmdet/README.md @@ -0,0 +1,83 @@ +# RTMDet: An Empirical Study of Designing Real-Time Object Detectors + +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/real-time-instance-segmentation-on-mscoco)](https://paperswithcode.com/sota/real-time-instance-segmentation-on-mscoco?p=rtmdet-an-empirical-study-of-designing-real) +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-dota-1)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-dota-1?p=rtmdet-an-empirical-study-of-designing-real) +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-hrsc2016)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-hrsc2016?p=rtmdet-an-empirical-study-of-designing-real) + + + +## Abstract + +In this paper, we aim to design an efficient real-time object detector that exceeds the YOLO series and is easily extensible for many object recognition tasks such as instance segmentation and rotated object detection. To obtain a more efficient model architecture, we explore an architecture that has compatible capacities in the backbone and neck, constructed by a basic building block that consists of large-kernel depth-wise convolutions. We further introduce soft labels when calculating matching costs in the dynamic label assignment to improve accuracy. Together with better training techniques, the resulting object detector, named RTMDet, achieves 52.8% AP on COCO with 300+ FPS on an NVIDIA 3090 GPU, outperforming the current mainstream industrial detectors. RTMDet achieves the best parameter-accuracy trade-off with tiny/small/medium/large/extra-large model sizes for various application scenarios, and obtains new state-of-the-art performance on real-time instance segmentation and rotated object detection. We hope the experimental results can provide new insights into designing versatile real-time object detectors for many object recognition tasks. + +
+ +
+ +
+ +RTMDet-l model structure +
+ +## Results and Models + +### Object Detection + +| Model | size | Params(M) | FLOPs(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP | Config | Download | +| :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 640 | 4.8 | 8.1 | 0.98 | 41.0 | 42.7 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) | +| RTMDet-tiny \* | 640 | 4.8 | 8.1 | 0.98 | 41.8 (+0.8) | 43.2 (+0.5) | [config](./distillation/kd_tiny_rtmdet_s_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) | +| RTMDet-s | 640 | 8.89 | 14.8 | 1.22 | 44.6 | 45.8 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) | +| RTMDet-s \* | 640 | 8.89 | 14.8 | 1.22 | 45.7 (+1.1) | 47.3 (+1.5) | [config](./distillation/kd_s_rtmdet_m_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) | +| RTMDet-m | 640 | 24.71 | 39.27 | 1.62 | 49.3 | 50.9 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) | +| RTMDet-m \* | 640 | 24.71 | 39.27 | 1.62 | 50.2 (+0.9) | 51.9 (+1.0) | [config](./distillation/kd_m_rtmdet_l_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) | +| RTMDet-l | 640 | 52.3 | 80.23 | 2.44 | 51.4 | 53.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) | +| RTMDet-l \* | 640 | 52.3 | 80.23 | 2.44 | 52.3 (+0.9) | 53.7 (+0.6) | [config](./distillation/kd_l_rtmdet_x_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) | +| RTMDet-x | 640 | 94.86 | 141.67 | 3.10 | 52.8 | 54.2 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) | + +**Note**: + +1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. +2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement. +3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details. +4. \* means checkpoints are trained with knowledge distillation. More details can be found in [RTMDet distillation](./distillation). + +### Rotated Object Detection + +RTMDet-R achieves state-of-the-art on various remote sensing datasets. + +| Backbone | pretrain | Epoch | Batch Size | Aug | mmAP | mAP50 | mAP75 | Mem (GB) | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :------: | :---: | :--------: | :-------------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------------------: | :--------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | IN | 36 | 1xb8 | RR | 46.94 | 75.07 | 50.11 | 12.7 | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210.log.json) | +| RTMDet-s | IN | 36 | 1xb8 | RR | 48.99 | 77.33 | 52.65 | 16.6 | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307.log.json) | +| RTMDet-m | IN | 36 | 2xb4 | RR | 50.38 | 78.43 | 54.28 | 10.9 | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237.log.json) | +| RTMDet-l | IN | 36 | 2xb4 | RR | 50.61 | 78.66 | 54.95 | 16.1 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544.log.json) | +| RTMDet-tiny | IN | 36 | 1xb8 | MS+RR | - | - | - | | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py) | \| | +| RTMDet-s | IN | 36 | 1xb8 | MS+RR | - | - | - | | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py) | \| | +| RTMDet-m | IN | 36 | 2xb4 | MS+RR | - | - | - | | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py) | \| | +| RTMDet-l | IN | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py) | \| | +| RTMDet-l | COCO | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py) | \| | +| RTMDet-l | IN | 100 | 2xb4 | Mixup+Mosaic+RR | 55.05 | 80.14 | 61.32 | 19.6 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735.log.json) | + +**Note**: + +1. Please follow doc to get start with rotated detection. [Rotated Object Detection](../../docs/zh_cn/tutorials/rotated_detection.md) +2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. +3. All models trained with image size 1024\*1024. +4. `IN` means ImageNet pretrain, `COCO` means COCO pretrain. +5. For Aug, RR means `RandomRotate`, MS means multi-scale augmentation in data prepare. +6. The inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS. +7. Currently, the training process of RTMDet-R tiny is unstable and may have 1% accuracy fluctuation, we will continue to investigate why. + +## Citation + +```latex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/rtmdet/cspnext_imagenet_pretrain/README.md b/configs/rtmdet/cspnext_imagenet_pretrain/README.md new file mode 100644 index 0000000..2db5a50 --- /dev/null +++ b/configs/rtmdet/cspnext_imagenet_pretrain/README.md @@ -0,0 +1,53 @@ +# CSPNeXt ImageNet Pre-training + +In this folder, we provide the imagenet pre-training config of RTMDet's backbone CSPNeXt. + +## Requirements + +To train with these configs, please install [MMClassification 1.x](https://github.com/open-mmlab/mmclassification/tree/1.x) first. + +Install by MIM: + +```shell +mim install mmcls>=1.0.0rc0 +``` + +or install by pip: + +```shell +pip install mmcls>=1.0.0rc0 +``` + +## Prepare Dataset + +To pre-train on ImageNet, you need to prepare the dataset first. Please refer to the [guide](https://mmclassification.readthedocs.io/en/1.x/user_guides/dataset_prepare.html#imagenet). + +## How to Train + +You can use the classification config in the same way as the detection config. + +For single-GPU training, run: + +```shell +python tools/train.py \ + ${CONFIG_FILE} \ + [optional arguments] +``` + +For multi-GPU training, run: + +```shell +bash ./tools/dist_train.sh \ + ${CONFIG_FILE} \ + ${GPU_NUM} \ + [optional arguments] +``` + +More details can be found in [user guides](https://mmdetection.readthedocs.io/en/3.x/user_guides/train.html). + +## Results and Models + +| Model | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Download | +| :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------------------------------------------------------: | +| CSPNeXt-tiny | 224x224 | 2.73 | 0.339 | 69.44 | 89.45 | [model](https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth) | +| CSPNeXt-s | 224x224 | 4.89 | 0.664 | 74.41 | 92.23 | [model](https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth) | diff --git a/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py b/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py new file mode 100644 index 0000000..4281f9c --- /dev/null +++ b/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py @@ -0,0 +1,67 @@ +_base_ = [ + 'mmcls::_base_/datasets/imagenet_bs256_rsb_a12.py', + 'mmcls::_base_/schedules/imagenet_bs2048_rsb.py', + 'mmcls::_base_/default_runtime.py' +] + +custom_imports = dict( + imports=['mmdet.models', 'mmyolo.models'], allow_failed_imports=False) + +model = dict( + type='ImageClassifier', + backbone=dict( + type='mmyolo.CSPNeXt', + arch='P5', + out_indices=(4, ), + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + channel_attention=True, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='mmyolo.SiLU')), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=512, + loss=dict( + type='LabelSmoothLoss', + label_smooth_val=0.1, + mode='original', + loss_weight=1.0), + topk=(1, 5)), + train_cfg=dict(augments=[ + dict(type='Mixup', alpha=0.2, num_classes=1000), + dict(type='CutMix', alpha=1.0, num_classes=1000) + ])) + +# dataset settings +train_dataloader = dict(sampler=dict(type='RepeatAugSampler', shuffle=True)) + +# schedule settings +optim_wrapper = dict( + optimizer=dict(weight_decay=0.01), + paramwise_cfg=dict(bias_decay_mult=0., norm_decay_mult=0.), +) + +param_scheduler = [ + # warm up learning rate scheduler + dict( + type='LinearLR', + start_factor=0.0001, + by_epoch=True, + begin=0, + end=5, + # update by iter + convert_to_iter_based=True), + # main learning rate scheduler + dict( + type='CosineAnnealingLR', + T_max=595, + eta_min=1.0e-6, + by_epoch=True, + begin=5, + end=600) +] + +train_cfg = dict(by_epoch=True, max_epochs=600) diff --git a/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py b/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py new file mode 100644 index 0000000..af3170b --- /dev/null +++ b/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py @@ -0,0 +1,5 @@ +_base_ = './cspnext-s_8xb256-rsb-a1-600e_in1k.py' + +model = dict( + backbone=dict(deepen_factor=0.167, widen_factor=0.375), + head=dict(in_channels=384)) diff --git a/configs/rtmdet/distillation/README.md b/configs/rtmdet/distillation/README.md new file mode 100644 index 0000000..452a46c --- /dev/null +++ b/configs/rtmdet/distillation/README.md @@ -0,0 +1,146 @@ +# Distill RTM Detectors Based on MMRazor + +## Description + +To further improve the model accuracy while not introducing much additional +computation cost, we apply the feature-based distillation to the training phase +of these RTM detectors. In summary, our distillation strategy are threefold: + +(1) Inspired by [PKD](https://arxiv.org/abs/2207.02039), we first normalize +the intermediate feature maps to have zero mean and unit variances before calculating +the distillation loss. + +(2) Inspired by [CWD](https://arxiv.org/abs/2011.13256), we adopt the channel-wise +distillation paradigm, which can pay more attention to the most salient regions +of each channel. + +(3) Inspired by [DAMO-YOLO](https://arxiv.org/abs/2211.15444), the distillation +process is split into two stages. 1) The teacher distills the student at the +first stage (280 epochs) on strong mosaic domain. 2) The student finetunes itself +on no masaic domain at the second stage (20 epochs). + +## Results and Models + +| Location | Dataset | Teacher | Student | mAP | mAP(T) | mAP(S) | Config | Download | +| :------: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: | :---------: | :----: | :----: | :------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FPN | COCO | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-tiny](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | 41.8 (+0.8) | 44.6 | 41.0 | [config](kd_tiny_rtmdet_s_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) | +| FPN | COCO | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | 45.7 (+1.1) | 49.3 | 44.6 | [config](kd_s_rtmdet_m_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) | +| FPN | COCO | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | 50.2 (+0.9) | 51.4 | 49.3 | [config](kd_m_rtmdet_l_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) | +| FPN | COCO | [RTMDet-x](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | 52.3 (+0.9) | 52.8 | 51.4 | [config](kd_l_rtmdet_x_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) | + +## Usage + +### Prerequisites + +- [MMRazor dev-1.x](https://github.com/open-mmlab/mmrazor/tree/dev-1.x) + +Install MMRazor from source + +``` +git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git +cd mmrazor +# Install MMRazor +mim install -v -e . +``` + +### Training commands + +In MMYOLO's root directory, run the following command to train the RTMDet-tiny +with 8 GPUs, using RTMDet-s as the teacher: + +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py +``` + +### Testing commands + +In MMYOLO's root directory, run the following command to test the model: + +```bash +CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py ${CHECKPOINT_PATH} +``` + +### Getting student-only checkpoint + +After training, the checkpoint contains parameters for both student and teacher models. +Run the following command to convert it to student-only checkpoint: + +```bash +python ./tools/model_converters/convert_kd_ckpt_to_student.py ${CHECKPOINT_PATH} --out-path ${OUTPUT_CHECKPOINT_PATH} +``` + +## Configs + +Here we provide detection configs and models for MMRazor in MMYOLO. For clarify, +we take `./kd_tiny_rtmdet_s_neck_300e_coco.py` as an example to show how to +distill a RTM detector based on MMRazor. + +Here is the main part of `./kd_tiny_rtmdet_s_neck_300e_coco.py`. + +```shell +norm_cfg = dict(type='BN', affine=False, track_running_stats=False) + +distiller=dict( + type='ConfigurableDistiller', + student_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'), + ), + teacher_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')), + connectors=dict( + fpn0_s=dict(type='ConvModuleConnector', in_channel=96, + out_channel=128, bias=False, norm_cfg=norm_cfg, + act_cfg=None), + fpn0_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg), + fpn1_s=dict( + type='ConvModuleConnector', in_channel=96, + out_channel=128, bias=False, norm_cfg=norm_cfg, + act_cfg=None), + fpn1_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg), + fpn2_s=dict( + type='ConvModuleConnector', in_channel=96, + out_channel=128, bias=False, norm_cfg=norm_cfg, + act_cfg=None), + fpn2_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg)), + distill_losses=dict( + loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)), + loss_forward_mappings=dict( + loss_fpn0=dict( + preds_S=dict(from_student=True, recorder='fpn0', connector='fpn0_s'), + preds_T=dict(from_student=False, recorder='fpn0', connector='fpn0_t')), + loss_fpn1=dict( + preds_S=dict(from_student=True, recorder='fpn1', connector='fpn1_s'), + preds_T=dict(from_student=False, recorder='fpn1', connector='fpn1_t')), + loss_fpn2=dict( + preds_S=dict(from_student=True, recorder='fpn2', connector='fpn2_s'), + preds_T=dict(from_student=False, recorder='fpn2', connector='fpn2_t')))) + +``` + +`recorders` are used to record various intermediate results during the model forward. +In this example, they can help record the output of 3 `nn.Module` of the teacher +and the student. Details are list in [Recorder](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/advanced_guides/recorder.md) and [MMRazor Distillation](https://zhuanlan.zhihu.com/p/596582609) (if users can read Chinese). + +`connectors` are adaptive layers which usually map teacher's and students features +to the same dimension. + +`distill_losses` are configs for multiple distill losses. + +`loss_forward_mappings` are mappings between distill loss forward arguments and records. + +In addition, the student finetunes itself on no masaic domain at the last 20 epochs, +so we add a new hook named `StopDistillHook` to stop distillation on time. +We need to add this hook to the `custom_hooks` list like this: + +```shell +custom_hooks = [..., dict(type='mmrazor.StopDistillHook', detach_epoch=280)] +``` diff --git a/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py b/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py new file mode 100644 index 0000000..2bab26a --- /dev/null +++ b/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py @@ -0,0 +1,99 @@ +_base_ = '../rtmdet_l_syncbn_fast_8xb32-300e_coco.py' + +teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth' # noqa: E501 + +norm_cfg = dict(type='BN', affine=False, track_running_stats=False) + +model = dict( + _delete_=True, + _scope_='mmrazor', + type='FpnTeacherDistill', + architecture=dict( + cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'), + teacher=dict( + cfg_path='mmyolo::rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py'), + teacher_ckpt=teacher_ckpt, + distiller=dict( + type='ConfigurableDistiller', + # `recorders` are used to record various intermediate results during + # the model forward. + student_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'), + ), + teacher_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')), + # `connectors` are adaptive layers which usually map teacher's and + # students features to the same dimension. + connectors=dict( + fpn0_s=dict( + type='ConvModuleConnector', + in_channel=256, + out_channel=320, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn0_t=dict( + type='NormConnector', in_channels=320, norm_cfg=norm_cfg), + fpn1_s=dict( + type='ConvModuleConnector', + in_channel=256, + out_channel=320, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn1_t=dict( + type='NormConnector', in_channels=320, norm_cfg=norm_cfg), + fpn2_s=dict( + type='ConvModuleConnector', + in_channel=256, + out_channel=320, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn2_t=dict( + type='NormConnector', in_channels=320, norm_cfg=norm_cfg)), + distill_losses=dict( + loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)), + # `loss_forward_mappings` are mappings between distill loss forward + # arguments and records. + loss_forward_mappings=dict( + loss_fpn0=dict( + preds_S=dict( + from_student=True, recorder='fpn0', connector='fpn0_s'), + preds_T=dict( + from_student=False, recorder='fpn0', connector='fpn0_t')), + loss_fpn1=dict( + preds_S=dict( + from_student=True, recorder='fpn1', connector='fpn1_s'), + preds_T=dict( + from_student=False, recorder='fpn1', connector='fpn1_t')), + loss_fpn2=dict( + preds_S=dict( + from_student=True, recorder='fpn2', connector='fpn2_s'), + preds_T=dict( + from_student=False, recorder='fpn2', + connector='fpn2_t'))))) + +find_unused_parameters = True + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2, + switch_pipeline=_base_.train_pipeline_stage2), + # stop distillation after the 280th epoch + dict(type='mmrazor.StopDistillHook', stop_epoch=280) +] diff --git a/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py b/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py new file mode 100644 index 0000000..f7d7f92 --- /dev/null +++ b/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py @@ -0,0 +1,99 @@ +_base_ = '../rtmdet_m_syncbn_fast_8xb32-300e_coco.py' + +teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa: E501 + +norm_cfg = dict(type='BN', affine=False, track_running_stats=False) + +model = dict( + _delete_=True, + _scope_='mmrazor', + type='FpnTeacherDistill', + architecture=dict( + cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'), + teacher=dict( + cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'), + teacher_ckpt=teacher_ckpt, + distiller=dict( + type='ConfigurableDistiller', + # `recorders` are used to record various intermediate results during + # the model forward. + student_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'), + ), + teacher_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')), + # `connectors` are adaptive layers which usually map teacher's and + # students features to the same dimension. + connectors=dict( + fpn0_s=dict( + type='ConvModuleConnector', + in_channel=192, + out_channel=256, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn0_t=dict( + type='NormConnector', in_channels=256, norm_cfg=norm_cfg), + fpn1_s=dict( + type='ConvModuleConnector', + in_channel=192, + out_channel=256, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn1_t=dict( + type='NormConnector', in_channels=256, norm_cfg=norm_cfg), + fpn2_s=dict( + type='ConvModuleConnector', + in_channel=192, + out_channel=256, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn2_t=dict( + type='NormConnector', in_channels=256, norm_cfg=norm_cfg)), + distill_losses=dict( + loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)), + # `loss_forward_mappings` are mappings between distill loss forward + # arguments and records. + loss_forward_mappings=dict( + loss_fpn0=dict( + preds_S=dict( + from_student=True, recorder='fpn0', connector='fpn0_s'), + preds_T=dict( + from_student=False, recorder='fpn0', connector='fpn0_t')), + loss_fpn1=dict( + preds_S=dict( + from_student=True, recorder='fpn1', connector='fpn1_s'), + preds_T=dict( + from_student=False, recorder='fpn1', connector='fpn1_t')), + loss_fpn2=dict( + preds_S=dict( + from_student=True, recorder='fpn2', connector='fpn2_s'), + preds_T=dict( + from_student=False, recorder='fpn2', + connector='fpn2_t'))))) + +find_unused_parameters = True + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2, + switch_pipeline=_base_.train_pipeline_stage2), + # stop distillation after the 280th epoch + dict(type='mmrazor.StopDistillHook', stop_epoch=280) +] diff --git a/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py b/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py new file mode 100644 index 0000000..99b5dc5 --- /dev/null +++ b/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py @@ -0,0 +1,99 @@ +_base_ = '../rtmdet_s_syncbn_fast_8xb32-300e_coco.py' + +teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth' # noqa: E501 + +norm_cfg = dict(type='BN', affine=False, track_running_stats=False) + +model = dict( + _delete_=True, + _scope_='mmrazor', + type='FpnTeacherDistill', + architecture=dict( + cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'), + teacher=dict( + cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'), + teacher_ckpt=teacher_ckpt, + distiller=dict( + type='ConfigurableDistiller', + # `recorders` are used to record various intermediate results during + # the model forward. + student_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'), + ), + teacher_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')), + # `connectors` are adaptive layers which usually map teacher's and + # students features to the same dimension. + connectors=dict( + fpn0_s=dict( + type='ConvModuleConnector', + in_channel=128, + out_channel=192, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn0_t=dict( + type='NormConnector', in_channels=192, norm_cfg=norm_cfg), + fpn1_s=dict( + type='ConvModuleConnector', + in_channel=128, + out_channel=192, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn1_t=dict( + type='NormConnector', in_channels=192, norm_cfg=norm_cfg), + fpn2_s=dict( + type='ConvModuleConnector', + in_channel=128, + out_channel=192, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn2_t=dict( + type='NormConnector', in_channels=192, norm_cfg=norm_cfg)), + distill_losses=dict( + loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)), + # `loss_forward_mappings` are mappings between distill loss forward + # arguments and records. + loss_forward_mappings=dict( + loss_fpn0=dict( + preds_S=dict( + from_student=True, recorder='fpn0', connector='fpn0_s'), + preds_T=dict( + from_student=False, recorder='fpn0', connector='fpn0_t')), + loss_fpn1=dict( + preds_S=dict( + from_student=True, recorder='fpn1', connector='fpn1_s'), + preds_T=dict( + from_student=False, recorder='fpn1', connector='fpn1_t')), + loss_fpn2=dict( + preds_S=dict( + from_student=True, recorder='fpn2', connector='fpn2_s'), + preds_T=dict( + from_student=False, recorder='fpn2', + connector='fpn2_t'))))) + +find_unused_parameters = True + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2, + switch_pipeline=_base_.train_pipeline_stage2), + # stop distillation after the 280th epoch + dict(type='mmrazor.StopDistillHook', stop_epoch=280) +] diff --git a/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py b/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py new file mode 100644 index 0000000..50c2358 --- /dev/null +++ b/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py @@ -0,0 +1,99 @@ +_base_ = '../rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py' + +teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth' # noqa: E501 + +norm_cfg = dict(type='BN', affine=False, track_running_stats=False) + +model = dict( + _delete_=True, + _scope_='mmrazor', + type='FpnTeacherDistill', + architecture=dict( + cfg_path='mmyolo::rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'), + teacher=dict( + cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'), + teacher_ckpt=teacher_ckpt, + distiller=dict( + type='ConfigurableDistiller', + # `recorders` are used to record various intermediate results during + # the model forward. + student_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'), + ), + teacher_recorders=dict( + fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'), + fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'), + fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')), + # `connectors` are adaptive layers which usually map teacher's and + # students features to the same dimension. + connectors=dict( + fpn0_s=dict( + type='ConvModuleConnector', + in_channel=96, + out_channel=128, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn0_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg), + fpn1_s=dict( + type='ConvModuleConnector', + in_channel=96, + out_channel=128, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn1_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg), + fpn2_s=dict( + type='ConvModuleConnector', + in_channel=96, + out_channel=128, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None), + fpn2_t=dict( + type='NormConnector', in_channels=128, norm_cfg=norm_cfg)), + distill_losses=dict( + loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1), + loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)), + # `loss_forward_mappings` are mappings between distill loss forward + # arguments and records. + loss_forward_mappings=dict( + loss_fpn0=dict( + preds_S=dict( + from_student=True, recorder='fpn0', connector='fpn0_s'), + preds_T=dict( + from_student=False, recorder='fpn0', connector='fpn0_t')), + loss_fpn1=dict( + preds_S=dict( + from_student=True, recorder='fpn1', connector='fpn1_s'), + preds_T=dict( + from_student=False, recorder='fpn1', connector='fpn1_t')), + loss_fpn2=dict( + preds_S=dict( + from_student=True, recorder='fpn2', connector='fpn2_s'), + preds_T=dict( + from_student=False, recorder='fpn2', + connector='fpn2_t'))))) + +find_unused_parameters = True + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2, + switch_pipeline=_base_.train_pipeline_stage2), + # stop distillation after the 280th epoch + dict(type='mmrazor.StopDistillHook', stop_epoch=280) +] diff --git a/configs/rtmdet/metafile.yml b/configs/rtmdet/metafile.yml new file mode 100644 index 0000000..704a44b --- /dev/null +++ b/configs/rtmdet/metafile.yml @@ -0,0 +1,215 @@ +Collections: + - Name: RTMDet + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Flat Cosine Annealing + Training Resources: 8x A100 GPUs + Architecture: + - CSPNeXt + - CSPNeXtPAFPN + README: configs/rtmdet/README.md + Code: + URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12 + Version: v0.1.1 + - Name: Rotated_RTMDet + Metadata: + Training Data: DOTAv1.0 + Training Techniques: + - AdamW + - Flat Cosine Annealing + Training Resources: 1x A100 GPUs + Architecture: + - CSPNeXt + - CSPNeXtPAFPN + README: configs/rtmdet/README.md + Code: + URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12 + Version: v0.1.1 + +Models: + - Name: rtmdet_tiny_syncbn_fast_8xb32-300e_coco + In Collection: RTMDet + Config: configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py + Metadata: + Training Memory (GB): 11.7 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.0 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth + + - Name: kd_tiny_rtmdet_s_neck_300e_coco + In Collection: RTMDet + Config: configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py + Metadata: + Training Memory (GB): 11.9 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.8 + Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth + + - Name: rtmdet_s_syncbn_fast_8xb32-300e_coco + In Collection: RTMDet + Config: configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py + Metadata: + Training Memory (GB): 15.9 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.6 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth + + - Name: kd_s_rtmdet_m_neck_300e_coco + In Collection: RTMDet + Config: configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py + Metadata: + Training Memory (GB): 16.3 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.7 + Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth + + - Name: rtmdet_m_syncbn_fast_8xb32-300e_coco + In Collection: RTMDet + Config: configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py + Metadata: + Training Memory (GB): 27.8 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.3 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth + + - Name: kd_m_rtmdet_l_neck_300e_coco + In Collection: RTMDet + Config: configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py + Metadata: + Training Memory (GB): 29.0 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.2 + Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth + + - Name: rtmdet_l_syncbn_fast_8xb32-300e_coco + In Collection: RTMDet + Config: configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py + Metadata: + Training Memory (GB): 43.2 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 51.4 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth + + - Name: kd_l_rtmdet_x_neck_300e_coco + In Collection: RTMDet + Config: configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py + Metadata: + Training Memory (GB): 45.2 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 52.3 + Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth + + - Name: rtmdet_x_syncbn_fast_8xb32-300e_coco + In Collection: RTMDet + Config: configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py + Metadata: + Training Memory (GB): 63.4 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 52.8 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth + + - Name: rtmdet-r_tiny_fast_1xb8-36e_dota + In Collection: Rotated_RTMDet + Config: configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py + Metadata: + Training Memory (GB): 12.7 + Epochs: 36 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 75.07 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth + + - Name: rtmdet-r_s_fast_1xb8-36e_dota + In Collection: Rotated_RTMDet + Config: configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py + Metadata: + Training Memory (GB): 16.6 + Epochs: 36 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 77.33 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth + + - Name: rtmdet-r_m_syncbn_fast_2xb4-36e_dota + In Collection: Rotated_RTMDet + Config: configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py + Metadata: + Training Resources: 2x A100 GPUs + Training Memory (GB): 10.9 + Epochs: 36 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 78.43 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth + + - Name: rtmdet-r_l_syncbn_fast_2xb4-36e_dota + In Collection: Rotated_RTMDet + Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py + Metadata: + Training Resources: 2x A100 GPUs + Training Memory (GB): 16.1 + Epochs: 36 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 78.66 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth + + - Name: rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota + In Collection: Rotated_RTMDet + Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py + Metadata: + Training Resources: 2x A100 GPUs + Training Memory (GB): 19.6 + Epochs: 100 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 80.14 + Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py new file mode 100644 index 0000000..ef29a1d --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py @@ -0,0 +1,30 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py' + +# ========================modified parameters====================== +data_root = 'data/split_ms_dota/' +# Path of test images folder +test_data_prefix = 'test/images/' +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +train_dataloader = dict(dataset=dict(data_root=data_root)) + +val_dataloader = dict(dataset=dict(data_root=data_root)) + +# Inference on val dataset +test_dataloader = val_dataloader + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py new file mode 100644 index 0000000..cbb2ae7 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py @@ -0,0 +1,331 @@ +_base_ = '../../_base_/default_runtime.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + +# ========================Frequently modified parameters====================== +# -----data related----- +data_root = 'data/split_ss_dota/' +# Path of train annotation folder +train_ann_file = 'trainval/annfiles/' +train_data_prefix = 'trainval/images/' # Prefix of train image path +# Path of val annotation folder +val_ann_file = 'trainval/annfiles/' +val_data_prefix = 'trainval/images/' # Prefix of val image path +# Path of test images folder +test_data_prefix = 'test/images/' + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +num_classes = 15 # Number of classes for classification +# Batch size of a single GPU during training +train_batch_size_per_gpu = 4 +# Worker to pre-fetch data for each single GPU during training +train_num_workers = 8 +# persistent_workers must be False if num_workers is 0. +persistent_workers = True + +# -----train val related----- +# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs +base_lr = 0.00025 # 0.004 / 16 +max_epochs = 36 # Maximum training epochs + +model_test_cfg = dict( + # The config of multi-label for multi-class prediction. + multi_label=True, + # Decode rbox with angle, For RTMDet-R, Defaults to True. + # When set to True, use rbox coder such as DistanceAnglePointCoder + # When set to False, use hbox coder such as DistancePointBBoxCoder + # different setting lead to different AP. + decode_with_angle=True, + # The number of boxes before NMS + nms_pre=30000, + score_thr=0.05, # Threshold to filter out boxes. + nms=dict(type='nms_rotated', iou_threshold=0.1), # NMS type and threshold + max_per_img=2000) # Max number of detections of each image + +# ========================Possible modified parameters======================== +# -----data related----- +img_scale = (1024, 1024) # width, height +# ratio for random rotate +random_rotate_ratio = 0.5 +# label ids for rect objs +rotate_rect_obj_labels = [9, 11] +# Dataset type, this will be used to define the dataset +dataset_type = 'YOLOv5DOTADataset' +# Batch size of a single GPU during validation +val_batch_size_per_gpu = 8 +# Worker to pre-fetch data for each single GPU during validation +val_num_workers = 8 + +# Config of batch shapes. Only on val. Not use in RTMDet-R +batch_shapes_cfg = None + +# -----model related----- +# The scaling factor that controls the depth of the network structure +deepen_factor = 1.0 +# The scaling factor that controls the width of the network structure +widen_factor = 1.0 +# Strides of multi-scale prior box +strides = [8, 16, 32] +# The angle definition for model +angle_version = 'le90' # le90, le135, oc are available options + +norm_cfg = dict(type='BN') # Normalization config + +# -----train val related----- +lr_start_factor = 1.0e-5 +dsl_topk = 13 # Number of bbox selected in each level +loss_cls_weight = 1.0 +loss_bbox_weight = 2.0 +qfl_beta = 2.0 # beta of QualityFocalLoss +weight_decay = 0.05 + +# Save model checkpoint and validation intervals +save_checkpoint_intervals = 1 +# The maximum checkpoints to keep. +max_keep_ckpts = 3 +# single-scale training is recommended to +# be turned on, which can speed up training. +env_cfg = dict(cudnn_benchmark=True) + +# ===============================Unmodified in most cases==================== +model = dict( + type='YOLODetector', + data_preprocessor=dict( + type='YOLOv5DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False), + backbone=dict( + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=deepen_factor, + widen_factor=widen_factor, + channel_attention=True, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict( + type='CSPNeXtPAFPN', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True)), + bbox_head=dict( + type='RTMDetRotatedHead', + head_module=dict( + type='RTMDetRotatedSepBNHeadModule', + num_classes=num_classes, + widen_factor=widen_factor, + in_channels=256, + stacked_convs=2, + feat_channels=256, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True), + share_conv=True, + pred_kernel_size=1, + featmap_strides=strides), + prior_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=strides), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=qfl_beta, + loss_weight=loss_cls_weight), + loss_bbox=dict( + type='mmrotate.RotatedIoULoss', + mode='linear', + loss_weight=loss_bbox_weight), + angle_version=angle_version, + # Used for angle encode and decode, similar to bbox coder + angle_coder=dict(type='mmrotate.PseudoAngleCoder'), + # If true, it will apply loss_bbox on horizontal box, and angle_loss + # needs to be specified. In this case the loss_bbox should use + # horizontal box loss e.g. IoULoss. Arg details can be seen in + # `docs/zh_cn/tutorials/rotated_detection.md` + use_hbbox_loss=False, + loss_angle=None), + train_cfg=dict( + assigner=dict( + type='BatchDynamicSoftLabelAssigner', + num_classes=num_classes, + topk=dsl_topk, + iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'), + # RBboxOverlaps2D doesn't support batch input, use loop instead. + batch_iou=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=model_test_cfg, +) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'), + dict( + type='mmrotate.ConvertBoxType', + box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmrotate.RandomRotate', + prob=random_rotate_ratio, + angle_range=180, + rotate_type='mmrotate.Rotate', + rect_obj_labels=rotate_rect_obj_labels), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type='RegularizeRotatedBox', angle_version=angle_version), + dict(type='mmdet.PackDetInputs') +] + +val_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='LoadAnnotations', + with_bbox=True, + box_type='qbox', + _scope_='mmdet'), + dict( + type='mmrotate.ConvertBoxType', + box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +test_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=train_batch_size_per_gpu, + num_workers=train_num_workers, + persistent_workers=persistent_workers, + pin_memory=True, + collate_fn=dict(type='yolov5_collate'), + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img_path=train_data_prefix), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) + +val_dataloader = dict( + batch_size=val_batch_size_per_gpu, + num_workers=val_num_workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=val_ann_file, + data_prefix=dict(img_path=val_data_prefix), + test_mode=True, + batch_shapes_cfg=batch_shapes_cfg, + pipeline=val_pipeline)) + +val_evaluator = dict(type='mmrotate.DOTAMetric', metric='mAP') + +# Inference on val dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# batch_size=val_batch_size_per_gpu, +# num_workers=val_num_workers, +# persistent_workers=True, +# drop_last=False, +# sampler=dict(type='DefaultSampler', shuffle=False), +# dataset=dict( +# type=dataset_type, +# data_root=data_root, +# data_prefix=dict(img_path=test_data_prefix), +# test_mode=True, +# batch_shapes_cfg=batch_shapes_cfg, +# pipeline=test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=lr_start_factor, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + interval=save_checkpoint_intervals, + max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints + save_best='auto')) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49) +] + +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=save_checkpoint_intervals) + +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +visualizer = dict(type='mmrotate.RotLocalVisualizer') diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py new file mode 100644 index 0000000..dcafa55 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py @@ -0,0 +1,168 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py' + +# This config use longer schedule with Mixup, Mosaic and Random Rotate. + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + +# ========================modified parameters====================== + +# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs +base_lr = 0.00025 # 0.004 / 16 +lr_start_factor = 1.0e-5 +max_epochs = 100 # Maximum training epochs +# Change train_pipeline for final 10 epochs (stage 2) +num_epochs_stage2 = 10 + +img_scale = (1024, 1024) # width, height +# ratio range for random resize +random_resize_ratio_range = (0.1, 2.0) +# Cached images number in mosaic +mosaic_max_cached_images = 40 +# Number of cached images in mixup +mixup_max_cached_images = 20 +# ratio for random rotate +random_rotate_ratio = 0.5 +# label ids for rect objs +rotate_rect_obj_labels = [9, 11] + +# Save model checkpoint and validation intervals +save_checkpoint_intervals = 1 +# validation intervals in stage 2 +val_interval_stage2 = 1 +# The maximum checkpoints to keep. +max_keep_ckpts = 3 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'), + dict( + type='mmrotate.ConvertBoxType', + box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='Mosaic', + img_scale=img_scale, + use_cached=True, + max_cached_images=mosaic_max_cached_images, + pad_val=114.0), + dict( + type='mmdet.RandomResize', + # img_scale is (width, height) + scale=(img_scale[0] * 2, img_scale[1] * 2), + ratio_range=random_resize_ratio_range, + resize_type='mmdet.Resize', + keep_ratio=True), + dict( + type='mmrotate.RandomRotate', + prob=random_rotate_ratio, + angle_range=180, + rotate_type='mmrotate.Rotate', + rect_obj_labels=rotate_rect_obj_labels), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='YOLOv5MixUp', + use_cached=True, + max_cached_images=mixup_max_cached_images), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'), + dict( + type='mmrotate.ConvertBoxType', + box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + scale=img_scale, + ratio_range=random_resize_ratio_range, + resize_type='mmdet.Resize', + keep_ratio=True), + dict( + type='mmrotate.RandomRotate', + prob=random_rotate_ratio, + angle_range=180, + rotate_type='mmrotate.Rotate', + rect_obj_labels=rotate_rect_obj_labels), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=lr_start_factor, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + interval=save_checkpoint_intervals, + max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints + save_best='auto')) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - num_epochs_stage2, + switch_pipeline=train_pipeline_stage2) +] + +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=save_checkpoint_intervals, + dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)]) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py new file mode 100644 index 0000000..1a9f50c --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py @@ -0,0 +1,20 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py' + +load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py new file mode 100644 index 0000000..4be8605 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py @@ -0,0 +1,33 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.67 +widen_factor = 0.75 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py new file mode 100644 index 0000000..8df61cf --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py @@ -0,0 +1,33 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.67 +widen_factor = 0.75 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py new file mode 100644 index 0000000..2b7b0b6 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py @@ -0,0 +1,38 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.33 +widen_factor = 0.5 + +# Batch size of a single GPU during training +train_batch_size_per_gpu = 8 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_dataloader = dict(batch_size=train_batch_size_per_gpu) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py new file mode 100644 index 0000000..d200dd7 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py @@ -0,0 +1,38 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.33 +widen_factor = 0.5 + +# Batch size of a single GPU during training +train_batch_size_per_gpu = 8 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_dataloader = dict(batch_size=train_batch_size_per_gpu) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py new file mode 100644 index 0000000..56bf038 --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py @@ -0,0 +1,38 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.167 +widen_factor = 0.375 + +# Batch size of a single GPU during training +train_batch_size_per_gpu = 8 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_dataloader = dict(batch_size=train_batch_size_per_gpu) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py new file mode 100644 index 0000000..739a2de --- /dev/null +++ b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py @@ -0,0 +1,38 @@ +_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.167 +widen_factor = 0.375 + +# Batch size of a single GPU during training +train_batch_size_per_gpu = 8 + +# Submission dir for result submit +submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission' + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_dataloader = dict(batch_size=train_batch_size_per_gpu) + +# Inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# dataset=dict( +# data_root=_base_.data_root, +# ann_file='', # test set has no annotation +# data_prefix=dict(img_path=_base_.test_data_prefix), +# pipeline=_base_.test_pipeline)) +# test_evaluator = dict( +# type='mmrotate.DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix=submission_dir) diff --git a/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..279a799 --- /dev/null +++ b/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,31 @@ +_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py' + +widen_factor = 0.5 + +model = dict( + bbox_head=dict( + type='RTMDetInsSepBNHead', + head_module=dict( + type='RTMDetInsSepBNHeadModule', + use_sigmoid_cls=True, + widen_factor=widen_factor), + loss_mask=dict( + type='mmdet.DiceLoss', loss_weight=2.0, eps=5e-6, + reduction='mean')), + test_cfg=dict( + multi_label=True, + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100, + mask_thr_binary=0.5)) + +_base_.test_pipeline[-2] = dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, _scope_='mmdet') + +val_dataloader = dict(dataset=dict(pipeline=_base_.test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(metric=['bbox', 'segm']) +test_evaluator = val_evaluator diff --git a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..c36ac38 --- /dev/null +++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,304 @@ +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] + +# ========================Frequently modified parameters====================== +# -----data related----- +data_root = 'data/coco/' +# Path of train annotation file +train_ann_file = 'annotations/instances_train2017.json' +train_data_prefix = 'train2017/' # Prefix of train image path +# Path of val annotation file +val_ann_file = 'annotations/instances_val2017.json' +val_data_prefix = 'val2017/' # Prefix of val image path + +num_classes = 80 # Number of classes for classification +# Batch size of a single GPU during training +train_batch_size_per_gpu = 32 +# Worker to pre-fetch data for each single GPU during training +train_num_workers = 10 +# persistent_workers must be False if num_workers is 0. +persistent_workers = True + +# -----train val related----- +# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs +base_lr = 0.004 +max_epochs = 300 # Maximum training epochs +# Change train_pipeline for final 20 epochs (stage 2) +num_epochs_stage2 = 20 + +model_test_cfg = dict( + # The config of multi-label for multi-class prediction. + multi_label=True, + # The number of boxes before NMS + nms_pre=30000, + score_thr=0.001, # Threshold to filter out boxes. + nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold + max_per_img=300) # Max number of detections of each image + +# ========================Possible modified parameters======================== +# -----data related----- +img_scale = (640, 640) # width, height +# ratio range for random resize +random_resize_ratio_range = (0.1, 2.0) +# Cached images number in mosaic +mosaic_max_cached_images = 40 +# Number of cached images in mixup +mixup_max_cached_images = 20 +# Dataset type, this will be used to define the dataset +dataset_type = 'YOLOv5CocoDataset' +# Batch size of a single GPU during validation +val_batch_size_per_gpu = 32 +# Worker to pre-fetch data for each single GPU during validation +val_num_workers = 10 + +# Config of batch shapes. Only on val. +batch_shapes_cfg = dict( + type='BatchShapePolicy', + batch_size=val_batch_size_per_gpu, + img_size=img_scale[0], + size_divisor=32, + extra_pad_ratio=0.5) + +# -----model related----- +# The scaling factor that controls the depth of the network structure +deepen_factor = 1.0 +# The scaling factor that controls the width of the network structure +widen_factor = 1.0 +# Strides of multi-scale prior box +strides = [8, 16, 32] + +norm_cfg = dict(type='BN') # Normalization config + +# -----train val related----- +lr_start_factor = 1.0e-5 +dsl_topk = 13 # Number of bbox selected in each level +loss_cls_weight = 1.0 +loss_bbox_weight = 2.0 +qfl_beta = 2.0 # beta of QualityFocalLoss +weight_decay = 0.05 + +# Save model checkpoint and validation intervals +save_checkpoint_intervals = 10 +# validation intervals in stage 2 +val_interval_stage2 = 1 +# The maximum checkpoints to keep. +max_keep_ckpts = 3 +# single-scale training is recommended to +# be turned on, which can speed up training. +env_cfg = dict(cudnn_benchmark=True) + +# ===============================Unmodified in most cases==================== +model = dict( + type='YOLODetector', + data_preprocessor=dict( + type='YOLOv5DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False), + backbone=dict( + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=deepen_factor, + widen_factor=widen_factor, + channel_attention=True, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True)), + neck=dict( + type='CSPNeXtPAFPN', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True)), + bbox_head=dict( + type='RTMDetHead', + head_module=dict( + type='RTMDetSepBNHeadModule', + num_classes=num_classes, + in_channels=256, + stacked_convs=2, + feat_channels=256, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True), + share_conv=True, + pred_kernel_size=1, + featmap_strides=strides), + prior_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=strides), + bbox_coder=dict(type='DistancePointBBoxCoder'), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=qfl_beta, + loss_weight=loss_cls_weight), + loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=loss_bbox_weight)), + train_cfg=dict( + assigner=dict( + type='BatchDynamicSoftLabelAssigner', + num_classes=num_classes, + topk=dsl_topk, + iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=model_test_cfg, +) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Mosaic', + img_scale=img_scale, + use_cached=True, + max_cached_images=mosaic_max_cached_images, + pad_val=114.0), + dict( + type='mmdet.RandomResize', + # img_scale is (width, height) + scale=(img_scale[0] * 2, img_scale[1] * 2), + ratio_range=random_resize_ratio_range, + resize_type='mmdet.Resize', + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='YOLOv5MixUp', + use_cached=True, + max_cached_images=mixup_max_cached_images), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='mmdet.RandomResize', + scale=img_scale, + ratio_range=random_resize_ratio_range, + resize_type='mmdet.Resize', + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict( + type='LetterResize', + scale=img_scale, + allow_scale_up=False, + pad_val=dict(img=114)), + dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param')) +] + +train_dataloader = dict( + batch_size=train_batch_size_per_gpu, + num_workers=train_num_workers, + persistent_workers=persistent_workers, + pin_memory=True, + collate_fn=dict(type='yolov5_collate'), + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline)) + +val_dataloader = dict( + batch_size=val_batch_size_per_gpu, + num_workers=val_num_workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=val_ann_file, + data_prefix=dict(img=val_data_prefix), + test_mode=True, + batch_shapes_cfg=batch_shapes_cfg, + pipeline=test_pipeline)) + +test_dataloader = val_dataloader + +# Reduce evaluation time +val_evaluator = dict( + type='mmdet.CocoMetric', + proposal_nums=(100, 1, 10), + ann_file=data_root + val_ann_file, + metric='bbox') +test_evaluator = val_evaluator + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=lr_start_factor, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + interval=save_checkpoint_intervals, + max_keep_ckpts=max_keep_ckpts # only keep latest 3 checkpoints + )) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - num_epochs_stage2, + switch_pipeline=train_pipeline_stage2) +] + +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=save_checkpoint_intervals, + dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)]) + +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') diff --git a/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..52576bf --- /dev/null +++ b/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,11 @@ +_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' + +# ========================modified parameters====================== +deepen_factor = 0.67 +widen_factor = 0.75 + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..8cead78 --- /dev/null +++ b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,92 @@ +_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.33 +widen_factor = 0.5 +img_scale = _base_.img_scale + +# ratio range for random resize +random_resize_ratio_range = (0.5, 2.0) +# Number of cached images in mosaic +mosaic_max_cached_images = 40 +# Number of cached images in mixup +mixup_max_cached_images = 20 + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + # Since the checkpoint includes CUDA:0 data, + # it must be forced to set map_location. + # Once checkpoint is fixed, it can be removed. + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint=checkpoint, + map_location='cpu')), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Mosaic', + img_scale=img_scale, + use_cached=True, + max_cached_images=mosaic_max_cached_images, + pad_val=114.0), + dict( + type='mmdet.RandomResize', + # img_scale is (width, height) + scale=(img_scale[0] * 2, img_scale[1] * 2), + ratio_range=random_resize_ratio_range, # note + resize_type='mmdet.Resize', + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='YOLOv5MixUp', + use_cached=True, + max_cached_images=mixup_max_cached_images), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='mmdet.RandomResize', + scale=img_scale, + ratio_range=random_resize_ratio_range, # note + resize_type='mmdet.Resize', + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + strict_load=False, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py b/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py new file mode 100644 index 0000000..8d1182c --- /dev/null +++ b/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py @@ -0,0 +1,70 @@ +_base_ = 'rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py' + +data_root = './data/cat/' +class_name = ('cat', ) +num_classes = len(class_name) +metainfo = dict(classes=class_name, palette=[(20, 220, 60)]) + +num_epochs_stage2 = 5 + +max_epochs = 40 +train_batch_size_per_gpu = 12 +train_num_workers = 4 +val_batch_size_per_gpu = 1 +val_num_workers = 2 + +load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth' # noqa + +model = dict( + backbone=dict(frozen_stages=4), + bbox_head=dict(head_module=dict(num_classes=num_classes)), + train_cfg=dict(assigner=dict(num_classes=num_classes))) + +train_dataloader = dict( + batch_size=train_batch_size_per_gpu, + num_workers=train_num_workers, + dataset=dict( + data_root=data_root, + metainfo=metainfo, + ann_file='annotations/trainval.json', + data_prefix=dict(img='images/'))) + +val_dataloader = dict( + batch_size=val_batch_size_per_gpu, + num_workers=val_num_workers, + dataset=dict( + metainfo=metainfo, + data_root=data_root, + ann_file='annotations/test.json', + data_prefix=dict(img='images/'))) + +test_dataloader = val_dataloader + +param_scheduler = [ + dict( + type='LinearLR', + start_factor=_base_.lr_start_factor, + by_epoch=False, + begin=0, + end=30), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=_base_.base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2 + +val_evaluator = dict(ann_file=data_root + 'annotations/test.json') +test_evaluator = val_evaluator + +default_hooks = dict( + checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'), + logger=dict(type='LoggerHook', interval=5)) +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa diff --git a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..257110d --- /dev/null +++ b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,58 @@ +_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py' +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +# ========================modified parameters====================== +deepen_factor = 0.167 +widen_factor = 0.375 +img_scale = _base_.img_scale + +# ratio range for random resize +random_resize_ratio_range = (0.5, 2.0) +# Number of cached images in mosaic +mosaic_max_cached_images = 20 +# Number of cached images in mixup +mixup_max_cached_images = 10 + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + init_cfg=dict(checkpoint=checkpoint)), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Mosaic', + img_scale=img_scale, + use_cached=True, + max_cached_images=mosaic_max_cached_images, # note + random_pop=False, # note + pad_val=114.0), + dict( + type='mmdet.RandomResize', + # img_scale is (width, height) + scale=(img_scale[0] * 2, img_scale[1] * 2), + ratio_range=random_resize_ratio_range, + resize_type='mmdet.Resize', + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=img_scale), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict( + type='YOLOv5MixUp', + use_cached=True, + random_pop=False, + max_cached_images=mixup_max_cached_images, + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py new file mode 100644 index 0000000..7fc9001 --- /dev/null +++ b/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py @@ -0,0 +1,11 @@ +_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' + +# ========================modified parameters====================== +deepen_factor = 1.33 +widen_factor = 1.25 + +# =======================Unmodified in most cases================== +model = dict( + backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), + bbox_head=dict(head_module=dict(widen_factor=widen_factor)))