Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +70 -0
- head_extractor/assets/001.jpg +3 -0
- head_extractor/assets/001_head-black-bg.webp +0 -0
- head_extractor/assets/001_head-default.webp +0 -0
- head_extractor/assets/001_head-pad2square-false.webp +0 -0
- head_extractor/build/lib/head_extractor/__init__.py +6 -0
- head_extractor/build/lib/head_extractor/models/__init__.py +0 -0
- head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py +573 -0
- head_extractor/build/lib/head_extractor/processor.py +585 -0
- head_extractor/build/lib/mmdet/__init__.py +27 -0
- head_extractor/build/lib/mmdet/apis/__init__.py +9 -0
- head_extractor/build/lib/mmdet/apis/det_inferencer.py +652 -0
- head_extractor/build/lib/mmdet/apis/inference.py +372 -0
- head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py +104 -0
- head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py +106 -0
- head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py +87 -0
- head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py +105 -0
- head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py +101 -0
- head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py +33 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +220 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py +201 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py +138 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +158 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py +154 -0
- head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py +77 -0
- head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py +33 -0
- head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py +33 -0
- head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +13 -0
- head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py +13 -0
- head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py +134 -0
- head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py +134 -0
- head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py +25 -0
- head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py +25 -0
- head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py +130 -0
- head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py +136 -0
- head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py +151 -0
- head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py +138 -0
- head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py +153 -0
- head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py +158 -0
- head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py +70 -0
- head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py +186 -0
- head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py +12 -0
- head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py +12 -0
- head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py +13 -0
- head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py +14 -0
- head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py +182 -0
- head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py +25 -0
- head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py +190 -0
- head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py +12 -0
- head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py +12 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,73 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
head_extractor/assets/001.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
head_extractor/mmcv-2.1.0/build/lib.linux-x86_64-cpython-311/mmcv/_ext.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/.ninja_deps filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bezier_align_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/border_align_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/chamfer_distance_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/convex_iou.o filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/correlation_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/cudabind.o filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/diff_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.o filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/knn_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/min_area_polygons.o filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/prroi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/psamask_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_indice.o filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.o filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_pool_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_reordering.o filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.o filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.o filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/deform_conv.o filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/modulated_deform_conv.o filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/pybind.o filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
head_extractor/mmcv-2.1.0/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
head_extractor/mmcv-2.1.0/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp_diff.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
head_extractor/mmcv-2.1.0/docs/en/_static/progress.gif filter=lfs diff=lfs merge=lfs -text
|
head_extractor/assets/001.jpg
ADDED
|
Git LFS Details
|
head_extractor/assets/001_head-black-bg.webp
ADDED
|
head_extractor/assets/001_head-default.webp
ADDED
|
head_extractor/assets/001_head-pad2square-false.webp
ADDED
|
head_extractor/build/lib/head_extractor/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .processor import ProcessorPipeline, TaskType
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
| 4 |
+
|
| 5 |
+
# 让外部可以直接 from head_extractor import ProcessorPipeline
|
| 6 |
+
__all__ = ['ProcessorPipeline', 'TaskType']
|
head_extractor/build/lib/head_extractor/models/__init__.py
ADDED
|
File without changes
|
head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
auto_scale_lr = dict(base_batch_size=16, enable=False)
|
| 2 |
+
backbone_embed_multi = dict(decay_mult=0.0, lr_mult=0.1)
|
| 3 |
+
backbone_norm_multi = dict(decay_mult=0.0, lr_mult=0.1)
|
| 4 |
+
crop_size = (
|
| 5 |
+
896,
|
| 6 |
+
896,
|
| 7 |
+
)
|
| 8 |
+
custom_keys = dict({
|
| 9 |
+
'backbone.dinov2':
|
| 10 |
+
dict(decay_mult=1.0, lr_mult=0.1),
|
| 11 |
+
'backbone.dinov2.blocks.0.norm':
|
| 12 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 13 |
+
'backbone.dinov2.blocks.1.norm':
|
| 14 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 15 |
+
'backbone.dinov2.blocks.10.norm':
|
| 16 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 17 |
+
'backbone.dinov2.blocks.11.norm':
|
| 18 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 19 |
+
'backbone.dinov2.blocks.12.norm':
|
| 20 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 21 |
+
'backbone.dinov2.blocks.13.norm':
|
| 22 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 23 |
+
'backbone.dinov2.blocks.14.norm':
|
| 24 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 25 |
+
'backbone.dinov2.blocks.15.norm':
|
| 26 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 27 |
+
'backbone.dinov2.blocks.16.norm':
|
| 28 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 29 |
+
'backbone.dinov2.blocks.17.norm':
|
| 30 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 31 |
+
'backbone.dinov2.blocks.18.norm':
|
| 32 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 33 |
+
'backbone.dinov2.blocks.19.norm':
|
| 34 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 35 |
+
'backbone.dinov2.blocks.2.norm':
|
| 36 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 37 |
+
'backbone.dinov2.blocks.20.norm':
|
| 38 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 39 |
+
'backbone.dinov2.blocks.21.norm':
|
| 40 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 41 |
+
'backbone.dinov2.blocks.22.norm':
|
| 42 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 43 |
+
'backbone.dinov2.blocks.23.norm':
|
| 44 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 45 |
+
'backbone.dinov2.blocks.3.norm':
|
| 46 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 47 |
+
'backbone.dinov2.blocks.4.norm':
|
| 48 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 49 |
+
'backbone.dinov2.blocks.5.norm':
|
| 50 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 51 |
+
'backbone.dinov2.blocks.6.norm':
|
| 52 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 53 |
+
'backbone.dinov2.blocks.7.norm':
|
| 54 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 55 |
+
'backbone.dinov2.blocks.8.norm':
|
| 56 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 57 |
+
'backbone.dinov2.blocks.9.norm':
|
| 58 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 59 |
+
'backbone.dinov2.norm':
|
| 60 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 61 |
+
'level_embed':
|
| 62 |
+
dict(decay_mult=0.0, lr_mult=1.0),
|
| 63 |
+
'pos_embed':
|
| 64 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 65 |
+
'query_embed':
|
| 66 |
+
dict(decay_mult=0.0, lr_mult=1.0),
|
| 67 |
+
'query_feat':
|
| 68 |
+
dict(decay_mult=0.0, lr_mult=1.0)
|
| 69 |
+
})
|
| 70 |
+
data_preprocessor = dict(
|
| 71 |
+
bgr_to_rgb=True,
|
| 72 |
+
mean=[
|
| 73 |
+
123.675,
|
| 74 |
+
116.28,
|
| 75 |
+
103.53,
|
| 76 |
+
],
|
| 77 |
+
pad_val=0,
|
| 78 |
+
seg_pad_val=255,
|
| 79 |
+
size=(
|
| 80 |
+
896,
|
| 81 |
+
896,
|
| 82 |
+
),
|
| 83 |
+
std=[
|
| 84 |
+
58.395,
|
| 85 |
+
57.12,
|
| 86 |
+
57.375,
|
| 87 |
+
],
|
| 88 |
+
type='SegDataPreProcessor')
|
| 89 |
+
data_root = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k'
|
| 90 |
+
dataset_type = 'HumanParsingDataset'
|
| 91 |
+
default_hooks = dict(
|
| 92 |
+
checkpoint=dict(
|
| 93 |
+
by_epoch=False,
|
| 94 |
+
interval=2000,
|
| 95 |
+
max_keep_ckpts=50,
|
| 96 |
+
save_best='mIoU',
|
| 97 |
+
type='CheckpointHook'),
|
| 98 |
+
logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
|
| 99 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
| 100 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 101 |
+
timer=dict(type='IterTimerHook'),
|
| 102 |
+
visualization=dict(type='SegVisualizationHook'))
|
| 103 |
+
default_scope = 'mmseg'
|
| 104 |
+
embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
|
| 105 |
+
env_cfg = dict(
|
| 106 |
+
cudnn_benchmark=True,
|
| 107 |
+
dist_cfg=dict(backend='nccl'),
|
| 108 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
|
| 109 |
+
find_unused_parameters = True
|
| 110 |
+
img_ratios = [
|
| 111 |
+
0.5,
|
| 112 |
+
0.75,
|
| 113 |
+
1.0,
|
| 114 |
+
1.25,
|
| 115 |
+
1.5,
|
| 116 |
+
1.75,
|
| 117 |
+
]
|
| 118 |
+
launcher = 'none'
|
| 119 |
+
load_from = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/mmsegmentation/work_dirs/depth_anything_large_mask2former_16xb1_160k_human_parsing_896x896/best_mIoU_iter_110000.pth'
|
| 120 |
+
log_level = 'INFO'
|
| 121 |
+
log_processor = dict(by_epoch=False)
|
| 122 |
+
model = dict(
|
| 123 |
+
backbone=dict(
|
| 124 |
+
freeze=False,
|
| 125 |
+
# load_from='./checkpoints/depth_anything_vitl14.pth',
|
| 126 |
+
type='DINOv2',
|
| 127 |
+
version='large'),
|
| 128 |
+
data_preprocessor=dict(
|
| 129 |
+
bgr_to_rgb=True,
|
| 130 |
+
mean=[
|
| 131 |
+
123.675,
|
| 132 |
+
116.28,
|
| 133 |
+
103.53,
|
| 134 |
+
],
|
| 135 |
+
pad_val=0,
|
| 136 |
+
seg_pad_val=255,
|
| 137 |
+
size=(
|
| 138 |
+
896,
|
| 139 |
+
896,
|
| 140 |
+
),
|
| 141 |
+
std=[
|
| 142 |
+
58.395,
|
| 143 |
+
57.12,
|
| 144 |
+
57.375,
|
| 145 |
+
],
|
| 146 |
+
type='SegDataPreProcessor'),
|
| 147 |
+
decode_head=dict(
|
| 148 |
+
align_corners=False,
|
| 149 |
+
enforce_decoder_input_project=False,
|
| 150 |
+
feat_channels=1024,
|
| 151 |
+
in_channels=[
|
| 152 |
+
1024,
|
| 153 |
+
1024,
|
| 154 |
+
1024,
|
| 155 |
+
1024,
|
| 156 |
+
],
|
| 157 |
+
loss_boundary=dict(loss_weight=5.0, type='BoundaryLoss'),
|
| 158 |
+
loss_cls=dict(
|
| 159 |
+
class_weight=[
|
| 160 |
+
1.0,
|
| 161 |
+
1.0,
|
| 162 |
+
1.0,
|
| 163 |
+
1.0,
|
| 164 |
+
1.0,
|
| 165 |
+
1.0,
|
| 166 |
+
1.0,
|
| 167 |
+
1.0,
|
| 168 |
+
1.0,
|
| 169 |
+
1.0,
|
| 170 |
+
1.0,
|
| 171 |
+
1.0,
|
| 172 |
+
1.0,
|
| 173 |
+
1.0,
|
| 174 |
+
1.0,
|
| 175 |
+
1.0,
|
| 176 |
+
1.0,
|
| 177 |
+
1.0,
|
| 178 |
+
1.0,
|
| 179 |
+
1.0,
|
| 180 |
+
1.0,
|
| 181 |
+
1.0,
|
| 182 |
+
1.0,
|
| 183 |
+
1.0,
|
| 184 |
+
1.0,
|
| 185 |
+
1.0,
|
| 186 |
+
1.0,
|
| 187 |
+
1.0,
|
| 188 |
+
1.0,
|
| 189 |
+
1.0,
|
| 190 |
+
1.0,
|
| 191 |
+
1.0,
|
| 192 |
+
1.0,
|
| 193 |
+
1.0,
|
| 194 |
+
1.0,
|
| 195 |
+
1.0,
|
| 196 |
+
1.0,
|
| 197 |
+
1.0,
|
| 198 |
+
1.0,
|
| 199 |
+
1.0,
|
| 200 |
+
1.0,
|
| 201 |
+
1.0,
|
| 202 |
+
1.0,
|
| 203 |
+
0.1,
|
| 204 |
+
],
|
| 205 |
+
loss_weight=2.0,
|
| 206 |
+
reduction='mean',
|
| 207 |
+
type='mmdet.CrossEntropyLoss',
|
| 208 |
+
use_sigmoid=False),
|
| 209 |
+
loss_dice=dict(
|
| 210 |
+
activate=True,
|
| 211 |
+
eps=1.0,
|
| 212 |
+
loss_weight=5.0,
|
| 213 |
+
naive_dice=True,
|
| 214 |
+
reduction='mean',
|
| 215 |
+
type='mmdet.DiceLoss',
|
| 216 |
+
use_sigmoid=True),
|
| 217 |
+
loss_mask=dict(
|
| 218 |
+
loss_weight=5.0,
|
| 219 |
+
reduction='mean',
|
| 220 |
+
type='mmdet.CrossEntropyLoss',
|
| 221 |
+
use_sigmoid=True),
|
| 222 |
+
num_classes=43,
|
| 223 |
+
num_queries=200,
|
| 224 |
+
num_transformer_feat_level=3,
|
| 225 |
+
out_channels=1024,
|
| 226 |
+
pixel_decoder=dict(
|
| 227 |
+
act_cfg=dict(type='ReLU'),
|
| 228 |
+
encoder=dict(
|
| 229 |
+
init_cfg=None,
|
| 230 |
+
layer_cfg=dict(
|
| 231 |
+
ffn_cfg=dict(
|
| 232 |
+
act_cfg=dict(inplace=True, type='ReLU'),
|
| 233 |
+
embed_dims=1024,
|
| 234 |
+
feedforward_channels=4096,
|
| 235 |
+
ffn_drop=0.0,
|
| 236 |
+
num_fcs=2),
|
| 237 |
+
self_attn_cfg=dict(
|
| 238 |
+
batch_first=True,
|
| 239 |
+
dropout=0.0,
|
| 240 |
+
embed_dims=1024,
|
| 241 |
+
im2col_step=64,
|
| 242 |
+
init_cfg=None,
|
| 243 |
+
norm_cfg=None,
|
| 244 |
+
num_heads=32,
|
| 245 |
+
num_levels=3,
|
| 246 |
+
num_points=4)),
|
| 247 |
+
num_layers=6),
|
| 248 |
+
init_cfg=None,
|
| 249 |
+
norm_cfg=dict(num_groups=32, type='GN'),
|
| 250 |
+
num_outs=3,
|
| 251 |
+
positional_encoding=dict(normalize=True, num_feats=512),
|
| 252 |
+
type='mmdet.MSDeformAttnPixelDecoder'),
|
| 253 |
+
positional_encoding=dict(normalize=True, num_feats=512),
|
| 254 |
+
train_cfg=dict(
|
| 255 |
+
assigner=dict(
|
| 256 |
+
match_costs=[
|
| 257 |
+
dict(type='mmdet.ClassificationCost', weight=2.0),
|
| 258 |
+
dict(
|
| 259 |
+
type='mmdet.CrossEntropyLossCost',
|
| 260 |
+
use_sigmoid=True,
|
| 261 |
+
weight=5.0),
|
| 262 |
+
dict(
|
| 263 |
+
eps=1.0,
|
| 264 |
+
pred_act=True,
|
| 265 |
+
type='mmdet.DiceCost',
|
| 266 |
+
weight=5.0),
|
| 267 |
+
],
|
| 268 |
+
type='mmdet.HungarianAssigner'),
|
| 269 |
+
importance_sample_ratio=0.75,
|
| 270 |
+
num_points=12544,
|
| 271 |
+
oversample_ratio=3.0,
|
| 272 |
+
sampler=dict(type='mmdet.MaskPseudoSampler')),
|
| 273 |
+
transformer_decoder=dict(
|
| 274 |
+
init_cfg=None,
|
| 275 |
+
layer_cfg=dict(
|
| 276 |
+
cross_attn_cfg=dict(
|
| 277 |
+
attn_drop=0.0,
|
| 278 |
+
batch_first=True,
|
| 279 |
+
dropout_layer=None,
|
| 280 |
+
embed_dims=1024,
|
| 281 |
+
num_heads=32,
|
| 282 |
+
proj_drop=0.0),
|
| 283 |
+
ffn_cfg=dict(
|
| 284 |
+
act_cfg=dict(inplace=True, type='ReLU'),
|
| 285 |
+
add_identity=True,
|
| 286 |
+
dropout_layer=None,
|
| 287 |
+
embed_dims=1024,
|
| 288 |
+
feedforward_channels=4096,
|
| 289 |
+
ffn_drop=0.0,
|
| 290 |
+
num_fcs=2),
|
| 291 |
+
self_attn_cfg=dict(
|
| 292 |
+
attn_drop=0.0,
|
| 293 |
+
batch_first=True,
|
| 294 |
+
dropout_layer=None,
|
| 295 |
+
embed_dims=1024,
|
| 296 |
+
num_heads=32,
|
| 297 |
+
proj_drop=0.0)),
|
| 298 |
+
num_layers=9,
|
| 299 |
+
return_intermediate=True),
|
| 300 |
+
type='Mask2FormerHead'),
|
| 301 |
+
neck=dict(
|
| 302 |
+
embed_dim=1024, rescales=[
|
| 303 |
+
4,
|
| 304 |
+
2,
|
| 305 |
+
1,
|
| 306 |
+
0.5,
|
| 307 |
+
], type='Feature2Pyramid'),
|
| 308 |
+
test_cfg=dict(crop_size=(
|
| 309 |
+
896,
|
| 310 |
+
896,
|
| 311 |
+
), mode='slide', stride=(
|
| 312 |
+
426,
|
| 313 |
+
426,
|
| 314 |
+
)),
|
| 315 |
+
train_cfg=dict(),
|
| 316 |
+
type='EncoderDecoder')
|
| 317 |
+
num_classes = 43
|
| 318 |
+
optim_wrapper = dict(
|
| 319 |
+
clip_grad=dict(max_norm=0.01, norm_type=2),
|
| 320 |
+
optimizer=dict(
|
| 321 |
+
betas=(
|
| 322 |
+
0.9,
|
| 323 |
+
0.999,
|
| 324 |
+
),
|
| 325 |
+
eps=1e-08,
|
| 326 |
+
lr=3e-05,
|
| 327 |
+
type='AdamW',
|
| 328 |
+
weight_decay=0.05),
|
| 329 |
+
paramwise_cfg=dict(
|
| 330 |
+
custom_keys=dict({
|
| 331 |
+
'backbone.dinov2':
|
| 332 |
+
dict(decay_mult=1.0, lr_mult=0.1),
|
| 333 |
+
'backbone.dinov2.blocks.0.norm':
|
| 334 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 335 |
+
'backbone.dinov2.blocks.1.norm':
|
| 336 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 337 |
+
'backbone.dinov2.blocks.10.norm':
|
| 338 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 339 |
+
'backbone.dinov2.blocks.11.norm':
|
| 340 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 341 |
+
'backbone.dinov2.blocks.12.norm':
|
| 342 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 343 |
+
'backbone.dinov2.blocks.13.norm':
|
| 344 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 345 |
+
'backbone.dinov2.blocks.14.norm':
|
| 346 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 347 |
+
'backbone.dinov2.blocks.15.norm':
|
| 348 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 349 |
+
'backbone.dinov2.blocks.16.norm':
|
| 350 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 351 |
+
'backbone.dinov2.blocks.17.norm':
|
| 352 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 353 |
+
'backbone.dinov2.blocks.18.norm':
|
| 354 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 355 |
+
'backbone.dinov2.blocks.19.norm':
|
| 356 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 357 |
+
'backbone.dinov2.blocks.2.norm':
|
| 358 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 359 |
+
'backbone.dinov2.blocks.20.norm':
|
| 360 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 361 |
+
'backbone.dinov2.blocks.21.norm':
|
| 362 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 363 |
+
'backbone.dinov2.blocks.22.norm':
|
| 364 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 365 |
+
'backbone.dinov2.blocks.23.norm':
|
| 366 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 367 |
+
'backbone.dinov2.blocks.3.norm':
|
| 368 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 369 |
+
'backbone.dinov2.blocks.4.norm':
|
| 370 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 371 |
+
'backbone.dinov2.blocks.5.norm':
|
| 372 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 373 |
+
'backbone.dinov2.blocks.6.norm':
|
| 374 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 375 |
+
'backbone.dinov2.blocks.7.norm':
|
| 376 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 377 |
+
'backbone.dinov2.blocks.8.norm':
|
| 378 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 379 |
+
'backbone.dinov2.blocks.9.norm':
|
| 380 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 381 |
+
'backbone.dinov2.norm':
|
| 382 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 383 |
+
'level_embed':
|
| 384 |
+
dict(decay_mult=0.0, lr_mult=1.0),
|
| 385 |
+
'pos_embed':
|
| 386 |
+
dict(decay_mult=0.0, lr_mult=0.1),
|
| 387 |
+
'query_embed':
|
| 388 |
+
dict(decay_mult=0.0, lr_mult=1.0),
|
| 389 |
+
'query_feat':
|
| 390 |
+
dict(decay_mult=0.0, lr_mult=1.0)
|
| 391 |
+
}),
|
| 392 |
+
norm_decay_mult=0.0),
|
| 393 |
+
type='OptimWrapper')
|
| 394 |
+
optimizer = dict(
|
| 395 |
+
betas=(
|
| 396 |
+
0.9,
|
| 397 |
+
0.999,
|
| 398 |
+
), eps=1e-08, lr=3e-05, type='AdamW', weight_decay=0.05)
|
| 399 |
+
param_scheduler = [
|
| 400 |
+
dict(
|
| 401 |
+
begin=0, by_epoch=False, end=1500, start_factor=1e-06,
|
| 402 |
+
type='LinearLR'),
|
| 403 |
+
dict(
|
| 404 |
+
begin=1500,
|
| 405 |
+
by_epoch=False,
|
| 406 |
+
end=300000,
|
| 407 |
+
eta_min=0.0,
|
| 408 |
+
power=0.9,
|
| 409 |
+
type='PolyLR'),
|
| 410 |
+
]
|
| 411 |
+
resume = False
|
| 412 |
+
test_cfg = dict(type='TestLoop')
|
| 413 |
+
test_dataloader = dict(
|
| 414 |
+
batch_size=1,
|
| 415 |
+
dataset=dict(
|
| 416 |
+
data_prefix=dict(
|
| 417 |
+
img_path='val20250512/images', seg_map_path='val20250512/labels'),
|
| 418 |
+
data_root=
|
| 419 |
+
'/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
|
| 420 |
+
pipeline=[
|
| 421 |
+
dict(type='LoadImageFromFile'),
|
| 422 |
+
dict(keep_ratio=False, scale=(
|
| 423 |
+
896,
|
| 424 |
+
896,
|
| 425 |
+
), type='Resize'),
|
| 426 |
+
dict(reduce_zero_label=False, type='LoadAnnotations'),
|
| 427 |
+
dict(type='PackSegInputs'),
|
| 428 |
+
],
|
| 429 |
+
type='HumanParsingDataset'),
|
| 430 |
+
num_workers=4,
|
| 431 |
+
persistent_workers=True,
|
| 432 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 433 |
+
test_evaluator = dict(
|
| 434 |
+
iou_metrics=[
|
| 435 |
+
'mIoU',
|
| 436 |
+
], type='IoUMetric')
|
| 437 |
+
test_pipeline = [
|
| 438 |
+
dict(type='LoadImageFromFile'),
|
| 439 |
+
dict(keep_ratio=False, scale=(
|
| 440 |
+
896,
|
| 441 |
+
896,
|
| 442 |
+
), type='Resize'),
|
| 443 |
+
dict(reduce_zero_label=False, type='LoadAnnotations'),
|
| 444 |
+
dict(type='PackSegInputs'),
|
| 445 |
+
]
|
| 446 |
+
train_cfg = dict(
|
| 447 |
+
max_iters=300000, type='IterBasedTrainLoop', val_interval=2000)
|
| 448 |
+
train_dataloader = dict(
|
| 449 |
+
batch_size=3,
|
| 450 |
+
dataset=dict(
|
| 451 |
+
data_prefix=dict(
|
| 452 |
+
img_path='train20250512/images',
|
| 453 |
+
seg_map_path='train20250512/labels'),
|
| 454 |
+
data_root=
|
| 455 |
+
'/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
|
| 456 |
+
pipeline=[
|
| 457 |
+
dict(type='LoadImageFromFile'),
|
| 458 |
+
dict(type='LoadAnnotations'),
|
| 459 |
+
dict(
|
| 460 |
+
keep_ratio=True,
|
| 461 |
+
ratio_range=(
|
| 462 |
+
0.2,
|
| 463 |
+
2.0,
|
| 464 |
+
),
|
| 465 |
+
scale=(
|
| 466 |
+
896,
|
| 467 |
+
896,
|
| 468 |
+
),
|
| 469 |
+
type='RandomResize'),
|
| 470 |
+
dict(
|
| 471 |
+
cat_max_ratio=0.75, crop_size=(
|
| 472 |
+
896,
|
| 473 |
+
896,
|
| 474 |
+
), type='RandomCrop'),
|
| 475 |
+
dict(keep_ratio=True, scale=(
|
| 476 |
+
896,
|
| 477 |
+
896,
|
| 478 |
+
), type='Resize'),
|
| 479 |
+
dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
|
| 480 |
+
dict(type='PhotoMetricDistortion'),
|
| 481 |
+
dict(type='PackSegInputs'),
|
| 482 |
+
],
|
| 483 |
+
type='HumanParsingDataset'),
|
| 484 |
+
num_workers=4,
|
| 485 |
+
persistent_workers=True,
|
| 486 |
+
sampler=dict(shuffle=True, type='InfiniteSampler'))
|
| 487 |
+
train_pipeline = [
|
| 488 |
+
dict(type='LoadImageFromFile'),
|
| 489 |
+
dict(type='LoadAnnotations'),
|
| 490 |
+
dict(
|
| 491 |
+
keep_ratio=True,
|
| 492 |
+
ratio_range=(
|
| 493 |
+
0.2,
|
| 494 |
+
2.0,
|
| 495 |
+
),
|
| 496 |
+
scale=(
|
| 497 |
+
896,
|
| 498 |
+
896,
|
| 499 |
+
),
|
| 500 |
+
type='RandomResize'),
|
| 501 |
+
dict(cat_max_ratio=0.75, crop_size=(
|
| 502 |
+
896,
|
| 503 |
+
896,
|
| 504 |
+
), type='RandomCrop'),
|
| 505 |
+
dict(keep_ratio=True, scale=(
|
| 506 |
+
896,
|
| 507 |
+
896,
|
| 508 |
+
), type='Resize'),
|
| 509 |
+
dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
|
| 510 |
+
dict(type='PhotoMetricDistortion'),
|
| 511 |
+
dict(type='PackSegInputs'),
|
| 512 |
+
]
|
| 513 |
+
tta_model = dict(type='SegTTAModel')
|
| 514 |
+
tta_pipeline = [
|
| 515 |
+
dict(backend_args=None, type='LoadImageFromFile'),
|
| 516 |
+
dict(
|
| 517 |
+
transforms=[
|
| 518 |
+
[
|
| 519 |
+
dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
|
| 520 |
+
dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
|
| 521 |
+
dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
|
| 522 |
+
dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
|
| 523 |
+
dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
|
| 524 |
+
dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
|
| 525 |
+
],
|
| 526 |
+
[
|
| 527 |
+
dict(direction='horizontal', prob=0.0, type='RandomFlip'),
|
| 528 |
+
dict(direction='horizontal', prob=1.0, type='RandomFlip'),
|
| 529 |
+
],
|
| 530 |
+
[
|
| 531 |
+
dict(type='LoadAnnotations'),
|
| 532 |
+
],
|
| 533 |
+
[
|
| 534 |
+
dict(type='PackSegInputs'),
|
| 535 |
+
],
|
| 536 |
+
],
|
| 537 |
+
type='TestTimeAug'),
|
| 538 |
+
]
|
| 539 |
+
val_cfg = dict(type='ValLoop')
|
| 540 |
+
val_dataloader = dict(
|
| 541 |
+
batch_size=1,
|
| 542 |
+
dataset=dict(
|
| 543 |
+
data_prefix=dict(
|
| 544 |
+
img_path='val20250512/images', seg_map_path='val20250512/labels'),
|
| 545 |
+
data_root=
|
| 546 |
+
'/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
|
| 547 |
+
pipeline=[
|
| 548 |
+
dict(type='LoadImageFromFile'),
|
| 549 |
+
dict(keep_ratio=False, scale=(
|
| 550 |
+
896,
|
| 551 |
+
896,
|
| 552 |
+
), type='Resize'),
|
| 553 |
+
dict(reduce_zero_label=False, type='LoadAnnotations'),
|
| 554 |
+
dict(type='PackSegInputs'),
|
| 555 |
+
],
|
| 556 |
+
type='HumanParsingDataset'),
|
| 557 |
+
num_workers=4,
|
| 558 |
+
persistent_workers=True,
|
| 559 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 560 |
+
val_evaluator = dict(
|
| 561 |
+
iou_metrics=[
|
| 562 |
+
'mIoU',
|
| 563 |
+
], type='IoUMetric')
|
| 564 |
+
vis_backends = [
|
| 565 |
+
dict(type='LocalVisBackend'),
|
| 566 |
+
]
|
| 567 |
+
visualizer = dict(
|
| 568 |
+
name='visualizer',
|
| 569 |
+
type='SegLocalVisualizer',
|
| 570 |
+
vis_backends=[
|
| 571 |
+
dict(type='LocalVisBackend'),
|
| 572 |
+
])
|
| 573 |
+
work_dir = './work_dirs/depth_anything_large_mask2former_16xb1_160k_human_pasing_fasion_1024x1024_boundary_20250521'
|
head_extractor/build/lib/head_extractor/processor.py
ADDED
|
@@ -0,0 +1,585 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
from mmseg.apis import inference_model, init_model
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import cv2
|
| 6 |
+
from enum import Enum
|
| 7 |
+
import importlib.resources
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
'''
|
| 11 |
+
Labels:
|
| 12 |
+
0: 'background' 1: 'top' 2: 'outer' 3: 'skirt'
|
| 13 |
+
4: 'dress' 5: 'pants' 6: 'leggings' 7: 'headwear'
|
| 14 |
+
8: 'eyeglass' 9: 'neckwear' 10: 'belt' 11: 'footwear'
|
| 15 |
+
12: 'bag' 13: 'hair' 14: 'face' 15: 'skin'
|
| 16 |
+
16: 'ring' 17: 'wrist_wearing' 18: 'socks' 19: 'gloves'
|
| 17 |
+
20: 'necklace' 21: 'rompers' 22: 'earrings' 23: 'tie'
|
| 18 |
+
24: Left_Foot
|
| 19 |
+
25: Left_Hand
|
| 20 |
+
26: Left_Lower_Arm
|
| 21 |
+
27: Left_Lower_Leg
|
| 22 |
+
28: Left_Upper_Arm
|
| 23 |
+
29: Left_Upper_Leg
|
| 24 |
+
30: Right_Foot
|
| 25 |
+
31: Right_Hand
|
| 26 |
+
32: Right_Lower_Arm
|
| 27 |
+
33: Right_Lower_Leg
|
| 28 |
+
34: Right_Upper_Arm
|
| 29 |
+
35: Right_Upper_Leg
|
| 30 |
+
36: Torso
|
| 31 |
+
'''
|
| 32 |
+
|
| 33 |
+
class PersonSeg:
|
| 34 |
+
def __init__(self, config_path, model_path, device='cuda'):
|
| 35 |
+
# init model
|
| 36 |
+
self.model = init_model(config_path, model_path, device=device)
|
| 37 |
+
|
| 38 |
+
def process(self, image):
|
| 39 |
+
result = inference_model(self.model, image)
|
| 40 |
+
pred_seg = result.pred_sem_seg.data.cpu().numpy()[0]
|
| 41 |
+
return pred_seg
|
| 42 |
+
|
| 43 |
+
class TaskType(Enum):
|
| 44 |
+
face = "face"
|
| 45 |
+
head = "head"
|
| 46 |
+
head_plus_shoulders = "head_plus_shoulders"
|
| 47 |
+
|
| 48 |
+
# 衣服相关任务
|
| 49 |
+
top_cloth = "top_cloth"
|
| 50 |
+
bottom_cloth = "bottom_cloth"
|
| 51 |
+
full_clothes = "full_clothes"
|
| 52 |
+
|
| 53 |
+
# 全身相关任务
|
| 54 |
+
full_character = "full_character"
|
| 55 |
+
|
| 56 |
+
class ProcessorPipeline:
|
| 57 |
+
"""
|
| 58 |
+
该功能主要用于从单个图像中提取指定内容的mask
|
| 59 |
+
"""
|
| 60 |
+
def __init__(self, seg_pipe: PersonSeg):
|
| 61 |
+
self.seg_pipe = seg_pipe
|
| 62 |
+
|
| 63 |
+
@classmethod
|
| 64 |
+
def load(cls, device: str = 'cuda') -> "ProcessorPipeline":
|
| 65 |
+
"""
|
| 66 |
+
从包内加载模型和配置来初始化 Pipeline。
|
| 67 |
+
不再需要外部路径。
|
| 68 |
+
"""
|
| 69 |
+
# 使用 importlib.resources 安全地获取包内文件的路径
|
| 70 |
+
with importlib.resources.path('head_extractor.models', 'depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py') as config_path:
|
| 71 |
+
with importlib.resources.path('head_extractor.models', 'ckpt.pth') as model_path:
|
| 72 |
+
seg_pipe = PersonSeg(str(config_path), str(model_path), device=device)
|
| 73 |
+
|
| 74 |
+
return cls(seg_pipe)
|
| 75 |
+
|
| 76 |
+
def process(
|
| 77 |
+
self,
|
| 78 |
+
image: Image.Image,
|
| 79 |
+
task_type: TaskType,
|
| 80 |
+
long_edge: int = 1024
|
| 81 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 82 |
+
"""
|
| 83 |
+
从图像中提取mask,内部流程优化为返回NumPy数组。
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
image: 输入图像
|
| 87 |
+
task_type: 任务类型 ('head' or 'face')
|
| 88 |
+
long_edge (int): 用于缩放图像的长边尺寸,值越小速度越快。
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
(处理后的图像 NumPy 数组, 生成的mask NumPy 数组)
|
| 92 |
+
"""
|
| 93 |
+
# 1. 预处理图像:统一转换为numpy array (RGB)
|
| 94 |
+
if isinstance(image, Image.Image):
|
| 95 |
+
image_np = np.array(image.convert("RGB"))
|
| 96 |
+
else: # 假设是numpy array
|
| 97 |
+
image_np = image
|
| 98 |
+
|
| 99 |
+
if len(image_np.shape) == 2:
|
| 100 |
+
image_np = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
|
| 101 |
+
elif image_np.shape[2] == 4:
|
| 102 |
+
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGBA2RGB)
|
| 103 |
+
|
| 104 |
+
processed_image_np = self.resize_long_edge(image_np, long_edge=long_edge)
|
| 105 |
+
ori_h, ori_w = processed_image_np.shape[:2]
|
| 106 |
+
|
| 107 |
+
# 2. 运行分割
|
| 108 |
+
pred_mask_map = self.seg_pipe.process(processed_image_np)
|
| 109 |
+
|
| 110 |
+
if task_type == TaskType.head_plus_shoulders:
|
| 111 |
+
# 2.1 先做“头部”基础mask
|
| 112 |
+
head_labels = [7, 8, 13, 14] # headwear, eyeglass, hair, face
|
| 113 |
+
head_mask = np.isin(pred_mask_map, head_labels).astype(np.float32)
|
| 114 |
+
head_mask = cv2.resize(head_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
|
| 115 |
+
|
| 116 |
+
# 2.2 计算头部bbox并向下和左右扩展一段
|
| 117 |
+
rows = np.any(head_mask > 0, axis=1)
|
| 118 |
+
cols = np.any(head_mask > 0, axis=0)
|
| 119 |
+
if np.any(rows) and np.any(cols):
|
| 120 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
| 121 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
| 122 |
+
h_box = max(1, rmax - rmin)
|
| 123 |
+
w_box = max(1, cmax - cmin)
|
| 124 |
+
|
| 125 |
+
down_ratio = 0.1 # 向下扩展比例(相对头bbox高)
|
| 126 |
+
side_ratio = 0.6 # 左右扩展比例(相对头bbox宽)
|
| 127 |
+
|
| 128 |
+
r2max = min(ori_h, rmax + int(h_box * down_ratio))
|
| 129 |
+
c2min = max(0, cmin - int(w_box * side_ratio))
|
| 130 |
+
c2max = min(ori_w, cmax + int(w_box * side_ratio))
|
| 131 |
+
|
| 132 |
+
rect_mask = np.zeros((ori_h, ori_w), dtype=np.float32)
|
| 133 |
+
rect_mask[rmin:r2max, c2min:c2max] = 1.0
|
| 134 |
+
|
| 135 |
+
# 2.3 在扩展矩形内,仅保留“人物相关像素”(过滤掉背景)
|
| 136 |
+
person_labels = list(range(1, 37)) # 1..36 都是人物部件
|
| 137 |
+
person_mask = np.isin(pred_mask_map, person_labels).astype(np.float32)
|
| 138 |
+
person_mask = cv2.resize(person_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
|
| 139 |
+
|
| 140 |
+
initial_mask = np.clip(head_mask + (person_mask * rect_mask), 0, 1)
|
| 141 |
+
else:
|
| 142 |
+
initial_mask = head_mask
|
| 143 |
+
else:
|
| 144 |
+
# 其它任务保持原逻辑
|
| 145 |
+
labels_map = self._get_labels_for_task(task_type)
|
| 146 |
+
primary_labels = labels_map['primary']
|
| 147 |
+
initial_mask = np.isin(pred_mask_map, primary_labels).astype(np.float32)
|
| 148 |
+
initial_mask = cv2.resize(initial_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
|
| 149 |
+
|
| 150 |
+
# 3. 后处理(不同任务的形态学策略)
|
| 151 |
+
final_mask_np = self._apply_task_specific_mask_processing(initial_mask, task_type, ori_h, ori_w)
|
| 152 |
+
|
| 153 |
+
# 4. 返回
|
| 154 |
+
final_mask_uint8 = (final_mask_np * 255).astype(np.uint8)
|
| 155 |
+
return processed_image_np, final_mask_uint8
|
| 156 |
+
|
| 157 |
+
def _get_labels_for_task(self, task_type: TaskType) -> dict:
|
| 158 |
+
"""根据任务类型获取对应的标签映射"""
|
| 159 |
+
labels_map = {
|
| 160 |
+
TaskType.face: { 'primary': [8, 14] }, # eyeglass, face
|
| 161 |
+
TaskType.head: { 'primary': [7, 8, 13, 14] }, # headwear, eyeglass, hair, face
|
| 162 |
+
TaskType.top_cloth: { 'primary': [1, 2] }, # top, outer
|
| 163 |
+
TaskType.bottom_cloth: { 'primary': [3, 4, 5, 6] }, # skirt, dress, pants, leggings
|
| 164 |
+
TaskType.full_clothes: { 'primary': [1, 2, 3, 4, 5, 6] }, # all clothes
|
| 165 |
+
TaskType.full_character: { 'primary': list(range(1, 37)) }, # 包含所有人物相关部分
|
| 166 |
+
}
|
| 167 |
+
return labels_map.get(task_type, {'primary': []})
|
| 168 |
+
|
| 169 |
+
def _apply_task_specific_mask_processing(self, mask: np.ndarray, task_type: TaskType, ori_h: int, ori_w: int) -> np.ndarray:
|
| 170 |
+
"""根据任务类型对mask进行特殊处理"""
|
| 171 |
+
if task_type == TaskType.face:
|
| 172 |
+
# 人脸任务:简单膨胀
|
| 173 |
+
expand_kernel = 5
|
| 174 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
|
| 175 |
+
mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
|
| 176 |
+
|
| 177 |
+
elif task_type == TaskType.head:
|
| 178 |
+
# 头部任务:先腐蚀再膨胀
|
| 179 |
+
kernel = np.ones((7, 7), dtype=np.uint8)
|
| 180 |
+
mask = cv2.erode(mask, kernel, iterations=1)
|
| 181 |
+
|
| 182 |
+
expand_kernel = 11
|
| 183 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
|
| 184 |
+
mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
|
| 185 |
+
|
| 186 |
+
elif task_type == TaskType.head_plus_shoulders:
|
| 187 |
+
# 比 head 更偏向“向下与左右扩展”的膨胀(高度核 > 宽度核)
|
| 188 |
+
# 轻微腐蚀,避免边界毛刺
|
| 189 |
+
erode_k = 5
|
| 190 |
+
kernel = np.ones((erode_k, erode_k), dtype=np.uint8)
|
| 191 |
+
mask = cv2.erode(mask, kernel, iterations=1)
|
| 192 |
+
|
| 193 |
+
max_side = max(ori_h, ori_w)
|
| 194 |
+
h_kernel = max(15, int(max_side * 0.05)) # 更高
|
| 195 |
+
w_kernel = max(11, int(max_side * 0.03)) # 稍窄
|
| 196 |
+
# 保证奇数
|
| 197 |
+
h_kernel = h_kernel // 2 * 2 + 1
|
| 198 |
+
w_kernel = w_kernel // 2 * 2 + 1
|
| 199 |
+
|
| 200 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (w_kernel, h_kernel))
|
| 201 |
+
mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
|
| 202 |
+
|
| 203 |
+
if task_type in [TaskType.top_cloth, TaskType.bottom_cloth, TaskType.full_clothes, TaskType.full_character]:
|
| 204 |
+
# 衣服相关任务:膨胀和模糊处理
|
| 205 |
+
expand_ratio = 0.01
|
| 206 |
+
max_side = max(ori_h, ori_w)
|
| 207 |
+
blur_kernel = 1
|
| 208 |
+
expand_kernel = int(max_side * expand_ratio) // 2 * 2 + 1
|
| 209 |
+
|
| 210 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
|
| 211 |
+
expanded = cv2.dilate((mask > 0.5).astype(np.uint8), kernel)
|
| 212 |
+
|
| 213 |
+
blurred = cv2.GaussianBlur(
|
| 214 |
+
expanded.astype(np.float32),
|
| 215 |
+
(blur_kernel, blur_kernel),
|
| 216 |
+
sigmaX=0,
|
| 217 |
+
)
|
| 218 |
+
mask = np.clip(blurred / (blurred.max() + 1e-6), 0, 1)
|
| 219 |
+
|
| 220 |
+
return mask
|
| 221 |
+
|
| 222 |
+
@staticmethod
|
| 223 |
+
def resize_long_edge(image_np: np.ndarray, long_edge=1024) -> np.ndarray:
|
| 224 |
+
"""将图像等比例缩放到指定长边尺寸 (使用OpenCV)"""
|
| 225 |
+
original_height, original_width = image_np.shape[:2]
|
| 226 |
+
|
| 227 |
+
max_dimension = max(original_width, original_height)
|
| 228 |
+
if max_dimension <= long_edge:
|
| 229 |
+
return image_np
|
| 230 |
+
|
| 231 |
+
ratio = long_edge / max_dimension
|
| 232 |
+
new_width = int(original_width * ratio)
|
| 233 |
+
new_height = int(original_height * ratio)
|
| 234 |
+
|
| 235 |
+
# 使用cv2.INTER_AREA进行缩放,对于缩小图像效果较好且速度快
|
| 236 |
+
return cv2.resize(image_np, (new_width, new_height), interpolation=cv2.INTER_AREA)
|
| 237 |
+
|
| 238 |
+
@staticmethod
|
| 239 |
+
def _pad_to_square_np(image_np: np.ndarray, background_value: tuple) -> np.ndarray:
|
| 240 |
+
"""将NumPy图像填充为正方形"""
|
| 241 |
+
height, width = image_np.shape[:2]
|
| 242 |
+
if width == height:
|
| 243 |
+
return image_np
|
| 244 |
+
|
| 245 |
+
max_dim = max(width, height)
|
| 246 |
+
|
| 247 |
+
# 根据通道数确定背景色
|
| 248 |
+
channels = image_np.shape[2] if len(image_np.shape) > 2 else 1
|
| 249 |
+
|
| 250 |
+
# 创建一个正确尺寸的背景板
|
| 251 |
+
padded_image = np.full((max_dim, max_dim, channels), background_value, dtype=image_np.dtype)
|
| 252 |
+
|
| 253 |
+
paste_x = (max_dim - width) // 2
|
| 254 |
+
paste_y = (max_dim - height) // 2
|
| 255 |
+
|
| 256 |
+
padded_image[paste_y:paste_y+height, paste_x:paste_x+width] = image_np
|
| 257 |
+
return padded_image
|
| 258 |
+
|
| 259 |
+
@staticmethod
|
| 260 |
+
def pad_to_square(image: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
|
| 261 |
+
"""
|
| 262 |
+
将图像填充为正方形
|
| 263 |
+
|
| 264 |
+
Args:
|
| 265 |
+
image: 输入图像
|
| 266 |
+
background_color: 填充的背景颜色
|
| 267 |
+
|
| 268 |
+
Returns:
|
| 269 |
+
填充为正方形的图像
|
| 270 |
+
"""
|
| 271 |
+
width, height = image.size
|
| 272 |
+
if width == height:
|
| 273 |
+
return image
|
| 274 |
+
|
| 275 |
+
max_dim = max(width, height)
|
| 276 |
+
padded_image = Image.new(image.mode, (max_dim, max_dim), background_color)
|
| 277 |
+
paste_x = (max_dim - width) // 2
|
| 278 |
+
paste_y = (max_dim - height) // 2
|
| 279 |
+
padded_image.paste(image, (paste_x, paste_y))
|
| 280 |
+
return padded_image
|
| 281 |
+
|
| 282 |
+
def crop_image_by_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> Image.Image:
|
| 283 |
+
"""
|
| 284 |
+
根据mask裁剪图像,只保留mask覆盖的区域
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
image: 原始图像
|
| 288 |
+
mask: 二值mask图像
|
| 289 |
+
padding: 裁剪区域的边距扩展像素数
|
| 290 |
+
|
| 291 |
+
Returns:
|
| 292 |
+
裁剪后的图像
|
| 293 |
+
"""
|
| 294 |
+
# 转换为numpy数组
|
| 295 |
+
mask_np = np.array(mask)
|
| 296 |
+
image_np = np.array(image)
|
| 297 |
+
|
| 298 |
+
# 找到mask中非零像素的边界框
|
| 299 |
+
rows = np.any(mask_np > 0, axis=1)
|
| 300 |
+
cols = np.any(mask_np > 0, axis=0)
|
| 301 |
+
|
| 302 |
+
if not np.any(rows) or not np.any(cols):
|
| 303 |
+
# 如果mask为空,返回原图
|
| 304 |
+
return image
|
| 305 |
+
|
| 306 |
+
# 获取边界框坐标
|
| 307 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
| 308 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
| 309 |
+
|
| 310 |
+
# 添加padding并确保不超出图像边界
|
| 311 |
+
h, w = image_np.shape[:2]
|
| 312 |
+
rmin = max(0, rmin - padding)
|
| 313 |
+
rmax = min(h, rmax + padding + 1)
|
| 314 |
+
cmin = max(0, cmin - padding)
|
| 315 |
+
cmax = min(w, cmax + padding + 1)
|
| 316 |
+
|
| 317 |
+
# 裁剪图像
|
| 318 |
+
cropped_image = image_np[rmin:rmax, cmin:cmax]
|
| 319 |
+
|
| 320 |
+
return Image.fromarray(cropped_image)
|
| 321 |
+
|
| 322 |
+
def _crop_image_and_mask_np(self, image_np: np.ndarray, mask_np: np.ndarray, padding: int = 20) -> tuple[np.ndarray, np.ndarray]:
|
| 323 |
+
"""根据mask同时裁剪NumPy图像和mask"""
|
| 324 |
+
rows = np.any(mask_np > 0, axis=1)
|
| 325 |
+
cols = np.any(mask_np > 0, axis=0)
|
| 326 |
+
|
| 327 |
+
if not np.any(rows) or not np.any(cols):
|
| 328 |
+
return image_np, mask_np
|
| 329 |
+
|
| 330 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
| 331 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
| 332 |
+
|
| 333 |
+
h, w = image_np.shape[:2]
|
| 334 |
+
rmin = max(0, rmin - padding)
|
| 335 |
+
rmax = min(h, rmax + padding + 1)
|
| 336 |
+
cmin = max(0, cmin - padding)
|
| 337 |
+
cmax = min(w, cmax + padding + 1)
|
| 338 |
+
|
| 339 |
+
cropped_image_np = image_np[rmin:rmax, cmin:cmax]
|
| 340 |
+
cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
|
| 341 |
+
|
| 342 |
+
return cropped_image_np, cropped_mask_np
|
| 343 |
+
|
| 344 |
+
def crop_image_and_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> tuple[Image.Image, Image.Image]:
|
| 345 |
+
"""根据mask同时裁剪图像和mask,避免重复计算边界框"""
|
| 346 |
+
mask_np = np.array(mask)
|
| 347 |
+
image_np = np.array(image)
|
| 348 |
+
|
| 349 |
+
rows = np.any(mask_np > 0, axis=1)
|
| 350 |
+
cols = np.any(mask_np > 0, axis=0)
|
| 351 |
+
|
| 352 |
+
if not np.any(rows) or not np.any(cols):
|
| 353 |
+
return image, mask
|
| 354 |
+
|
| 355 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
| 356 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
| 357 |
+
|
| 358 |
+
h, w = image_np.shape[:2]
|
| 359 |
+
rmin = max(0, rmin - padding)
|
| 360 |
+
rmax = min(h, rmax + padding + 1)
|
| 361 |
+
cmin = max(0, cmin - padding)
|
| 362 |
+
cmax = min(w, cmax + padding + 1)
|
| 363 |
+
|
| 364 |
+
cropped_image_np = image_np[rmin:rmax, cmin:cmax]
|
| 365 |
+
cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
|
| 366 |
+
|
| 367 |
+
return Image.fromarray(cropped_image_np), Image.fromarray(cropped_mask_np)
|
| 368 |
+
|
| 369 |
+
def _apply_mask_to_image_np(self, image_np: np.ndarray, mask_np: np.ndarray, background_color: tuple) -> np.ndarray:
|
| 370 |
+
"""将NumPy mask应用到NumPy图像上"""
|
| 371 |
+
mask_normalized = mask_np.astype(np.float32) / 255.0
|
| 372 |
+
background = np.full_like(image_np, background_color)
|
| 373 |
+
result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
|
| 374 |
+
return result.astype(np.uint8)
|
| 375 |
+
|
| 376 |
+
def apply_mask_to_image(self, image: Image.Image, mask: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
|
| 377 |
+
"""
|
| 378 |
+
将mask应用到图像上,mask外的区域设置为指定背景色
|
| 379 |
+
|
| 380 |
+
Args:
|
| 381 |
+
image: 原始图像
|
| 382 |
+
mask: 二值mask图像
|
| 383 |
+
background_color: 背景颜色 (R, G, B)
|
| 384 |
+
|
| 385 |
+
Returns:
|
| 386 |
+
应用mask后的图像
|
| 387 |
+
"""
|
| 388 |
+
# 转换为numpy数组
|
| 389 |
+
image_np = np.array(image)
|
| 390 |
+
mask_np = np.array(mask)
|
| 391 |
+
|
| 392 |
+
# 将mask归一化到0-1范围
|
| 393 |
+
mask_normalized = mask_np.astype(np.float32) / 255.0
|
| 394 |
+
|
| 395 |
+
# 创建背景
|
| 396 |
+
background = np.full_like(image_np, background_color)
|
| 397 |
+
|
| 398 |
+
# 应用mask:mask区域保持原图,其他区域为背景色
|
| 399 |
+
result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
|
| 400 |
+
|
| 401 |
+
return Image.fromarray(result.astype(np.uint8))
|
| 402 |
+
|
| 403 |
+
def extract_head(
|
| 404 |
+
self,
|
| 405 |
+
image: Image.Image,
|
| 406 |
+
crop_padding: int = 10,
|
| 407 |
+
background_color: tuple = (255, 255, 255),
|
| 408 |
+
pad2square: bool = True,
|
| 409 |
+
output_mode: str = 'RGB',
|
| 410 |
+
long_edge: int = 1024,
|
| 411 |
+
include_shoulders: bool = False
|
| 412 |
+
) -> Image.Image:
|
| 413 |
+
"""
|
| 414 |
+
从输入图像中提取头部区域,并返回一个裁剪、填充为正方形的图像。
|
| 415 |
+
|
| 416 |
+
Args:
|
| 417 |
+
image: 输入图像 (PIL.Image or np.ndarray).
|
| 418 |
+
crop_padding: 裁剪边界框的额外边距.
|
| 419 |
+
background_color: `output_mode` 为 'RGB' 时,用于填充背景的颜色.
|
| 420 |
+
pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
|
| 421 |
+
output_mode (str): 输出图像模式,可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
|
| 422 |
+
long_edge (int): 送入模型前缩放的长边尺寸,值越小速度越快,但可能影响精度。默认为1024。
|
| 423 |
+
|
| 424 |
+
Returns:
|
| 425 |
+
处理后的头部图像 (PIL.Image).
|
| 426 |
+
"""
|
| 427 |
+
# 1. 任务类型改为可选
|
| 428 |
+
task = TaskType.head_plus_shoulders if include_shoulders else TaskType.head
|
| 429 |
+
processed_image_np, head_mask_np = self.process(
|
| 430 |
+
image=image,
|
| 431 |
+
task_type=task,
|
| 432 |
+
long_edge=long_edge
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
# 2. NumPy-based 裁剪
|
| 436 |
+
face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
|
| 437 |
+
processed_image_np, head_mask_np, padding=crop_padding
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
# 3. 根据输出模式(RGB/RGBA)应用蒙版
|
| 441 |
+
output_mode = output_mode.upper()
|
| 442 |
+
if output_mode == 'RGBA':
|
| 443 |
+
# 创建一个带透明通道的RGBA图像
|
| 444 |
+
# 首先确保图像是3通道的
|
| 445 |
+
if face_cropped_np.shape[2] == 4:
|
| 446 |
+
face_cropped_np = face_cropped_np[:,:,:3]
|
| 447 |
+
# 创建RGBA图像
|
| 448 |
+
result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
|
| 449 |
+
result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
|
| 450 |
+
|
| 451 |
+
elif output_mode == 'RGB':
|
| 452 |
+
# NumPy-based 蒙版应用
|
| 453 |
+
result_image_np = self._apply_mask_to_image_np(
|
| 454 |
+
face_cropped_np,
|
| 455 |
+
mask_cropped_np,
|
| 456 |
+
background_color=background_color
|
| 457 |
+
)
|
| 458 |
+
else:
|
| 459 |
+
raise ValueError("output_mode must be 'RGB' or 'RGBA'")
|
| 460 |
+
|
| 461 |
+
# 4. 可选:NumPy-based 填充
|
| 462 |
+
if pad2square:
|
| 463 |
+
if output_mode == 'RGBA':
|
| 464 |
+
pad_color = (255, 255, 255, 0) # 透明背景
|
| 465 |
+
else: # RGB
|
| 466 |
+
pad_color = background_color
|
| 467 |
+
|
| 468 |
+
final_image_np = self._pad_to_square_np(
|
| 469 |
+
result_image_np,
|
| 470 |
+
background_value=pad_color
|
| 471 |
+
)
|
| 472 |
+
else:
|
| 473 |
+
final_image_np = result_image_np
|
| 474 |
+
|
| 475 |
+
# 5. 仅在最后一步转换为 PIL Image
|
| 476 |
+
if output_mode == 'RGBA':
|
| 477 |
+
return Image.fromarray(final_image_np, 'RGBA')
|
| 478 |
+
else:
|
| 479 |
+
return Image.fromarray(final_image_np, 'RGB')
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
def extract(
|
| 483 |
+
self,
|
| 484 |
+
task_type: TaskType.full_character,
|
| 485 |
+
image: Image.Image,
|
| 486 |
+
crop_padding: int = 10,
|
| 487 |
+
background_color: tuple = (255, 255, 255),
|
| 488 |
+
pad2square: bool = True,
|
| 489 |
+
output_mode: str = 'RGB',
|
| 490 |
+
long_edge: int = 1024
|
| 491 |
+
) -> Image.Image:
|
| 492 |
+
"""
|
| 493 |
+
从输入图像中提取头部区域,并返回一个裁剪、填充为正方形的图像。
|
| 494 |
+
|
| 495 |
+
Args:
|
| 496 |
+
image: 输入图像 (PIL.Image or np.ndarray).
|
| 497 |
+
crop_padding: 裁剪边界框的额外边距.
|
| 498 |
+
background_color: `output_mode` 为 'RGB' 时,用于填充背景的颜色.
|
| 499 |
+
pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
|
| 500 |
+
output_mode (str): 输出图像模式,可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
|
| 501 |
+
long_edge (int): 送入模型前缩放的长边尺寸,值越小速度越快,但可能影响精度。默认为1024。
|
| 502 |
+
|
| 503 |
+
Returns:
|
| 504 |
+
处理后的头部图像 (PIL.Image).
|
| 505 |
+
"""
|
| 506 |
+
# 1. 运行分割��直接获取 NumPy 结果
|
| 507 |
+
processed_image_np, head_mask_np = self.process(
|
| 508 |
+
image=image,
|
| 509 |
+
task_type=task_type,
|
| 510 |
+
long_edge=long_edge
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
# 2. NumPy-based 裁剪
|
| 514 |
+
face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
|
| 515 |
+
processed_image_np, head_mask_np, padding=crop_padding
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
# 3. 根据输出模式(RGB/RGBA)应用蒙版
|
| 519 |
+
output_mode = output_mode.upper()
|
| 520 |
+
if output_mode == 'RGBA':
|
| 521 |
+
# 创建一个带透明通道的RGBA图像
|
| 522 |
+
# 首先确保图像是3通道的
|
| 523 |
+
if face_cropped_np.shape[2] == 4:
|
| 524 |
+
face_cropped_np = face_cropped_np[:,:,:3]
|
| 525 |
+
# 创建RGBA图像
|
| 526 |
+
result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
|
| 527 |
+
result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
|
| 528 |
+
|
| 529 |
+
elif output_mode == 'RGB':
|
| 530 |
+
# NumPy-based 蒙版应用
|
| 531 |
+
result_image_np = self._apply_mask_to_image_np(
|
| 532 |
+
face_cropped_np,
|
| 533 |
+
mask_cropped_np,
|
| 534 |
+
background_color=background_color
|
| 535 |
+
)
|
| 536 |
+
else:
|
| 537 |
+
raise ValueError("output_mode must be 'RGB' or 'RGBA'")
|
| 538 |
+
|
| 539 |
+
# 4. 可选:NumPy-based 填充
|
| 540 |
+
if pad2square:
|
| 541 |
+
if output_mode == 'RGBA':
|
| 542 |
+
pad_color = (255, 255, 255, 0) # 透明背景
|
| 543 |
+
else: # RGB
|
| 544 |
+
pad_color = background_color
|
| 545 |
+
|
| 546 |
+
final_image_np = self._pad_to_square_np(
|
| 547 |
+
result_image_np,
|
| 548 |
+
background_value=pad_color
|
| 549 |
+
)
|
| 550 |
+
else:
|
| 551 |
+
final_image_np = result_image_np
|
| 552 |
+
|
| 553 |
+
# 5. 仅在最后一步转换为 PIL Image
|
| 554 |
+
if output_mode == 'RGBA':
|
| 555 |
+
return Image.fromarray(final_image_np, 'RGBA')
|
| 556 |
+
else:
|
| 557 |
+
return Image.fromarray(final_image_np, 'RGB')
|
| 558 |
+
|
| 559 |
+
if __name__ == '__main__':
|
| 560 |
+
# 这是一个示例如何初始化和使用 Pipeline
|
| 561 |
+
print("Initializing pipeline from package resources...")
|
| 562 |
+
pipeline = ProcessorPipeline.load()
|
| 563 |
+
print("Pipeline initialized.")
|
| 564 |
+
|
| 565 |
+
# 使用示例 (需要提供一张图片):
|
| 566 |
+
|
| 567 |
+
# 请替换为你的图片路径
|
| 568 |
+
image_path = "001.jpg"
|
| 569 |
+
if os.path.exists(image_path):
|
| 570 |
+
print(f"Processing image: {image_path}")
|
| 571 |
+
image = Image.open(image_path)
|
| 572 |
+
|
| 573 |
+
print("正在提取头部...")
|
| 574 |
+
extracted_head = pipeline.extract_head(image)
|
| 575 |
+
|
| 576 |
+
# 保存最终结果
|
| 577 |
+
output_path = "output_head_extracted.png"
|
| 578 |
+
extracted_head.save(output_path)
|
| 579 |
+
|
| 580 |
+
print("\n处理完成!")
|
| 581 |
+
print(f"已保存提取的头部图像至 '{output_path}'")
|
| 582 |
+
|
| 583 |
+
else:
|
| 584 |
+
print(f"示例图片未找到: {image_path}")
|
| 585 |
+
|
head_extractor/build/lib/mmdet/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import mmcv
|
| 3 |
+
import mmengine
|
| 4 |
+
from mmengine.utils import digit_version
|
| 5 |
+
|
| 6 |
+
from .version import __version__, version_info
|
| 7 |
+
|
| 8 |
+
mmcv_minimum_version = '2.0.0rc4'
|
| 9 |
+
mmcv_maximum_version = '2.2.0'
|
| 10 |
+
mmcv_version = digit_version(mmcv.__version__)
|
| 11 |
+
|
| 12 |
+
mmengine_minimum_version = '0.7.1'
|
| 13 |
+
mmengine_maximum_version = '1.0.0'
|
| 14 |
+
mmengine_version = digit_version(mmengine.__version__)
|
| 15 |
+
|
| 16 |
+
assert (mmcv_version >= digit_version(mmcv_minimum_version)
|
| 17 |
+
and mmcv_version < digit_version(mmcv_maximum_version)), \
|
| 18 |
+
f'MMCV=={mmcv.__version__} is used but incompatible. ' \
|
| 19 |
+
f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
|
| 20 |
+
|
| 21 |
+
assert (mmengine_version >= digit_version(mmengine_minimum_version)
|
| 22 |
+
and mmengine_version < digit_version(mmengine_maximum_version)), \
|
| 23 |
+
f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
|
| 24 |
+
f'Please install mmengine>={mmengine_minimum_version}, ' \
|
| 25 |
+
f'<{mmengine_maximum_version}.'
|
| 26 |
+
|
| 27 |
+
__all__ = ['__version__', 'version_info', 'digit_version']
|
head_extractor/build/lib/mmdet/apis/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from .det_inferencer import DetInferencer
|
| 3 |
+
from .inference import (async_inference_detector, inference_detector,
|
| 4 |
+
inference_mot, init_detector, init_track_model)
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
'init_detector', 'async_inference_detector', 'inference_detector',
|
| 8 |
+
'DetInferencer', 'inference_mot', 'init_track_model'
|
| 9 |
+
]
|
head_extractor/build/lib/mmdet/apis/det_inferencer.py
ADDED
|
@@ -0,0 +1,652 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import copy
|
| 3 |
+
import os.path as osp
|
| 4 |
+
import warnings
|
| 5 |
+
from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
| 6 |
+
|
| 7 |
+
import mmcv
|
| 8 |
+
import mmengine
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
from mmcv.transforms import LoadImageFromFile
|
| 12 |
+
from mmengine.dataset import Compose
|
| 13 |
+
from mmengine.fileio import (get_file_backend, isdir, join_path,
|
| 14 |
+
list_dir_or_file)
|
| 15 |
+
from mmengine.infer.infer import BaseInferencer, ModelType
|
| 16 |
+
from mmengine.model.utils import revert_sync_batchnorm
|
| 17 |
+
from mmengine.registry import init_default_scope
|
| 18 |
+
from mmengine.runner.checkpoint import _load_checkpoint_to_model
|
| 19 |
+
from mmengine.visualization import Visualizer
|
| 20 |
+
from rich.progress import track
|
| 21 |
+
|
| 22 |
+
from mmdet.evaluation import INSTANCE_OFFSET
|
| 23 |
+
from mmdet.registry import DATASETS
|
| 24 |
+
from mmdet.structures import DetDataSample
|
| 25 |
+
from mmdet.structures.mask import encode_mask_results, mask2bbox
|
| 26 |
+
from mmdet.utils import ConfigType
|
| 27 |
+
from ..evaluation import get_classes
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
from panopticapi.evaluation import VOID
|
| 31 |
+
from panopticapi.utils import id2rgb
|
| 32 |
+
except ImportError:
|
| 33 |
+
id2rgb = None
|
| 34 |
+
VOID = None
|
| 35 |
+
|
| 36 |
+
InputType = Union[str, np.ndarray]
|
| 37 |
+
InputsType = Union[InputType, Sequence[InputType]]
|
| 38 |
+
PredType = List[DetDataSample]
|
| 39 |
+
ImgType = Union[np.ndarray, Sequence[np.ndarray]]
|
| 40 |
+
|
| 41 |
+
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
|
| 42 |
+
'.tiff', '.webp')
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class DetInferencer(BaseInferencer):
|
| 46 |
+
"""Object Detection Inferencer.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
model (str, optional): Path to the config file or the model name
|
| 50 |
+
defined in metafile. For example, it could be
|
| 51 |
+
"rtmdet-s" or 'rtmdet_s_8xb32-300e_coco' or
|
| 52 |
+
"configs/rtmdet/rtmdet_s_8xb32-300e_coco.py".
|
| 53 |
+
If model is not specified, user must provide the
|
| 54 |
+
`weights` saved by MMEngine which contains the config string.
|
| 55 |
+
Defaults to None.
|
| 56 |
+
weights (str, optional): Path to the checkpoint. If it is not specified
|
| 57 |
+
and model is a model name of metafile, the weights will be loaded
|
| 58 |
+
from metafile. Defaults to None.
|
| 59 |
+
device (str, optional): Device to run inference. If None, the available
|
| 60 |
+
device will be automatically used. Defaults to None.
|
| 61 |
+
scope (str, optional): The scope of the model. Defaults to mmdet.
|
| 62 |
+
palette (str): Color palette used for visualization. The order of
|
| 63 |
+
priority is palette -> config -> checkpoint. Defaults to 'none'.
|
| 64 |
+
show_progress (bool): Control whether to display the progress
|
| 65 |
+
bar during the inference process. Defaults to True.
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
preprocess_kwargs: set = set()
|
| 69 |
+
forward_kwargs: set = set()
|
| 70 |
+
visualize_kwargs: set = {
|
| 71 |
+
'return_vis',
|
| 72 |
+
'show',
|
| 73 |
+
'wait_time',
|
| 74 |
+
'draw_pred',
|
| 75 |
+
'pred_score_thr',
|
| 76 |
+
'img_out_dir',
|
| 77 |
+
'no_save_vis',
|
| 78 |
+
}
|
| 79 |
+
postprocess_kwargs: set = {
|
| 80 |
+
'print_result',
|
| 81 |
+
'pred_out_dir',
|
| 82 |
+
'return_datasamples',
|
| 83 |
+
'no_save_pred',
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
def __init__(self,
|
| 87 |
+
model: Optional[Union[ModelType, str]] = None,
|
| 88 |
+
weights: Optional[str] = None,
|
| 89 |
+
device: Optional[str] = None,
|
| 90 |
+
scope: Optional[str] = 'mmdet',
|
| 91 |
+
palette: str = 'none',
|
| 92 |
+
show_progress: bool = True) -> None:
|
| 93 |
+
# A global counter tracking the number of images processed, for
|
| 94 |
+
# naming of the output images
|
| 95 |
+
self.num_visualized_imgs = 0
|
| 96 |
+
self.num_predicted_imgs = 0
|
| 97 |
+
self.palette = palette
|
| 98 |
+
init_default_scope(scope)
|
| 99 |
+
super().__init__(
|
| 100 |
+
model=model, weights=weights, device=device, scope=scope)
|
| 101 |
+
self.model = revert_sync_batchnorm(self.model)
|
| 102 |
+
self.show_progress = show_progress
|
| 103 |
+
|
| 104 |
+
def _load_weights_to_model(self, model: nn.Module,
|
| 105 |
+
checkpoint: Optional[dict],
|
| 106 |
+
cfg: Optional[ConfigType]) -> None:
|
| 107 |
+
"""Loading model weights and meta information from cfg and checkpoint.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
model (nn.Module): Model to load weights and meta information.
|
| 111 |
+
checkpoint (dict, optional): The loaded checkpoint.
|
| 112 |
+
cfg (Config or ConfigDict, optional): The loaded config.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
if checkpoint is not None:
|
| 116 |
+
_load_checkpoint_to_model(model, checkpoint)
|
| 117 |
+
checkpoint_meta = checkpoint.get('meta', {})
|
| 118 |
+
# save the dataset_meta in the model for convenience
|
| 119 |
+
if 'dataset_meta' in checkpoint_meta:
|
| 120 |
+
# mmdet 3.x, all keys should be lowercase
|
| 121 |
+
model.dataset_meta = {
|
| 122 |
+
k.lower(): v
|
| 123 |
+
for k, v in checkpoint_meta['dataset_meta'].items()
|
| 124 |
+
}
|
| 125 |
+
elif 'CLASSES' in checkpoint_meta:
|
| 126 |
+
# < mmdet 3.x
|
| 127 |
+
classes = checkpoint_meta['CLASSES']
|
| 128 |
+
model.dataset_meta = {'classes': classes}
|
| 129 |
+
else:
|
| 130 |
+
warnings.warn(
|
| 131 |
+
'dataset_meta or class names are not saved in the '
|
| 132 |
+
'checkpoint\'s meta data, use COCO classes by default.')
|
| 133 |
+
model.dataset_meta = {'classes': get_classes('coco')}
|
| 134 |
+
else:
|
| 135 |
+
warnings.warn('Checkpoint is not loaded, and the inference '
|
| 136 |
+
'result is calculated by the randomly initialized '
|
| 137 |
+
'model!')
|
| 138 |
+
warnings.warn('weights is None, use COCO classes by default.')
|
| 139 |
+
model.dataset_meta = {'classes': get_classes('coco')}
|
| 140 |
+
|
| 141 |
+
# Priority: args.palette -> config -> checkpoint
|
| 142 |
+
if self.palette != 'none':
|
| 143 |
+
model.dataset_meta['palette'] = self.palette
|
| 144 |
+
else:
|
| 145 |
+
test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
|
| 146 |
+
# lazy init. We only need the metainfo.
|
| 147 |
+
test_dataset_cfg['lazy_init'] = True
|
| 148 |
+
metainfo = DATASETS.build(test_dataset_cfg).metainfo
|
| 149 |
+
cfg_palette = metainfo.get('palette', None)
|
| 150 |
+
if cfg_palette is not None:
|
| 151 |
+
model.dataset_meta['palette'] = cfg_palette
|
| 152 |
+
else:
|
| 153 |
+
if 'palette' not in model.dataset_meta:
|
| 154 |
+
warnings.warn(
|
| 155 |
+
'palette does not exist, random is used by default. '
|
| 156 |
+
'You can also set the palette to customize.')
|
| 157 |
+
model.dataset_meta['palette'] = 'random'
|
| 158 |
+
|
| 159 |
+
def _init_pipeline(self, cfg: ConfigType) -> Compose:
|
| 160 |
+
"""Initialize the test pipeline."""
|
| 161 |
+
pipeline_cfg = cfg.test_dataloader.dataset.pipeline
|
| 162 |
+
|
| 163 |
+
# For inference, the key of ``img_id`` is not used.
|
| 164 |
+
if 'meta_keys' in pipeline_cfg[-1]:
|
| 165 |
+
pipeline_cfg[-1]['meta_keys'] = tuple(
|
| 166 |
+
meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
|
| 167 |
+
if meta_key != 'img_id')
|
| 168 |
+
|
| 169 |
+
load_img_idx = self._get_transform_idx(
|
| 170 |
+
pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
|
| 171 |
+
if load_img_idx == -1:
|
| 172 |
+
raise ValueError(
|
| 173 |
+
'LoadImageFromFile is not found in the test pipeline')
|
| 174 |
+
pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
|
| 175 |
+
return Compose(pipeline_cfg)
|
| 176 |
+
|
| 177 |
+
def _get_transform_idx(self, pipeline_cfg: ConfigType,
|
| 178 |
+
name: Union[str, Tuple[str, type]]) -> int:
|
| 179 |
+
"""Returns the index of the transform in a pipeline.
|
| 180 |
+
|
| 181 |
+
If the transform is not found, returns -1.
|
| 182 |
+
"""
|
| 183 |
+
for i, transform in enumerate(pipeline_cfg):
|
| 184 |
+
if transform['type'] in name:
|
| 185 |
+
return i
|
| 186 |
+
return -1
|
| 187 |
+
|
| 188 |
+
def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
|
| 189 |
+
"""Initialize visualizers.
|
| 190 |
+
|
| 191 |
+
Args:
|
| 192 |
+
cfg (ConfigType): Config containing the visualizer information.
|
| 193 |
+
|
| 194 |
+
Returns:
|
| 195 |
+
Visualizer or None: Visualizer initialized with config.
|
| 196 |
+
"""
|
| 197 |
+
visualizer = super()._init_visualizer(cfg)
|
| 198 |
+
visualizer.dataset_meta = self.model.dataset_meta
|
| 199 |
+
return visualizer
|
| 200 |
+
|
| 201 |
+
def _inputs_to_list(self, inputs: InputsType) -> list:
|
| 202 |
+
"""Preprocess the inputs to a list.
|
| 203 |
+
|
| 204 |
+
Preprocess inputs to a list according to its type:
|
| 205 |
+
|
| 206 |
+
- list or tuple: return inputs
|
| 207 |
+
- str:
|
| 208 |
+
- Directory path: return all files in the directory
|
| 209 |
+
- other cases: return a list containing the string. The string
|
| 210 |
+
could be a path to file, a url or other types of string according
|
| 211 |
+
to the task.
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
inputs (InputsType): Inputs for the inferencer.
|
| 215 |
+
|
| 216 |
+
Returns:
|
| 217 |
+
list: List of input for the :meth:`preprocess`.
|
| 218 |
+
"""
|
| 219 |
+
if isinstance(inputs, str):
|
| 220 |
+
backend = get_file_backend(inputs)
|
| 221 |
+
if hasattr(backend, 'isdir') and isdir(inputs):
|
| 222 |
+
# Backends like HttpsBackend do not implement `isdir`, so only
|
| 223 |
+
# those backends that implement `isdir` could accept the inputs
|
| 224 |
+
# as a directory
|
| 225 |
+
filename_list = list_dir_or_file(
|
| 226 |
+
inputs, list_dir=False, suffix=IMG_EXTENSIONS)
|
| 227 |
+
inputs = [
|
| 228 |
+
join_path(inputs, filename) for filename in filename_list
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
if not isinstance(inputs, (list, tuple)):
|
| 232 |
+
inputs = [inputs]
|
| 233 |
+
|
| 234 |
+
return list(inputs)
|
| 235 |
+
|
| 236 |
+
def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
|
| 237 |
+
"""Process the inputs into a model-feedable format.
|
| 238 |
+
|
| 239 |
+
Customize your preprocess by overriding this method. Preprocess should
|
| 240 |
+
return an iterable object, of which each item will be used as the
|
| 241 |
+
input of ``model.test_step``.
|
| 242 |
+
|
| 243 |
+
``BaseInferencer.preprocess`` will return an iterable chunked data,
|
| 244 |
+
which will be used in __call__ like this:
|
| 245 |
+
|
| 246 |
+
.. code-block:: python
|
| 247 |
+
|
| 248 |
+
def __call__(self, inputs, batch_size=1, **kwargs):
|
| 249 |
+
chunked_data = self.preprocess(inputs, batch_size, **kwargs)
|
| 250 |
+
for batch in chunked_data:
|
| 251 |
+
preds = self.forward(batch, **kwargs)
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
inputs (InputsType): Inputs given by user.
|
| 255 |
+
batch_size (int): batch size. Defaults to 1.
|
| 256 |
+
|
| 257 |
+
Yields:
|
| 258 |
+
Any: Data processed by the ``pipeline`` and ``collate_fn``.
|
| 259 |
+
"""
|
| 260 |
+
chunked_data = self._get_chunk_data(inputs, batch_size)
|
| 261 |
+
yield from map(self.collate_fn, chunked_data)
|
| 262 |
+
|
| 263 |
+
def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
|
| 264 |
+
"""Get batch data from inputs.
|
| 265 |
+
|
| 266 |
+
Args:
|
| 267 |
+
inputs (Iterable): An iterable dataset.
|
| 268 |
+
chunk_size (int): Equivalent to batch size.
|
| 269 |
+
|
| 270 |
+
Yields:
|
| 271 |
+
list: batch data.
|
| 272 |
+
"""
|
| 273 |
+
inputs_iter = iter(inputs)
|
| 274 |
+
while True:
|
| 275 |
+
try:
|
| 276 |
+
chunk_data = []
|
| 277 |
+
for _ in range(chunk_size):
|
| 278 |
+
inputs_ = next(inputs_iter)
|
| 279 |
+
if isinstance(inputs_, dict):
|
| 280 |
+
if 'img' in inputs_:
|
| 281 |
+
ori_inputs_ = inputs_['img']
|
| 282 |
+
else:
|
| 283 |
+
ori_inputs_ = inputs_['img_path']
|
| 284 |
+
chunk_data.append(
|
| 285 |
+
(ori_inputs_,
|
| 286 |
+
self.pipeline(copy.deepcopy(inputs_))))
|
| 287 |
+
else:
|
| 288 |
+
chunk_data.append((inputs_, self.pipeline(inputs_)))
|
| 289 |
+
yield chunk_data
|
| 290 |
+
except StopIteration:
|
| 291 |
+
if chunk_data:
|
| 292 |
+
yield chunk_data
|
| 293 |
+
break
|
| 294 |
+
|
| 295 |
+
# TODO: Video and Webcam are currently not supported and
|
| 296 |
+
# may consume too much memory if your input folder has a lot of images.
|
| 297 |
+
# We will be optimized later.
|
| 298 |
+
def __call__(
|
| 299 |
+
self,
|
| 300 |
+
inputs: InputsType,
|
| 301 |
+
batch_size: int = 1,
|
| 302 |
+
return_vis: bool = False,
|
| 303 |
+
show: bool = False,
|
| 304 |
+
wait_time: int = 0,
|
| 305 |
+
no_save_vis: bool = False,
|
| 306 |
+
draw_pred: bool = True,
|
| 307 |
+
pred_score_thr: float = 0.3,
|
| 308 |
+
return_datasamples: bool = False,
|
| 309 |
+
print_result: bool = False,
|
| 310 |
+
no_save_pred: bool = True,
|
| 311 |
+
out_dir: str = '',
|
| 312 |
+
# by open image task
|
| 313 |
+
texts: Optional[Union[str, list]] = None,
|
| 314 |
+
# by open panoptic task
|
| 315 |
+
stuff_texts: Optional[Union[str, list]] = None,
|
| 316 |
+
# by GLIP and Grounding DINO
|
| 317 |
+
custom_entities: bool = False,
|
| 318 |
+
# by Grounding DINO
|
| 319 |
+
tokens_positive: Optional[Union[int, list]] = None,
|
| 320 |
+
**kwargs) -> dict:
|
| 321 |
+
"""Call the inferencer.
|
| 322 |
+
|
| 323 |
+
Args:
|
| 324 |
+
inputs (InputsType): Inputs for the inferencer.
|
| 325 |
+
batch_size (int): Inference batch size. Defaults to 1.
|
| 326 |
+
show (bool): Whether to display the visualization results in a
|
| 327 |
+
popup window. Defaults to False.
|
| 328 |
+
wait_time (float): The interval of show (s). Defaults to 0.
|
| 329 |
+
no_save_vis (bool): Whether to force not to save prediction
|
| 330 |
+
vis results. Defaults to False.
|
| 331 |
+
draw_pred (bool): Whether to draw predicted bounding boxes.
|
| 332 |
+
Defaults to True.
|
| 333 |
+
pred_score_thr (float): Minimum score of bboxes to draw.
|
| 334 |
+
Defaults to 0.3.
|
| 335 |
+
return_datasamples (bool): Whether to return results as
|
| 336 |
+
:obj:`DetDataSample`. Defaults to False.
|
| 337 |
+
print_result (bool): Whether to print the inference result w/o
|
| 338 |
+
visualization to the console. Defaults to False.
|
| 339 |
+
no_save_pred (bool): Whether to force not to save prediction
|
| 340 |
+
results. Defaults to True.
|
| 341 |
+
out_dir: Dir to save the inference results or
|
| 342 |
+
visualization. If left as empty, no file will be saved.
|
| 343 |
+
Defaults to ''.
|
| 344 |
+
texts (str | list[str]): Text prompts. Defaults to None.
|
| 345 |
+
stuff_texts (str | list[str]): Stuff text prompts of open
|
| 346 |
+
panoptic task. Defaults to None.
|
| 347 |
+
custom_entities (bool): Whether to use custom entities.
|
| 348 |
+
Defaults to False. Only used in GLIP and Grounding DINO.
|
| 349 |
+
**kwargs: Other keyword arguments passed to :meth:`preprocess`,
|
| 350 |
+
:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
|
| 351 |
+
Each key in kwargs should be in the corresponding set of
|
| 352 |
+
``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
|
| 353 |
+
and ``postprocess_kwargs``.
|
| 354 |
+
|
| 355 |
+
Returns:
|
| 356 |
+
dict: Inference and visualization results.
|
| 357 |
+
"""
|
| 358 |
+
(
|
| 359 |
+
preprocess_kwargs,
|
| 360 |
+
forward_kwargs,
|
| 361 |
+
visualize_kwargs,
|
| 362 |
+
postprocess_kwargs,
|
| 363 |
+
) = self._dispatch_kwargs(**kwargs)
|
| 364 |
+
|
| 365 |
+
ori_inputs = self._inputs_to_list(inputs)
|
| 366 |
+
|
| 367 |
+
if texts is not None and isinstance(texts, str):
|
| 368 |
+
texts = [texts] * len(ori_inputs)
|
| 369 |
+
if stuff_texts is not None and isinstance(stuff_texts, str):
|
| 370 |
+
stuff_texts = [stuff_texts] * len(ori_inputs)
|
| 371 |
+
|
| 372 |
+
# Currently only supports bs=1
|
| 373 |
+
tokens_positive = [tokens_positive] * len(ori_inputs)
|
| 374 |
+
|
| 375 |
+
if texts is not None:
|
| 376 |
+
assert len(texts) == len(ori_inputs)
|
| 377 |
+
for i in range(len(texts)):
|
| 378 |
+
if isinstance(ori_inputs[i], str):
|
| 379 |
+
ori_inputs[i] = {
|
| 380 |
+
'text': texts[i],
|
| 381 |
+
'img_path': ori_inputs[i],
|
| 382 |
+
'custom_entities': custom_entities,
|
| 383 |
+
'tokens_positive': tokens_positive[i]
|
| 384 |
+
}
|
| 385 |
+
else:
|
| 386 |
+
ori_inputs[i] = {
|
| 387 |
+
'text': texts[i],
|
| 388 |
+
'img': ori_inputs[i],
|
| 389 |
+
'custom_entities': custom_entities,
|
| 390 |
+
'tokens_positive': tokens_positive[i]
|
| 391 |
+
}
|
| 392 |
+
if stuff_texts is not None:
|
| 393 |
+
assert len(stuff_texts) == len(ori_inputs)
|
| 394 |
+
for i in range(len(stuff_texts)):
|
| 395 |
+
ori_inputs[i]['stuff_text'] = stuff_texts[i]
|
| 396 |
+
|
| 397 |
+
inputs = self.preprocess(
|
| 398 |
+
ori_inputs, batch_size=batch_size, **preprocess_kwargs)
|
| 399 |
+
|
| 400 |
+
results_dict = {'predictions': [], 'visualization': []}
|
| 401 |
+
for ori_imgs, data in (track(inputs, description='Inference')
|
| 402 |
+
if self.show_progress else inputs):
|
| 403 |
+
preds = self.forward(data, **forward_kwargs)
|
| 404 |
+
visualization = self.visualize(
|
| 405 |
+
ori_imgs,
|
| 406 |
+
preds,
|
| 407 |
+
return_vis=return_vis,
|
| 408 |
+
show=show,
|
| 409 |
+
wait_time=wait_time,
|
| 410 |
+
draw_pred=draw_pred,
|
| 411 |
+
pred_score_thr=pred_score_thr,
|
| 412 |
+
no_save_vis=no_save_vis,
|
| 413 |
+
img_out_dir=out_dir,
|
| 414 |
+
**visualize_kwargs)
|
| 415 |
+
results = self.postprocess(
|
| 416 |
+
preds,
|
| 417 |
+
visualization,
|
| 418 |
+
return_datasamples=return_datasamples,
|
| 419 |
+
print_result=print_result,
|
| 420 |
+
no_save_pred=no_save_pred,
|
| 421 |
+
pred_out_dir=out_dir,
|
| 422 |
+
**postprocess_kwargs)
|
| 423 |
+
results_dict['predictions'].extend(results['predictions'])
|
| 424 |
+
if results['visualization'] is not None:
|
| 425 |
+
results_dict['visualization'].extend(results['visualization'])
|
| 426 |
+
return results_dict
|
| 427 |
+
|
| 428 |
+
def visualize(self,
|
| 429 |
+
inputs: InputsType,
|
| 430 |
+
preds: PredType,
|
| 431 |
+
return_vis: bool = False,
|
| 432 |
+
show: bool = False,
|
| 433 |
+
wait_time: int = 0,
|
| 434 |
+
draw_pred: bool = True,
|
| 435 |
+
pred_score_thr: float = 0.3,
|
| 436 |
+
no_save_vis: bool = False,
|
| 437 |
+
img_out_dir: str = '',
|
| 438 |
+
**kwargs) -> Union[List[np.ndarray], None]:
|
| 439 |
+
"""Visualize predictions.
|
| 440 |
+
|
| 441 |
+
Args:
|
| 442 |
+
inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
|
| 443 |
+
preds (List[:obj:`DetDataSample`]): Predictions of the model.
|
| 444 |
+
return_vis (bool): Whether to return the visualization result.
|
| 445 |
+
Defaults to False.
|
| 446 |
+
show (bool): Whether to display the image in a popup window.
|
| 447 |
+
Defaults to False.
|
| 448 |
+
wait_time (float): The interval of show (s). Defaults to 0.
|
| 449 |
+
draw_pred (bool): Whether to draw predicted bounding boxes.
|
| 450 |
+
Defaults to True.
|
| 451 |
+
pred_score_thr (float): Minimum score of bboxes to draw.
|
| 452 |
+
Defaults to 0.3.
|
| 453 |
+
no_save_vis (bool): Whether to force not to save prediction
|
| 454 |
+
vis results. Defaults to False.
|
| 455 |
+
img_out_dir (str): Output directory of visualization results.
|
| 456 |
+
If left as empty, no file will be saved. Defaults to ''.
|
| 457 |
+
|
| 458 |
+
Returns:
|
| 459 |
+
List[np.ndarray] or None: Returns visualization results only if
|
| 460 |
+
applicable.
|
| 461 |
+
"""
|
| 462 |
+
if no_save_vis is True:
|
| 463 |
+
img_out_dir = ''
|
| 464 |
+
|
| 465 |
+
if not show and img_out_dir == '' and not return_vis:
|
| 466 |
+
return None
|
| 467 |
+
|
| 468 |
+
if self.visualizer is None:
|
| 469 |
+
raise ValueError('Visualization needs the "visualizer" term'
|
| 470 |
+
'defined in the config, but got None.')
|
| 471 |
+
|
| 472 |
+
results = []
|
| 473 |
+
|
| 474 |
+
for single_input, pred in zip(inputs, preds):
|
| 475 |
+
if isinstance(single_input, str):
|
| 476 |
+
img_bytes = mmengine.fileio.get(single_input)
|
| 477 |
+
img = mmcv.imfrombytes(img_bytes)
|
| 478 |
+
img = img[:, :, ::-1]
|
| 479 |
+
img_name = osp.basename(single_input)
|
| 480 |
+
elif isinstance(single_input, np.ndarray):
|
| 481 |
+
img = single_input.copy()
|
| 482 |
+
img_num = str(self.num_visualized_imgs).zfill(8)
|
| 483 |
+
img_name = f'{img_num}.jpg'
|
| 484 |
+
else:
|
| 485 |
+
raise ValueError('Unsupported input type: '
|
| 486 |
+
f'{type(single_input)}')
|
| 487 |
+
|
| 488 |
+
out_file = osp.join(img_out_dir, 'vis',
|
| 489 |
+
img_name) if img_out_dir != '' else None
|
| 490 |
+
|
| 491 |
+
self.visualizer.add_datasample(
|
| 492 |
+
img_name,
|
| 493 |
+
img,
|
| 494 |
+
pred,
|
| 495 |
+
show=show,
|
| 496 |
+
wait_time=wait_time,
|
| 497 |
+
draw_gt=False,
|
| 498 |
+
draw_pred=draw_pred,
|
| 499 |
+
pred_score_thr=pred_score_thr,
|
| 500 |
+
out_file=out_file,
|
| 501 |
+
)
|
| 502 |
+
results.append(self.visualizer.get_image())
|
| 503 |
+
self.num_visualized_imgs += 1
|
| 504 |
+
|
| 505 |
+
return results
|
| 506 |
+
|
| 507 |
+
def postprocess(
|
| 508 |
+
self,
|
| 509 |
+
preds: PredType,
|
| 510 |
+
visualization: Optional[List[np.ndarray]] = None,
|
| 511 |
+
return_datasamples: bool = False,
|
| 512 |
+
print_result: bool = False,
|
| 513 |
+
no_save_pred: bool = False,
|
| 514 |
+
pred_out_dir: str = '',
|
| 515 |
+
**kwargs,
|
| 516 |
+
) -> Dict:
|
| 517 |
+
"""Process the predictions and visualization results from ``forward``
|
| 518 |
+
and ``visualize``.
|
| 519 |
+
|
| 520 |
+
This method should be responsible for the following tasks:
|
| 521 |
+
|
| 522 |
+
1. Convert datasamples into a json-serializable dict if needed.
|
| 523 |
+
2. Pack the predictions and visualization results and return them.
|
| 524 |
+
3. Dump or log the predictions.
|
| 525 |
+
|
| 526 |
+
Args:
|
| 527 |
+
preds (List[:obj:`DetDataSample`]): Predictions of the model.
|
| 528 |
+
visualization (Optional[np.ndarray]): Visualized predictions.
|
| 529 |
+
return_datasamples (bool): Whether to use Datasample to store
|
| 530 |
+
inference results. If False, dict will be used.
|
| 531 |
+
print_result (bool): Whether to print the inference result w/o
|
| 532 |
+
visualization to the console. Defaults to False.
|
| 533 |
+
no_save_pred (bool): Whether to force not to save prediction
|
| 534 |
+
results. Defaults to False.
|
| 535 |
+
pred_out_dir: Dir to save the inference results w/o
|
| 536 |
+
visualization. If left as empty, no file will be saved.
|
| 537 |
+
Defaults to ''.
|
| 538 |
+
|
| 539 |
+
Returns:
|
| 540 |
+
dict: Inference and visualization results with key ``predictions``
|
| 541 |
+
and ``visualization``.
|
| 542 |
+
|
| 543 |
+
- ``visualization`` (Any): Returned by :meth:`visualize`.
|
| 544 |
+
- ``predictions`` (dict or DataSample): Returned by
|
| 545 |
+
:meth:`forward` and processed in :meth:`postprocess`.
|
| 546 |
+
If ``return_datasamples=False``, it usually should be a
|
| 547 |
+
json-serializable dict containing only basic data elements such
|
| 548 |
+
as strings and numbers.
|
| 549 |
+
"""
|
| 550 |
+
if no_save_pred is True:
|
| 551 |
+
pred_out_dir = ''
|
| 552 |
+
|
| 553 |
+
result_dict = {}
|
| 554 |
+
results = preds
|
| 555 |
+
if not return_datasamples:
|
| 556 |
+
results = []
|
| 557 |
+
for pred in preds:
|
| 558 |
+
result = self.pred2dict(pred, pred_out_dir)
|
| 559 |
+
results.append(result)
|
| 560 |
+
elif pred_out_dir != '':
|
| 561 |
+
warnings.warn('Currently does not support saving datasample '
|
| 562 |
+
'when return_datasamples is set to True. '
|
| 563 |
+
'Prediction results are not saved!')
|
| 564 |
+
# Add img to the results after printing and dumping
|
| 565 |
+
result_dict['predictions'] = results
|
| 566 |
+
if print_result:
|
| 567 |
+
print(result_dict)
|
| 568 |
+
result_dict['visualization'] = visualization
|
| 569 |
+
return result_dict
|
| 570 |
+
|
| 571 |
+
# TODO: The data format and fields saved in json need further discussion.
|
| 572 |
+
# Maybe should include model name, timestamp, filename, image info etc.
|
| 573 |
+
def pred2dict(self,
|
| 574 |
+
data_sample: DetDataSample,
|
| 575 |
+
pred_out_dir: str = '') -> Dict:
|
| 576 |
+
"""Extract elements necessary to represent a prediction into a
|
| 577 |
+
dictionary.
|
| 578 |
+
|
| 579 |
+
It's better to contain only basic data elements such as strings and
|
| 580 |
+
numbers in order to guarantee it's json-serializable.
|
| 581 |
+
|
| 582 |
+
Args:
|
| 583 |
+
data_sample (:obj:`DetDataSample`): Predictions of the model.
|
| 584 |
+
pred_out_dir: Dir to save the inference results w/o
|
| 585 |
+
visualization. If left as empty, no file will be saved.
|
| 586 |
+
Defaults to ''.
|
| 587 |
+
|
| 588 |
+
Returns:
|
| 589 |
+
dict: Prediction results.
|
| 590 |
+
"""
|
| 591 |
+
is_save_pred = True
|
| 592 |
+
if pred_out_dir == '':
|
| 593 |
+
is_save_pred = False
|
| 594 |
+
|
| 595 |
+
if is_save_pred and 'img_path' in data_sample:
|
| 596 |
+
img_path = osp.basename(data_sample.img_path)
|
| 597 |
+
img_path = osp.splitext(img_path)[0]
|
| 598 |
+
out_img_path = osp.join(pred_out_dir, 'preds',
|
| 599 |
+
img_path + '_panoptic_seg.png')
|
| 600 |
+
out_json_path = osp.join(pred_out_dir, 'preds', img_path + '.json')
|
| 601 |
+
elif is_save_pred:
|
| 602 |
+
out_img_path = osp.join(
|
| 603 |
+
pred_out_dir, 'preds',
|
| 604 |
+
f'{self.num_predicted_imgs}_panoptic_seg.png')
|
| 605 |
+
out_json_path = osp.join(pred_out_dir, 'preds',
|
| 606 |
+
f'{self.num_predicted_imgs}.json')
|
| 607 |
+
self.num_predicted_imgs += 1
|
| 608 |
+
|
| 609 |
+
result = {}
|
| 610 |
+
if 'pred_instances' in data_sample:
|
| 611 |
+
masks = data_sample.pred_instances.get('masks')
|
| 612 |
+
pred_instances = data_sample.pred_instances.numpy()
|
| 613 |
+
result = {
|
| 614 |
+
'labels': pred_instances.labels.tolist(),
|
| 615 |
+
'scores': pred_instances.scores.tolist()
|
| 616 |
+
}
|
| 617 |
+
if 'bboxes' in pred_instances:
|
| 618 |
+
result['bboxes'] = pred_instances.bboxes.tolist()
|
| 619 |
+
if masks is not None:
|
| 620 |
+
if 'bboxes' not in pred_instances or pred_instances.bboxes.sum(
|
| 621 |
+
) == 0:
|
| 622 |
+
# Fake bbox, such as the SOLO.
|
| 623 |
+
bboxes = mask2bbox(masks.cpu()).numpy().tolist()
|
| 624 |
+
result['bboxes'] = bboxes
|
| 625 |
+
encode_masks = encode_mask_results(pred_instances.masks)
|
| 626 |
+
for encode_mask in encode_masks:
|
| 627 |
+
if isinstance(encode_mask['counts'], bytes):
|
| 628 |
+
encode_mask['counts'] = encode_mask['counts'].decode()
|
| 629 |
+
result['masks'] = encode_masks
|
| 630 |
+
|
| 631 |
+
if 'pred_panoptic_seg' in data_sample:
|
| 632 |
+
if VOID is None:
|
| 633 |
+
raise RuntimeError(
|
| 634 |
+
'panopticapi is not installed, please install it by: '
|
| 635 |
+
'pip install git+https://github.com/cocodataset/'
|
| 636 |
+
'panopticapi.git.')
|
| 637 |
+
|
| 638 |
+
pan = data_sample.pred_panoptic_seg.sem_seg.cpu().numpy()[0]
|
| 639 |
+
pan[pan % INSTANCE_OFFSET == len(
|
| 640 |
+
self.model.dataset_meta['classes'])] = VOID
|
| 641 |
+
pan = id2rgb(pan).astype(np.uint8)
|
| 642 |
+
|
| 643 |
+
if is_save_pred:
|
| 644 |
+
mmcv.imwrite(pan[:, :, ::-1], out_img_path)
|
| 645 |
+
result['panoptic_seg_path'] = out_img_path
|
| 646 |
+
else:
|
| 647 |
+
result['panoptic_seg'] = pan
|
| 648 |
+
|
| 649 |
+
if is_save_pred:
|
| 650 |
+
mmengine.dump(result, out_json_path)
|
| 651 |
+
|
| 652 |
+
return result
|
head_extractor/build/lib/mmdet/apis/inference.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import copy
|
| 3 |
+
import warnings
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Optional, Sequence, Union
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
from mmcv.ops import RoIPool
|
| 11 |
+
from mmcv.transforms import Compose
|
| 12 |
+
from mmengine.config import Config
|
| 13 |
+
from mmengine.dataset import default_collate
|
| 14 |
+
from mmengine.model.utils import revert_sync_batchnorm
|
| 15 |
+
from mmengine.registry import init_default_scope
|
| 16 |
+
from mmengine.runner import load_checkpoint
|
| 17 |
+
|
| 18 |
+
from mmdet.registry import DATASETS
|
| 19 |
+
from mmdet.utils import ConfigType
|
| 20 |
+
from ..evaluation import get_classes
|
| 21 |
+
from ..registry import MODELS
|
| 22 |
+
from ..structures import DetDataSample, SampleList
|
| 23 |
+
from ..utils import get_test_pipeline_cfg
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def init_detector(
|
| 27 |
+
config: Union[str, Path, Config],
|
| 28 |
+
checkpoint: Optional[str] = None,
|
| 29 |
+
palette: str = 'none',
|
| 30 |
+
device: str = 'cuda:0',
|
| 31 |
+
cfg_options: Optional[dict] = None,
|
| 32 |
+
) -> nn.Module:
|
| 33 |
+
"""Initialize a detector from config file.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path,
|
| 37 |
+
:obj:`Path`, or the config object.
|
| 38 |
+
checkpoint (str, optional): Checkpoint path. If left as None, the model
|
| 39 |
+
will not load any weights.
|
| 40 |
+
palette (str): Color palette used for visualization. If palette
|
| 41 |
+
is stored in checkpoint, use checkpoint's palette first, otherwise
|
| 42 |
+
use externally passed palette. Currently, supports 'coco', 'voc',
|
| 43 |
+
'citys' and 'random'. Defaults to none.
|
| 44 |
+
device (str): The device where the anchors will be put on.
|
| 45 |
+
Defaults to cuda:0.
|
| 46 |
+
cfg_options (dict, optional): Options to override some settings in
|
| 47 |
+
the used config.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
nn.Module: The constructed detector.
|
| 51 |
+
"""
|
| 52 |
+
if isinstance(config, (str, Path)):
|
| 53 |
+
config = Config.fromfile(config)
|
| 54 |
+
elif not isinstance(config, Config):
|
| 55 |
+
raise TypeError('config must be a filename or Config object, '
|
| 56 |
+
f'but got {type(config)}')
|
| 57 |
+
if cfg_options is not None:
|
| 58 |
+
config.merge_from_dict(cfg_options)
|
| 59 |
+
elif 'init_cfg' in config.model.backbone:
|
| 60 |
+
config.model.backbone.init_cfg = None
|
| 61 |
+
|
| 62 |
+
scope = config.get('default_scope', 'mmdet')
|
| 63 |
+
if scope is not None:
|
| 64 |
+
init_default_scope(config.get('default_scope', 'mmdet'))
|
| 65 |
+
|
| 66 |
+
model = MODELS.build(config.model)
|
| 67 |
+
model = revert_sync_batchnorm(model)
|
| 68 |
+
if checkpoint is None:
|
| 69 |
+
warnings.simplefilter('once')
|
| 70 |
+
warnings.warn('checkpoint is None, use COCO classes by default.')
|
| 71 |
+
model.dataset_meta = {'classes': get_classes('coco')}
|
| 72 |
+
else:
|
| 73 |
+
checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
|
| 74 |
+
# Weights converted from elsewhere may not have meta fields.
|
| 75 |
+
checkpoint_meta = checkpoint.get('meta', {})
|
| 76 |
+
|
| 77 |
+
# save the dataset_meta in the model for convenience
|
| 78 |
+
if 'dataset_meta' in checkpoint_meta:
|
| 79 |
+
# mmdet 3.x, all keys should be lowercase
|
| 80 |
+
model.dataset_meta = {
|
| 81 |
+
k.lower(): v
|
| 82 |
+
for k, v in checkpoint_meta['dataset_meta'].items()
|
| 83 |
+
}
|
| 84 |
+
elif 'CLASSES' in checkpoint_meta:
|
| 85 |
+
# < mmdet 3.x
|
| 86 |
+
classes = checkpoint_meta['CLASSES']
|
| 87 |
+
model.dataset_meta = {'classes': classes}
|
| 88 |
+
else:
|
| 89 |
+
warnings.simplefilter('once')
|
| 90 |
+
warnings.warn(
|
| 91 |
+
'dataset_meta or class names are not saved in the '
|
| 92 |
+
'checkpoint\'s meta data, use COCO classes by default.')
|
| 93 |
+
model.dataset_meta = {'classes': get_classes('coco')}
|
| 94 |
+
|
| 95 |
+
# Priority: args.palette -> config -> checkpoint
|
| 96 |
+
if palette != 'none':
|
| 97 |
+
model.dataset_meta['palette'] = palette
|
| 98 |
+
else:
|
| 99 |
+
test_dataset_cfg = copy.deepcopy(config.test_dataloader.dataset)
|
| 100 |
+
# lazy init. We only need the metainfo.
|
| 101 |
+
test_dataset_cfg['lazy_init'] = True
|
| 102 |
+
metainfo = DATASETS.build(test_dataset_cfg).metainfo
|
| 103 |
+
cfg_palette = metainfo.get('palette', None)
|
| 104 |
+
if cfg_palette is not None:
|
| 105 |
+
model.dataset_meta['palette'] = cfg_palette
|
| 106 |
+
else:
|
| 107 |
+
if 'palette' not in model.dataset_meta:
|
| 108 |
+
warnings.warn(
|
| 109 |
+
'palette does not exist, random is used by default. '
|
| 110 |
+
'You can also set the palette to customize.')
|
| 111 |
+
model.dataset_meta['palette'] = 'random'
|
| 112 |
+
|
| 113 |
+
model.cfg = config # save the config in the model for convenience
|
| 114 |
+
model.to(device)
|
| 115 |
+
model.eval()
|
| 116 |
+
return model
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def inference_detector(
|
| 123 |
+
model: nn.Module,
|
| 124 |
+
imgs: ImagesType,
|
| 125 |
+
test_pipeline: Optional[Compose] = None,
|
| 126 |
+
text_prompt: Optional[str] = None,
|
| 127 |
+
custom_entities: bool = False,
|
| 128 |
+
) -> Union[DetDataSample, SampleList]:
|
| 129 |
+
"""Inference image(s) with the detector.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
model (nn.Module): The loaded detector.
|
| 133 |
+
imgs (str, ndarray, Sequence[str/ndarray]):
|
| 134 |
+
Either image files or loaded images.
|
| 135 |
+
test_pipeline (:obj:`Compose`): Test pipeline.
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
:obj:`DetDataSample` or list[:obj:`DetDataSample`]:
|
| 139 |
+
If imgs is a list or tuple, the same length list type results
|
| 140 |
+
will be returned, otherwise return the detection results directly.
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
if isinstance(imgs, (list, tuple)):
|
| 144 |
+
is_batch = True
|
| 145 |
+
else:
|
| 146 |
+
imgs = [imgs]
|
| 147 |
+
is_batch = False
|
| 148 |
+
|
| 149 |
+
cfg = model.cfg
|
| 150 |
+
|
| 151 |
+
if test_pipeline is None:
|
| 152 |
+
cfg = cfg.copy()
|
| 153 |
+
test_pipeline = get_test_pipeline_cfg(cfg)
|
| 154 |
+
if isinstance(imgs[0], np.ndarray):
|
| 155 |
+
# Calling this method across libraries will result
|
| 156 |
+
# in module unregistered error if not prefixed with mmdet.
|
| 157 |
+
test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
|
| 158 |
+
|
| 159 |
+
test_pipeline = Compose(test_pipeline)
|
| 160 |
+
|
| 161 |
+
if model.data_preprocessor.device.type == 'cpu':
|
| 162 |
+
for m in model.modules():
|
| 163 |
+
assert not isinstance(
|
| 164 |
+
m, RoIPool
|
| 165 |
+
), 'CPU inference with RoIPool is not supported currently.'
|
| 166 |
+
|
| 167 |
+
result_list = []
|
| 168 |
+
for i, img in enumerate(imgs):
|
| 169 |
+
# prepare data
|
| 170 |
+
if isinstance(img, np.ndarray):
|
| 171 |
+
# TODO: remove img_id.
|
| 172 |
+
data_ = dict(img=img, img_id=0)
|
| 173 |
+
else:
|
| 174 |
+
# TODO: remove img_id.
|
| 175 |
+
data_ = dict(img_path=img, img_id=0)
|
| 176 |
+
|
| 177 |
+
if text_prompt:
|
| 178 |
+
data_['text'] = text_prompt
|
| 179 |
+
data_['custom_entities'] = custom_entities
|
| 180 |
+
|
| 181 |
+
# build the data pipeline
|
| 182 |
+
data_ = test_pipeline(data_)
|
| 183 |
+
|
| 184 |
+
data_['inputs'] = [data_['inputs']]
|
| 185 |
+
data_['data_samples'] = [data_['data_samples']]
|
| 186 |
+
|
| 187 |
+
# forward the model
|
| 188 |
+
with torch.no_grad():
|
| 189 |
+
results = model.test_step(data_)[0]
|
| 190 |
+
|
| 191 |
+
result_list.append(results)
|
| 192 |
+
|
| 193 |
+
if not is_batch:
|
| 194 |
+
return result_list[0]
|
| 195 |
+
else:
|
| 196 |
+
return result_list
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# TODO: Awaiting refactoring
|
| 200 |
+
async def async_inference_detector(model, imgs):
|
| 201 |
+
"""Async inference image(s) with the detector.
|
| 202 |
+
|
| 203 |
+
Args:
|
| 204 |
+
model (nn.Module): The loaded detector.
|
| 205 |
+
img (str | ndarray): Either image files or loaded images.
|
| 206 |
+
|
| 207 |
+
Returns:
|
| 208 |
+
Awaitable detection results.
|
| 209 |
+
"""
|
| 210 |
+
if not isinstance(imgs, (list, tuple)):
|
| 211 |
+
imgs = [imgs]
|
| 212 |
+
|
| 213 |
+
cfg = model.cfg
|
| 214 |
+
|
| 215 |
+
if isinstance(imgs[0], np.ndarray):
|
| 216 |
+
cfg = cfg.copy()
|
| 217 |
+
# set loading pipeline type
|
| 218 |
+
cfg.data.test.pipeline[0].type = 'LoadImageFromNDArray'
|
| 219 |
+
|
| 220 |
+
# cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
|
| 221 |
+
test_pipeline = Compose(cfg.data.test.pipeline)
|
| 222 |
+
|
| 223 |
+
datas = []
|
| 224 |
+
for img in imgs:
|
| 225 |
+
# prepare data
|
| 226 |
+
if isinstance(img, np.ndarray):
|
| 227 |
+
# directly add img
|
| 228 |
+
data = dict(img=img)
|
| 229 |
+
else:
|
| 230 |
+
# add information into dict
|
| 231 |
+
data = dict(img_info=dict(filename=img), img_prefix=None)
|
| 232 |
+
# build the data pipeline
|
| 233 |
+
data = test_pipeline(data)
|
| 234 |
+
datas.append(data)
|
| 235 |
+
|
| 236 |
+
for m in model.modules():
|
| 237 |
+
assert not isinstance(
|
| 238 |
+
m,
|
| 239 |
+
RoIPool), 'CPU inference with RoIPool is not supported currently.'
|
| 240 |
+
|
| 241 |
+
# We don't restore `torch.is_grad_enabled()` value during concurrent
|
| 242 |
+
# inference since execution can overlap
|
| 243 |
+
torch.set_grad_enabled(False)
|
| 244 |
+
results = await model.aforward_test(data, rescale=True)
|
| 245 |
+
return results
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def build_test_pipeline(cfg: ConfigType) -> ConfigType:
|
| 249 |
+
"""Build test_pipeline for mot/vis demo. In mot/vis infer, original
|
| 250 |
+
test_pipeline should remove the "LoadImageFromFile" and
|
| 251 |
+
"LoadTrackAnnotations".
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
cfg (ConfigDict): The loaded config.
|
| 255 |
+
Returns:
|
| 256 |
+
ConfigType: new test_pipeline
|
| 257 |
+
"""
|
| 258 |
+
# remove the "LoadImageFromFile" and "LoadTrackAnnotations" in pipeline
|
| 259 |
+
transform_broadcaster = cfg.test_dataloader.dataset.pipeline[0].copy()
|
| 260 |
+
for transform in transform_broadcaster['transforms']:
|
| 261 |
+
if transform['type'] == 'Resize':
|
| 262 |
+
transform_broadcaster['transforms'] = transform
|
| 263 |
+
pack_track_inputs = cfg.test_dataloader.dataset.pipeline[-1].copy()
|
| 264 |
+
test_pipeline = Compose([transform_broadcaster, pack_track_inputs])
|
| 265 |
+
|
| 266 |
+
return test_pipeline
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
|
| 270 |
+
video_len: int) -> SampleList:
|
| 271 |
+
"""Inference image(s) with the mot model.
|
| 272 |
+
|
| 273 |
+
Args:
|
| 274 |
+
model (nn.Module): The loaded mot model.
|
| 275 |
+
img (np.ndarray): Loaded image.
|
| 276 |
+
frame_id (int): frame id.
|
| 277 |
+
video_len (int): demo video length
|
| 278 |
+
Returns:
|
| 279 |
+
SampleList: The tracking data samples.
|
| 280 |
+
"""
|
| 281 |
+
cfg = model.cfg
|
| 282 |
+
data = dict(
|
| 283 |
+
img=[img.astype(np.float32)],
|
| 284 |
+
frame_id=[frame_id],
|
| 285 |
+
ori_shape=[img.shape[:2]],
|
| 286 |
+
img_id=[frame_id + 1],
|
| 287 |
+
ori_video_length=[video_len])
|
| 288 |
+
|
| 289 |
+
test_pipeline = build_test_pipeline(cfg)
|
| 290 |
+
data = test_pipeline(data)
|
| 291 |
+
|
| 292 |
+
if not next(model.parameters()).is_cuda:
|
| 293 |
+
for m in model.modules():
|
| 294 |
+
assert not isinstance(
|
| 295 |
+
m, RoIPool
|
| 296 |
+
), 'CPU inference with RoIPool is not supported currently.'
|
| 297 |
+
|
| 298 |
+
# forward the model
|
| 299 |
+
with torch.no_grad():
|
| 300 |
+
data = default_collate([data])
|
| 301 |
+
result = model.test_step(data)[0]
|
| 302 |
+
return result
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def init_track_model(config: Union[str, Config],
|
| 306 |
+
checkpoint: Optional[str] = None,
|
| 307 |
+
detector: Optional[str] = None,
|
| 308 |
+
reid: Optional[str] = None,
|
| 309 |
+
device: str = 'cuda:0',
|
| 310 |
+
cfg_options: Optional[dict] = None) -> nn.Module:
|
| 311 |
+
"""Initialize a model from config file.
|
| 312 |
+
|
| 313 |
+
Args:
|
| 314 |
+
config (str or :obj:`mmengine.Config`): Config file path or the config
|
| 315 |
+
object.
|
| 316 |
+
checkpoint (Optional[str], optional): Checkpoint path. Defaults to
|
| 317 |
+
None.
|
| 318 |
+
detector (Optional[str], optional): Detector Checkpoint path, use in
|
| 319 |
+
some tracking algorithms like sort. Defaults to None.
|
| 320 |
+
reid (Optional[str], optional): Reid checkpoint path. use in
|
| 321 |
+
some tracking algorithms like sort. Defaults to None.
|
| 322 |
+
device (str, optional): The device that the model inferences on.
|
| 323 |
+
Defaults to `cuda:0`.
|
| 324 |
+
cfg_options (Optional[dict], optional): Options to override some
|
| 325 |
+
settings in the used config. Defaults to None.
|
| 326 |
+
|
| 327 |
+
Returns:
|
| 328 |
+
nn.Module: The constructed model.
|
| 329 |
+
"""
|
| 330 |
+
if isinstance(config, str):
|
| 331 |
+
config = Config.fromfile(config)
|
| 332 |
+
elif not isinstance(config, Config):
|
| 333 |
+
raise TypeError('config must be a filename or Config object, '
|
| 334 |
+
f'but got {type(config)}')
|
| 335 |
+
if cfg_options is not None:
|
| 336 |
+
config.merge_from_dict(cfg_options)
|
| 337 |
+
|
| 338 |
+
model = MODELS.build(config.model)
|
| 339 |
+
|
| 340 |
+
if checkpoint is not None:
|
| 341 |
+
checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
|
| 342 |
+
# Weights converted from elsewhere may not have meta fields.
|
| 343 |
+
checkpoint_meta = checkpoint.get('meta', {})
|
| 344 |
+
# save the dataset_meta in the model for convenience
|
| 345 |
+
if 'dataset_meta' in checkpoint_meta:
|
| 346 |
+
if 'CLASSES' in checkpoint_meta['dataset_meta']:
|
| 347 |
+
value = checkpoint_meta['dataset_meta'].pop('CLASSES')
|
| 348 |
+
checkpoint_meta['dataset_meta']['classes'] = value
|
| 349 |
+
model.dataset_meta = checkpoint_meta['dataset_meta']
|
| 350 |
+
|
| 351 |
+
if detector is not None:
|
| 352 |
+
assert not (checkpoint and detector), \
|
| 353 |
+
'Error: checkpoint and detector checkpoint cannot both exist'
|
| 354 |
+
load_checkpoint(model.detector, detector, map_location='cpu')
|
| 355 |
+
|
| 356 |
+
if reid is not None:
|
| 357 |
+
assert not (checkpoint and reid), \
|
| 358 |
+
'Error: checkpoint and reid checkpoint cannot both exist'
|
| 359 |
+
load_checkpoint(model.reid, reid, map_location='cpu')
|
| 360 |
+
|
| 361 |
+
# Some methods don't load checkpoints or checkpoints don't contain
|
| 362 |
+
# 'dataset_meta'
|
| 363 |
+
# VIS need dataset_meta, MOT don't need dataset_meta
|
| 364 |
+
if not hasattr(model, 'dataset_meta'):
|
| 365 |
+
warnings.warn('dataset_meta or class names are missed, '
|
| 366 |
+
'use None by default.')
|
| 367 |
+
model.dataset_meta = {'classes': None}
|
| 368 |
+
|
| 369 |
+
model.cfg = config # save the config in the model for convenience
|
| 370 |
+
model.to(device)
|
| 371 |
+
model.eval()
|
| 372 |
+
return model
|
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms import LoadImageFromFile
|
| 3 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 4 |
+
|
| 5 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 6 |
+
from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
|
| 7 |
+
RandomFlip, Resize)
|
| 8 |
+
from mmdet.evaluation import CocoMetric
|
| 9 |
+
|
| 10 |
+
# dataset settings
|
| 11 |
+
dataset_type = CocoDataset
|
| 12 |
+
data_root = 'data/coco/'
|
| 13 |
+
|
| 14 |
+
# Example to use different file client
|
| 15 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 16 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 17 |
+
|
| 18 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 19 |
+
|
| 20 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 21 |
+
# backend_args = dict(
|
| 22 |
+
# backend='petrel',
|
| 23 |
+
# path_mapping=dict({
|
| 24 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 25 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 26 |
+
# }))
|
| 27 |
+
backend_args = None
|
| 28 |
+
|
| 29 |
+
train_pipeline = [
|
| 30 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 31 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 32 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 33 |
+
dict(type=RandomFlip, prob=0.5),
|
| 34 |
+
dict(type=PackDetInputs)
|
| 35 |
+
]
|
| 36 |
+
test_pipeline = [
|
| 37 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 38 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 39 |
+
# If you don't have a gt annotation, delete the pipeline
|
| 40 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 41 |
+
dict(
|
| 42 |
+
type=PackDetInputs,
|
| 43 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 44 |
+
'scale_factor'))
|
| 45 |
+
]
|
| 46 |
+
train_dataloader = dict(
|
| 47 |
+
batch_size=2,
|
| 48 |
+
num_workers=2,
|
| 49 |
+
persistent_workers=True,
|
| 50 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 51 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 52 |
+
dataset=dict(
|
| 53 |
+
type=dataset_type,
|
| 54 |
+
data_root=data_root,
|
| 55 |
+
ann_file='annotations/instances_train2017.json',
|
| 56 |
+
data_prefix=dict(img='train2017/'),
|
| 57 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 58 |
+
pipeline=train_pipeline,
|
| 59 |
+
backend_args=backend_args))
|
| 60 |
+
val_dataloader = dict(
|
| 61 |
+
batch_size=1,
|
| 62 |
+
num_workers=2,
|
| 63 |
+
persistent_workers=True,
|
| 64 |
+
drop_last=False,
|
| 65 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 66 |
+
dataset=dict(
|
| 67 |
+
type=dataset_type,
|
| 68 |
+
data_root=data_root,
|
| 69 |
+
ann_file='annotations/instances_val2017.json',
|
| 70 |
+
data_prefix=dict(img='val2017/'),
|
| 71 |
+
test_mode=True,
|
| 72 |
+
pipeline=test_pipeline,
|
| 73 |
+
backend_args=backend_args))
|
| 74 |
+
test_dataloader = val_dataloader
|
| 75 |
+
|
| 76 |
+
val_evaluator = dict(
|
| 77 |
+
type=CocoMetric,
|
| 78 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 79 |
+
metric='bbox',
|
| 80 |
+
format_only=False,
|
| 81 |
+
backend_args=backend_args)
|
| 82 |
+
test_evaluator = val_evaluator
|
| 83 |
+
|
| 84 |
+
# inference on test dataset and
|
| 85 |
+
# format the output results for submission.
|
| 86 |
+
# test_dataloader = dict(
|
| 87 |
+
# batch_size=1,
|
| 88 |
+
# num_workers=2,
|
| 89 |
+
# persistent_workers=True,
|
| 90 |
+
# drop_last=False,
|
| 91 |
+
# sampler=dict(type=DefaultSampler, shuffle=False),
|
| 92 |
+
# dataset=dict(
|
| 93 |
+
# type=dataset_type,
|
| 94 |
+
# data_root=data_root,
|
| 95 |
+
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
|
| 96 |
+
# data_prefix=dict(img='test2017/'),
|
| 97 |
+
# test_mode=True,
|
| 98 |
+
# pipeline=test_pipeline))
|
| 99 |
+
# test_evaluator = dict(
|
| 100 |
+
# type=CocoMetric,
|
| 101 |
+
# metric='bbox',
|
| 102 |
+
# format_only=True,
|
| 103 |
+
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
|
| 104 |
+
# outfile_prefix='./work_dirs/coco_detection/test')
|
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms.loading import LoadImageFromFile
|
| 3 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 4 |
+
|
| 5 |
+
from mmdet.datasets.coco import CocoDataset
|
| 6 |
+
from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
|
| 7 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 8 |
+
from mmdet.datasets.transforms.loading import LoadAnnotations
|
| 9 |
+
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
|
| 10 |
+
from mmdet.evaluation.metrics.coco_metric import CocoMetric
|
| 11 |
+
|
| 12 |
+
# dataset settings
|
| 13 |
+
dataset_type = 'CocoDataset'
|
| 14 |
+
data_root = 'data/coco/'
|
| 15 |
+
|
| 16 |
+
# Example to use different file client
|
| 17 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 18 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 19 |
+
|
| 20 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 21 |
+
|
| 22 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 23 |
+
# backend_args = dict(
|
| 24 |
+
# backend='petrel',
|
| 25 |
+
# path_mapping=dict({
|
| 26 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 27 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 28 |
+
# }))
|
| 29 |
+
backend_args = None
|
| 30 |
+
|
| 31 |
+
train_pipeline = [
|
| 32 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 33 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 34 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 35 |
+
dict(type=RandomFlip, prob=0.5),
|
| 36 |
+
dict(type=PackDetInputs)
|
| 37 |
+
]
|
| 38 |
+
test_pipeline = [
|
| 39 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 40 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 41 |
+
# If you don't have a gt annotation, delete the pipeline
|
| 42 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 43 |
+
dict(
|
| 44 |
+
type=PackDetInputs,
|
| 45 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 46 |
+
'scale_factor'))
|
| 47 |
+
]
|
| 48 |
+
train_dataloader = dict(
|
| 49 |
+
batch_size=2,
|
| 50 |
+
num_workers=2,
|
| 51 |
+
persistent_workers=True,
|
| 52 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 53 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 54 |
+
dataset=dict(
|
| 55 |
+
type=CocoDataset,
|
| 56 |
+
data_root=data_root,
|
| 57 |
+
ann_file='annotations/instances_train2017.json',
|
| 58 |
+
data_prefix=dict(img='train2017/'),
|
| 59 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 60 |
+
pipeline=train_pipeline,
|
| 61 |
+
backend_args=backend_args))
|
| 62 |
+
val_dataloader = dict(
|
| 63 |
+
batch_size=1,
|
| 64 |
+
num_workers=2,
|
| 65 |
+
persistent_workers=True,
|
| 66 |
+
drop_last=False,
|
| 67 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 68 |
+
dataset=dict(
|
| 69 |
+
type=CocoDataset,
|
| 70 |
+
data_root=data_root,
|
| 71 |
+
ann_file='annotations/instances_val2017.json',
|
| 72 |
+
data_prefix=dict(img='val2017/'),
|
| 73 |
+
test_mode=True,
|
| 74 |
+
pipeline=test_pipeline,
|
| 75 |
+
backend_args=backend_args))
|
| 76 |
+
test_dataloader = val_dataloader
|
| 77 |
+
|
| 78 |
+
val_evaluator = dict(
|
| 79 |
+
type=CocoMetric,
|
| 80 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 81 |
+
metric=['bbox', 'segm'],
|
| 82 |
+
format_only=False,
|
| 83 |
+
backend_args=backend_args)
|
| 84 |
+
test_evaluator = val_evaluator
|
| 85 |
+
|
| 86 |
+
# inference on test dataset and
|
| 87 |
+
# format the output results for submission.
|
| 88 |
+
# test_dataloader = dict(
|
| 89 |
+
# batch_size=1,
|
| 90 |
+
# num_workers=2,
|
| 91 |
+
# persistent_workers=True,
|
| 92 |
+
# drop_last=False,
|
| 93 |
+
# sampler=dict(type=DefaultSampler, shuffle=False),
|
| 94 |
+
# dataset=dict(
|
| 95 |
+
# type=CocoDataset,
|
| 96 |
+
# data_root=data_root,
|
| 97 |
+
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
|
| 98 |
+
# data_prefix=dict(img='test2017/'),
|
| 99 |
+
# test_mode=True,
|
| 100 |
+
# pipeline=test_pipeline))
|
| 101 |
+
# test_evaluator = dict(
|
| 102 |
+
# type=CocoMetric,
|
| 103 |
+
# metric=['bbox', 'segm'],
|
| 104 |
+
# format_only=True,
|
| 105 |
+
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
|
| 106 |
+
# outfile_prefix='./work_dirs/coco_instance/test')
|
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms.loading import LoadImageFromFile
|
| 3 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 4 |
+
|
| 5 |
+
from mmdet.datasets.coco import CocoDataset
|
| 6 |
+
from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
|
| 7 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 8 |
+
from mmdet.datasets.transforms.loading import LoadAnnotations
|
| 9 |
+
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
|
| 10 |
+
from mmdet.evaluation.metrics.coco_metric import CocoMetric
|
| 11 |
+
|
| 12 |
+
# dataset settings
|
| 13 |
+
dataset_type = 'CocoDataset'
|
| 14 |
+
data_root = 'data/coco/'
|
| 15 |
+
|
| 16 |
+
# Example to use different file client
|
| 17 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 18 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 19 |
+
|
| 20 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 21 |
+
|
| 22 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 23 |
+
# backend_args = dict(
|
| 24 |
+
# backend='petrel',
|
| 25 |
+
# path_mapping=dict({
|
| 26 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 27 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 28 |
+
# }))
|
| 29 |
+
backend_args = None
|
| 30 |
+
|
| 31 |
+
train_pipeline = [
|
| 32 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 33 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
|
| 34 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 35 |
+
dict(type=RandomFlip, prob=0.5),
|
| 36 |
+
dict(type=PackDetInputs)
|
| 37 |
+
]
|
| 38 |
+
test_pipeline = [
|
| 39 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 40 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 41 |
+
# If you don't have a gt annotation, delete the pipeline
|
| 42 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
|
| 43 |
+
dict(
|
| 44 |
+
type=PackDetInputs,
|
| 45 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 46 |
+
'scale_factor'))
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
train_dataloader = dict(
|
| 50 |
+
batch_size=2,
|
| 51 |
+
num_workers=2,
|
| 52 |
+
persistent_workers=True,
|
| 53 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 54 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 55 |
+
dataset=dict(
|
| 56 |
+
type=CocoDataset,
|
| 57 |
+
data_root=data_root,
|
| 58 |
+
ann_file='annotations/instances_train2017.json',
|
| 59 |
+
data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
|
| 60 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 61 |
+
pipeline=train_pipeline,
|
| 62 |
+
backend_args=backend_args))
|
| 63 |
+
|
| 64 |
+
val_dataloader = dict(
|
| 65 |
+
batch_size=1,
|
| 66 |
+
num_workers=2,
|
| 67 |
+
persistent_workers=True,
|
| 68 |
+
drop_last=False,
|
| 69 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 70 |
+
dataset=dict(
|
| 71 |
+
type=CocoDataset,
|
| 72 |
+
data_root=data_root,
|
| 73 |
+
ann_file='annotations/instances_val2017.json',
|
| 74 |
+
data_prefix=dict(img='val2017/'),
|
| 75 |
+
test_mode=True,
|
| 76 |
+
pipeline=test_pipeline,
|
| 77 |
+
backend_args=backend_args))
|
| 78 |
+
|
| 79 |
+
test_dataloader = val_dataloader
|
| 80 |
+
|
| 81 |
+
val_evaluator = dict(
|
| 82 |
+
type=CocoMetric,
|
| 83 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 84 |
+
metric=['bbox', 'segm'],
|
| 85 |
+
format_only=False,
|
| 86 |
+
backend_args=backend_args)
|
| 87 |
+
test_evaluator = val_evaluator
|
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms.loading import LoadImageFromFile
|
| 3 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 4 |
+
|
| 5 |
+
from mmdet.datasets.coco_panoptic import CocoPanopticDataset
|
| 6 |
+
from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
|
| 7 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 8 |
+
from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
|
| 9 |
+
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
|
| 10 |
+
from mmdet.evaluation.metrics.coco_panoptic_metric import CocoPanopticMetric
|
| 11 |
+
|
| 12 |
+
# dataset settings
|
| 13 |
+
dataset_type = 'CocoPanopticDataset'
|
| 14 |
+
data_root = 'data/coco/'
|
| 15 |
+
|
| 16 |
+
# Example to use different file client
|
| 17 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 18 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 19 |
+
|
| 20 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 21 |
+
|
| 22 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 23 |
+
# backend_args = dict(
|
| 24 |
+
# backend='petrel',
|
| 25 |
+
# path_mapping=dict({
|
| 26 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 27 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 28 |
+
# }))
|
| 29 |
+
backend_args = None
|
| 30 |
+
|
| 31 |
+
train_pipeline = [
|
| 32 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 33 |
+
dict(type=LoadPanopticAnnotations, backend_args=backend_args),
|
| 34 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 35 |
+
dict(type=RandomFlip, prob=0.5),
|
| 36 |
+
dict(type=PackDetInputs)
|
| 37 |
+
]
|
| 38 |
+
test_pipeline = [
|
| 39 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 40 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 41 |
+
dict(type=LoadPanopticAnnotations, backend_args=backend_args),
|
| 42 |
+
dict(
|
| 43 |
+
type=PackDetInputs,
|
| 44 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 45 |
+
'scale_factor'))
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
train_dataloader = dict(
|
| 49 |
+
batch_size=2,
|
| 50 |
+
num_workers=2,
|
| 51 |
+
persistent_workers=True,
|
| 52 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 53 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 54 |
+
dataset=dict(
|
| 55 |
+
type=CocoPanopticDataset,
|
| 56 |
+
data_root=data_root,
|
| 57 |
+
ann_file='annotations/panoptic_train2017.json',
|
| 58 |
+
data_prefix=dict(
|
| 59 |
+
img='train2017/', seg='annotations/panoptic_train2017/'),
|
| 60 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 61 |
+
pipeline=train_pipeline,
|
| 62 |
+
backend_args=backend_args))
|
| 63 |
+
val_dataloader = dict(
|
| 64 |
+
batch_size=1,
|
| 65 |
+
num_workers=2,
|
| 66 |
+
persistent_workers=True,
|
| 67 |
+
drop_last=False,
|
| 68 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 69 |
+
dataset=dict(
|
| 70 |
+
type=CocoPanopticDataset,
|
| 71 |
+
data_root=data_root,
|
| 72 |
+
ann_file='annotations/panoptic_val2017.json',
|
| 73 |
+
data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
|
| 74 |
+
test_mode=True,
|
| 75 |
+
pipeline=test_pipeline,
|
| 76 |
+
backend_args=backend_args))
|
| 77 |
+
test_dataloader = val_dataloader
|
| 78 |
+
|
| 79 |
+
val_evaluator = dict(
|
| 80 |
+
type=CocoPanopticMetric,
|
| 81 |
+
ann_file=data_root + 'annotations/panoptic_val2017.json',
|
| 82 |
+
seg_prefix=data_root + 'annotations/panoptic_val2017/',
|
| 83 |
+
backend_args=backend_args)
|
| 84 |
+
test_evaluator = val_evaluator
|
| 85 |
+
|
| 86 |
+
# inference on test dataset and
|
| 87 |
+
# format the output results for submission.
|
| 88 |
+
# test_dataloader = dict(
|
| 89 |
+
# batch_size=1,
|
| 90 |
+
# num_workers=1,
|
| 91 |
+
# persistent_workers=True,
|
| 92 |
+
# drop_last=False,
|
| 93 |
+
# sampler=dict(type=DefaultSampler, shuffle=False),
|
| 94 |
+
# dataset=dict(
|
| 95 |
+
# type=CocoPanopticDataset,
|
| 96 |
+
# data_root=data_root,
|
| 97 |
+
# ann_file='annotations/panoptic_image_info_test-dev2017.json',
|
| 98 |
+
# data_prefix=dict(img='test2017/'),
|
| 99 |
+
# test_mode=True,
|
| 100 |
+
# pipeline=test_pipeline))
|
| 101 |
+
# test_evaluator = dict(
|
| 102 |
+
# type=CocoPanopticMetric,
|
| 103 |
+
# format_only=True,
|
| 104 |
+
# ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
|
| 105 |
+
# outfile_prefix='./work_dirs/coco_panoptic/test')
|
head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms import (LoadImageFromFile, RandomResize,
|
| 3 |
+
TransformBroadcaster)
|
| 4 |
+
|
| 5 |
+
from mmdet.datasets import MOTChallengeDataset
|
| 6 |
+
from mmdet.datasets.samplers import TrackImgSampler
|
| 7 |
+
from mmdet.datasets.transforms import (LoadTrackAnnotations, PackTrackInputs,
|
| 8 |
+
PhotoMetricDistortion, RandomCrop,
|
| 9 |
+
RandomFlip, Resize,
|
| 10 |
+
UniformRefFrameSample)
|
| 11 |
+
from mmdet.evaluation import MOTChallengeMetric
|
| 12 |
+
|
| 13 |
+
# dataset settings
|
| 14 |
+
dataset_type = MOTChallengeDataset
|
| 15 |
+
data_root = 'data/MOT17/'
|
| 16 |
+
img_scale = (1088, 1088)
|
| 17 |
+
|
| 18 |
+
backend_args = None
|
| 19 |
+
# data pipeline
|
| 20 |
+
train_pipeline = [
|
| 21 |
+
dict(
|
| 22 |
+
type=UniformRefFrameSample,
|
| 23 |
+
num_ref_imgs=1,
|
| 24 |
+
frame_range=10,
|
| 25 |
+
filter_key_img=True),
|
| 26 |
+
dict(
|
| 27 |
+
type=TransformBroadcaster,
|
| 28 |
+
share_random_params=True,
|
| 29 |
+
transforms=[
|
| 30 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 31 |
+
dict(type=LoadTrackAnnotations),
|
| 32 |
+
dict(
|
| 33 |
+
type=RandomResize,
|
| 34 |
+
scale=img_scale,
|
| 35 |
+
ratio_range=(0.8, 1.2),
|
| 36 |
+
keep_ratio=True,
|
| 37 |
+
clip_object_border=False),
|
| 38 |
+
dict(type=PhotoMetricDistortion)
|
| 39 |
+
]),
|
| 40 |
+
dict(
|
| 41 |
+
type=TransformBroadcaster,
|
| 42 |
+
# different cropped positions for different frames
|
| 43 |
+
share_random_params=False,
|
| 44 |
+
transforms=[
|
| 45 |
+
dict(type=RandomCrop, crop_size=img_scale, bbox_clip_border=False)
|
| 46 |
+
]),
|
| 47 |
+
dict(
|
| 48 |
+
type=TransformBroadcaster,
|
| 49 |
+
share_random_params=True,
|
| 50 |
+
transforms=[
|
| 51 |
+
dict(type=RandomFlip, prob=0.5),
|
| 52 |
+
]),
|
| 53 |
+
dict(type=PackTrackInputs)
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
test_pipeline = [
|
| 57 |
+
dict(
|
| 58 |
+
type=TransformBroadcaster,
|
| 59 |
+
transforms=[
|
| 60 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 61 |
+
dict(type=Resize, scale=img_scale, keep_ratio=True),
|
| 62 |
+
dict(type=LoadTrackAnnotations)
|
| 63 |
+
]),
|
| 64 |
+
dict(type=PackTrackInputs)
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
# dataloader
|
| 68 |
+
train_dataloader = dict(
|
| 69 |
+
batch_size=2,
|
| 70 |
+
num_workers=2,
|
| 71 |
+
persistent_workers=True,
|
| 72 |
+
sampler=dict(type=TrackImgSampler), # image-based sampling
|
| 73 |
+
dataset=dict(
|
| 74 |
+
type=dataset_type,
|
| 75 |
+
data_root=data_root,
|
| 76 |
+
visibility_thr=-1,
|
| 77 |
+
ann_file='annotations/half-train_cocoformat.json',
|
| 78 |
+
data_prefix=dict(img_path='train'),
|
| 79 |
+
metainfo=dict(classes=('pedestrian', )),
|
| 80 |
+
pipeline=train_pipeline))
|
| 81 |
+
val_dataloader = dict(
|
| 82 |
+
batch_size=1,
|
| 83 |
+
num_workers=2,
|
| 84 |
+
persistent_workers=True,
|
| 85 |
+
# Now we support two ways to test, image_based and video_based
|
| 86 |
+
# if you want to use video_based sampling, you can use as follows
|
| 87 |
+
# sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
| 88 |
+
sampler=dict(type=TrackImgSampler), # image-based sampling
|
| 89 |
+
dataset=dict(
|
| 90 |
+
type=dataset_type,
|
| 91 |
+
data_root=data_root,
|
| 92 |
+
ann_file='annotations/half-val_cocoformat.json',
|
| 93 |
+
data_prefix=dict(img_path='train'),
|
| 94 |
+
test_mode=True,
|
| 95 |
+
pipeline=test_pipeline))
|
| 96 |
+
test_dataloader = val_dataloader
|
| 97 |
+
|
| 98 |
+
# evaluator
|
| 99 |
+
val_evaluator = dict(
|
| 100 |
+
type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
|
| 101 |
+
test_evaluator = val_evaluator
|
head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
|
| 3 |
+
LoggerHook, ParamSchedulerHook)
|
| 4 |
+
from mmengine.runner import LogProcessor
|
| 5 |
+
from mmengine.visualization import LocalVisBackend
|
| 6 |
+
|
| 7 |
+
from mmdet.engine.hooks import DetVisualizationHook
|
| 8 |
+
from mmdet.visualization import DetLocalVisualizer
|
| 9 |
+
|
| 10 |
+
default_scope = None
|
| 11 |
+
|
| 12 |
+
default_hooks = dict(
|
| 13 |
+
timer=dict(type=IterTimerHook),
|
| 14 |
+
logger=dict(type=LoggerHook, interval=50),
|
| 15 |
+
param_scheduler=dict(type=ParamSchedulerHook),
|
| 16 |
+
checkpoint=dict(type=CheckpointHook, interval=1),
|
| 17 |
+
sampler_seed=dict(type=DistSamplerSeedHook),
|
| 18 |
+
visualization=dict(type=DetVisualizationHook))
|
| 19 |
+
|
| 20 |
+
env_cfg = dict(
|
| 21 |
+
cudnn_benchmark=False,
|
| 22 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
| 23 |
+
dist_cfg=dict(backend='nccl'),
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
vis_backends = [dict(type=LocalVisBackend)]
|
| 27 |
+
visualizer = dict(
|
| 28 |
+
type=DetLocalVisualizer, vis_backends=vis_backends, name='visualizer')
|
| 29 |
+
log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
|
| 30 |
+
|
| 31 |
+
log_level = 'INFO'
|
| 32 |
+
load_from = None
|
| 33 |
+
resume = False
|
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import RoIAlign, nms
|
| 3 |
+
from torch.nn import BatchNorm2d
|
| 4 |
+
|
| 5 |
+
from mmdet.models.backbones.resnet import ResNet
|
| 6 |
+
from mmdet.models.data_preprocessors.data_preprocessor import \
|
| 7 |
+
DetDataPreprocessor
|
| 8 |
+
from mmdet.models.dense_heads.rpn_head import RPNHead
|
| 9 |
+
from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
|
| 10 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 11 |
+
from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
|
| 12 |
+
from mmdet.models.necks.fpn import FPN
|
| 13 |
+
from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
|
| 14 |
+
Shared2FCBBoxHead
|
| 15 |
+
from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
|
| 16 |
+
from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
|
| 17 |
+
from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
|
| 18 |
+
SingleRoIExtractor
|
| 19 |
+
from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
|
| 20 |
+
from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
|
| 21 |
+
DeltaXYWHBBoxCoder
|
| 22 |
+
from mmdet.models.task_modules.prior_generators.anchor_generator import \
|
| 23 |
+
AnchorGenerator
|
| 24 |
+
from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
|
| 25 |
+
|
| 26 |
+
# model settings
|
| 27 |
+
model = dict(
|
| 28 |
+
type=CascadeRCNN,
|
| 29 |
+
data_preprocessor=dict(
|
| 30 |
+
type=DetDataPreprocessor,
|
| 31 |
+
mean=[123.675, 116.28, 103.53],
|
| 32 |
+
std=[58.395, 57.12, 57.375],
|
| 33 |
+
bgr_to_rgb=True,
|
| 34 |
+
pad_mask=True,
|
| 35 |
+
pad_size_divisor=32),
|
| 36 |
+
backbone=dict(
|
| 37 |
+
type=ResNet,
|
| 38 |
+
depth=50,
|
| 39 |
+
num_stages=4,
|
| 40 |
+
out_indices=(0, 1, 2, 3),
|
| 41 |
+
frozen_stages=1,
|
| 42 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 43 |
+
norm_eval=True,
|
| 44 |
+
style='pytorch',
|
| 45 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 46 |
+
neck=dict(
|
| 47 |
+
type=FPN,
|
| 48 |
+
in_channels=[256, 512, 1024, 2048],
|
| 49 |
+
out_channels=256,
|
| 50 |
+
num_outs=5),
|
| 51 |
+
rpn_head=dict(
|
| 52 |
+
type=RPNHead,
|
| 53 |
+
in_channels=256,
|
| 54 |
+
feat_channels=256,
|
| 55 |
+
anchor_generator=dict(
|
| 56 |
+
type=AnchorGenerator,
|
| 57 |
+
scales=[8],
|
| 58 |
+
ratios=[0.5, 1.0, 2.0],
|
| 59 |
+
strides=[4, 8, 16, 32, 64]),
|
| 60 |
+
bbox_coder=dict(
|
| 61 |
+
type=DeltaXYWHBBoxCoder,
|
| 62 |
+
target_means=[.0, .0, .0, .0],
|
| 63 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 64 |
+
loss_cls=dict(
|
| 65 |
+
type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
|
| 66 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
|
| 67 |
+
roi_head=dict(
|
| 68 |
+
type=CascadeRoIHead,
|
| 69 |
+
num_stages=3,
|
| 70 |
+
stage_loss_weights=[1, 0.5, 0.25],
|
| 71 |
+
bbox_roi_extractor=dict(
|
| 72 |
+
type=SingleRoIExtractor,
|
| 73 |
+
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
|
| 74 |
+
out_channels=256,
|
| 75 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 76 |
+
bbox_head=[
|
| 77 |
+
dict(
|
| 78 |
+
type=Shared2FCBBoxHead,
|
| 79 |
+
in_channels=256,
|
| 80 |
+
fc_out_channels=1024,
|
| 81 |
+
roi_feat_size=7,
|
| 82 |
+
num_classes=80,
|
| 83 |
+
bbox_coder=dict(
|
| 84 |
+
type=DeltaXYWHBBoxCoder,
|
| 85 |
+
target_means=[0., 0., 0., 0.],
|
| 86 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 87 |
+
reg_class_agnostic=True,
|
| 88 |
+
loss_cls=dict(
|
| 89 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 90 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
|
| 91 |
+
dict(
|
| 92 |
+
type=Shared2FCBBoxHead,
|
| 93 |
+
in_channels=256,
|
| 94 |
+
fc_out_channels=1024,
|
| 95 |
+
roi_feat_size=7,
|
| 96 |
+
num_classes=80,
|
| 97 |
+
bbox_coder=dict(
|
| 98 |
+
type=DeltaXYWHBBoxCoder,
|
| 99 |
+
target_means=[0., 0., 0., 0.],
|
| 100 |
+
target_stds=[0.05, 0.05, 0.1, 0.1]),
|
| 101 |
+
reg_class_agnostic=True,
|
| 102 |
+
loss_cls=dict(
|
| 103 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 104 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
|
| 105 |
+
dict(
|
| 106 |
+
type=Shared2FCBBoxHead,
|
| 107 |
+
in_channels=256,
|
| 108 |
+
fc_out_channels=1024,
|
| 109 |
+
roi_feat_size=7,
|
| 110 |
+
num_classes=80,
|
| 111 |
+
bbox_coder=dict(
|
| 112 |
+
type=DeltaXYWHBBoxCoder,
|
| 113 |
+
target_means=[0., 0., 0., 0.],
|
| 114 |
+
target_stds=[0.033, 0.033, 0.067, 0.067]),
|
| 115 |
+
reg_class_agnostic=True,
|
| 116 |
+
loss_cls=dict(
|
| 117 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 118 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
|
| 119 |
+
],
|
| 120 |
+
mask_roi_extractor=dict(
|
| 121 |
+
type=SingleRoIExtractor,
|
| 122 |
+
roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
|
| 123 |
+
out_channels=256,
|
| 124 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 125 |
+
mask_head=dict(
|
| 126 |
+
type=FCNMaskHead,
|
| 127 |
+
num_convs=4,
|
| 128 |
+
in_channels=256,
|
| 129 |
+
conv_out_channels=256,
|
| 130 |
+
num_classes=80,
|
| 131 |
+
loss_mask=dict(
|
| 132 |
+
type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
|
| 133 |
+
# model training and testing settings
|
| 134 |
+
train_cfg=dict(
|
| 135 |
+
rpn=dict(
|
| 136 |
+
assigner=dict(
|
| 137 |
+
type=MaxIoUAssigner,
|
| 138 |
+
pos_iou_thr=0.7,
|
| 139 |
+
neg_iou_thr=0.3,
|
| 140 |
+
min_pos_iou=0.3,
|
| 141 |
+
match_low_quality=True,
|
| 142 |
+
ignore_iof_thr=-1),
|
| 143 |
+
sampler=dict(
|
| 144 |
+
type=RandomSampler,
|
| 145 |
+
num=256,
|
| 146 |
+
pos_fraction=0.5,
|
| 147 |
+
neg_pos_ub=-1,
|
| 148 |
+
add_gt_as_proposals=False),
|
| 149 |
+
allowed_border=0,
|
| 150 |
+
pos_weight=-1,
|
| 151 |
+
debug=False),
|
| 152 |
+
rpn_proposal=dict(
|
| 153 |
+
nms_pre=2000,
|
| 154 |
+
max_per_img=2000,
|
| 155 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 156 |
+
min_bbox_size=0),
|
| 157 |
+
rcnn=[
|
| 158 |
+
dict(
|
| 159 |
+
assigner=dict(
|
| 160 |
+
type=MaxIoUAssigner,
|
| 161 |
+
pos_iou_thr=0.5,
|
| 162 |
+
neg_iou_thr=0.5,
|
| 163 |
+
min_pos_iou=0.5,
|
| 164 |
+
match_low_quality=False,
|
| 165 |
+
ignore_iof_thr=-1),
|
| 166 |
+
sampler=dict(
|
| 167 |
+
type=RandomSampler,
|
| 168 |
+
num=512,
|
| 169 |
+
pos_fraction=0.25,
|
| 170 |
+
neg_pos_ub=-1,
|
| 171 |
+
add_gt_as_proposals=True),
|
| 172 |
+
mask_size=28,
|
| 173 |
+
pos_weight=-1,
|
| 174 |
+
debug=False),
|
| 175 |
+
dict(
|
| 176 |
+
assigner=dict(
|
| 177 |
+
type=MaxIoUAssigner,
|
| 178 |
+
pos_iou_thr=0.6,
|
| 179 |
+
neg_iou_thr=0.6,
|
| 180 |
+
min_pos_iou=0.6,
|
| 181 |
+
match_low_quality=False,
|
| 182 |
+
ignore_iof_thr=-1),
|
| 183 |
+
sampler=dict(
|
| 184 |
+
type=RandomSampler,
|
| 185 |
+
num=512,
|
| 186 |
+
pos_fraction=0.25,
|
| 187 |
+
neg_pos_ub=-1,
|
| 188 |
+
add_gt_as_proposals=True),
|
| 189 |
+
mask_size=28,
|
| 190 |
+
pos_weight=-1,
|
| 191 |
+
debug=False),
|
| 192 |
+
dict(
|
| 193 |
+
assigner=dict(
|
| 194 |
+
type=MaxIoUAssigner,
|
| 195 |
+
pos_iou_thr=0.7,
|
| 196 |
+
neg_iou_thr=0.7,
|
| 197 |
+
min_pos_iou=0.7,
|
| 198 |
+
match_low_quality=False,
|
| 199 |
+
ignore_iof_thr=-1),
|
| 200 |
+
sampler=dict(
|
| 201 |
+
type=RandomSampler,
|
| 202 |
+
num=512,
|
| 203 |
+
pos_fraction=0.25,
|
| 204 |
+
neg_pos_ub=-1,
|
| 205 |
+
add_gt_as_proposals=True),
|
| 206 |
+
mask_size=28,
|
| 207 |
+
pos_weight=-1,
|
| 208 |
+
debug=False)
|
| 209 |
+
]),
|
| 210 |
+
test_cfg=dict(
|
| 211 |
+
rpn=dict(
|
| 212 |
+
nms_pre=1000,
|
| 213 |
+
max_per_img=1000,
|
| 214 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 215 |
+
min_bbox_size=0),
|
| 216 |
+
rcnn=dict(
|
| 217 |
+
score_thr=0.05,
|
| 218 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 219 |
+
max_per_img=100,
|
| 220 |
+
mask_thr_binary=0.5)))
|
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import RoIAlign, nms
|
| 3 |
+
from torch.nn import BatchNorm2d
|
| 4 |
+
|
| 5 |
+
from mmdet.models.backbones.resnet import ResNet
|
| 6 |
+
from mmdet.models.data_preprocessors.data_preprocessor import \
|
| 7 |
+
DetDataPreprocessor
|
| 8 |
+
from mmdet.models.dense_heads.rpn_head import RPNHead
|
| 9 |
+
from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
|
| 10 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 11 |
+
from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
|
| 12 |
+
from mmdet.models.necks.fpn import FPN
|
| 13 |
+
from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
|
| 14 |
+
Shared2FCBBoxHead
|
| 15 |
+
from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
|
| 16 |
+
from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
|
| 17 |
+
SingleRoIExtractor
|
| 18 |
+
from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
|
| 19 |
+
from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
|
| 20 |
+
DeltaXYWHBBoxCoder
|
| 21 |
+
from mmdet.models.task_modules.prior_generators.anchor_generator import \
|
| 22 |
+
AnchorGenerator
|
| 23 |
+
from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
|
| 24 |
+
|
| 25 |
+
# model settings
|
| 26 |
+
model = dict(
|
| 27 |
+
type=CascadeRCNN,
|
| 28 |
+
data_preprocessor=dict(
|
| 29 |
+
type=DetDataPreprocessor,
|
| 30 |
+
mean=[123.675, 116.28, 103.53],
|
| 31 |
+
std=[58.395, 57.12, 57.375],
|
| 32 |
+
bgr_to_rgb=True,
|
| 33 |
+
pad_size_divisor=32),
|
| 34 |
+
backbone=dict(
|
| 35 |
+
type=ResNet,
|
| 36 |
+
depth=50,
|
| 37 |
+
num_stages=4,
|
| 38 |
+
out_indices=(0, 1, 2, 3),
|
| 39 |
+
frozen_stages=1,
|
| 40 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 41 |
+
norm_eval=True,
|
| 42 |
+
style='pytorch',
|
| 43 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 44 |
+
neck=dict(
|
| 45 |
+
type=FPN,
|
| 46 |
+
in_channels=[256, 512, 1024, 2048],
|
| 47 |
+
out_channels=256,
|
| 48 |
+
num_outs=5),
|
| 49 |
+
rpn_head=dict(
|
| 50 |
+
type=RPNHead,
|
| 51 |
+
in_channels=256,
|
| 52 |
+
feat_channels=256,
|
| 53 |
+
anchor_generator=dict(
|
| 54 |
+
type=AnchorGenerator,
|
| 55 |
+
scales=[8],
|
| 56 |
+
ratios=[0.5, 1.0, 2.0],
|
| 57 |
+
strides=[4, 8, 16, 32, 64]),
|
| 58 |
+
bbox_coder=dict(
|
| 59 |
+
type=DeltaXYWHBBoxCoder,
|
| 60 |
+
target_means=[.0, .0, .0, .0],
|
| 61 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 62 |
+
loss_cls=dict(
|
| 63 |
+
type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
|
| 64 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
|
| 65 |
+
roi_head=dict(
|
| 66 |
+
type=CascadeRoIHead,
|
| 67 |
+
num_stages=3,
|
| 68 |
+
stage_loss_weights=[1, 0.5, 0.25],
|
| 69 |
+
bbox_roi_extractor=dict(
|
| 70 |
+
type=SingleRoIExtractor,
|
| 71 |
+
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
|
| 72 |
+
out_channels=256,
|
| 73 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 74 |
+
bbox_head=[
|
| 75 |
+
dict(
|
| 76 |
+
type=Shared2FCBBoxHead,
|
| 77 |
+
in_channels=256,
|
| 78 |
+
fc_out_channels=1024,
|
| 79 |
+
roi_feat_size=7,
|
| 80 |
+
num_classes=80,
|
| 81 |
+
bbox_coder=dict(
|
| 82 |
+
type=DeltaXYWHBBoxCoder,
|
| 83 |
+
target_means=[0., 0., 0., 0.],
|
| 84 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 85 |
+
reg_class_agnostic=True,
|
| 86 |
+
loss_cls=dict(
|
| 87 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 88 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
|
| 89 |
+
dict(
|
| 90 |
+
type=Shared2FCBBoxHead,
|
| 91 |
+
in_channels=256,
|
| 92 |
+
fc_out_channels=1024,
|
| 93 |
+
roi_feat_size=7,
|
| 94 |
+
num_classes=80,
|
| 95 |
+
bbox_coder=dict(
|
| 96 |
+
type=DeltaXYWHBBoxCoder,
|
| 97 |
+
target_means=[0., 0., 0., 0.],
|
| 98 |
+
target_stds=[0.05, 0.05, 0.1, 0.1]),
|
| 99 |
+
reg_class_agnostic=True,
|
| 100 |
+
loss_cls=dict(
|
| 101 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 102 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
|
| 103 |
+
dict(
|
| 104 |
+
type=Shared2FCBBoxHead,
|
| 105 |
+
in_channels=256,
|
| 106 |
+
fc_out_channels=1024,
|
| 107 |
+
roi_feat_size=7,
|
| 108 |
+
num_classes=80,
|
| 109 |
+
bbox_coder=dict(
|
| 110 |
+
type=DeltaXYWHBBoxCoder,
|
| 111 |
+
target_means=[0., 0., 0., 0.],
|
| 112 |
+
target_stds=[0.033, 0.033, 0.067, 0.067]),
|
| 113 |
+
reg_class_agnostic=True,
|
| 114 |
+
loss_cls=dict(
|
| 115 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 116 |
+
loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
|
| 117 |
+
]),
|
| 118 |
+
# model training and testing settings
|
| 119 |
+
train_cfg=dict(
|
| 120 |
+
rpn=dict(
|
| 121 |
+
assigner=dict(
|
| 122 |
+
type=MaxIoUAssigner,
|
| 123 |
+
pos_iou_thr=0.7,
|
| 124 |
+
neg_iou_thr=0.3,
|
| 125 |
+
min_pos_iou=0.3,
|
| 126 |
+
match_low_quality=True,
|
| 127 |
+
ignore_iof_thr=-1),
|
| 128 |
+
sampler=dict(
|
| 129 |
+
type=RandomSampler,
|
| 130 |
+
num=256,
|
| 131 |
+
pos_fraction=0.5,
|
| 132 |
+
neg_pos_ub=-1,
|
| 133 |
+
add_gt_as_proposals=False),
|
| 134 |
+
allowed_border=0,
|
| 135 |
+
pos_weight=-1,
|
| 136 |
+
debug=False),
|
| 137 |
+
rpn_proposal=dict(
|
| 138 |
+
nms_pre=2000,
|
| 139 |
+
max_per_img=2000,
|
| 140 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 141 |
+
min_bbox_size=0),
|
| 142 |
+
rcnn=[
|
| 143 |
+
dict(
|
| 144 |
+
assigner=dict(
|
| 145 |
+
type=MaxIoUAssigner,
|
| 146 |
+
pos_iou_thr=0.5,
|
| 147 |
+
neg_iou_thr=0.5,
|
| 148 |
+
min_pos_iou=0.5,
|
| 149 |
+
match_low_quality=False,
|
| 150 |
+
ignore_iof_thr=-1),
|
| 151 |
+
sampler=dict(
|
| 152 |
+
type=RandomSampler,
|
| 153 |
+
num=512,
|
| 154 |
+
pos_fraction=0.25,
|
| 155 |
+
neg_pos_ub=-1,
|
| 156 |
+
add_gt_as_proposals=True),
|
| 157 |
+
pos_weight=-1,
|
| 158 |
+
debug=False),
|
| 159 |
+
dict(
|
| 160 |
+
assigner=dict(
|
| 161 |
+
type=MaxIoUAssigner,
|
| 162 |
+
pos_iou_thr=0.6,
|
| 163 |
+
neg_iou_thr=0.6,
|
| 164 |
+
min_pos_iou=0.6,
|
| 165 |
+
match_low_quality=False,
|
| 166 |
+
ignore_iof_thr=-1),
|
| 167 |
+
sampler=dict(
|
| 168 |
+
type=RandomSampler,
|
| 169 |
+
num=512,
|
| 170 |
+
pos_fraction=0.25,
|
| 171 |
+
neg_pos_ub=-1,
|
| 172 |
+
add_gt_as_proposals=True),
|
| 173 |
+
pos_weight=-1,
|
| 174 |
+
debug=False),
|
| 175 |
+
dict(
|
| 176 |
+
assigner=dict(
|
| 177 |
+
type=MaxIoUAssigner,
|
| 178 |
+
pos_iou_thr=0.7,
|
| 179 |
+
neg_iou_thr=0.7,
|
| 180 |
+
min_pos_iou=0.7,
|
| 181 |
+
match_low_quality=False,
|
| 182 |
+
ignore_iof_thr=-1),
|
| 183 |
+
sampler=dict(
|
| 184 |
+
type=RandomSampler,
|
| 185 |
+
num=512,
|
| 186 |
+
pos_fraction=0.25,
|
| 187 |
+
neg_pos_ub=-1,
|
| 188 |
+
add_gt_as_proposals=True),
|
| 189 |
+
pos_weight=-1,
|
| 190 |
+
debug=False)
|
| 191 |
+
]),
|
| 192 |
+
test_cfg=dict(
|
| 193 |
+
rpn=dict(
|
| 194 |
+
nms_pre=1000,
|
| 195 |
+
max_per_img=1000,
|
| 196 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 197 |
+
min_bbox_size=0),
|
| 198 |
+
rcnn=dict(
|
| 199 |
+
score_thr=0.05,
|
| 200 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 201 |
+
max_per_img=100)))
|
head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import RoIAlign, nms
|
| 3 |
+
from torch.nn import BatchNorm2d
|
| 4 |
+
|
| 5 |
+
from mmdet.models.backbones.resnet import ResNet
|
| 6 |
+
from mmdet.models.data_preprocessors.data_preprocessor import \
|
| 7 |
+
DetDataPreprocessor
|
| 8 |
+
from mmdet.models.dense_heads.rpn_head import RPNHead
|
| 9 |
+
from mmdet.models.detectors.faster_rcnn import FasterRCNN
|
| 10 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 11 |
+
from mmdet.models.losses.smooth_l1_loss import L1Loss
|
| 12 |
+
from mmdet.models.necks.fpn import FPN
|
| 13 |
+
from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
|
| 14 |
+
Shared2FCBBoxHead
|
| 15 |
+
from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
|
| 16 |
+
SingleRoIExtractor
|
| 17 |
+
from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
|
| 18 |
+
from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
|
| 19 |
+
from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
|
| 20 |
+
DeltaXYWHBBoxCoder
|
| 21 |
+
from mmdet.models.task_modules.prior_generators.anchor_generator import \
|
| 22 |
+
AnchorGenerator
|
| 23 |
+
from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
|
| 24 |
+
|
| 25 |
+
# model settings
|
| 26 |
+
model = dict(
|
| 27 |
+
type=FasterRCNN,
|
| 28 |
+
data_preprocessor=dict(
|
| 29 |
+
type=DetDataPreprocessor,
|
| 30 |
+
mean=[123.675, 116.28, 103.53],
|
| 31 |
+
std=[58.395, 57.12, 57.375],
|
| 32 |
+
bgr_to_rgb=True,
|
| 33 |
+
pad_size_divisor=32),
|
| 34 |
+
backbone=dict(
|
| 35 |
+
type=ResNet,
|
| 36 |
+
depth=50,
|
| 37 |
+
num_stages=4,
|
| 38 |
+
out_indices=(0, 1, 2, 3),
|
| 39 |
+
frozen_stages=1,
|
| 40 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 41 |
+
norm_eval=True,
|
| 42 |
+
style='pytorch',
|
| 43 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 44 |
+
neck=dict(
|
| 45 |
+
type=FPN,
|
| 46 |
+
in_channels=[256, 512, 1024, 2048],
|
| 47 |
+
out_channels=256,
|
| 48 |
+
num_outs=5),
|
| 49 |
+
rpn_head=dict(
|
| 50 |
+
type=RPNHead,
|
| 51 |
+
in_channels=256,
|
| 52 |
+
feat_channels=256,
|
| 53 |
+
anchor_generator=dict(
|
| 54 |
+
type=AnchorGenerator,
|
| 55 |
+
scales=[8],
|
| 56 |
+
ratios=[0.5, 1.0, 2.0],
|
| 57 |
+
strides=[4, 8, 16, 32, 64]),
|
| 58 |
+
bbox_coder=dict(
|
| 59 |
+
type=DeltaXYWHBBoxCoder,
|
| 60 |
+
target_means=[.0, .0, .0, .0],
|
| 61 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 62 |
+
loss_cls=dict(
|
| 63 |
+
type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
|
| 64 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 65 |
+
roi_head=dict(
|
| 66 |
+
type=StandardRoIHead,
|
| 67 |
+
bbox_roi_extractor=dict(
|
| 68 |
+
type=SingleRoIExtractor,
|
| 69 |
+
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
|
| 70 |
+
out_channels=256,
|
| 71 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 72 |
+
bbox_head=dict(
|
| 73 |
+
type=Shared2FCBBoxHead,
|
| 74 |
+
in_channels=256,
|
| 75 |
+
fc_out_channels=1024,
|
| 76 |
+
roi_feat_size=7,
|
| 77 |
+
num_classes=80,
|
| 78 |
+
bbox_coder=dict(
|
| 79 |
+
type=DeltaXYWHBBoxCoder,
|
| 80 |
+
target_means=[0., 0., 0., 0.],
|
| 81 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 82 |
+
reg_class_agnostic=False,
|
| 83 |
+
loss_cls=dict(
|
| 84 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 85 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0))),
|
| 86 |
+
# model training and testing settings
|
| 87 |
+
train_cfg=dict(
|
| 88 |
+
rpn=dict(
|
| 89 |
+
assigner=dict(
|
| 90 |
+
type=MaxIoUAssigner,
|
| 91 |
+
pos_iou_thr=0.7,
|
| 92 |
+
neg_iou_thr=0.3,
|
| 93 |
+
min_pos_iou=0.3,
|
| 94 |
+
match_low_quality=True,
|
| 95 |
+
ignore_iof_thr=-1),
|
| 96 |
+
sampler=dict(
|
| 97 |
+
type=RandomSampler,
|
| 98 |
+
num=256,
|
| 99 |
+
pos_fraction=0.5,
|
| 100 |
+
neg_pos_ub=-1,
|
| 101 |
+
add_gt_as_proposals=False),
|
| 102 |
+
allowed_border=-1,
|
| 103 |
+
pos_weight=-1,
|
| 104 |
+
debug=False),
|
| 105 |
+
rpn_proposal=dict(
|
| 106 |
+
nms_pre=2000,
|
| 107 |
+
max_per_img=1000,
|
| 108 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 109 |
+
min_bbox_size=0),
|
| 110 |
+
rcnn=dict(
|
| 111 |
+
assigner=dict(
|
| 112 |
+
type=MaxIoUAssigner,
|
| 113 |
+
pos_iou_thr=0.5,
|
| 114 |
+
neg_iou_thr=0.5,
|
| 115 |
+
min_pos_iou=0.5,
|
| 116 |
+
match_low_quality=False,
|
| 117 |
+
ignore_iof_thr=-1),
|
| 118 |
+
sampler=dict(
|
| 119 |
+
type=RandomSampler,
|
| 120 |
+
num=512,
|
| 121 |
+
pos_fraction=0.25,
|
| 122 |
+
neg_pos_ub=-1,
|
| 123 |
+
add_gt_as_proposals=True),
|
| 124 |
+
pos_weight=-1,
|
| 125 |
+
debug=False)),
|
| 126 |
+
test_cfg=dict(
|
| 127 |
+
rpn=dict(
|
| 128 |
+
nms_pre=1000,
|
| 129 |
+
max_per_img=1000,
|
| 130 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 131 |
+
min_bbox_size=0),
|
| 132 |
+
rcnn=dict(
|
| 133 |
+
score_thr=0.05,
|
| 134 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 135 |
+
max_per_img=100)
|
| 136 |
+
# soft-nms is also supported for rcnn testing
|
| 137 |
+
# e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
|
| 138 |
+
))
|
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import RoIAlign, nms
|
| 3 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 4 |
+
from torch.nn import BatchNorm2d
|
| 5 |
+
|
| 6 |
+
from mmdet.models.backbones.resnet import ResNet
|
| 7 |
+
from mmdet.models.data_preprocessors.data_preprocessor import \
|
| 8 |
+
DetDataPreprocessor
|
| 9 |
+
from mmdet.models.dense_heads.rpn_head import RPNHead
|
| 10 |
+
from mmdet.models.detectors.mask_rcnn import MaskRCNN
|
| 11 |
+
from mmdet.models.layers import ResLayer
|
| 12 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 13 |
+
from mmdet.models.losses.smooth_l1_loss import L1Loss
|
| 14 |
+
from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead
|
| 15 |
+
from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
|
| 16 |
+
from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
|
| 17 |
+
SingleRoIExtractor
|
| 18 |
+
from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
|
| 19 |
+
from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
|
| 20 |
+
from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
|
| 21 |
+
DeltaXYWHBBoxCoder
|
| 22 |
+
from mmdet.models.task_modules.prior_generators.anchor_generator import \
|
| 23 |
+
AnchorGenerator
|
| 24 |
+
from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
|
| 25 |
+
|
| 26 |
+
# model settings
|
| 27 |
+
norm_cfg = dict(type=BatchNorm2d, requires_grad=False)
|
| 28 |
+
# model settings
|
| 29 |
+
model = dict(
|
| 30 |
+
type=MaskRCNN,
|
| 31 |
+
data_preprocessor=dict(
|
| 32 |
+
type=DetDataPreprocessor,
|
| 33 |
+
mean=[103.530, 116.280, 123.675],
|
| 34 |
+
std=[1.0, 1.0, 1.0],
|
| 35 |
+
bgr_to_rgb=False,
|
| 36 |
+
pad_mask=True,
|
| 37 |
+
pad_size_divisor=32),
|
| 38 |
+
backbone=dict(
|
| 39 |
+
type=ResNet,
|
| 40 |
+
depth=50,
|
| 41 |
+
num_stages=3,
|
| 42 |
+
strides=(1, 2, 2),
|
| 43 |
+
dilations=(1, 1, 1),
|
| 44 |
+
out_indices=(2, ),
|
| 45 |
+
frozen_stages=1,
|
| 46 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 47 |
+
norm_eval=True,
|
| 48 |
+
style='caffe',
|
| 49 |
+
init_cfg=dict(
|
| 50 |
+
type=PretrainedInit,
|
| 51 |
+
checkpoint='open-mmlab://detectron2/resnet50_caffe')),
|
| 52 |
+
rpn_head=dict(
|
| 53 |
+
type=RPNHead,
|
| 54 |
+
in_channels=1024,
|
| 55 |
+
feat_channels=1024,
|
| 56 |
+
anchor_generator=dict(
|
| 57 |
+
type=AnchorGenerator,
|
| 58 |
+
scales=[2, 4, 8, 16, 32],
|
| 59 |
+
ratios=[0.5, 1.0, 2.0],
|
| 60 |
+
strides=[16]),
|
| 61 |
+
bbox_coder=dict(
|
| 62 |
+
type=DeltaXYWHBBoxCoder,
|
| 63 |
+
target_means=[.0, .0, .0, .0],
|
| 64 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 65 |
+
loss_cls=dict(
|
| 66 |
+
type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
|
| 67 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 68 |
+
roi_head=dict(
|
| 69 |
+
type=StandardRoIHead,
|
| 70 |
+
shared_head=dict(
|
| 71 |
+
type=ResLayer,
|
| 72 |
+
depth=50,
|
| 73 |
+
stage=3,
|
| 74 |
+
stride=2,
|
| 75 |
+
dilation=1,
|
| 76 |
+
style='caffe',
|
| 77 |
+
norm_cfg=norm_cfg,
|
| 78 |
+
norm_eval=True),
|
| 79 |
+
bbox_roi_extractor=dict(
|
| 80 |
+
type=SingleRoIExtractor,
|
| 81 |
+
roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
|
| 82 |
+
out_channels=1024,
|
| 83 |
+
featmap_strides=[16]),
|
| 84 |
+
bbox_head=dict(
|
| 85 |
+
type=BBoxHead,
|
| 86 |
+
with_avg_pool=True,
|
| 87 |
+
roi_feat_size=7,
|
| 88 |
+
in_channels=2048,
|
| 89 |
+
num_classes=80,
|
| 90 |
+
bbox_coder=dict(
|
| 91 |
+
type=DeltaXYWHBBoxCoder,
|
| 92 |
+
target_means=[0., 0., 0., 0.],
|
| 93 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 94 |
+
reg_class_agnostic=False,
|
| 95 |
+
loss_cls=dict(
|
| 96 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 97 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 98 |
+
mask_roi_extractor=None,
|
| 99 |
+
mask_head=dict(
|
| 100 |
+
type=FCNMaskHead,
|
| 101 |
+
num_convs=0,
|
| 102 |
+
in_channels=2048,
|
| 103 |
+
conv_out_channels=256,
|
| 104 |
+
num_classes=80,
|
| 105 |
+
loss_mask=dict(
|
| 106 |
+
type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
|
| 107 |
+
# model training and testing settings
|
| 108 |
+
train_cfg=dict(
|
| 109 |
+
rpn=dict(
|
| 110 |
+
assigner=dict(
|
| 111 |
+
type=MaxIoUAssigner,
|
| 112 |
+
pos_iou_thr=0.7,
|
| 113 |
+
neg_iou_thr=0.3,
|
| 114 |
+
min_pos_iou=0.3,
|
| 115 |
+
match_low_quality=True,
|
| 116 |
+
ignore_iof_thr=-1),
|
| 117 |
+
sampler=dict(
|
| 118 |
+
type=RandomSampler,
|
| 119 |
+
num=256,
|
| 120 |
+
pos_fraction=0.5,
|
| 121 |
+
neg_pos_ub=-1,
|
| 122 |
+
add_gt_as_proposals=False),
|
| 123 |
+
allowed_border=0,
|
| 124 |
+
pos_weight=-1,
|
| 125 |
+
debug=False),
|
| 126 |
+
rpn_proposal=dict(
|
| 127 |
+
nms_pre=12000,
|
| 128 |
+
max_per_img=2000,
|
| 129 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 130 |
+
min_bbox_size=0),
|
| 131 |
+
rcnn=dict(
|
| 132 |
+
assigner=dict(
|
| 133 |
+
type=MaxIoUAssigner,
|
| 134 |
+
pos_iou_thr=0.5,
|
| 135 |
+
neg_iou_thr=0.5,
|
| 136 |
+
min_pos_iou=0.5,
|
| 137 |
+
match_low_quality=False,
|
| 138 |
+
ignore_iof_thr=-1),
|
| 139 |
+
sampler=dict(
|
| 140 |
+
type=RandomSampler,
|
| 141 |
+
num=512,
|
| 142 |
+
pos_fraction=0.25,
|
| 143 |
+
neg_pos_ub=-1,
|
| 144 |
+
add_gt_as_proposals=True),
|
| 145 |
+
mask_size=14,
|
| 146 |
+
pos_weight=-1,
|
| 147 |
+
debug=False)),
|
| 148 |
+
test_cfg=dict(
|
| 149 |
+
rpn=dict(
|
| 150 |
+
nms_pre=6000,
|
| 151 |
+
max_per_img=1000,
|
| 152 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 153 |
+
min_bbox_size=0),
|
| 154 |
+
rcnn=dict(
|
| 155 |
+
score_thr=0.05,
|
| 156 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 157 |
+
max_per_img=100,
|
| 158 |
+
mask_thr_binary=0.5)))
|
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import RoIAlign, nms
|
| 3 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 4 |
+
from torch.nn import BatchNorm2d
|
| 5 |
+
|
| 6 |
+
from mmdet.models.backbones.resnet import ResNet
|
| 7 |
+
from mmdet.models.data_preprocessors.data_preprocessor import \
|
| 8 |
+
DetDataPreprocessor
|
| 9 |
+
from mmdet.models.dense_heads.rpn_head import RPNHead
|
| 10 |
+
from mmdet.models.detectors.mask_rcnn import MaskRCNN
|
| 11 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 12 |
+
from mmdet.models.losses.smooth_l1_loss import L1Loss
|
| 13 |
+
from mmdet.models.necks.fpn import FPN
|
| 14 |
+
from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
|
| 15 |
+
Shared2FCBBoxHead
|
| 16 |
+
from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
|
| 17 |
+
from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
|
| 18 |
+
SingleRoIExtractor
|
| 19 |
+
from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
|
| 20 |
+
from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
|
| 21 |
+
from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
|
| 22 |
+
DeltaXYWHBBoxCoder
|
| 23 |
+
from mmdet.models.task_modules.prior_generators.anchor_generator import \
|
| 24 |
+
AnchorGenerator
|
| 25 |
+
from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
|
| 26 |
+
|
| 27 |
+
# model settings
|
| 28 |
+
model = dict(
|
| 29 |
+
type=MaskRCNN,
|
| 30 |
+
data_preprocessor=dict(
|
| 31 |
+
type=DetDataPreprocessor,
|
| 32 |
+
mean=[123.675, 116.28, 103.53],
|
| 33 |
+
std=[58.395, 57.12, 57.375],
|
| 34 |
+
bgr_to_rgb=True,
|
| 35 |
+
pad_mask=True,
|
| 36 |
+
pad_size_divisor=32),
|
| 37 |
+
backbone=dict(
|
| 38 |
+
type=ResNet,
|
| 39 |
+
depth=50,
|
| 40 |
+
num_stages=4,
|
| 41 |
+
out_indices=(0, 1, 2, 3),
|
| 42 |
+
frozen_stages=1,
|
| 43 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 44 |
+
norm_eval=True,
|
| 45 |
+
style='pytorch',
|
| 46 |
+
init_cfg=dict(
|
| 47 |
+
type=PretrainedInit, checkpoint='torchvision://resnet50')),
|
| 48 |
+
neck=dict(
|
| 49 |
+
type=FPN,
|
| 50 |
+
in_channels=[256, 512, 1024, 2048],
|
| 51 |
+
out_channels=256,
|
| 52 |
+
num_outs=5),
|
| 53 |
+
rpn_head=dict(
|
| 54 |
+
type=RPNHead,
|
| 55 |
+
in_channels=256,
|
| 56 |
+
feat_channels=256,
|
| 57 |
+
anchor_generator=dict(
|
| 58 |
+
type=AnchorGenerator,
|
| 59 |
+
scales=[8],
|
| 60 |
+
ratios=[0.5, 1.0, 2.0],
|
| 61 |
+
strides=[4, 8, 16, 32, 64]),
|
| 62 |
+
bbox_coder=dict(
|
| 63 |
+
type=DeltaXYWHBBoxCoder,
|
| 64 |
+
target_means=[.0, .0, .0, .0],
|
| 65 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 66 |
+
loss_cls=dict(
|
| 67 |
+
type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
|
| 68 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 69 |
+
roi_head=dict(
|
| 70 |
+
type=StandardRoIHead,
|
| 71 |
+
bbox_roi_extractor=dict(
|
| 72 |
+
type=SingleRoIExtractor,
|
| 73 |
+
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
|
| 74 |
+
out_channels=256,
|
| 75 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 76 |
+
bbox_head=dict(
|
| 77 |
+
type=Shared2FCBBoxHead,
|
| 78 |
+
in_channels=256,
|
| 79 |
+
fc_out_channels=1024,
|
| 80 |
+
roi_feat_size=7,
|
| 81 |
+
num_classes=80,
|
| 82 |
+
bbox_coder=dict(
|
| 83 |
+
type=DeltaXYWHBBoxCoder,
|
| 84 |
+
target_means=[0., 0., 0., 0.],
|
| 85 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 86 |
+
reg_class_agnostic=False,
|
| 87 |
+
loss_cls=dict(
|
| 88 |
+
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
|
| 89 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 90 |
+
mask_roi_extractor=dict(
|
| 91 |
+
type=SingleRoIExtractor,
|
| 92 |
+
roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
|
| 93 |
+
out_channels=256,
|
| 94 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 95 |
+
mask_head=dict(
|
| 96 |
+
type=FCNMaskHead,
|
| 97 |
+
num_convs=4,
|
| 98 |
+
in_channels=256,
|
| 99 |
+
conv_out_channels=256,
|
| 100 |
+
num_classes=80,
|
| 101 |
+
loss_mask=dict(
|
| 102 |
+
type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
|
| 103 |
+
# model training and testing settings
|
| 104 |
+
train_cfg=dict(
|
| 105 |
+
rpn=dict(
|
| 106 |
+
assigner=dict(
|
| 107 |
+
type=MaxIoUAssigner,
|
| 108 |
+
pos_iou_thr=0.7,
|
| 109 |
+
neg_iou_thr=0.3,
|
| 110 |
+
min_pos_iou=0.3,
|
| 111 |
+
match_low_quality=True,
|
| 112 |
+
ignore_iof_thr=-1),
|
| 113 |
+
sampler=dict(
|
| 114 |
+
type=RandomSampler,
|
| 115 |
+
num=256,
|
| 116 |
+
pos_fraction=0.5,
|
| 117 |
+
neg_pos_ub=-1,
|
| 118 |
+
add_gt_as_proposals=False),
|
| 119 |
+
allowed_border=-1,
|
| 120 |
+
pos_weight=-1,
|
| 121 |
+
debug=False),
|
| 122 |
+
rpn_proposal=dict(
|
| 123 |
+
nms_pre=2000,
|
| 124 |
+
max_per_img=1000,
|
| 125 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 126 |
+
min_bbox_size=0),
|
| 127 |
+
rcnn=dict(
|
| 128 |
+
assigner=dict(
|
| 129 |
+
type=MaxIoUAssigner,
|
| 130 |
+
pos_iou_thr=0.5,
|
| 131 |
+
neg_iou_thr=0.5,
|
| 132 |
+
min_pos_iou=0.5,
|
| 133 |
+
match_low_quality=True,
|
| 134 |
+
ignore_iof_thr=-1),
|
| 135 |
+
sampler=dict(
|
| 136 |
+
type=RandomSampler,
|
| 137 |
+
num=512,
|
| 138 |
+
pos_fraction=0.25,
|
| 139 |
+
neg_pos_ub=-1,
|
| 140 |
+
add_gt_as_proposals=True),
|
| 141 |
+
mask_size=28,
|
| 142 |
+
pos_weight=-1,
|
| 143 |
+
debug=False)),
|
| 144 |
+
test_cfg=dict(
|
| 145 |
+
rpn=dict(
|
| 146 |
+
nms_pre=1000,
|
| 147 |
+
max_per_img=1000,
|
| 148 |
+
nms=dict(type=nms, iou_threshold=0.7),
|
| 149 |
+
min_bbox_size=0),
|
| 150 |
+
rcnn=dict(
|
| 151 |
+
score_thr=0.05,
|
| 152 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 153 |
+
max_per_img=100,
|
| 154 |
+
mask_thr_binary=0.5)))
|
head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.ops import nms
|
| 3 |
+
from torch.nn import BatchNorm2d
|
| 4 |
+
|
| 5 |
+
from mmdet.models import (FPN, DetDataPreprocessor, FocalLoss, L1Loss, ResNet,
|
| 6 |
+
RetinaHead, RetinaNet)
|
| 7 |
+
from mmdet.models.task_modules import (AnchorGenerator, DeltaXYWHBBoxCoder,
|
| 8 |
+
MaxIoUAssigner, PseudoSampler)
|
| 9 |
+
|
| 10 |
+
# model settings
|
| 11 |
+
model = dict(
|
| 12 |
+
type=RetinaNet,
|
| 13 |
+
data_preprocessor=dict(
|
| 14 |
+
type=DetDataPreprocessor,
|
| 15 |
+
mean=[123.675, 116.28, 103.53],
|
| 16 |
+
std=[58.395, 57.12, 57.375],
|
| 17 |
+
bgr_to_rgb=True,
|
| 18 |
+
pad_size_divisor=32),
|
| 19 |
+
backbone=dict(
|
| 20 |
+
type=ResNet,
|
| 21 |
+
depth=50,
|
| 22 |
+
num_stages=4,
|
| 23 |
+
out_indices=(0, 1, 2, 3),
|
| 24 |
+
frozen_stages=1,
|
| 25 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
|
| 26 |
+
norm_eval=True,
|
| 27 |
+
style='pytorch',
|
| 28 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 29 |
+
neck=dict(
|
| 30 |
+
type=FPN,
|
| 31 |
+
in_channels=[256, 512, 1024, 2048],
|
| 32 |
+
out_channels=256,
|
| 33 |
+
start_level=1,
|
| 34 |
+
add_extra_convs='on_input',
|
| 35 |
+
num_outs=5),
|
| 36 |
+
bbox_head=dict(
|
| 37 |
+
type=RetinaHead,
|
| 38 |
+
num_classes=80,
|
| 39 |
+
in_channels=256,
|
| 40 |
+
stacked_convs=4,
|
| 41 |
+
feat_channels=256,
|
| 42 |
+
anchor_generator=dict(
|
| 43 |
+
type=AnchorGenerator,
|
| 44 |
+
octave_base_scale=4,
|
| 45 |
+
scales_per_octave=3,
|
| 46 |
+
ratios=[0.5, 1.0, 2.0],
|
| 47 |
+
strides=[8, 16, 32, 64, 128]),
|
| 48 |
+
bbox_coder=dict(
|
| 49 |
+
type=DeltaXYWHBBoxCoder,
|
| 50 |
+
target_means=[.0, .0, .0, .0],
|
| 51 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 52 |
+
loss_cls=dict(
|
| 53 |
+
type=FocalLoss,
|
| 54 |
+
use_sigmoid=True,
|
| 55 |
+
gamma=2.0,
|
| 56 |
+
alpha=0.25,
|
| 57 |
+
loss_weight=1.0),
|
| 58 |
+
loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
|
| 59 |
+
# model training and testing settings
|
| 60 |
+
train_cfg=dict(
|
| 61 |
+
assigner=dict(
|
| 62 |
+
type=MaxIoUAssigner,
|
| 63 |
+
pos_iou_thr=0.5,
|
| 64 |
+
neg_iou_thr=0.4,
|
| 65 |
+
min_pos_iou=0,
|
| 66 |
+
ignore_iof_thr=-1),
|
| 67 |
+
sampler=dict(
|
| 68 |
+
type=PseudoSampler), # Focal loss should use PseudoSampler
|
| 69 |
+
allowed_border=-1,
|
| 70 |
+
pos_weight=-1,
|
| 71 |
+
debug=False),
|
| 72 |
+
test_cfg=dict(
|
| 73 |
+
nms_pre=1000,
|
| 74 |
+
min_bbox_size=0,
|
| 75 |
+
score_thr=0.05,
|
| 76 |
+
nms=dict(type=nms, iou_threshold=0.5),
|
| 77 |
+
max_per_img=100))
|
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
|
| 3 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 4 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 5 |
+
from torch.optim.sgd import SGD
|
| 6 |
+
|
| 7 |
+
# training schedule for 1x
|
| 8 |
+
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
|
| 9 |
+
val_cfg = dict(type=ValLoop)
|
| 10 |
+
test_cfg = dict(type=TestLoop)
|
| 11 |
+
|
| 12 |
+
# learning rate
|
| 13 |
+
param_scheduler = [
|
| 14 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 15 |
+
dict(
|
| 16 |
+
type=MultiStepLR,
|
| 17 |
+
begin=0,
|
| 18 |
+
end=12,
|
| 19 |
+
by_epoch=True,
|
| 20 |
+
milestones=[8, 11],
|
| 21 |
+
gamma=0.1)
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
# optimizer
|
| 25 |
+
optim_wrapper = dict(
|
| 26 |
+
type=OptimWrapper,
|
| 27 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
|
| 28 |
+
|
| 29 |
+
# Default setting for scaling LR automatically
|
| 30 |
+
# - `enable` means enable scaling LR automatically
|
| 31 |
+
# or not by default.
|
| 32 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 33 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
|
| 3 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 4 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 5 |
+
from torch.optim.sgd import SGD
|
| 6 |
+
|
| 7 |
+
# training schedule for 1x
|
| 8 |
+
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=1)
|
| 9 |
+
val_cfg = dict(type=ValLoop)
|
| 10 |
+
test_cfg = dict(type=TestLoop)
|
| 11 |
+
|
| 12 |
+
# learning rate
|
| 13 |
+
param_scheduler = [
|
| 14 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 15 |
+
dict(
|
| 16 |
+
type=MultiStepLR,
|
| 17 |
+
begin=0,
|
| 18 |
+
end=24,
|
| 19 |
+
by_epoch=True,
|
| 20 |
+
milestones=[16, 22],
|
| 21 |
+
gamma=0.1)
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
# optimizer
|
| 25 |
+
optim_wrapper = dict(
|
| 26 |
+
type=OptimWrapper,
|
| 27 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
|
| 28 |
+
|
| 29 |
+
# Default setting for scaling LR automatically
|
| 30 |
+
# - `enable` means enable scaling LR automatically
|
| 31 |
+
# or not by default.
|
| 32 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 33 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.datasets.coco_instance import *
|
| 11 |
+
from .._base_.default_runtime import *
|
| 12 |
+
from .._base_.models.cascade_mask_rcnn_r50_fpn import *
|
| 13 |
+
from .._base_.schedules.schedule_1x import *
|
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.datasets.coco_detection import *
|
| 11 |
+
from .._base_.default_runtime import *
|
| 12 |
+
from .._base_.models.cascade_rcnn_r50_fpn import *
|
| 13 |
+
from .._base_.schedules.schedule_1x import *
|
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 13 |
+
from mmengine.optim import OptimWrapper
|
| 14 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 15 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 16 |
+
from torch.optim import SGD
|
| 17 |
+
|
| 18 |
+
from mmdet.datasets import CocoDataset, RepeatDataset
|
| 19 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 20 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 21 |
+
LoadAnnotations,
|
| 22 |
+
LoadImageFromFile)
|
| 23 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 24 |
+
Pad, RandomCrop, RandomFlip,
|
| 25 |
+
RandomResize, Resize)
|
| 26 |
+
from mmdet.evaluation import CocoMetric
|
| 27 |
+
|
| 28 |
+
# dataset settings
|
| 29 |
+
dataset_type = CocoDataset
|
| 30 |
+
data_root = 'data/coco/'
|
| 31 |
+
image_size = (1024, 1024)
|
| 32 |
+
|
| 33 |
+
backend_args = None
|
| 34 |
+
|
| 35 |
+
train_pipeline = [
|
| 36 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 37 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 38 |
+
dict(
|
| 39 |
+
type=RandomResize,
|
| 40 |
+
scale=image_size,
|
| 41 |
+
ratio_range=(0.1, 2.0),
|
| 42 |
+
keep_ratio=True),
|
| 43 |
+
dict(
|
| 44 |
+
type=RandomCrop,
|
| 45 |
+
crop_type='absolute_range',
|
| 46 |
+
crop_size=image_size,
|
| 47 |
+
recompute_bbox=True,
|
| 48 |
+
allow_negative_crop=True),
|
| 49 |
+
dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
|
| 50 |
+
dict(type=RandomFlip, prob=0.5),
|
| 51 |
+
dict(type=PackDetInputs)
|
| 52 |
+
]
|
| 53 |
+
test_pipeline = [
|
| 54 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 55 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 56 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 57 |
+
dict(
|
| 58 |
+
type=PackDetInputs,
|
| 59 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 60 |
+
'scale_factor'))
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
# Use RepeatDataset to speed up training
|
| 64 |
+
train_dataloader = dict(
|
| 65 |
+
batch_size=2,
|
| 66 |
+
num_workers=2,
|
| 67 |
+
persistent_workers=True,
|
| 68 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 69 |
+
dataset=dict(
|
| 70 |
+
type=RepeatDataset,
|
| 71 |
+
times=4, # simply change this from 2 to 16 for 50e - 400e training.
|
| 72 |
+
dataset=dict(
|
| 73 |
+
type=dataset_type,
|
| 74 |
+
data_root=data_root,
|
| 75 |
+
ann_file='annotations/instances_train2017.json',
|
| 76 |
+
data_prefix=dict(img='train2017/'),
|
| 77 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 78 |
+
pipeline=train_pipeline,
|
| 79 |
+
backend_args=backend_args)))
|
| 80 |
+
val_dataloader = dict(
|
| 81 |
+
batch_size=1,
|
| 82 |
+
num_workers=2,
|
| 83 |
+
persistent_workers=True,
|
| 84 |
+
drop_last=False,
|
| 85 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 86 |
+
dataset=dict(
|
| 87 |
+
type=dataset_type,
|
| 88 |
+
data_root=data_root,
|
| 89 |
+
ann_file='annotations/instances_val2017.json',
|
| 90 |
+
data_prefix=dict(img='val2017/'),
|
| 91 |
+
test_mode=True,
|
| 92 |
+
pipeline=test_pipeline,
|
| 93 |
+
backend_args=backend_args))
|
| 94 |
+
test_dataloader = val_dataloader
|
| 95 |
+
|
| 96 |
+
val_evaluator = dict(
|
| 97 |
+
type=CocoMetric,
|
| 98 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 99 |
+
metric=['bbox', 'segm'],
|
| 100 |
+
format_only=False,
|
| 101 |
+
backend_args=backend_args)
|
| 102 |
+
test_evaluator = val_evaluator
|
| 103 |
+
|
| 104 |
+
max_epochs = 25
|
| 105 |
+
|
| 106 |
+
train_cfg = dict(
|
| 107 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
|
| 108 |
+
val_cfg = dict(type=ValLoop)
|
| 109 |
+
test_cfg = dict(type=TestLoop)
|
| 110 |
+
|
| 111 |
+
# optimizer assumes bs=64
|
| 112 |
+
optim_wrapper = dict(
|
| 113 |
+
type=OptimWrapper,
|
| 114 |
+
optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
|
| 115 |
+
|
| 116 |
+
# learning rate
|
| 117 |
+
param_scheduler = [
|
| 118 |
+
dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
|
| 119 |
+
dict(
|
| 120 |
+
type=MultiStepLR,
|
| 121 |
+
begin=0,
|
| 122 |
+
end=max_epochs,
|
| 123 |
+
by_epoch=True,
|
| 124 |
+
milestones=[22, 24],
|
| 125 |
+
gamma=0.1)
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
# only keep latest 2 checkpoints
|
| 129 |
+
default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
|
| 130 |
+
|
| 131 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 132 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 133 |
+
# base_batch_size = (32 GPUs) x (2 samples per GPU)
|
| 134 |
+
auto_scale_lr = dict(base_batch_size=64)
|
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 13 |
+
from mmengine.optim import OptimWrapper
|
| 14 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 15 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 16 |
+
from torch.optim import SGD
|
| 17 |
+
|
| 18 |
+
from mmdet.datasets import CocoDataset, RepeatDataset
|
| 19 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 20 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 21 |
+
LoadAnnotations,
|
| 22 |
+
LoadImageFromFile)
|
| 23 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 24 |
+
Pad, RandomCrop, RandomFlip,
|
| 25 |
+
RandomResize, Resize)
|
| 26 |
+
from mmdet.evaluation import CocoMetric
|
| 27 |
+
|
| 28 |
+
# dataset settings
|
| 29 |
+
dataset_type = CocoDataset
|
| 30 |
+
data_root = 'data/coco/'
|
| 31 |
+
image_size = (1024, 1024)
|
| 32 |
+
|
| 33 |
+
backend_args = None
|
| 34 |
+
|
| 35 |
+
train_pipeline = [
|
| 36 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 37 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 38 |
+
dict(
|
| 39 |
+
type=RandomResize,
|
| 40 |
+
scale=image_size,
|
| 41 |
+
ratio_range=(0.1, 2.0),
|
| 42 |
+
keep_ratio=True),
|
| 43 |
+
dict(
|
| 44 |
+
type=RandomCrop,
|
| 45 |
+
crop_type='absolute_range',
|
| 46 |
+
crop_size=image_size,
|
| 47 |
+
recompute_bbox=True,
|
| 48 |
+
allow_negative_crop=True),
|
| 49 |
+
dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
|
| 50 |
+
dict(type=RandomFlip, prob=0.5),
|
| 51 |
+
dict(type=PackDetInputs)
|
| 52 |
+
]
|
| 53 |
+
test_pipeline = [
|
| 54 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 55 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 56 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 57 |
+
dict(
|
| 58 |
+
type=PackDetInputs,
|
| 59 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 60 |
+
'scale_factor'))
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
# Use RepeatDataset to speed up training
|
| 64 |
+
train_dataloader = dict(
|
| 65 |
+
batch_size=2,
|
| 66 |
+
num_workers=2,
|
| 67 |
+
persistent_workers=True,
|
| 68 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 69 |
+
dataset=dict(
|
| 70 |
+
type=RepeatDataset,
|
| 71 |
+
times=4, # simply change this from 2 to 16 for 50e - 400e training.
|
| 72 |
+
dataset=dict(
|
| 73 |
+
type=dataset_type,
|
| 74 |
+
data_root=data_root,
|
| 75 |
+
ann_file='annotations/instances_train2017.json',
|
| 76 |
+
data_prefix=dict(img='train2017/'),
|
| 77 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 78 |
+
pipeline=train_pipeline,
|
| 79 |
+
backend_args=backend_args)))
|
| 80 |
+
val_dataloader = dict(
|
| 81 |
+
batch_size=1,
|
| 82 |
+
num_workers=2,
|
| 83 |
+
persistent_workers=True,
|
| 84 |
+
drop_last=False,
|
| 85 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 86 |
+
dataset=dict(
|
| 87 |
+
type=dataset_type,
|
| 88 |
+
data_root=data_root,
|
| 89 |
+
ann_file='annotations/instances_val2017.json',
|
| 90 |
+
data_prefix=dict(img='val2017/'),
|
| 91 |
+
test_mode=True,
|
| 92 |
+
pipeline=test_pipeline,
|
| 93 |
+
backend_args=backend_args))
|
| 94 |
+
test_dataloader = val_dataloader
|
| 95 |
+
|
| 96 |
+
val_evaluator = dict(
|
| 97 |
+
type=CocoMetric,
|
| 98 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 99 |
+
metric=['bbox', 'segm'],
|
| 100 |
+
format_only=False,
|
| 101 |
+
backend_args=backend_args)
|
| 102 |
+
test_evaluator = val_evaluator
|
| 103 |
+
|
| 104 |
+
max_epochs = 25
|
| 105 |
+
|
| 106 |
+
train_cfg = dict(
|
| 107 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
|
| 108 |
+
val_cfg = dict(type=ValLoop)
|
| 109 |
+
test_cfg = dict(type=TestLoop)
|
| 110 |
+
|
| 111 |
+
# optimizer assumes bs=64
|
| 112 |
+
optim_wrapper = dict(
|
| 113 |
+
type=OptimWrapper,
|
| 114 |
+
optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
|
| 115 |
+
|
| 116 |
+
# learning rate
|
| 117 |
+
param_scheduler = [
|
| 118 |
+
dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
|
| 119 |
+
dict(
|
| 120 |
+
type=MultiStepLR,
|
| 121 |
+
begin=0,
|
| 122 |
+
end=max_epochs,
|
| 123 |
+
by_epoch=True,
|
| 124 |
+
milestones=[22, 24],
|
| 125 |
+
gamma=0.1)
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
# only keep latest 2 checkpoints
|
| 129 |
+
default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
|
| 130 |
+
|
| 131 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 132 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 133 |
+
# base_batch_size = (32 GPUs) x (2 samples per GPU)
|
| 134 |
+
auto_scale_lr = dict(base_batch_size=64)
|
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .lsj_100e_coco_detection import *
|
| 11 |
+
|
| 12 |
+
# 8x25=200e
|
| 13 |
+
train_dataloader.update(dict(dataset=dict(times=8)))
|
| 14 |
+
|
| 15 |
+
# learning rate
|
| 16 |
+
param_scheduler = [
|
| 17 |
+
dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
|
| 18 |
+
dict(
|
| 19 |
+
type=MultiStepLR,
|
| 20 |
+
begin=0,
|
| 21 |
+
end=25,
|
| 22 |
+
by_epoch=True,
|
| 23 |
+
milestones=[22, 24],
|
| 24 |
+
gamma=0.1)
|
| 25 |
+
]
|
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .lsj_100e_coco_instance import *
|
| 11 |
+
|
| 12 |
+
# 8x25=200e
|
| 13 |
+
train_dataloader.update(dict(dataset=dict(times=8)))
|
| 14 |
+
|
| 15 |
+
# learning rate
|
| 16 |
+
param_scheduler = [
|
| 17 |
+
dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
|
| 18 |
+
dict(
|
| 19 |
+
type=MultiStepLR,
|
| 20 |
+
begin=0,
|
| 21 |
+
end=25,
|
| 22 |
+
by_epoch=True,
|
| 23 |
+
milestones=[22, 24],
|
| 24 |
+
gamma=0.1)
|
| 25 |
+
]
|
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (LoadAnnotations,
|
| 23 |
+
LoadImageFromFile)
|
| 24 |
+
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
|
| 25 |
+
from mmdet.evaluation import CocoMetric
|
| 26 |
+
|
| 27 |
+
# dataset settings
|
| 28 |
+
dataset_type = CocoDataset
|
| 29 |
+
data_root = 'data/coco/'
|
| 30 |
+
|
| 31 |
+
# Example to use different file client
|
| 32 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 33 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 34 |
+
|
| 35 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 36 |
+
|
| 37 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 38 |
+
# backend_args = dict(
|
| 39 |
+
# backend='petrel',
|
| 40 |
+
# path_mapping=dict({
|
| 41 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 42 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 43 |
+
# }))
|
| 44 |
+
backend_args = None
|
| 45 |
+
|
| 46 |
+
# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
|
| 47 |
+
# multiscale_mode='range'
|
| 48 |
+
train_pipeline = [
|
| 49 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 50 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 51 |
+
dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
|
| 52 |
+
dict(type=RandomFlip, prob=0.5),
|
| 53 |
+
dict(type=PackDetInputs)
|
| 54 |
+
]
|
| 55 |
+
test_pipeline = [
|
| 56 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 57 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 58 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 59 |
+
dict(
|
| 60 |
+
type=PackDetInputs,
|
| 61 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 62 |
+
'scale_factor'))
|
| 63 |
+
]
|
| 64 |
+
train_dataloader = dict(
|
| 65 |
+
batch_size=2,
|
| 66 |
+
num_workers=2,
|
| 67 |
+
persistent_workers=True,
|
| 68 |
+
pin_memory=True,
|
| 69 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 70 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 71 |
+
dataset=dict(
|
| 72 |
+
type=RepeatDataset,
|
| 73 |
+
times=3,
|
| 74 |
+
dataset=dict(
|
| 75 |
+
type=dataset_type,
|
| 76 |
+
data_root=data_root,
|
| 77 |
+
ann_file='annotations/instances_train2017.json',
|
| 78 |
+
data_prefix=dict(img='train2017/'),
|
| 79 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 80 |
+
pipeline=train_pipeline,
|
| 81 |
+
backend_args=backend_args)))
|
| 82 |
+
val_dataloader = dict(
|
| 83 |
+
batch_size=1,
|
| 84 |
+
num_workers=2,
|
| 85 |
+
persistent_workers=True,
|
| 86 |
+
drop_last=False,
|
| 87 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 88 |
+
dataset=dict(
|
| 89 |
+
type=dataset_type,
|
| 90 |
+
data_root=data_root,
|
| 91 |
+
ann_file='annotations/instances_val2017.json',
|
| 92 |
+
data_prefix=dict(img='val2017/'),
|
| 93 |
+
test_mode=True,
|
| 94 |
+
pipeline=test_pipeline,
|
| 95 |
+
backend_args=backend_args))
|
| 96 |
+
test_dataloader = val_dataloader
|
| 97 |
+
|
| 98 |
+
val_evaluator = dict(
|
| 99 |
+
type=CocoMetric,
|
| 100 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 101 |
+
metric='bbox',
|
| 102 |
+
backend_args=backend_args)
|
| 103 |
+
test_evaluator = val_evaluator
|
| 104 |
+
|
| 105 |
+
# training schedule for 3x with `RepeatDataset`
|
| 106 |
+
train_cfg = dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1)
|
| 107 |
+
val_cfg = dict(type=ValLoop)
|
| 108 |
+
test_cfg = dict(type=TestLoop)
|
| 109 |
+
|
| 110 |
+
# learning rate
|
| 111 |
+
param_scheduler = [
|
| 112 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 113 |
+
dict(
|
| 114 |
+
type=MultiStepLR,
|
| 115 |
+
begin=0,
|
| 116 |
+
end=12,
|
| 117 |
+
by_epoch=False,
|
| 118 |
+
milestones=[9, 11],
|
| 119 |
+
gamma=0.1)
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
# optimizer
|
| 123 |
+
optim_wrapper = dict(
|
| 124 |
+
type=OptimWrapper,
|
| 125 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
|
| 126 |
+
# Default setting for scaling LR automatically
|
| 127 |
+
# - `enable` means enable scaling LR automatically
|
| 128 |
+
# or not by default.
|
| 129 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 130 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomChoiceResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 23 |
+
LoadAnnotations,
|
| 24 |
+
LoadImageFromFile)
|
| 25 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 26 |
+
Pad, RandomCrop, RandomFlip,
|
| 27 |
+
RandomResize, Resize)
|
| 28 |
+
from mmdet.evaluation import CocoMetric
|
| 29 |
+
|
| 30 |
+
# dataset settings
|
| 31 |
+
dataset_type = CocoDataset
|
| 32 |
+
data_root = 'data/coco/'
|
| 33 |
+
|
| 34 |
+
# Example to use different file client
|
| 35 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 36 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 37 |
+
|
| 38 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 39 |
+
|
| 40 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 41 |
+
# backend_args = dict(
|
| 42 |
+
# backend='petrel',
|
| 43 |
+
# path_mapping=dict({
|
| 44 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 45 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 46 |
+
# }))
|
| 47 |
+
backend_args = None
|
| 48 |
+
|
| 49 |
+
train_pipeline = [
|
| 50 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 51 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 52 |
+
dict(
|
| 53 |
+
type='RandomResize', scale=[(1333, 640), (1333, 800)],
|
| 54 |
+
keep_ratio=True),
|
| 55 |
+
dict(type=RandomFlip, prob=0.5),
|
| 56 |
+
dict(type=PackDetInputs)
|
| 57 |
+
]
|
| 58 |
+
test_pipeline = [
|
| 59 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 60 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 61 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 62 |
+
dict(
|
| 63 |
+
type=PackDetInputs,
|
| 64 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 65 |
+
'scale_factor'))
|
| 66 |
+
]
|
| 67 |
+
train_dataloader.update(
|
| 68 |
+
dict(
|
| 69 |
+
batch_size=2,
|
| 70 |
+
num_workers=2,
|
| 71 |
+
persistent_workers=True,
|
| 72 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 73 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 74 |
+
dataset=dict(
|
| 75 |
+
type=RepeatDataset,
|
| 76 |
+
times=3,
|
| 77 |
+
dataset=dict(
|
| 78 |
+
type=dataset_type,
|
| 79 |
+
data_root=data_root,
|
| 80 |
+
ann_file='annotations/instances_train2017.json',
|
| 81 |
+
data_prefix=dict(img='train2017/'),
|
| 82 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 83 |
+
pipeline=train_pipeline,
|
| 84 |
+
backend_args=backend_args))))
|
| 85 |
+
val_dataloader.update(
|
| 86 |
+
dict(
|
| 87 |
+
batch_size=1,
|
| 88 |
+
num_workers=2,
|
| 89 |
+
persistent_workers=True,
|
| 90 |
+
drop_last=False,
|
| 91 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 92 |
+
dataset=dict(
|
| 93 |
+
type=dataset_type,
|
| 94 |
+
data_root=data_root,
|
| 95 |
+
ann_file='annotations/instances_val2017.json',
|
| 96 |
+
data_prefix=dict(img='val2017/'),
|
| 97 |
+
test_mode=True,
|
| 98 |
+
pipeline=test_pipeline,
|
| 99 |
+
backend_args=backend_args)))
|
| 100 |
+
test_dataloader = val_dataloader
|
| 101 |
+
|
| 102 |
+
val_evaluator.update(
|
| 103 |
+
dict(
|
| 104 |
+
type=CocoMetric,
|
| 105 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 106 |
+
metric='bbox',
|
| 107 |
+
backend_args=backend_args))
|
| 108 |
+
test_evaluator = val_evaluator
|
| 109 |
+
|
| 110 |
+
# training schedule for 3x with `RepeatDataset`
|
| 111 |
+
train_cfg.update(dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1))
|
| 112 |
+
val_cfg.update(dict(type=ValLoop))
|
| 113 |
+
test_cfg.update(dict(type=TestLoop))
|
| 114 |
+
|
| 115 |
+
# learning rate
|
| 116 |
+
param_scheduler = [
|
| 117 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 118 |
+
dict(
|
| 119 |
+
type=MultiStepLR,
|
| 120 |
+
begin=0,
|
| 121 |
+
end=12,
|
| 122 |
+
by_epoch=False,
|
| 123 |
+
milestones=[9, 11],
|
| 124 |
+
gamma=0.1)
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
# optimizer
|
| 128 |
+
optim_wrapper.update(
|
| 129 |
+
dict(
|
| 130 |
+
type=OptimWrapper,
|
| 131 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
|
| 132 |
+
# Default setting for scaling LR automatically
|
| 133 |
+
# - `enable` means enable scaling LR automatically
|
| 134 |
+
# or not by default.
|
| 135 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 136 |
+
auto_scale_lr.update(dict(enable=False, base_batch_size=16))
|
head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomChoiceResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 23 |
+
LoadAnnotations,
|
| 24 |
+
LoadImageFromFile)
|
| 25 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 26 |
+
Pad, RandomCrop, RandomFlip,
|
| 27 |
+
RandomResize, Resize)
|
| 28 |
+
from mmdet.evaluation import CocoMetric
|
| 29 |
+
|
| 30 |
+
# dataset settings
|
| 31 |
+
dataset_type = CocoDataset
|
| 32 |
+
data_root = 'data/coco/'
|
| 33 |
+
# Example to use different file client
|
| 34 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 35 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 36 |
+
|
| 37 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 38 |
+
|
| 39 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 40 |
+
# backend_args = dict(
|
| 41 |
+
# backend='petrel',
|
| 42 |
+
# path_mapping=dict({
|
| 43 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 44 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 45 |
+
# }))
|
| 46 |
+
backend_args = None
|
| 47 |
+
|
| 48 |
+
# Align with Detectron2
|
| 49 |
+
backend = 'pillow'
|
| 50 |
+
train_pipeline = [
|
| 51 |
+
dict(
|
| 52 |
+
type=LoadImageFromFile,
|
| 53 |
+
backend_args=backend_args,
|
| 54 |
+
imdecode_backend=backend),
|
| 55 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 56 |
+
dict(
|
| 57 |
+
type=RandomChoiceResize,
|
| 58 |
+
scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
| 59 |
+
(1333, 768), (1333, 800)],
|
| 60 |
+
keep_ratio=True,
|
| 61 |
+
backend=backend),
|
| 62 |
+
dict(type=RandomFlip, prob=0.5),
|
| 63 |
+
dict(type=PackDetInputs)
|
| 64 |
+
]
|
| 65 |
+
test_pipeline = [
|
| 66 |
+
dict(
|
| 67 |
+
type=LoadImageFromFile,
|
| 68 |
+
backend_args=backend_args,
|
| 69 |
+
imdecode_backend=backend),
|
| 70 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
|
| 71 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 72 |
+
dict(
|
| 73 |
+
type=PackDetInputs,
|
| 74 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 75 |
+
'scale_factor'))
|
| 76 |
+
]
|
| 77 |
+
train_dataloader.update(
|
| 78 |
+
dict(
|
| 79 |
+
batch_size=2,
|
| 80 |
+
num_workers=2,
|
| 81 |
+
persistent_workers=True,
|
| 82 |
+
pin_memory=True,
|
| 83 |
+
sampler=dict(type=InfiniteSampler, shuffle=True),
|
| 84 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 85 |
+
dataset=dict(
|
| 86 |
+
type=dataset_type,
|
| 87 |
+
data_root=data_root,
|
| 88 |
+
ann_file='annotations/instances_train2017.json',
|
| 89 |
+
data_prefix=dict(img='train2017/'),
|
| 90 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 91 |
+
pipeline=train_pipeline,
|
| 92 |
+
backend_args=backend_args)))
|
| 93 |
+
val_dataloader.update(
|
| 94 |
+
dict(
|
| 95 |
+
batch_size=1,
|
| 96 |
+
num_workers=2,
|
| 97 |
+
persistent_workers=True,
|
| 98 |
+
drop_last=False,
|
| 99 |
+
pin_memory=True,
|
| 100 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 101 |
+
dataset=dict(
|
| 102 |
+
type=dataset_type,
|
| 103 |
+
data_root=data_root,
|
| 104 |
+
ann_file='annotations/instances_val2017.json',
|
| 105 |
+
data_prefix=dict(img='val2017/'),
|
| 106 |
+
test_mode=True,
|
| 107 |
+
pipeline=test_pipeline,
|
| 108 |
+
backend_args=backend_args)))
|
| 109 |
+
test_dataloader = val_dataloader
|
| 110 |
+
|
| 111 |
+
val_evaluator.update(
|
| 112 |
+
dict(
|
| 113 |
+
type=CocoMetric,
|
| 114 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 115 |
+
metric='bbox',
|
| 116 |
+
format_only=False,
|
| 117 |
+
backend_args=backend_args))
|
| 118 |
+
test_evaluator = val_evaluator
|
| 119 |
+
|
| 120 |
+
# training schedule for 90k
|
| 121 |
+
max_iter = 90000
|
| 122 |
+
train_cfg.update(
|
| 123 |
+
dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
|
| 124 |
+
val_cfg.update(dict(type=ValLoop))
|
| 125 |
+
test_cfg.update(dict(type=TestLoop))
|
| 126 |
+
|
| 127 |
+
# learning rate
|
| 128 |
+
param_scheduler = [
|
| 129 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
|
| 130 |
+
dict(
|
| 131 |
+
type=MultiStepLR,
|
| 132 |
+
begin=0,
|
| 133 |
+
end=max_iter,
|
| 134 |
+
by_epoch=False,
|
| 135 |
+
milestones=[60000, 80000],
|
| 136 |
+
gamma=0.1)
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
# optimizer
|
| 140 |
+
optim_wrapper.update(
|
| 141 |
+
dict(
|
| 142 |
+
type=OptimWrapper,
|
| 143 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
|
| 144 |
+
# Default setting for scaling LR automatically
|
| 145 |
+
# - `enable` means enable scaling LR automatically
|
| 146 |
+
# or not by default.
|
| 147 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 148 |
+
auto_scale_lr.update(dict(enable=False, base_batch_size=16))
|
| 149 |
+
|
| 150 |
+
default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
|
| 151 |
+
log_processor.update(dict(by_epoch=False))
|
head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomChoiceResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 23 |
+
LoadAnnotations,
|
| 24 |
+
LoadImageFromFile)
|
| 25 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 26 |
+
Pad, RandomCrop, RandomFlip,
|
| 27 |
+
RandomResize, Resize)
|
| 28 |
+
from mmdet.evaluation import CocoMetric
|
| 29 |
+
|
| 30 |
+
# dataset settings
|
| 31 |
+
dataset_type = CocoDataset
|
| 32 |
+
data_root = 'data/coco/'
|
| 33 |
+
# Example to use different file client
|
| 34 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 35 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 36 |
+
|
| 37 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 38 |
+
|
| 39 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 40 |
+
# backend_args = dict(
|
| 41 |
+
# backend='petrel',
|
| 42 |
+
# path_mapping=dict({
|
| 43 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 44 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 45 |
+
# }))
|
| 46 |
+
backend_args = None
|
| 47 |
+
|
| 48 |
+
# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
|
| 49 |
+
# multiscale_mode='range'
|
| 50 |
+
train_pipeline = [
|
| 51 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 52 |
+
dict(
|
| 53 |
+
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
|
| 54 |
+
dict(
|
| 55 |
+
type='RandomResize', scale=[(1333, 640), (1333, 800)],
|
| 56 |
+
keep_ratio=True),
|
| 57 |
+
dict(type=RandomFlip, prob=0.5),
|
| 58 |
+
dict(type=PackDetInputs)
|
| 59 |
+
]
|
| 60 |
+
test_pipeline = [
|
| 61 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 62 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 63 |
+
dict(
|
| 64 |
+
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
|
| 65 |
+
dict(
|
| 66 |
+
type=PackDetInputs,
|
| 67 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 68 |
+
'scale_factor'))
|
| 69 |
+
]
|
| 70 |
+
train_dataloader.update(
|
| 71 |
+
dict(
|
| 72 |
+
batch_size=2,
|
| 73 |
+
num_workers=2,
|
| 74 |
+
persistent_workers=True,
|
| 75 |
+
pin_memory=True,
|
| 76 |
+
sampler=dict(type=DefaultSampler, shuffle=True),
|
| 77 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 78 |
+
dataset=dict(
|
| 79 |
+
type=RepeatDataset,
|
| 80 |
+
data_root=data_root,
|
| 81 |
+
ann_file='annotations/instances_train2017.json',
|
| 82 |
+
data_prefix=dict(img='train2017/'),
|
| 83 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 84 |
+
pipeline=train_pipeline,
|
| 85 |
+
backend_args=backend_args)))
|
| 86 |
+
val_dataloader.update(
|
| 87 |
+
dict(
|
| 88 |
+
batch_size=2,
|
| 89 |
+
num_workers=2,
|
| 90 |
+
persistent_workers=True,
|
| 91 |
+
drop_last=False,
|
| 92 |
+
pin_memory=True,
|
| 93 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 94 |
+
dataset=dict(
|
| 95 |
+
type=dataset_type,
|
| 96 |
+
data_root=data_root,
|
| 97 |
+
ann_file='annotations/instances_val2017.json',
|
| 98 |
+
data_prefix=dict(img='val2017/'),
|
| 99 |
+
test_mode=True,
|
| 100 |
+
pipeline=test_pipeline,
|
| 101 |
+
backend_args=backend_args)))
|
| 102 |
+
test_dataloader = val_dataloader
|
| 103 |
+
|
| 104 |
+
val_evaluator.update(
|
| 105 |
+
dict(
|
| 106 |
+
type=CocoMetric,
|
| 107 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 108 |
+
metric=['bbox', 'segm'],
|
| 109 |
+
backend_args=backend_args))
|
| 110 |
+
test_evaluator = val_evaluator
|
| 111 |
+
|
| 112 |
+
# training schedule for 3x with `RepeatDataset`
|
| 113 |
+
train_cfg.update(dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1))
|
| 114 |
+
val_cfg.update(dict(type=ValLoop))
|
| 115 |
+
test_cfg.update(dict(type=TestLoop))
|
| 116 |
+
|
| 117 |
+
# learning rate
|
| 118 |
+
param_scheduler = [
|
| 119 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 120 |
+
dict(
|
| 121 |
+
type=MultiStepLR,
|
| 122 |
+
begin=0,
|
| 123 |
+
end=12,
|
| 124 |
+
by_epoch=False,
|
| 125 |
+
milestones=[9, 11],
|
| 126 |
+
gamma=0.1)
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
# optimizer
|
| 130 |
+
optim_wrapper.update(
|
| 131 |
+
dict(
|
| 132 |
+
type=OptimWrapper,
|
| 133 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
|
| 134 |
+
# Default setting for scaling LR automatically
|
| 135 |
+
# - `enable` means enable scaling LR automatically
|
| 136 |
+
# or not by default.
|
| 137 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 138 |
+
auto_scale_lr.update(dict(enable=False, base_batch_size=16))
|
head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomChoiceResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 23 |
+
LoadAnnotations,
|
| 24 |
+
LoadImageFromFile)
|
| 25 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 26 |
+
Pad, RandomCrop, RandomFlip,
|
| 27 |
+
RandomResize, Resize)
|
| 28 |
+
from mmdet.evaluation import CocoMetric
|
| 29 |
+
|
| 30 |
+
# dataset settings
|
| 31 |
+
dataset_type = CocoDataset
|
| 32 |
+
data_root = 'data/coco/'
|
| 33 |
+
# Example to use different file client
|
| 34 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 35 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 36 |
+
|
| 37 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 38 |
+
|
| 39 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 40 |
+
# backend_args = dict(
|
| 41 |
+
# backend='petrel',
|
| 42 |
+
# path_mapping=dict({
|
| 43 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 44 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 45 |
+
# }))
|
| 46 |
+
backend_args = None
|
| 47 |
+
|
| 48 |
+
# Align with Detectron2
|
| 49 |
+
backend = 'pillow'
|
| 50 |
+
train_pipeline = [
|
| 51 |
+
dict(
|
| 52 |
+
type=LoadImageFromFile,
|
| 53 |
+
backend_args=backend_args,
|
| 54 |
+
imdecode_backend=backend),
|
| 55 |
+
dict(
|
| 56 |
+
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
|
| 57 |
+
dict(
|
| 58 |
+
type=RandomChoiceResize,
|
| 59 |
+
scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
| 60 |
+
(1333, 768), (1333, 800)],
|
| 61 |
+
keep_ratio=True,
|
| 62 |
+
backend=backend),
|
| 63 |
+
dict(type=RandomFlip, prob=0.5),
|
| 64 |
+
dict(type=PackDetInputs)
|
| 65 |
+
]
|
| 66 |
+
test_pipeline = [
|
| 67 |
+
dict(
|
| 68 |
+
type=LoadImageFromFile,
|
| 69 |
+
backend_args=backend_args,
|
| 70 |
+
imdecode_backend=backend),
|
| 71 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
|
| 72 |
+
dict(
|
| 73 |
+
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
|
| 74 |
+
dict(
|
| 75 |
+
type=PackDetInputs,
|
| 76 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 77 |
+
'scale_factor'))
|
| 78 |
+
]
|
| 79 |
+
train_dataloader.update(
|
| 80 |
+
dict(
|
| 81 |
+
batch_size=2,
|
| 82 |
+
num_workers=2,
|
| 83 |
+
persistent_workers=True,
|
| 84 |
+
pin_memory=True,
|
| 85 |
+
sampler=dict(type=InfiniteSampler, shuffle=True),
|
| 86 |
+
batch_sampler=dict(type=AspectRatioBatchSampler),
|
| 87 |
+
dataset=dict(
|
| 88 |
+
type=dataset_type,
|
| 89 |
+
data_root=data_root,
|
| 90 |
+
ann_file='annotations/instances_train2017.json',
|
| 91 |
+
data_prefix=dict(img='train2017/'),
|
| 92 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 93 |
+
pipeline=train_pipeline,
|
| 94 |
+
backend_args=backend_args)))
|
| 95 |
+
val_dataloader.update(
|
| 96 |
+
dict(
|
| 97 |
+
batch_size=1,
|
| 98 |
+
num_workers=2,
|
| 99 |
+
persistent_workers=True,
|
| 100 |
+
drop_last=False,
|
| 101 |
+
pin_memory=True,
|
| 102 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 103 |
+
dataset=dict(
|
| 104 |
+
type=dataset_type,
|
| 105 |
+
data_root=data_root,
|
| 106 |
+
ann_file='annotations/instances_val2017.json',
|
| 107 |
+
data_prefix=dict(img='val2017/'),
|
| 108 |
+
test_mode=True,
|
| 109 |
+
pipeline=test_pipeline,
|
| 110 |
+
backend_args=backend_args)))
|
| 111 |
+
test_dataloader = val_dataloader
|
| 112 |
+
|
| 113 |
+
val_evaluator.update(
|
| 114 |
+
dict(
|
| 115 |
+
type=CocoMetric,
|
| 116 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 117 |
+
metric=['bbox', 'segm'],
|
| 118 |
+
format_only=False,
|
| 119 |
+
backend_args=backend_args))
|
| 120 |
+
test_evaluator = val_evaluator
|
| 121 |
+
|
| 122 |
+
# training schedule for 90k
|
| 123 |
+
max_iter = 90000
|
| 124 |
+
train_cfg.update(
|
| 125 |
+
dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
|
| 126 |
+
val_cfg.update(dict(type=ValLoop))
|
| 127 |
+
test_cfg.update(dict(type=TestLoop))
|
| 128 |
+
|
| 129 |
+
# learning rate
|
| 130 |
+
param_scheduler = [
|
| 131 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
|
| 132 |
+
dict(
|
| 133 |
+
type=MultiStepLR,
|
| 134 |
+
begin=0,
|
| 135 |
+
end=max_iter,
|
| 136 |
+
by_epoch=False,
|
| 137 |
+
milestones=[60000, 80000],
|
| 138 |
+
gamma=0.1)
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
# optimizer
|
| 142 |
+
optim_wrapper.update(
|
| 143 |
+
dict(
|
| 144 |
+
type=OptimWrapper,
|
| 145 |
+
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
|
| 146 |
+
# Default setting for scaling LR automatically
|
| 147 |
+
# - `enable` means enable scaling LR automatically
|
| 148 |
+
# or not by default.
|
| 149 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 150 |
+
auto_scale_lr.update(dict(enable=False, base_batch_size=16))
|
| 151 |
+
|
| 152 |
+
default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
|
| 153 |
+
log_processor.update(dict(by_epoch=False))
|
head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.default_runtime import *
|
| 11 |
+
|
| 12 |
+
from mmcv.transforms import RandomChoiceResize
|
| 13 |
+
from mmengine.dataset import RepeatDataset
|
| 14 |
+
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
|
| 15 |
+
from mmengine.optim import OptimWrapper
|
| 16 |
+
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
|
| 17 |
+
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
|
| 18 |
+
from torch.optim import SGD
|
| 19 |
+
|
| 20 |
+
from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
|
| 21 |
+
from mmdet.datasets.transforms.formatting import PackDetInputs
|
| 22 |
+
from mmdet.datasets.transforms.loading import (FilterAnnotations,
|
| 23 |
+
LoadAnnotations,
|
| 24 |
+
LoadImageFromFile)
|
| 25 |
+
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
|
| 26 |
+
Pad, RandomCrop, RandomFlip,
|
| 27 |
+
RandomResize, Resize)
|
| 28 |
+
from mmdet.evaluation import CocoMetric
|
| 29 |
+
|
| 30 |
+
# dataset settings
|
| 31 |
+
dataset_type = CocoDataset
|
| 32 |
+
data_root = 'data/coco/'
|
| 33 |
+
# Example to use different file client
|
| 34 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 35 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 36 |
+
|
| 37 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 38 |
+
|
| 39 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 40 |
+
# backend_args = dict(
|
| 41 |
+
# backend='petrel',
|
| 42 |
+
# path_mapping=dict({
|
| 43 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 44 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 45 |
+
# }))
|
| 46 |
+
backend_args = None
|
| 47 |
+
|
| 48 |
+
# Standard Scale Jittering (SSJ) resizes and crops an image
|
| 49 |
+
# with a resize range of 0.8 to 1.25 of the original image size.
|
| 50 |
+
train_pipeline = [
|
| 51 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 52 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 53 |
+
dict(
|
| 54 |
+
type=RandomResize,
|
| 55 |
+
scale=image_size,
|
| 56 |
+
ratio_range=(0.8, 1.25),
|
| 57 |
+
keep_ratio=True),
|
| 58 |
+
dict(
|
| 59 |
+
type='RandomCrop',
|
| 60 |
+
crop_type='absolute_range',
|
| 61 |
+
crop_size=image_size,
|
| 62 |
+
recompute_bbox=True,
|
| 63 |
+
allow_negative_crop=True),
|
| 64 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
|
| 65 |
+
dict(type=RandomFlip, prob=0.5),
|
| 66 |
+
dict(type=PackDetInputs)
|
| 67 |
+
]
|
| 68 |
+
test_pipeline = [
|
| 69 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 70 |
+
dict(type=Resize, scale=(1333, 800), keep_ratio=True),
|
| 71 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 72 |
+
dict(
|
| 73 |
+
type=PackDetInputs,
|
| 74 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 75 |
+
'scale_factor'))
|
| 76 |
+
]
|
| 77 |
+
train_dataloader.update(
|
| 78 |
+
dict(
|
| 79 |
+
batch_size=2,
|
| 80 |
+
num_workers=2,
|
| 81 |
+
persistent_workers=True,
|
| 82 |
+
sampler=dict(type=InfiniteSampler),
|
| 83 |
+
dataset=dict(
|
| 84 |
+
type=dataset_type,
|
| 85 |
+
data_root=data_root,
|
| 86 |
+
ann_file='annotations/instances_train2017.json',
|
| 87 |
+
data_prefix=dict(img='train2017/'),
|
| 88 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 89 |
+
pipeline=train_pipeline,
|
| 90 |
+
backend_args=backend_args)))
|
| 91 |
+
val_dataloader.update(
|
| 92 |
+
dict(
|
| 93 |
+
batch_size=1,
|
| 94 |
+
num_workers=2,
|
| 95 |
+
persistent_workers=True,
|
| 96 |
+
drop_last=False,
|
| 97 |
+
sampler=dict(type=DefaultSampler, shuffle=False),
|
| 98 |
+
dataset=dict(
|
| 99 |
+
type=dataset_type,
|
| 100 |
+
data_root=data_root,
|
| 101 |
+
ann_file='annotations/instances_val2017.json',
|
| 102 |
+
data_prefix=dict(img='val2017/'),
|
| 103 |
+
test_mode=True,
|
| 104 |
+
pipeline=test_pipeline,
|
| 105 |
+
backend_args=backend_args)))
|
| 106 |
+
test_dataloader = val_dataloader
|
| 107 |
+
|
| 108 |
+
val_evaluator.update(
|
| 109 |
+
dict(
|
| 110 |
+
type=CocoMetric,
|
| 111 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 112 |
+
metric=['bbox', 'segm'],
|
| 113 |
+
format_only=False,
|
| 114 |
+
backend_args=backend_args))
|
| 115 |
+
test_evaluator = val_evaluator
|
| 116 |
+
|
| 117 |
+
val_evaluator = dict(
|
| 118 |
+
type=CocoMetric,
|
| 119 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 120 |
+
metric=['bbox', 'segm'],
|
| 121 |
+
format_only=False,
|
| 122 |
+
backend_args=backend_args)
|
| 123 |
+
test_evaluator = val_evaluator
|
| 124 |
+
|
| 125 |
+
# The model is trained by 270k iterations with batch_size 64,
|
| 126 |
+
# which is roughly equivalent to 144 epochs.
|
| 127 |
+
|
| 128 |
+
max_iter = 270000
|
| 129 |
+
train_cfg.update(
|
| 130 |
+
dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
|
| 131 |
+
val_cfg.update(dict(type=ValLoop))
|
| 132 |
+
test_cfg.update(dict(type=TestLoop))
|
| 133 |
+
|
| 134 |
+
# learning rate
|
| 135 |
+
param_scheduler = [
|
| 136 |
+
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
|
| 137 |
+
dict(
|
| 138 |
+
type=MultiStepLR,
|
| 139 |
+
begin=0,
|
| 140 |
+
end=max_iter,
|
| 141 |
+
by_epoch=False,
|
| 142 |
+
milestones=[243000, 256500, 263250],
|
| 143 |
+
gamma=0.1)
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
# optimizer
|
| 147 |
+
optim_wrapper.update(
|
| 148 |
+
dict(
|
| 149 |
+
type=OptimWrapper,
|
| 150 |
+
optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004)))
|
| 151 |
+
# Default setting for scaling LR automatically
|
| 152 |
+
# - `enable` means enable scaling LR automatically
|
| 153 |
+
# or not by default.
|
| 154 |
+
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
|
| 155 |
+
auto_scale_lr.update(dict(base_batch_size=64))
|
| 156 |
+
|
| 157 |
+
default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
|
| 158 |
+
log_processor.update(dict(by_epoch=False))
|
head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .ssj_270_coco_instance import *
|
| 11 |
+
|
| 12 |
+
from mmdet.datasets import MultiImageMixDataset
|
| 13 |
+
from mmdet.datasets.transforms import CopyPaste
|
| 14 |
+
|
| 15 |
+
# dataset settings
|
| 16 |
+
dataset_type = CocoDataset
|
| 17 |
+
data_root = 'data/coco/'
|
| 18 |
+
image_size = (1024, 1024)
|
| 19 |
+
# Example to use different file client
|
| 20 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 21 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 22 |
+
|
| 23 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 24 |
+
|
| 25 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 26 |
+
# backend_args = dict(
|
| 27 |
+
# backend='petrel',
|
| 28 |
+
# path_mapping=dict({
|
| 29 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 30 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 31 |
+
# }))
|
| 32 |
+
backend_args = None
|
| 33 |
+
|
| 34 |
+
# Standard Scale Jittering (SSJ) resizes and crops an image
|
| 35 |
+
# with a resize range of 0.8 to 1.25 of the original image size.
|
| 36 |
+
load_pipeline = [
|
| 37 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 38 |
+
dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
|
| 39 |
+
dict(
|
| 40 |
+
type=RandomResize,
|
| 41 |
+
scale=image_size,
|
| 42 |
+
ratio_range=(0.8, 1.25),
|
| 43 |
+
keep_ratio=True),
|
| 44 |
+
dict(
|
| 45 |
+
type='RandomCrop',
|
| 46 |
+
crop_type='absolute_range',
|
| 47 |
+
crop_size=image_size,
|
| 48 |
+
recompute_bbox=True,
|
| 49 |
+
allow_negative_crop=True),
|
| 50 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
|
| 51 |
+
dict(type=RandomFlip, prob=0.5),
|
| 52 |
+
dict(type=Pad, size=image_size),
|
| 53 |
+
]
|
| 54 |
+
train_pipeline = [
|
| 55 |
+
dict(type=CopyPaste, max_num_pasted=100),
|
| 56 |
+
dict(type=PackDetInputs)
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
train_dataloader.update(
|
| 60 |
+
dict(
|
| 61 |
+
type=MultiImageMixDataset,
|
| 62 |
+
dataset=dict(
|
| 63 |
+
type=dataset_type,
|
| 64 |
+
data_root=data_root,
|
| 65 |
+
ann_file='annotations/instances_train2017.json',
|
| 66 |
+
data_prefix=dict(img='train2017/'),
|
| 67 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 68 |
+
pipeline=load_pipeline,
|
| 69 |
+
backend_args=backend_args),
|
| 70 |
+
pipeline=train_pipeline))
|
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .._base_.datasets.coco_detection import *
|
| 11 |
+
from .._base_.default_runtime import *
|
| 12 |
+
|
| 13 |
+
from mmcv.transforms import LoadImageFromFile, RandomChoice, RandomChoiceResize
|
| 14 |
+
from mmengine.optim.optimizer import OptimWrapper
|
| 15 |
+
from mmengine.optim.scheduler import MultiStepLR
|
| 16 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 17 |
+
from torch.optim.adamw import AdamW
|
| 18 |
+
|
| 19 |
+
from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
|
| 20 |
+
RandomCrop, RandomFlip, Resize)
|
| 21 |
+
from mmdet.models.backbones import ResNet
|
| 22 |
+
from mmdet.models.data_preprocessors import DetDataPreprocessor
|
| 23 |
+
from mmdet.models.dense_heads import DeformableDETRHead
|
| 24 |
+
from mmdet.models.detectors import DeformableDETR
|
| 25 |
+
from mmdet.models.losses import FocalLoss, GIoULoss, L1Loss
|
| 26 |
+
from mmdet.models.necks import ChannelMapper
|
| 27 |
+
from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
|
| 28 |
+
HungarianAssigner, IoUCost)
|
| 29 |
+
|
| 30 |
+
model = dict(
|
| 31 |
+
type=DeformableDETR,
|
| 32 |
+
num_queries=300,
|
| 33 |
+
num_feature_levels=4,
|
| 34 |
+
with_box_refine=False,
|
| 35 |
+
as_two_stage=False,
|
| 36 |
+
data_preprocessor=dict(
|
| 37 |
+
type=DetDataPreprocessor,
|
| 38 |
+
mean=[123.675, 116.28, 103.53],
|
| 39 |
+
std=[58.395, 57.12, 57.375],
|
| 40 |
+
bgr_to_rgb=True,
|
| 41 |
+
pad_size_divisor=1),
|
| 42 |
+
backbone=dict(
|
| 43 |
+
type=ResNet,
|
| 44 |
+
depth=50,
|
| 45 |
+
num_stages=4,
|
| 46 |
+
out_indices=(1, 2, 3),
|
| 47 |
+
frozen_stages=1,
|
| 48 |
+
norm_cfg=dict(type='BN', requires_grad=False),
|
| 49 |
+
norm_eval=True,
|
| 50 |
+
style='pytorch',
|
| 51 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 52 |
+
neck=dict(
|
| 53 |
+
type=ChannelMapper,
|
| 54 |
+
in_channels=[512, 1024, 2048],
|
| 55 |
+
kernel_size=1,
|
| 56 |
+
out_channels=256,
|
| 57 |
+
act_cfg=None,
|
| 58 |
+
norm_cfg=dict(type='GN', num_groups=32),
|
| 59 |
+
num_outs=4),
|
| 60 |
+
encoder=dict( # DeformableDetrTransformerEncoder
|
| 61 |
+
num_layers=6,
|
| 62 |
+
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
| 63 |
+
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
| 64 |
+
embed_dims=256,
|
| 65 |
+
batch_first=True),
|
| 66 |
+
ffn_cfg=dict(
|
| 67 |
+
embed_dims=256, feedforward_channels=1024, ffn_drop=0.1))),
|
| 68 |
+
decoder=dict( # DeformableDetrTransformerDecoder
|
| 69 |
+
num_layers=6,
|
| 70 |
+
return_intermediate=True,
|
| 71 |
+
layer_cfg=dict( # DeformableDetrTransformerDecoderLayer
|
| 72 |
+
self_attn_cfg=dict( # MultiheadAttention
|
| 73 |
+
embed_dims=256,
|
| 74 |
+
num_heads=8,
|
| 75 |
+
dropout=0.1,
|
| 76 |
+
batch_first=True),
|
| 77 |
+
cross_attn_cfg=dict( # MultiScaleDeformableAttention
|
| 78 |
+
embed_dims=256,
|
| 79 |
+
batch_first=True),
|
| 80 |
+
ffn_cfg=dict(
|
| 81 |
+
embed_dims=256, feedforward_channels=1024, ffn_drop=0.1)),
|
| 82 |
+
post_norm_cfg=None),
|
| 83 |
+
positional_encoding=dict(num_feats=128, normalize=True, offset=-0.5),
|
| 84 |
+
bbox_head=dict(
|
| 85 |
+
type=DeformableDETRHead,
|
| 86 |
+
num_classes=80,
|
| 87 |
+
sync_cls_avg_factor=True,
|
| 88 |
+
loss_cls=dict(
|
| 89 |
+
type=FocalLoss,
|
| 90 |
+
use_sigmoid=True,
|
| 91 |
+
gamma=2.0,
|
| 92 |
+
alpha=0.25,
|
| 93 |
+
loss_weight=2.0),
|
| 94 |
+
loss_bbox=dict(type=L1Loss, loss_weight=5.0),
|
| 95 |
+
loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
|
| 96 |
+
# training and testing settings
|
| 97 |
+
train_cfg=dict(
|
| 98 |
+
assigner=dict(
|
| 99 |
+
type=HungarianAssigner,
|
| 100 |
+
match_costs=[
|
| 101 |
+
dict(type=FocalLossCost, weight=2.0),
|
| 102 |
+
dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
|
| 103 |
+
dict(type=IoUCost, iou_mode='giou', weight=2.0)
|
| 104 |
+
])),
|
| 105 |
+
test_cfg=dict(max_per_img=100))
|
| 106 |
+
|
| 107 |
+
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
|
| 108 |
+
# from the default setting in mmdet.
|
| 109 |
+
train_pipeline = [
|
| 110 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 111 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 112 |
+
dict(type=RandomFlip, prob=0.5),
|
| 113 |
+
dict(
|
| 114 |
+
type=RandomChoice,
|
| 115 |
+
transforms=[
|
| 116 |
+
[
|
| 117 |
+
dict(
|
| 118 |
+
type=RandomChoiceResize,
|
| 119 |
+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 120 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 121 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 122 |
+
resize_type=Resize,
|
| 123 |
+
keep_ratio=True)
|
| 124 |
+
],
|
| 125 |
+
[
|
| 126 |
+
dict(
|
| 127 |
+
type=RandomChoiceResize,
|
| 128 |
+
# The radio of all image in train dataset < 7
|
| 129 |
+
# follow the original implement
|
| 130 |
+
scales=[(400, 4200), (500, 4200), (600, 4200)],
|
| 131 |
+
resize_type=Resize,
|
| 132 |
+
keep_ratio=True),
|
| 133 |
+
dict(
|
| 134 |
+
type=RandomCrop,
|
| 135 |
+
crop_type='absolute_range',
|
| 136 |
+
crop_size=(384, 600),
|
| 137 |
+
allow_negative_crop=True),
|
| 138 |
+
dict(
|
| 139 |
+
type=RandomChoiceResize,
|
| 140 |
+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 141 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 142 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 143 |
+
resize_type=Resize,
|
| 144 |
+
keep_ratio=True)
|
| 145 |
+
]
|
| 146 |
+
]),
|
| 147 |
+
dict(type=PackDetInputs)
|
| 148 |
+
]
|
| 149 |
+
train_dataloader.update(
|
| 150 |
+
dict(
|
| 151 |
+
dataset=dict(
|
| 152 |
+
filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline)))
|
| 153 |
+
|
| 154 |
+
# optimizer
|
| 155 |
+
optim_wrapper = dict(
|
| 156 |
+
type=OptimWrapper,
|
| 157 |
+
optimizer=dict(type=AdamW, lr=0.0002, weight_decay=0.0001),
|
| 158 |
+
clip_grad=dict(max_norm=0.1, norm_type=2),
|
| 159 |
+
paramwise_cfg=dict(
|
| 160 |
+
custom_keys={
|
| 161 |
+
'backbone': dict(lr_mult=0.1),
|
| 162 |
+
'sampling_offsets': dict(lr_mult=0.1),
|
| 163 |
+
'reference_points': dict(lr_mult=0.1)
|
| 164 |
+
}))
|
| 165 |
+
|
| 166 |
+
# learning policy
|
| 167 |
+
max_epochs = 50
|
| 168 |
+
train_cfg = dict(
|
| 169 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
|
| 170 |
+
val_cfg = dict(type=ValLoop)
|
| 171 |
+
test_cfg = dict(type=TestLoop)
|
| 172 |
+
|
| 173 |
+
param_scheduler = [
|
| 174 |
+
dict(
|
| 175 |
+
type=MultiStepLR,
|
| 176 |
+
begin=0,
|
| 177 |
+
end=max_epochs,
|
| 178 |
+
by_epoch=True,
|
| 179 |
+
milestones=[40],
|
| 180 |
+
gamma=0.1)
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 184 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 185 |
+
# base_batch_size = (16 GPUs) x (2 samples per GPU)
|
| 186 |
+
auto_scale_lr = dict(base_batch_size=32)
|
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .deformable_detr_r50_16xb2_50e_coco import *
|
| 11 |
+
|
| 12 |
+
model.update(dict(with_box_refine=True))
|
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
|
| 3 |
+
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
|
| 4 |
+
# mmcv >= 2.0.1
|
| 5 |
+
# mmengine >= 0.8.0
|
| 6 |
+
|
| 7 |
+
from mmengine.config import read_base
|
| 8 |
+
|
| 9 |
+
with read_base():
|
| 10 |
+
from .deformable_detr_refine_r50_16xb2_50e_coco import *
|
| 11 |
+
|
| 12 |
+
model.update(dict(as_two_stage=True))
|
head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.config import read_base
|
| 3 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 4 |
+
|
| 5 |
+
with read_base():
|
| 6 |
+
from .detr_r50_8xb2_500e_coco import *
|
| 7 |
+
|
| 8 |
+
model.update(
|
| 9 |
+
dict(
|
| 10 |
+
backbone=dict(
|
| 11 |
+
depth=101,
|
| 12 |
+
init_cfg=dict(
|
| 13 |
+
type=PretrainedInit, checkpoint='torchvision://resnet101'))))
|
head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.config import read_base
|
| 3 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 4 |
+
|
| 5 |
+
with read_base():
|
| 6 |
+
from .detr_r50_8xb2_500e_coco import *
|
| 7 |
+
|
| 8 |
+
model.update(
|
| 9 |
+
dict(
|
| 10 |
+
backbone=dict(
|
| 11 |
+
depth=18,
|
| 12 |
+
init_cfg=dict(
|
| 13 |
+
type=PretrainedInit, checkpoint='torchvision://resnet18')),
|
| 14 |
+
neck=dict(in_channels=[512])))
|
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms import RandomChoice, RandomChoiceResize
|
| 3 |
+
from mmcv.transforms.loading import LoadImageFromFile
|
| 4 |
+
from mmengine.config import read_base
|
| 5 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 6 |
+
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
|
| 7 |
+
from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
|
| 8 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 9 |
+
from torch.nn.modules.activation import ReLU
|
| 10 |
+
from torch.nn.modules.batchnorm import BatchNorm2d
|
| 11 |
+
from torch.optim.adamw import AdamW
|
| 12 |
+
|
| 13 |
+
from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
|
| 14 |
+
RandomCrop, RandomFlip, Resize)
|
| 15 |
+
from mmdet.models import (DETR, ChannelMapper, DetDataPreprocessor, DETRHead,
|
| 16 |
+
ResNet)
|
| 17 |
+
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
|
| 18 |
+
from mmdet.models.losses.iou_loss import GIoULoss
|
| 19 |
+
from mmdet.models.losses.smooth_l1_loss import L1Loss
|
| 20 |
+
from mmdet.models.task_modules import (BBoxL1Cost, ClassificationCost,
|
| 21 |
+
HungarianAssigner, IoUCost)
|
| 22 |
+
|
| 23 |
+
with read_base():
|
| 24 |
+
from .._base_.datasets.coco_detection import *
|
| 25 |
+
from .._base_.default_runtime import *
|
| 26 |
+
|
| 27 |
+
model = dict(
|
| 28 |
+
type=DETR,
|
| 29 |
+
num_queries=100,
|
| 30 |
+
data_preprocessor=dict(
|
| 31 |
+
type=DetDataPreprocessor,
|
| 32 |
+
mean=[123.675, 116.28, 103.53],
|
| 33 |
+
std=[58.395, 57.12, 57.375],
|
| 34 |
+
bgr_to_rgb=True,
|
| 35 |
+
pad_size_divisor=1),
|
| 36 |
+
backbone=dict(
|
| 37 |
+
type=ResNet,
|
| 38 |
+
depth=50,
|
| 39 |
+
num_stages=4,
|
| 40 |
+
out_indices=(3, ),
|
| 41 |
+
frozen_stages=1,
|
| 42 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
|
| 43 |
+
norm_eval=True,
|
| 44 |
+
style='pytorch',
|
| 45 |
+
init_cfg=dict(
|
| 46 |
+
type=PretrainedInit, checkpoint='torchvision://resnet50')),
|
| 47 |
+
neck=dict(
|
| 48 |
+
type=ChannelMapper,
|
| 49 |
+
in_channels=[2048],
|
| 50 |
+
kernel_size=1,
|
| 51 |
+
out_channels=256,
|
| 52 |
+
act_cfg=None,
|
| 53 |
+
norm_cfg=None,
|
| 54 |
+
num_outs=1),
|
| 55 |
+
encoder=dict( # DetrTransformerEncoder
|
| 56 |
+
num_layers=6,
|
| 57 |
+
layer_cfg=dict( # DetrTransformerEncoderLayer
|
| 58 |
+
self_attn_cfg=dict( # MultiheadAttention
|
| 59 |
+
embed_dims=256,
|
| 60 |
+
num_heads=8,
|
| 61 |
+
dropout=0.1,
|
| 62 |
+
batch_first=True),
|
| 63 |
+
ffn_cfg=dict(
|
| 64 |
+
embed_dims=256,
|
| 65 |
+
feedforward_channels=2048,
|
| 66 |
+
num_fcs=2,
|
| 67 |
+
ffn_drop=0.1,
|
| 68 |
+
act_cfg=dict(type=ReLU, inplace=True)))),
|
| 69 |
+
decoder=dict( # DetrTransformerDecoder
|
| 70 |
+
num_layers=6,
|
| 71 |
+
layer_cfg=dict( # DetrTransformerDecoderLayer
|
| 72 |
+
self_attn_cfg=dict( # MultiheadAttention
|
| 73 |
+
embed_dims=256,
|
| 74 |
+
num_heads=8,
|
| 75 |
+
dropout=0.1,
|
| 76 |
+
batch_first=True),
|
| 77 |
+
cross_attn_cfg=dict( # MultiheadAttention
|
| 78 |
+
embed_dims=256,
|
| 79 |
+
num_heads=8,
|
| 80 |
+
dropout=0.1,
|
| 81 |
+
batch_first=True),
|
| 82 |
+
ffn_cfg=dict(
|
| 83 |
+
embed_dims=256,
|
| 84 |
+
feedforward_channels=2048,
|
| 85 |
+
num_fcs=2,
|
| 86 |
+
ffn_drop=0.1,
|
| 87 |
+
act_cfg=dict(type=ReLU, inplace=True))),
|
| 88 |
+
return_intermediate=True),
|
| 89 |
+
positional_encoding=dict(num_feats=128, normalize=True),
|
| 90 |
+
bbox_head=dict(
|
| 91 |
+
type=DETRHead,
|
| 92 |
+
num_classes=80,
|
| 93 |
+
embed_dims=256,
|
| 94 |
+
loss_cls=dict(
|
| 95 |
+
type=CrossEntropyLoss,
|
| 96 |
+
bg_cls_weight=0.1,
|
| 97 |
+
use_sigmoid=False,
|
| 98 |
+
loss_weight=1.0,
|
| 99 |
+
class_weight=1.0),
|
| 100 |
+
loss_bbox=dict(type=L1Loss, loss_weight=5.0),
|
| 101 |
+
loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
|
| 102 |
+
# training and testing settings
|
| 103 |
+
train_cfg=dict(
|
| 104 |
+
assigner=dict(
|
| 105 |
+
type=HungarianAssigner,
|
| 106 |
+
match_costs=[
|
| 107 |
+
dict(type=ClassificationCost, weight=1.),
|
| 108 |
+
dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
|
| 109 |
+
dict(type=IoUCost, iou_mode='giou', weight=2.0)
|
| 110 |
+
])),
|
| 111 |
+
test_cfg=dict(max_per_img=100))
|
| 112 |
+
|
| 113 |
+
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
|
| 114 |
+
# from the default setting in mmdet.
|
| 115 |
+
train_pipeline = [
|
| 116 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 117 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 118 |
+
dict(type=RandomFlip, prob=0.5),
|
| 119 |
+
dict(
|
| 120 |
+
type=RandomChoice,
|
| 121 |
+
transforms=[[
|
| 122 |
+
dict(
|
| 123 |
+
type=RandomChoiceResize,
|
| 124 |
+
resize_type=Resize,
|
| 125 |
+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 126 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 127 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 128 |
+
keep_ratio=True)
|
| 129 |
+
],
|
| 130 |
+
[
|
| 131 |
+
dict(
|
| 132 |
+
type=RandomChoiceResize,
|
| 133 |
+
resize_type=Resize,
|
| 134 |
+
scales=[(400, 1333), (500, 1333), (600, 1333)],
|
| 135 |
+
keep_ratio=True),
|
| 136 |
+
dict(
|
| 137 |
+
type=RandomCrop,
|
| 138 |
+
crop_type='absolute_range',
|
| 139 |
+
crop_size=(384, 600),
|
| 140 |
+
allow_negative_crop=True),
|
| 141 |
+
dict(
|
| 142 |
+
type=RandomChoiceResize,
|
| 143 |
+
resize_type=Resize,
|
| 144 |
+
scales=[(480, 1333), (512, 1333), (544, 1333),
|
| 145 |
+
(576, 1333), (608, 1333), (640, 1333),
|
| 146 |
+
(672, 1333), (704, 1333), (736, 1333),
|
| 147 |
+
(768, 1333), (800, 1333)],
|
| 148 |
+
keep_ratio=True)
|
| 149 |
+
]]),
|
| 150 |
+
dict(type=PackDetInputs)
|
| 151 |
+
]
|
| 152 |
+
train_dataloader.update(dataset=dict(pipeline=train_pipeline))
|
| 153 |
+
|
| 154 |
+
# optimizer
|
| 155 |
+
optim_wrapper = dict(
|
| 156 |
+
type=OptimWrapper,
|
| 157 |
+
optimizer=dict(type=AdamW, lr=0.0001, weight_decay=0.0001),
|
| 158 |
+
clip_grad=dict(max_norm=0.1, norm_type=2),
|
| 159 |
+
paramwise_cfg=dict(
|
| 160 |
+
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
|
| 161 |
+
|
| 162 |
+
# learning policy
|
| 163 |
+
max_epochs = 150
|
| 164 |
+
train_cfg = dict(
|
| 165 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
|
| 166 |
+
val_cfg = dict(type=ValLoop)
|
| 167 |
+
test_cfg = dict(type=TestLoop)
|
| 168 |
+
|
| 169 |
+
param_scheduler = [
|
| 170 |
+
dict(
|
| 171 |
+
type=MultiStepLR,
|
| 172 |
+
begin=0,
|
| 173 |
+
end=max_epochs,
|
| 174 |
+
by_epoch=True,
|
| 175 |
+
milestones=[100],
|
| 176 |
+
gamma=0.1)
|
| 177 |
+
]
|
| 178 |
+
|
| 179 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 180 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 181 |
+
# base_batch_size = (8 GPUs) x (2 samples per GPU)
|
| 182 |
+
auto_scale_lr = dict(base_batch_size=16)
|
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.config import read_base
|
| 3 |
+
from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
|
| 4 |
+
from mmengine.runner.loops import EpochBasedTrainLoop
|
| 5 |
+
|
| 6 |
+
with read_base():
|
| 7 |
+
from .detr_r50_8xb2_150e_coco import *
|
| 8 |
+
|
| 9 |
+
# learning policy
|
| 10 |
+
max_epochs = 500
|
| 11 |
+
train_cfg.update(
|
| 12 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=10)
|
| 13 |
+
|
| 14 |
+
param_scheduler = [
|
| 15 |
+
dict(
|
| 16 |
+
type=MultiStepLR,
|
| 17 |
+
begin=0,
|
| 18 |
+
end=max_epochs,
|
| 19 |
+
by_epoch=True,
|
| 20 |
+
milestones=[334],
|
| 21 |
+
gamma=0.1)
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
# only keep latest 2 checkpoints
|
| 25 |
+
default_hooks.update(checkpoint=dict(max_keep_ckpts=2))
|
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmcv.transforms import RandomChoice, RandomChoiceResize
|
| 3 |
+
from mmcv.transforms.loading import LoadImageFromFile
|
| 4 |
+
from mmengine.config import read_base
|
| 5 |
+
from mmengine.model.weight_init import PretrainedInit
|
| 6 |
+
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
|
| 7 |
+
from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
|
| 8 |
+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
|
| 9 |
+
from torch.nn.modules.batchnorm import BatchNorm2d
|
| 10 |
+
from torch.nn.modules.normalization import GroupNorm
|
| 11 |
+
from torch.optim.adamw import AdamW
|
| 12 |
+
|
| 13 |
+
from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
|
| 14 |
+
RandomCrop, RandomFlip, Resize)
|
| 15 |
+
from mmdet.models import (DINO, ChannelMapper, DetDataPreprocessor, DINOHead,
|
| 16 |
+
ResNet)
|
| 17 |
+
from mmdet.models.losses.focal_loss import FocalLoss
|
| 18 |
+
from mmdet.models.losses.iou_loss import GIoULoss
|
| 19 |
+
from mmdet.models.losses.smooth_l1_loss import L1Loss
|
| 20 |
+
from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
|
| 21 |
+
HungarianAssigner, IoUCost)
|
| 22 |
+
|
| 23 |
+
with read_base():
|
| 24 |
+
from .._base_.datasets.coco_detection import *
|
| 25 |
+
from .._base_.default_runtime import *
|
| 26 |
+
|
| 27 |
+
model = dict(
|
| 28 |
+
type=DINO,
|
| 29 |
+
num_queries=900, # num_matching_queries
|
| 30 |
+
with_box_refine=True,
|
| 31 |
+
as_two_stage=True,
|
| 32 |
+
data_preprocessor=dict(
|
| 33 |
+
type=DetDataPreprocessor,
|
| 34 |
+
mean=[123.675, 116.28, 103.53],
|
| 35 |
+
std=[58.395, 57.12, 57.375],
|
| 36 |
+
bgr_to_rgb=True,
|
| 37 |
+
pad_size_divisor=1),
|
| 38 |
+
backbone=dict(
|
| 39 |
+
type=ResNet,
|
| 40 |
+
depth=50,
|
| 41 |
+
num_stages=4,
|
| 42 |
+
out_indices=(1, 2, 3),
|
| 43 |
+
frozen_stages=1,
|
| 44 |
+
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
|
| 45 |
+
norm_eval=True,
|
| 46 |
+
style='pytorch',
|
| 47 |
+
init_cfg=dict(
|
| 48 |
+
type=PretrainedInit, checkpoint='torchvision://resnet50')),
|
| 49 |
+
neck=dict(
|
| 50 |
+
type=ChannelMapper,
|
| 51 |
+
in_channels=[512, 1024, 2048],
|
| 52 |
+
kernel_size=1,
|
| 53 |
+
out_channels=256,
|
| 54 |
+
act_cfg=None,
|
| 55 |
+
norm_cfg=dict(type=GroupNorm, num_groups=32),
|
| 56 |
+
num_outs=4),
|
| 57 |
+
encoder=dict(
|
| 58 |
+
num_layers=6,
|
| 59 |
+
layer_cfg=dict(
|
| 60 |
+
self_attn_cfg=dict(embed_dims=256, num_levels=4,
|
| 61 |
+
dropout=0.0), # 0.1 for DeformDETR
|
| 62 |
+
ffn_cfg=dict(
|
| 63 |
+
embed_dims=256,
|
| 64 |
+
feedforward_channels=2048, # 1024 for DeformDETR
|
| 65 |
+
ffn_drop=0.0))), # 0.1 for DeformDETR
|
| 66 |
+
decoder=dict(
|
| 67 |
+
num_layers=6,
|
| 68 |
+
return_intermediate=True,
|
| 69 |
+
layer_cfg=dict(
|
| 70 |
+
self_attn_cfg=dict(embed_dims=256, num_heads=8,
|
| 71 |
+
dropout=0.0), # 0.1 for DeformDETR
|
| 72 |
+
cross_attn_cfg=dict(embed_dims=256, num_levels=4,
|
| 73 |
+
dropout=0.0), # 0.1 for DeformDETR
|
| 74 |
+
ffn_cfg=dict(
|
| 75 |
+
embed_dims=256,
|
| 76 |
+
feedforward_channels=2048, # 1024 for DeformDETR
|
| 77 |
+
ffn_drop=0.0)), # 0.1 for DeformDETR
|
| 78 |
+
post_norm_cfg=None),
|
| 79 |
+
positional_encoding=dict(
|
| 80 |
+
num_feats=128,
|
| 81 |
+
normalize=True,
|
| 82 |
+
offset=0.0, # -0.5 for DeformDETR
|
| 83 |
+
temperature=20), # 10000 for DeformDETR
|
| 84 |
+
bbox_head=dict(
|
| 85 |
+
type=DINOHead,
|
| 86 |
+
num_classes=80,
|
| 87 |
+
sync_cls_avg_factor=True,
|
| 88 |
+
loss_cls=dict(
|
| 89 |
+
type=FocalLoss,
|
| 90 |
+
use_sigmoid=True,
|
| 91 |
+
gamma=2.0,
|
| 92 |
+
alpha=0.25,
|
| 93 |
+
loss_weight=1.0), # 2.0 in DeformDETR
|
| 94 |
+
loss_bbox=dict(type=L1Loss, loss_weight=5.0),
|
| 95 |
+
loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
|
| 96 |
+
dn_cfg=dict( # TODO: Move to model.train_cfg ?
|
| 97 |
+
label_noise_scale=0.5,
|
| 98 |
+
box_noise_scale=1.0, # 0.4 for DN-DETR
|
| 99 |
+
group_cfg=dict(dynamic=True, num_groups=None,
|
| 100 |
+
num_dn_queries=100)), # TODO: half num_dn_queries
|
| 101 |
+
# training and testing settings
|
| 102 |
+
train_cfg=dict(
|
| 103 |
+
assigner=dict(
|
| 104 |
+
type=HungarianAssigner,
|
| 105 |
+
match_costs=[
|
| 106 |
+
dict(type=FocalLossCost, weight=2.0),
|
| 107 |
+
dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
|
| 108 |
+
dict(type=IoUCost, iou_mode='giou', weight=2.0)
|
| 109 |
+
])),
|
| 110 |
+
test_cfg=dict(max_per_img=300)) # 100 for DeformDETR
|
| 111 |
+
|
| 112 |
+
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
|
| 113 |
+
# from the default setting in mmdet.
|
| 114 |
+
train_pipeline = [
|
| 115 |
+
dict(type=LoadImageFromFile, backend_args=backend_args),
|
| 116 |
+
dict(type=LoadAnnotations, with_bbox=True),
|
| 117 |
+
dict(type=RandomFlip, prob=0.5),
|
| 118 |
+
dict(
|
| 119 |
+
type=RandomChoice,
|
| 120 |
+
transforms=[
|
| 121 |
+
[
|
| 122 |
+
dict(
|
| 123 |
+
type=RandomChoiceResize,
|
| 124 |
+
resize_type=Resize,
|
| 125 |
+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 126 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 127 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 128 |
+
keep_ratio=True)
|
| 129 |
+
],
|
| 130 |
+
[
|
| 131 |
+
dict(
|
| 132 |
+
type=RandomChoiceResize,
|
| 133 |
+
resize_type=Resize,
|
| 134 |
+
# The radio of all image in train dataset < 7
|
| 135 |
+
# follow the original implement
|
| 136 |
+
scales=[(400, 4200), (500, 4200), (600, 4200)],
|
| 137 |
+
keep_ratio=True),
|
| 138 |
+
dict(
|
| 139 |
+
type=RandomCrop,
|
| 140 |
+
crop_type='absolute_range',
|
| 141 |
+
crop_size=(384, 600),
|
| 142 |
+
allow_negative_crop=True),
|
| 143 |
+
dict(
|
| 144 |
+
type=RandomChoiceResize,
|
| 145 |
+
resize_type=Resize,
|
| 146 |
+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 147 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 148 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 149 |
+
keep_ratio=True)
|
| 150 |
+
]
|
| 151 |
+
]),
|
| 152 |
+
dict(type=PackDetInputs)
|
| 153 |
+
]
|
| 154 |
+
train_dataloader.update(
|
| 155 |
+
dataset=dict(
|
| 156 |
+
filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline))
|
| 157 |
+
|
| 158 |
+
# optimizer
|
| 159 |
+
optim_wrapper = dict(
|
| 160 |
+
type=OptimWrapper,
|
| 161 |
+
optimizer=dict(
|
| 162 |
+
type=AdamW,
|
| 163 |
+
lr=0.0001, # 0.0002 for DeformDETR
|
| 164 |
+
weight_decay=0.0001),
|
| 165 |
+
clip_grad=dict(max_norm=0.1, norm_type=2),
|
| 166 |
+
paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
|
| 167 |
+
) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa
|
| 168 |
+
|
| 169 |
+
# learning policy
|
| 170 |
+
max_epochs = 12
|
| 171 |
+
train_cfg = dict(
|
| 172 |
+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
|
| 173 |
+
|
| 174 |
+
val_cfg = dict(type=ValLoop)
|
| 175 |
+
test_cfg = dict(type=TestLoop)
|
| 176 |
+
|
| 177 |
+
param_scheduler = [
|
| 178 |
+
dict(
|
| 179 |
+
type=MultiStepLR,
|
| 180 |
+
begin=0,
|
| 181 |
+
end=max_epochs,
|
| 182 |
+
by_epoch=True,
|
| 183 |
+
milestones=[11],
|
| 184 |
+
gamma=0.1)
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 188 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 189 |
+
# base_batch_size = (8 GPUs) x (2 samples per GPU)
|
| 190 |
+
auto_scale_lr = dict(base_batch_size=16)
|
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.config import read_base
|
| 3 |
+
from mmengine.runner.loops import EpochBasedTrainLoop
|
| 4 |
+
|
| 5 |
+
with read_base():
|
| 6 |
+
from .dino_4scale_r50_8xb2_12e_coco import *
|
| 7 |
+
|
| 8 |
+
max_epochs = 24
|
| 9 |
+
train_cfg.update(
|
| 10 |
+
dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
|
| 11 |
+
|
| 12 |
+
param_scheduler[0].update(dict(milestones=[20]))
|
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from mmengine.config import read_base
|
| 3 |
+
from mmengine.runner.loops import EpochBasedTrainLoop
|
| 4 |
+
|
| 5 |
+
with read_base():
|
| 6 |
+
from .dino_4scale_r50_8xb2_12e_coco import *
|
| 7 |
+
|
| 8 |
+
max_epochs = 36
|
| 9 |
+
train_cfg.update(
|
| 10 |
+
dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
|
| 11 |
+
|
| 12 |
+
param_scheduler[0].update(dict(milestones=[30]))
|