codyshen commited on Nov 24, 2025

Commit

6ed4a9c

verified ·

1 Parent(s): 3c4d98e

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +70 -0
head_extractor/assets/001.jpg +3 -0
head_extractor/assets/001_head-black-bg.webp +0 -0
head_extractor/assets/001_head-default.webp +0 -0
head_extractor/assets/001_head-pad2square-false.webp +0 -0
head_extractor/build/lib/head_extractor/__init__.py +6 -0
head_extractor/build/lib/head_extractor/models/__init__.py +0 -0
head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py +573 -0
head_extractor/build/lib/head_extractor/processor.py +585 -0
head_extractor/build/lib/mmdet/__init__.py +27 -0
head_extractor/build/lib/mmdet/apis/__init__.py +9 -0
head_extractor/build/lib/mmdet/apis/det_inferencer.py +652 -0
head_extractor/build/lib/mmdet/apis/inference.py +372 -0
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py +104 -0
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py +106 -0
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py +87 -0
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py +105 -0
head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py +101 -0
head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py +33 -0
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +220 -0
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py +201 -0
head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py +138 -0
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +158 -0
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py +154 -0
head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py +77 -0
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py +33 -0
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py +33 -0
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +13 -0
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py +13 -0
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py +134 -0
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py +134 -0
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py +25 -0
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py +25 -0
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py +130 -0
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py +136 -0
head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py +151 -0
head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py +138 -0
head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py +153 -0
head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py +158 -0
head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py +70 -0
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py +186 -0
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py +12 -0
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py +12 -0
head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py +13 -0
head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py +14 -0
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py +182 -0
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py +25 -0
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py +190 -0
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py +12 -0
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,73 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+head_extractor/assets/001.jpg filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/lib.linux-x86_64-cpython-311/mmcv/_ext.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/.ninja_deps filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bezier_align_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/border_align_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/chamfer_distance_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/convex_iou.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/correlation_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/cudabind.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/diff_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/knn_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/min_area_polygons.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/prroi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/psamask_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_indice.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_pool_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_reordering.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/deform_conv.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/modulated_deform_conv.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/pybind.o filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp.png filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp_diff.png filter=lfs diff=lfs merge=lfs -text
+head_extractor/mmcv-2.1.0/docs/en/_static/progress.gif filter=lfs diff=lfs merge=lfs -text

head_extractor/assets/001.jpg ADDED Viewed

Git LFS Details

SHA256: 33562c08290fdd1576ebfe8da41bae3b8f7e21b7e0971ba0568d1fa259e1f409
Pointer size: 131 Bytes
Size of remote file: 113 kB

head_extractor/assets/001_head-black-bg.webp ADDED Viewed

head_extractor/assets/001_head-default.webp ADDED Viewed

head_extractor/assets/001_head-pad2square-false.webp ADDED Viewed

head_extractor/build/lib/head_extractor/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .processor import ProcessorPipeline, TaskType
+__version__ = "0.1.0"
+# 让外部可以直接 from head_extractor import ProcessorPipeline
+__all__ = ['ProcessorPipeline', 'TaskType']

head_extractor/build/lib/head_extractor/models/__init__.py ADDED Viewed

File without changes

head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py ADDED Viewed

	@@ -0,0 +1,573 @@

+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backbone_embed_multi = dict(decay_mult=0.0, lr_mult=0.1)
+backbone_norm_multi = dict(decay_mult=0.0, lr_mult=0.1)
+crop_size = (
+    896,
+    896,
+)
+custom_keys = dict({
+    'backbone.dinov2':
+    dict(decay_mult=1.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.0.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.1.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.10.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.11.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.12.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.13.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.14.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.15.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.16.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.17.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.18.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.19.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.2.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.20.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.21.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.22.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.23.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.3.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.4.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.5.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.6.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.7.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.8.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.blocks.9.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'backbone.dinov2.norm':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'level_embed':
+    dict(decay_mult=0.0, lr_mult=1.0),
+    'pos_embed':
+    dict(decay_mult=0.0, lr_mult=0.1),
+    'query_embed':
+    dict(decay_mult=0.0, lr_mult=1.0),
+    'query_feat':
+    dict(decay_mult=0.0, lr_mult=1.0)
+})
+data_preprocessor = dict(
+    bgr_to_rgb=True,
+    mean=[
+        123.675,
+        116.28,
+        103.53,
+    ],
+    pad_val=0,
+    seg_pad_val=255,
+    size=(
+        896,
+        896,
+    ),
+    std=[
+        58.395,
+        57.12,
+        57.375,
+    ],
+    type='SegDataPreProcessor')
+data_root = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k'
+dataset_type = 'HumanParsingDataset'
+default_hooks = dict(
+    checkpoint=dict(
+        by_epoch=False,
+        interval=2000,
+        max_keep_ckpts=50,
+        save_best='mIoU',
+        type='CheckpointHook'),
+    logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='SegVisualizationHook'))
+default_scope = 'mmseg'
+embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
+env_cfg = dict(
+    cudnn_benchmark=True,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+find_unused_parameters = True
+img_ratios = [
+    0.5,
+    0.75,
+    1.0,
+    1.25,
+    1.5,
+    1.75,
+]
+launcher = 'none'
+load_from = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/mmsegmentation/work_dirs/depth_anything_large_mask2former_16xb1_160k_human_parsing_896x896/best_mIoU_iter_110000.pth'
+log_level = 'INFO'
+log_processor = dict(by_epoch=False)
+model = dict(
+    backbone=dict(
+        freeze=False,
+        # load_from='./checkpoints/depth_anything_vitl14.pth',
+        type='DINOv2',
+        version='large'),
+    data_preprocessor=dict(
+        bgr_to_rgb=True,
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        pad_val=0,
+        seg_pad_val=255,
+        size=(
+            896,
+            896,
+        ),
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        type='SegDataPreProcessor'),
+    decode_head=dict(
+        align_corners=False,
+        enforce_decoder_input_project=False,
+        feat_channels=1024,
+        in_channels=[
+            1024,
+            1024,
+            1024,
+            1024,
+        ],
+        loss_boundary=dict(loss_weight=5.0, type='BoundaryLoss'),
+        loss_cls=dict(
+            class_weight=[
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                0.1,
+            ],
+            loss_weight=2.0,
+            reduction='mean',
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=False),
+        loss_dice=dict(
+            activate=True,
+            eps=1.0,
+            loss_weight=5.0,
+            naive_dice=True,
+            reduction='mean',
+            type='mmdet.DiceLoss',
+            use_sigmoid=True),
+        loss_mask=dict(
+            loss_weight=5.0,
+            reduction='mean',
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=True),
+        num_classes=43,
+        num_queries=200,
+        num_transformer_feat_level=3,
+        out_channels=1024,
+        pixel_decoder=dict(
+            act_cfg=dict(type='ReLU'),
+            encoder=dict(
+                init_cfg=None,
+                layer_cfg=dict(
+                    ffn_cfg=dict(
+                        act_cfg=dict(inplace=True, type='ReLU'),
+                        embed_dims=1024,
+                        feedforward_channels=4096,
+                        ffn_drop=0.0,
+                        num_fcs=2),
+                    self_attn_cfg=dict(
+                        batch_first=True,
+                        dropout=0.0,
+                        embed_dims=1024,
+                        im2col_step=64,
+                        init_cfg=None,
+                        norm_cfg=None,
+                        num_heads=32,
+                        num_levels=3,
+                        num_points=4)),
+                num_layers=6),
+            init_cfg=None,
+            norm_cfg=dict(num_groups=32, type='GN'),
+            num_outs=3,
+            positional_encoding=dict(normalize=True, num_feats=512),
+            type='mmdet.MSDeformAttnPixelDecoder'),
+        positional_encoding=dict(normalize=True, num_feats=512),
+        train_cfg=dict(
+            assigner=dict(
+                match_costs=[
+                    dict(type='mmdet.ClassificationCost', weight=2.0),
+                    dict(
+                        type='mmdet.CrossEntropyLossCost',
+                        use_sigmoid=True,
+                        weight=5.0),
+                    dict(
+                        eps=1.0,
+                        pred_act=True,
+                        type='mmdet.DiceCost',
+                        weight=5.0),
+                ],
+                type='mmdet.HungarianAssigner'),
+            importance_sample_ratio=0.75,
+            num_points=12544,
+            oversample_ratio=3.0,
+            sampler=dict(type='mmdet.MaskPseudoSampler')),
+        transformer_decoder=dict(
+            init_cfg=None,
+            layer_cfg=dict(
+                cross_attn_cfg=dict(
+                    attn_drop=0.0,
+                    batch_first=True,
+                    dropout_layer=None,
+                    embed_dims=1024,
+                    num_heads=32,
+                    proj_drop=0.0),
+                ffn_cfg=dict(
+                    act_cfg=dict(inplace=True, type='ReLU'),
+                    add_identity=True,
+                    dropout_layer=None,
+                    embed_dims=1024,
+                    feedforward_channels=4096,
+                    ffn_drop=0.0,
+                    num_fcs=2),
+                self_attn_cfg=dict(
+                    attn_drop=0.0,
+                    batch_first=True,
+                    dropout_layer=None,
+                    embed_dims=1024,
+                    num_heads=32,
+                    proj_drop=0.0)),
+            num_layers=9,
+            return_intermediate=True),
+        type='Mask2FormerHead'),
+    neck=dict(
+        embed_dim=1024, rescales=[
+            4,
+            2,
+            1,
+            0.5,
+        ], type='Feature2Pyramid'),
+    test_cfg=dict(crop_size=(
+        896,
+        896,
+    ), mode='slide', stride=(
+        426,
+        426,
+    )),
+    train_cfg=dict(),
+    type='EncoderDecoder')
+num_classes = 43
+optim_wrapper = dict(
+    clip_grad=dict(max_norm=0.01, norm_type=2),
+    optimizer=dict(
+        betas=(
+            0.9,
+            0.999,
+        ),
+        eps=1e-08,
+        lr=3e-05,
+        type='AdamW',
+        weight_decay=0.05),
+    paramwise_cfg=dict(
+        custom_keys=dict({
+            'backbone.dinov2':
+            dict(decay_mult=1.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.0.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.1.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.10.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.11.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.12.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.13.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.14.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.15.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.16.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.17.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.18.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.19.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.2.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.20.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.21.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.22.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.23.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.3.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.4.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.5.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.6.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.7.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.8.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.blocks.9.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'backbone.dinov2.norm':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'level_embed':
+            dict(decay_mult=0.0, lr_mult=1.0),
+            'pos_embed':
+            dict(decay_mult=0.0, lr_mult=0.1),
+            'query_embed':
+            dict(decay_mult=0.0, lr_mult=1.0),
+            'query_feat':
+            dict(decay_mult=0.0, lr_mult=1.0)
+        }),
+        norm_decay_mult=0.0),
+    type='OptimWrapper')
+optimizer = dict(
+    betas=(
+        0.9,
+        0.999,
+    ), eps=1e-08, lr=3e-05, type='AdamW', weight_decay=0.05)
+param_scheduler = [
+    dict(
+        begin=0, by_epoch=False, end=1500, start_factor=1e-06,
+        type='LinearLR'),
+    dict(
+        begin=1500,
+        by_epoch=False,
+        end=300000,
+        eta_min=0.0,
+        power=0.9,
+        type='PolyLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='val20250512/images', seg_map_path='val20250512/labels'),
+        data_root=
+        '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(keep_ratio=False, scale=(
+                896,
+                896,
+            ), type='Resize'),
+            dict(reduce_zero_label=False, type='LoadAnnotations'),
+            dict(type='PackSegInputs'),
+        ],
+        type='HumanParsingDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    iou_metrics=[
+        'mIoU',
+    ], type='IoUMetric')
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(keep_ratio=False, scale=(
+        896,
+        896,
+    ), type='Resize'),
+    dict(reduce_zero_label=False, type='LoadAnnotations'),
+    dict(type='PackSegInputs'),
+]
+train_cfg = dict(
+    max_iters=300000, type='IterBasedTrainLoop', val_interval=2000)
+train_dataloader = dict(
+    batch_size=3,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='train20250512/images',
+            seg_map_path='train20250512/labels'),
+        data_root=
+        '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='LoadAnnotations'),
+            dict(
+                keep_ratio=True,
+                ratio_range=(
+                    0.2,
+                    2.0,
+                ),
+                scale=(
+                    896,
+                    896,
+                ),
+                type='RandomResize'),
+            dict(
+                cat_max_ratio=0.75, crop_size=(
+                    896,
+                    896,
+                ), type='RandomCrop'),
+            dict(keep_ratio=True, scale=(
+                896,
+                896,
+            ), type='Resize'),
+            dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
+            dict(type='PhotoMetricDistortion'),
+            dict(type='PackSegInputs'),
+        ],
+        type='HumanParsingDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='InfiniteSampler'))
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(
+        keep_ratio=True,
+        ratio_range=(
+            0.2,
+            2.0,
+        ),
+        scale=(
+            896,
+            896,
+        ),
+        type='RandomResize'),
+    dict(cat_max_ratio=0.75, crop_size=(
+        896,
+        896,
+    ), type='RandomCrop'),
+    dict(keep_ratio=True, scale=(
+        896,
+        896,
+    ), type='Resize'),
+    dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='PackSegInputs'),
+]
+tta_model = dict(type='SegTTAModel')
+tta_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(
+        transforms=[
+            [
+                dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
+                dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
+            ],
+            [
+                dict(direction='horizontal', prob=0.0, type='RandomFlip'),
+                dict(direction='horizontal', prob=1.0, type='RandomFlip'),
+            ],
+            [
+                dict(type='LoadAnnotations'),
+            ],
+            [
+                dict(type='PackSegInputs'),
+            ],
+        ],
+        type='TestTimeAug'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='val20250512/images', seg_map_path='val20250512/labels'),
+        data_root=
+        '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(keep_ratio=False, scale=(
+                896,
+                896,
+            ), type='Resize'),
+            dict(reduce_zero_label=False, type='LoadAnnotations'),
+            dict(type='PackSegInputs'),
+        ],
+        type='HumanParsingDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    iou_metrics=[
+        'mIoU',
+    ], type='IoUMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='SegLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
+work_dir = './work_dirs/depth_anything_large_mask2former_16xb1_160k_human_pasing_fasion_1024x1024_boundary_20250521'

head_extractor/build/lib/head_extractor/processor.py ADDED Viewed

	@@ -0,0 +1,585 @@

+import os
+import numpy as np
+from mmseg.apis import inference_model, init_model
+from PIL import Image
+import cv2
+from enum import Enum
+import importlib.resources
+'''
+Labels:
+0: 'background'	1: 'top'	2: 'outer'	3: 'skirt'
+4: 'dress'	5: 'pants'	6: 'leggings'	7: 'headwear'
+8: 'eyeglass'	9: 'neckwear'	10: 'belt'	11: 'footwear'
+12: 'bag'	13: 'hair'	14: 'face'	15: 'skin'
+16: 'ring'	17: 'wrist_wearing'	18: 'socks'	19: 'gloves'
+20: 'necklace'	21: 'rompers'	22: 'earrings'	23: 'tie'
+24: Left_Foot
+25: Left_Hand
+26: Left_Lower_Arm
+27: Left_Lower_Leg
+28: Left_Upper_Arm
+29: Left_Upper_Leg
+30: Right_Foot
+31: Right_Hand
+32: Right_Lower_Arm
+33: Right_Lower_Leg
+34: Right_Upper_Arm
+35: Right_Upper_Leg
+36: Torso
+'''
+class PersonSeg:
+    def __init__(self, config_path, model_path, device='cuda'):
+        # init model
+        self.model = init_model(config_path, model_path, device=device)
+    def process(self, image):
+        result = inference_model(self.model, image)
+        pred_seg = result.pred_sem_seg.data.cpu().numpy()[0]
+        return pred_seg
+class TaskType(Enum):
+    face = "face"
+    head = "head"
+    head_plus_shoulders = "head_plus_shoulders"
+    # 衣服相关任务
+    top_cloth = "top_cloth"
+    bottom_cloth = "bottom_cloth"
+    full_clothes = "full_clothes"
+    # 全身相关任务
+    full_character = "full_character"
+class ProcessorPipeline:
+    """
+    该功能主要用于从单个图像中提取指定内容的mask
+    """
+    def __init__(self, seg_pipe: PersonSeg):
+        self.seg_pipe = seg_pipe
+    @classmethod
+    def load(cls, device: str = 'cuda') -> "ProcessorPipeline":
+        """
+        从包内加载模型和配置来初始化 Pipeline。
+        不再需要外部路径。
+        """
+        # 使用 importlib.resources 安全地获取包内文件的路径
+        with importlib.resources.path('head_extractor.models', 'depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py') as config_path:
+            with importlib.resources.path('head_extractor.models', 'ckpt.pth') as model_path:
+                seg_pipe = PersonSeg(str(config_path), str(model_path), device=device)
+        return cls(seg_pipe)
+    def process(
+        self,
+        image: Image.Image,
+        task_type: TaskType,
+        long_edge: int = 1024
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        从图像中提取mask，内部流程优化为返回NumPy数组。
+        Args:
+            image: 输入图像
+            task_type: 任务类型 ('head' or 'face')
+            long_edge (int): 用于缩放图像的长边尺寸，值越小速度越快。
+        Returns:
+            (处理后的图像 NumPy 数组, 生成的mask NumPy 数组)
+        """
+        # 1. 预处理图像：统一转换为numpy array (RGB)
+        if isinstance(image, Image.Image):
+            image_np = np.array(image.convert("RGB"))
+        else: # 假设是numpy array
+            image_np = image
+        if len(image_np.shape) == 2:
+            image_np = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
+        elif image_np.shape[2] == 4:
+            image_np = cv2.cvtColor(image_np, cv2.COLOR_RGBA2RGB)
+        processed_image_np = self.resize_long_edge(image_np, long_edge=long_edge)
+        ori_h, ori_w = processed_image_np.shape[:2]
+        # 2. 运行分割
+        pred_mask_map = self.seg_pipe.process(processed_image_np)
+        if task_type == TaskType.head_plus_shoulders:
+            # 2.1 先做“头部”基础mask
+            head_labels = [7, 8, 13, 14]  # headwear, eyeglass, hair, face
+            head_mask = np.isin(pred_mask_map, head_labels).astype(np.float32)
+            head_mask = cv2.resize(head_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
+            # 2.2 计算头部bbox并向下和左右扩展一段
+            rows = np.any(head_mask > 0, axis=1)
+            cols = np.any(head_mask > 0, axis=0)
+            if np.any(rows) and np.any(cols):
+                rmin, rmax = np.where(rows)[0][[0, -1]]
+                cmin, cmax = np.where(cols)[0][[0, -1]]
+                h_box = max(1, rmax - rmin)
+                w_box = max(1, cmax - cmin)
+                down_ratio = 0.1   # 向下扩展比例（相对头bbox高）
+                side_ratio = 0.6  # 左右扩展比例（相对头bbox宽）
+                r2max = min(ori_h, rmax + int(h_box * down_ratio))
+                c2min = max(0, cmin - int(w_box * side_ratio))
+                c2max = min(ori_w, cmax + int(w_box * side_ratio))
+                rect_mask = np.zeros((ori_h, ori_w), dtype=np.float32)
+                rect_mask[rmin:r2max, c2min:c2max] = 1.0
+                # 2.3 在扩展矩形内，仅保留“人物相关像素”（过滤掉背景）
+                person_labels = list(range(1, 37))  # 1..36 都是人物部件
+                person_mask = np.isin(pred_mask_map, person_labels).astype(np.float32)
+                person_mask = cv2.resize(person_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
+                initial_mask = np.clip(head_mask + (person_mask * rect_mask), 0, 1)
+            else:
+                initial_mask = head_mask
+        else:
+            # 其它任务保持原逻辑
+            labels_map = self._get_labels_for_task(task_type)
+            primary_labels = labels_map['primary']
+            initial_mask = np.isin(pred_mask_map, primary_labels).astype(np.float32)
+            initial_mask = cv2.resize(initial_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
+        # 3. 后处理（不同任务的形态学策略）
+        final_mask_np = self._apply_task_specific_mask_processing(initial_mask, task_type, ori_h, ori_w)
+        # 4. 返回
+        final_mask_uint8 = (final_mask_np * 255).astype(np.uint8)
+        return processed_image_np, final_mask_uint8
+    def _get_labels_for_task(self, task_type: TaskType) -> dict:
+        """根据任务类型获取对应的标签映射"""
+        labels_map = {
+            TaskType.face: { 'primary': [8, 14] }, # eyeglass, face
+            TaskType.head: { 'primary': [7, 8, 13, 14] }, # headwear, eyeglass, hair, face
+            TaskType.top_cloth: { 'primary': [1, 2] }, # top, outer
+            TaskType.bottom_cloth: { 'primary': [3, 4, 5, 6] }, # skirt, dress, pants, leggings
+            TaskType.full_clothes: { 'primary': [1, 2, 3, 4, 5, 6] }, # all clothes
+            TaskType.full_character: { 'primary': list(range(1, 37)) }, # 包含所有人物相关部分
+        }
+        return labels_map.get(task_type, {'primary': []})
+    def _apply_task_specific_mask_processing(self, mask: np.ndarray, task_type: TaskType, ori_h: int, ori_w: int) -> np.ndarray:
+        """根据任务类型对mask进行特殊处理"""
+        if task_type == TaskType.face:
+            # 人脸任务：简单膨胀
+            expand_kernel = 5
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
+            mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
+        elif task_type == TaskType.head:
+            # 头部任务：先腐蚀再膨胀
+            kernel = np.ones((7, 7), dtype=np.uint8)
+            mask = cv2.erode(mask, kernel, iterations=1)
+            expand_kernel = 11
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
+            mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
+        elif task_type == TaskType.head_plus_shoulders:
+            # 比 head 更偏向“向下与左右扩展”的膨胀（高度核 > 宽度核）
+            # 轻微腐蚀，避免边界毛刺
+            erode_k = 5
+            kernel = np.ones((erode_k, erode_k), dtype=np.uint8)
+            mask = cv2.erode(mask, kernel, iterations=1)
+            max_side = max(ori_h, ori_w)
+            h_kernel = max(15, int(max_side * 0.05))  # 更高
+            w_kernel = max(11, int(max_side * 0.03))  # 稍窄
+            # 保证奇数
+            h_kernel = h_kernel // 2 * 2 + 1
+            w_kernel = w_kernel // 2 * 2 + 1
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (w_kernel, h_kernel))
+            mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
+        if task_type in [TaskType.top_cloth, TaskType.bottom_cloth, TaskType.full_clothes, TaskType.full_character]:
+            # 衣服相关任务：膨胀和模糊处理
+            expand_ratio = 0.01
+            max_side = max(ori_h, ori_w)
+            blur_kernel = 1
+            expand_kernel = int(max_side * expand_ratio) // 2 * 2 + 1
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
+            expanded = cv2.dilate((mask > 0.5).astype(np.uint8), kernel)
+            blurred = cv2.GaussianBlur(
+                expanded.astype(np.float32),
+                (blur_kernel, blur_kernel),
+                sigmaX=0,
+            )
+            mask = np.clip(blurred / (blurred.max() + 1e-6), 0, 1)
+        return mask
+    @staticmethod
+    def resize_long_edge(image_np: np.ndarray, long_edge=1024) -> np.ndarray:
+        """将图像等比例缩放到指定长边尺寸 (使用OpenCV)"""
+        original_height, original_width = image_np.shape[:2]
+        max_dimension = max(original_width, original_height)
+        if max_dimension <= long_edge:
+            return image_np
+        ratio = long_edge / max_dimension
+        new_width = int(original_width * ratio)
+        new_height = int(original_height * ratio)
+        # 使用cv2.INTER_AREA进行缩放，对于缩小图像效果较好且速度快
+        return cv2.resize(image_np, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    @staticmethod
+    def _pad_to_square_np(image_np: np.ndarray, background_value: tuple) -> np.ndarray:
+        """将NumPy图像填充为正方形"""
+        height, width = image_np.shape[:2]
+        if width == height:
+            return image_np
+        max_dim = max(width, height)
+        # 根据通道数确定背景色
+        channels = image_np.shape[2] if len(image_np.shape) > 2 else 1
+        # 创建一个正确尺寸的背景板
+        padded_image = np.full((max_dim, max_dim, channels), background_value, dtype=image_np.dtype)
+        paste_x = (max_dim - width) // 2
+        paste_y = (max_dim - height) // 2
+        padded_image[paste_y:paste_y+height, paste_x:paste_x+width] = image_np
+        return padded_image
+    @staticmethod
+    def pad_to_square(image: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
+        """
+        将图像填充为正方形
+        Args:
+            image: 输入图像
+            background_color: 填充的背景颜色
+        Returns:
+            填充为正方形的图像
+        """
+        width, height = image.size
+        if width == height:
+            return image
+        max_dim = max(width, height)
+        padded_image = Image.new(image.mode, (max_dim, max_dim), background_color)
+        paste_x = (max_dim - width) // 2
+        paste_y = (max_dim - height) // 2
+        padded_image.paste(image, (paste_x, paste_y))
+        return padded_image
+    def crop_image_by_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> Image.Image:
+        """
+        根据mask裁剪图像，只保留mask覆盖的区域
+        Args:
+            image: 原始图像
+            mask: 二值mask图像
+            padding: 裁剪区域的边距扩展像素数
+        Returns:
+            裁剪后的图像
+        """
+        # 转换为numpy数组
+        mask_np = np.array(mask)
+        image_np = np.array(image)
+        # 找到mask中非零像素的边界框
+        rows = np.any(mask_np > 0, axis=1)
+        cols = np.any(mask_np > 0, axis=0)
+        if not np.any(rows) or not np.any(cols):
+            # 如果mask为空，返回原图
+            return image
+        # 获取边界框坐标
+        rmin, rmax = np.where(rows)[0][[0, -1]]
+        cmin, cmax = np.where(cols)[0][[0, -1]]
+        # 添加padding并确保不超出图像边界
+        h, w = image_np.shape[:2]
+        rmin = max(0, rmin - padding)
+        rmax = min(h, rmax + padding + 1)
+        cmin = max(0, cmin - padding)
+        cmax = min(w, cmax + padding + 1)
+        # 裁剪图像
+        cropped_image = image_np[rmin:rmax, cmin:cmax]
+        return Image.fromarray(cropped_image)
+    def _crop_image_and_mask_np(self, image_np: np.ndarray, mask_np: np.ndarray, padding: int = 20) -> tuple[np.ndarray, np.ndarray]:
+        """根据mask同时裁剪NumPy图像和mask"""
+        rows = np.any(mask_np > 0, axis=1)
+        cols = np.any(mask_np > 0, axis=0)
+        if not np.any(rows) or not np.any(cols):
+            return image_np, mask_np
+        rmin, rmax = np.where(rows)[0][[0, -1]]
+        cmin, cmax = np.where(cols)[0][[0, -1]]
+        h, w = image_np.shape[:2]
+        rmin = max(0, rmin - padding)
+        rmax = min(h, rmax + padding + 1)
+        cmin = max(0, cmin - padding)
+        cmax = min(w, cmax + padding + 1)
+        cropped_image_np = image_np[rmin:rmax, cmin:cmax]
+        cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
+        return cropped_image_np, cropped_mask_np
+    def crop_image_and_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> tuple[Image.Image, Image.Image]:
+        """根据mask同时裁剪图像和mask，避免重复计算边界框"""
+        mask_np = np.array(mask)
+        image_np = np.array(image)
+        rows = np.any(mask_np > 0, axis=1)
+        cols = np.any(mask_np > 0, axis=0)
+        if not np.any(rows) or not np.any(cols):
+            return image, mask
+        rmin, rmax = np.where(rows)[0][[0, -1]]
+        cmin, cmax = np.where(cols)[0][[0, -1]]
+        h, w = image_np.shape[:2]
+        rmin = max(0, rmin - padding)
+        rmax = min(h, rmax + padding + 1)
+        cmin = max(0, cmin - padding)
+        cmax = min(w, cmax + padding + 1)
+        cropped_image_np = image_np[rmin:rmax, cmin:cmax]
+        cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
+        return Image.fromarray(cropped_image_np), Image.fromarray(cropped_mask_np)
+    def _apply_mask_to_image_np(self, image_np: np.ndarray, mask_np: np.ndarray, background_color: tuple) -> np.ndarray:
+        """将NumPy mask应用到NumPy图像上"""
+        mask_normalized = mask_np.astype(np.float32) / 255.0
+        background = np.full_like(image_np, background_color)
+        result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
+        return result.astype(np.uint8)
+    def apply_mask_to_image(self, image: Image.Image, mask: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
+        """
+        将mask应用到图像上，mask外的区域设置为指定背景色
+        Args:
+            image: 原始图像
+            mask: 二值mask图像
+            background_color: 背景颜色 (R, G, B)
+        Returns:
+            应用mask后的图像
+        """
+        # 转换为numpy数组
+        image_np = np.array(image)
+        mask_np = np.array(mask)
+        # 将mask归一化到0-1范围
+        mask_normalized = mask_np.astype(np.float32) / 255.0
+        # 创建背景
+        background = np.full_like(image_np, background_color)
+        # 应用mask：mask区域保持原图，其他区域为背景色
+        result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
+        return Image.fromarray(result.astype(np.uint8))
+    def extract_head(
+        self,
+        image: Image.Image,
+        crop_padding: int = 10,
+        background_color: tuple = (255, 255, 255),
+        pad2square: bool = True,
+        output_mode: str = 'RGB',
+        long_edge: int = 1024,
+        include_shoulders: bool = False
+    ) -> Image.Image:
+        """
+        从输入图像中提取头部区域，并返回一个裁剪、填充为正方形的图像。
+        Args:
+            image: 输入图像 (PIL.Image or np.ndarray).
+            crop_padding: 裁剪边界框的额外边距.
+            background_color: `output_mode` 为 'RGB' 时，用于填充背景的颜色.
+            pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
+            output_mode (str): 输出图像模式，可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
+            long_edge (int): 送入模型前缩放的长边尺寸，值越小速度越快，但可能影响精度。默认为1024。
+        Returns:
+            处理后的头部图像 (PIL.Image).
+        """
+        # 1. 任务类型改为可选
+        task = TaskType.head_plus_shoulders if include_shoulders else TaskType.head
+        processed_image_np, head_mask_np = self.process(
+            image=image,
+            task_type=task,
+            long_edge=long_edge
+        )
+        # 2. NumPy-based 裁剪
+        face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
+            processed_image_np, head_mask_np, padding=crop_padding
+        )
+        # 3. 根据输出模式（RGB/RGBA）应用蒙版
+        output_mode = output_mode.upper()
+        if output_mode == 'RGBA':
+            # 创建一个带透明通道的RGBA图像
+            # 首先确保图像是3通道的
+            if face_cropped_np.shape[2] == 4:
+                face_cropped_np = face_cropped_np[:,:,:3]
+            # 创建RGBA图像
+            result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
+            result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
+        elif output_mode == 'RGB':
+            # NumPy-based 蒙版应用
+            result_image_np = self._apply_mask_to_image_np(
+                face_cropped_np,
+                mask_cropped_np,
+                background_color=background_color
+            )
+        else:
+            raise ValueError("output_mode must be 'RGB' or 'RGBA'")
+        # 4. 可选：NumPy-based 填充
+        if pad2square:
+            if output_mode == 'RGBA':
+                pad_color = (255, 255, 255, 0) # 透明背景
+            else:  # RGB
+                pad_color = background_color
+            final_image_np = self._pad_to_square_np(
+                result_image_np,
+                background_value=pad_color
+            )
+        else:
+            final_image_np = result_image_np
+        # 5. 仅在最后一步转换为 PIL Image
+        if output_mode == 'RGBA':
+             return Image.fromarray(final_image_np, 'RGBA')
+        else:
+             return Image.fromarray(final_image_np, 'RGB')
+    def extract(
+        self,
+        task_type: TaskType.full_character,
+        image: Image.Image,
+        crop_padding: int = 10,
+        background_color: tuple = (255, 255, 255),
+        pad2square: bool = True,
+        output_mode: str = 'RGB',
+        long_edge: int = 1024
+    ) -> Image.Image:
+        """
+        从输入图像中提取头部区域，并返回一个裁剪、填充为正方形的图像。
+        Args:
+            image: 输入图像 (PIL.Image or np.ndarray).
+            crop_padding: 裁剪边界框的额外边距.
+            background_color: `output_mode` 为 'RGB' 时，用于填充背景的颜色.
+            pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
+            output_mode (str): 输出图像模式，可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
+            long_edge (int): 送入模型前缩放的长边尺寸，值越小速度越快，但可能影响精度。默认为1024。
+        Returns:
+            处理后的头部图像 (PIL.Image).
+        """
+        # 1. 运行分割��直接获取 NumPy 结果
+        processed_image_np, head_mask_np = self.process(
+            image=image,
+            task_type=task_type,
+            long_edge=long_edge
+        )
+        # 2. NumPy-based 裁剪
+        face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
+            processed_image_np, head_mask_np, padding=crop_padding
+        )
+        # 3. 根据输出模式（RGB/RGBA）应用蒙版
+        output_mode = output_mode.upper()
+        if output_mode == 'RGBA':
+            # 创建一个带透明通道的RGBA图像
+            # 首先确保图像是3通道的
+            if face_cropped_np.shape[2] == 4:
+                face_cropped_np = face_cropped_np[:,:,:3]
+            # 创建RGBA图像
+            result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
+            result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
+        elif output_mode == 'RGB':
+            # NumPy-based 蒙版应用
+            result_image_np = self._apply_mask_to_image_np(
+                face_cropped_np,
+                mask_cropped_np,
+                background_color=background_color
+            )
+        else:
+            raise ValueError("output_mode must be 'RGB' or 'RGBA'")
+        # 4. 可选：NumPy-based 填充
+        if pad2square:
+            if output_mode == 'RGBA':
+                pad_color = (255, 255, 255, 0) # 透明背景
+            else:  # RGB
+                pad_color = background_color
+            final_image_np = self._pad_to_square_np(
+                result_image_np,
+                background_value=pad_color
+            )
+        else:
+            final_image_np = result_image_np
+        # 5. 仅在最后一步转换为 PIL Image
+        if output_mode == 'RGBA':
+             return Image.fromarray(final_image_np, 'RGBA')
+        else:
+             return Image.fromarray(final_image_np, 'RGB')
+if __name__ == '__main__':
+    # 这是一个示例如何初始化和使用 Pipeline
+    print("Initializing pipeline from package resources...")
+    pipeline = ProcessorPipeline.load()
+    print("Pipeline initialized.")
+    # 使用示例 (需要提供一张图片):
+    # 请替换为你的图片路径
+    image_path = "001.jpg"
+    if os.path.exists(image_path):
+        print(f"Processing image: {image_path}")
+        image = Image.open(image_path)
+        print("正在提取头部...")
+        extracted_head = pipeline.extract_head(image)
+        # 保存最终结果
+        output_path = "output_head_extracted.png"
+        extracted_head.save(output_path)
+        print("\n处理完成!")
+        print(f"已保存提取的头部图像至 '{output_path}'")
+    else:
+        print(f"示例图片未找到: {image_path}")

head_extractor/build/lib/mmdet/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import mmengine
+from mmengine.utils import digit_version
+from .version import __version__, version_info
+mmcv_minimum_version = '2.0.0rc4'
+mmcv_maximum_version = '2.2.0'
+mmcv_version = digit_version(mmcv.__version__)
+mmengine_minimum_version = '0.7.1'
+mmengine_maximum_version = '1.0.0'
+mmengine_version = digit_version(mmengine.__version__)
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version < digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
+assert (mmengine_version >= digit_version(mmengine_minimum_version)
+        and mmengine_version < digit_version(mmengine_maximum_version)), \
+    f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
+    f'Please install mmengine>={mmengine_minimum_version}, ' \
+    f'<{mmengine_maximum_version}.'
+__all__ = ['__version__', 'version_info', 'digit_version']

head_extractor/build/lib/mmdet/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .det_inferencer import DetInferencer
+from .inference import (async_inference_detector, inference_detector,
+                        inference_mot, init_detector, init_track_model)
+__all__ = [
+    'init_detector', 'async_inference_detector', 'inference_detector',
+    'DetInferencer', 'inference_mot', 'init_track_model'
+]

head_extractor/build/lib/mmdet/apis/det_inferencer.py ADDED Viewed

	@@ -0,0 +1,652 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+import warnings
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
+import mmcv
+import mmengine
+import numpy as np
+import torch.nn as nn
+from mmcv.transforms import LoadImageFromFile
+from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
+from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
+from mmengine.visualization import Visualizer
+from rich.progress import track
+from mmdet.evaluation import INSTANCE_OFFSET
+from mmdet.registry import DATASETS
+from mmdet.structures import DetDataSample
+from mmdet.structures.mask import encode_mask_results, mask2bbox
+from mmdet.utils import ConfigType
+from ..evaluation import get_classes
+try:
+    from panopticapi.evaluation import VOID
+    from panopticapi.utils import id2rgb
+except ImportError:
+    id2rgb = None
+    VOID = None
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = List[DetDataSample]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+class DetInferencer(BaseInferencer):
+    """Object Detection Inferencer.
+    Args:
+        model (str, optional): Path to the config file or the model name
+            defined in metafile. For example, it could be
+            "rtmdet-s" or 'rtmdet_s_8xb32-300e_coco' or
+            "configs/rtmdet/rtmdet_s_8xb32-300e_coco.py".
+            If model is not specified, user must provide the
+            `weights` saved by MMEngine which contains the config string.
+            Defaults to None.
+        weights (str, optional): Path to the checkpoint. If it is not specified
+            and model is a model name of metafile, the weights will be loaded
+            from metafile. Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to mmdet.
+        palette (str): Color palette used for visualization. The order of
+            priority is palette -> config -> checkpoint. Defaults to 'none'.
+        show_progress (bool): Control whether to display the progress
+            bar during the inference process. Defaults to True.
+    """
+    preprocess_kwargs: set = set()
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis',
+        'show',
+        'wait_time',
+        'draw_pred',
+        'pred_score_thr',
+        'img_out_dir',
+        'no_save_vis',
+    }
+    postprocess_kwargs: set = {
+        'print_result',
+        'pred_out_dir',
+        'return_datasamples',
+        'no_save_pred',
+    }
+    def __init__(self,
+                 model: Optional[Union[ModelType, str]] = None,
+                 weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: Optional[str] = 'mmdet',
+                 palette: str = 'none',
+                 show_progress: bool = True) -> None:
+        # A global counter tracking the number of images processed, for
+        # naming of the output images
+        self.num_visualized_imgs = 0
+        self.num_predicted_imgs = 0
+        self.palette = palette
+        init_default_scope(scope)
+        super().__init__(
+            model=model, weights=weights, device=device, scope=scope)
+        self.model = revert_sync_batchnorm(self.model)
+        self.show_progress = show_progress
+    def _load_weights_to_model(self, model: nn.Module,
+                               checkpoint: Optional[dict],
+                               cfg: Optional[ConfigType]) -> None:
+        """Loading model weights and meta information from cfg and checkpoint.
+        Args:
+            model (nn.Module): Model to load weights and meta information.
+            checkpoint (dict, optional): The loaded checkpoint.
+            cfg (Config or ConfigDict, optional): The loaded config.
+        """
+        if checkpoint is not None:
+            _load_checkpoint_to_model(model, checkpoint)
+            checkpoint_meta = checkpoint.get('meta', {})
+            # save the dataset_meta in the model for convenience
+            if 'dataset_meta' in checkpoint_meta:
+                # mmdet 3.x, all keys should be lowercase
+                model.dataset_meta = {
+                    k.lower(): v
+                    for k, v in checkpoint_meta['dataset_meta'].items()
+                }
+            elif 'CLASSES' in checkpoint_meta:
+                # < mmdet 3.x
+                classes = checkpoint_meta['CLASSES']
+                model.dataset_meta = {'classes': classes}
+            else:
+                warnings.warn(
+                    'dataset_meta or class names are not saved in the '
+                    'checkpoint\'s meta data, use COCO classes by default.')
+                model.dataset_meta = {'classes': get_classes('coco')}
+        else:
+            warnings.warn('Checkpoint is not loaded, and the inference '
+                          'result is calculated by the randomly initialized '
+                          'model!')
+            warnings.warn('weights is None, use COCO classes by default.')
+            model.dataset_meta = {'classes': get_classes('coco')}
+        # Priority:  args.palette -> config -> checkpoint
+        if self.palette != 'none':
+            model.dataset_meta['palette'] = self.palette
+        else:
+            test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
+            # lazy init. We only need the metainfo.
+            test_dataset_cfg['lazy_init'] = True
+            metainfo = DATASETS.build(test_dataset_cfg).metainfo
+            cfg_palette = metainfo.get('palette', None)
+            if cfg_palette is not None:
+                model.dataset_meta['palette'] = cfg_palette
+            else:
+                if 'palette' not in model.dataset_meta:
+                    warnings.warn(
+                        'palette does not exist, random is used by default. '
+                        'You can also set the palette to customize.')
+                    model.dataset_meta['palette'] = 'random'
+    def _init_pipeline(self, cfg: ConfigType) -> Compose:
+        """Initialize the test pipeline."""
+        pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        # For inference, the key of ``img_id`` is not used.
+        if 'meta_keys' in pipeline_cfg[-1]:
+            pipeline_cfg[-1]['meta_keys'] = tuple(
+                meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
+                if meta_key != 'img_id')
+        load_img_idx = self._get_transform_idx(
+            pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
+        if load_img_idx == -1:
+            raise ValueError(
+                'LoadImageFromFile is not found in the test pipeline')
+        pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
+        return Compose(pipeline_cfg)
+    def _get_transform_idx(self, pipeline_cfg: ConfigType,
+                           name: Union[str, Tuple[str, type]]) -> int:
+        """Returns the index of the transform in a pipeline.
+        If the transform is not found, returns -1.
+        """
+        for i, transform in enumerate(pipeline_cfg):
+            if transform['type'] in name:
+                return i
+        return -1
+    def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
+        """Initialize visualizers.
+        Args:
+            cfg (ConfigType): Config containing the visualizer information.
+        Returns:
+            Visualizer or None: Visualizer initialized with config.
+        """
+        visualizer = super()._init_visualizer(cfg)
+        visualizer.dataset_meta = self.model.dataset_meta
+        return visualizer
+    def _inputs_to_list(self, inputs: InputsType) -> list:
+        """Preprocess the inputs to a list.
+        Preprocess inputs to a list according to its type:
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string according
+              to the task.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the inputs
+                # as a directory
+                filename_list = list_dir_or_file(
+                    inputs, list_dir=False, suffix=IMG_EXTENSIONS)
+                inputs = [
+                    join_path(inputs, filename) for filename in filename_list
+                ]
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+        return list(inputs)
+    def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
+        """Process the inputs into a model-feedable format.
+        Customize your preprocess by overriding this method. Preprocess should
+        return an iterable object, of which each item will be used as the
+        input of ``model.test_step``.
+        ``BaseInferencer.preprocess`` will return an iterable chunked data,
+        which will be used in __call__ like this:
+        .. code-block:: python
+            def __call__(self, inputs, batch_size=1, **kwargs):
+                chunked_data = self.preprocess(inputs, batch_size, **kwargs)
+                for batch in chunked_data:
+                    preds = self.forward(batch, **kwargs)
+        Args:
+            inputs (InputsType): Inputs given by user.
+            batch_size (int): batch size. Defaults to 1.
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``collate_fn``.
+        """
+        chunked_data = self._get_chunk_data(inputs, batch_size)
+        yield from map(self.collate_fn, chunked_data)
+    def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
+        """Get batch data from inputs.
+        Args:
+            inputs (Iterable): An iterable dataset.
+            chunk_size (int): Equivalent to batch size.
+        Yields:
+            list: batch data.
+        """
+        inputs_iter = iter(inputs)
+        while True:
+            try:
+                chunk_data = []
+                for _ in range(chunk_size):
+                    inputs_ = next(inputs_iter)
+                    if isinstance(inputs_, dict):
+                        if 'img' in inputs_:
+                            ori_inputs_ = inputs_['img']
+                        else:
+                            ori_inputs_ = inputs_['img_path']
+                        chunk_data.append(
+                            (ori_inputs_,
+                             self.pipeline(copy.deepcopy(inputs_))))
+                    else:
+                        chunk_data.append((inputs_, self.pipeline(inputs_)))
+                yield chunk_data
+            except StopIteration:
+                if chunk_data:
+                    yield chunk_data
+                break
+    # TODO: Video and Webcam are currently not supported and
+    #  may consume too much memory if your input folder has a lot of images.
+    #  We will be optimized later.
+    def __call__(
+            self,
+            inputs: InputsType,
+            batch_size: int = 1,
+            return_vis: bool = False,
+            show: bool = False,
+            wait_time: int = 0,
+            no_save_vis: bool = False,
+            draw_pred: bool = True,
+            pred_score_thr: float = 0.3,
+            return_datasamples: bool = False,
+            print_result: bool = False,
+            no_save_pred: bool = True,
+            out_dir: str = '',
+            # by open image task
+            texts: Optional[Union[str, list]] = None,
+            # by open panoptic task
+            stuff_texts: Optional[Union[str, list]] = None,
+            # by GLIP and Grounding DINO
+            custom_entities: bool = False,
+            # by Grounding DINO
+            tokens_positive: Optional[Union[int, list]] = None,
+            **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            batch_size (int): Inference batch size. Defaults to 1.
+            show (bool): Whether to display the visualization results in a
+                popup window. Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            no_save_vis (bool): Whether to force not to save prediction
+                vis results. Defaults to False.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DetDataSample`. Defaults to False.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            no_save_pred (bool): Whether to force not to save prediction
+                results. Defaults to True.
+            out_dir: Dir to save the inference results or
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+            texts (str | list[str]): Text prompts. Defaults to None.
+            stuff_texts (str | list[str]): Stuff text prompts of open
+                panoptic task. Defaults to None.
+            custom_entities (bool): Whether to use custom entities.
+                Defaults to False. Only used in GLIP and Grounding DINO.
+            **kwargs: Other keyword arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
+                and ``postprocess_kwargs``.
+        Returns:
+            dict: Inference and visualization results.
+        """
+        (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        ) = self._dispatch_kwargs(**kwargs)
+        ori_inputs = self._inputs_to_list(inputs)
+        if texts is not None and isinstance(texts, str):
+            texts = [texts] * len(ori_inputs)
+        if stuff_texts is not None and isinstance(stuff_texts, str):
+            stuff_texts = [stuff_texts] * len(ori_inputs)
+        # Currently only supports bs=1
+        tokens_positive = [tokens_positive] * len(ori_inputs)
+        if texts is not None:
+            assert len(texts) == len(ori_inputs)
+            for i in range(len(texts)):
+                if isinstance(ori_inputs[i], str):
+                    ori_inputs[i] = {
+                        'text': texts[i],
+                        'img_path': ori_inputs[i],
+                        'custom_entities': custom_entities,
+                        'tokens_positive': tokens_positive[i]
+                    }
+                else:
+                    ori_inputs[i] = {
+                        'text': texts[i],
+                        'img': ori_inputs[i],
+                        'custom_entities': custom_entities,
+                        'tokens_positive': tokens_positive[i]
+                    }
+        if stuff_texts is not None:
+            assert len(stuff_texts) == len(ori_inputs)
+            for i in range(len(stuff_texts)):
+                ori_inputs[i]['stuff_text'] = stuff_texts[i]
+        inputs = self.preprocess(
+            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
+        results_dict = {'predictions': [], 'visualization': []}
+        for ori_imgs, data in (track(inputs, description='Inference')
+                               if self.show_progress else inputs):
+            preds = self.forward(data, **forward_kwargs)
+            visualization = self.visualize(
+                ori_imgs,
+                preds,
+                return_vis=return_vis,
+                show=show,
+                wait_time=wait_time,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                no_save_vis=no_save_vis,
+                img_out_dir=out_dir,
+                **visualize_kwargs)
+            results = self.postprocess(
+                preds,
+                visualization,
+                return_datasamples=return_datasamples,
+                print_result=print_result,
+                no_save_pred=no_save_pred,
+                pred_out_dir=out_dir,
+                **postprocess_kwargs)
+            results_dict['predictions'].extend(results['predictions'])
+            if results['visualization'] is not None:
+                results_dict['visualization'].extend(results['visualization'])
+        return results_dict
+    def visualize(self,
+                  inputs: InputsType,
+                  preds: PredType,
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_pred: bool = True,
+                  pred_score_thr: float = 0.3,
+                  no_save_vis: bool = False,
+                  img_out_dir: str = '',
+                  **kwargs) -> Union[List[np.ndarray], None]:
+        """Visualize predictions.
+        Args:
+            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
+            preds (List[:obj:`DetDataSample`]): Predictions of the model.
+            return_vis (bool): Whether to return the visualization result.
+                Defaults to False.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            no_save_vis (bool): Whether to force not to save prediction
+                vis results. Defaults to False.
+            img_out_dir (str): Output directory of visualization results.
+                If left as empty, no file will be saved. Defaults to ''.
+        Returns:
+            List[np.ndarray] or None: Returns visualization results only if
+            applicable.
+        """
+        if no_save_vis is True:
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
+            return None
+        if self.visualizer is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+        results = []
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img_bytes = mmengine.fileio.get(single_input)
+                img = mmcv.imfrombytes(img_bytes)
+                img = img[:, :, ::-1]
+                img_name = osp.basename(single_input)
+            elif isinstance(single_input, np.ndarray):
+                img = single_input.copy()
+                img_num = str(self.num_visualized_imgs).zfill(8)
+                img_name = f'{img_num}.jpg'
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+            out_file = osp.join(img_out_dir, 'vis',
+                                img_name) if img_out_dir != '' else None
+            self.visualizer.add_datasample(
+                img_name,
+                img,
+                pred,
+                show=show,
+                wait_time=wait_time,
+                draw_gt=False,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                out_file=out_file,
+            )
+            results.append(self.visualizer.get_image())
+            self.num_visualized_imgs += 1
+        return results
+    def postprocess(
+        self,
+        preds: PredType,
+        visualization: Optional[List[np.ndarray]] = None,
+        return_datasamples: bool = False,
+        print_result: bool = False,
+        no_save_pred: bool = False,
+        pred_out_dir: str = '',
+        **kwargs,
+    ) -> Dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+        This method should be responsible for the following tasks:
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+        Args:
+            preds (List[:obj:`DetDataSample`]): Predictions of the model.
+            visualization (Optional[np.ndarray]): Visualized predictions.
+            return_datasamples (bool): Whether to use Datasample to store
+                inference results. If False, dict will be used.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            no_save_pred (bool): Whether to force not to save prediction
+                results. Defaults to False.
+            pred_out_dir: Dir to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``.
+            - ``visualization`` (Any): Returned by :meth:`visualize`.
+            - ``predictions`` (dict or DataSample): Returned by
+                :meth:`forward` and processed in :meth:`postprocess`.
+                If ``return_datasamples=False``, it usually should be a
+                json-serializable dict containing only basic data elements such
+                as strings and numbers.
+        """
+        if no_save_pred is True:
+            pred_out_dir = ''
+        result_dict = {}
+        results = preds
+        if not return_datasamples:
+            results = []
+            for pred in preds:
+                result = self.pred2dict(pred, pred_out_dir)
+                results.append(result)
+        elif pred_out_dir != '':
+            warnings.warn('Currently does not support saving datasample '
+                          'when return_datasamples is set to True. '
+                          'Prediction results are not saved!')
+        # Add img to the results after printing and dumping
+        result_dict['predictions'] = results
+        if print_result:
+            print(result_dict)
+        result_dict['visualization'] = visualization
+        return result_dict
+    # TODO: The data format and fields saved in json need further discussion.
+    #  Maybe should include model name, timestamp, filename, image info etc.
+    def pred2dict(self,
+                  data_sample: DetDataSample,
+                  pred_out_dir: str = '') -> Dict:
+        """Extract elements necessary to represent a prediction into a
+        dictionary.
+        It's better to contain only basic data elements such as strings and
+        numbers in order to guarantee it's json-serializable.
+        Args:
+            data_sample (:obj:`DetDataSample`): Predictions of the model.
+            pred_out_dir: Dir to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Prediction results.
+        """
+        is_save_pred = True
+        if pred_out_dir == '':
+            is_save_pred = False
+        if is_save_pred and 'img_path' in data_sample:
+            img_path = osp.basename(data_sample.img_path)
+            img_path = osp.splitext(img_path)[0]
+            out_img_path = osp.join(pred_out_dir, 'preds',
+                                    img_path + '_panoptic_seg.png')
+            out_json_path = osp.join(pred_out_dir, 'preds', img_path + '.json')
+        elif is_save_pred:
+            out_img_path = osp.join(
+                pred_out_dir, 'preds',
+                f'{self.num_predicted_imgs}_panoptic_seg.png')
+            out_json_path = osp.join(pred_out_dir, 'preds',
+                                     f'{self.num_predicted_imgs}.json')
+            self.num_predicted_imgs += 1
+        result = {}
+        if 'pred_instances' in data_sample:
+            masks = data_sample.pred_instances.get('masks')
+            pred_instances = data_sample.pred_instances.numpy()
+            result = {
+                'labels': pred_instances.labels.tolist(),
+                'scores': pred_instances.scores.tolist()
+            }
+            if 'bboxes' in pred_instances:
+                result['bboxes'] = pred_instances.bboxes.tolist()
+            if masks is not None:
+                if 'bboxes' not in pred_instances or pred_instances.bboxes.sum(
+                ) == 0:
+                    # Fake bbox, such as the SOLO.
+                    bboxes = mask2bbox(masks.cpu()).numpy().tolist()
+                    result['bboxes'] = bboxes
+                encode_masks = encode_mask_results(pred_instances.masks)
+                for encode_mask in encode_masks:
+                    if isinstance(encode_mask['counts'], bytes):
+                        encode_mask['counts'] = encode_mask['counts'].decode()
+                result['masks'] = encode_masks
+        if 'pred_panoptic_seg' in data_sample:
+            if VOID is None:
+                raise RuntimeError(
+                    'panopticapi is not installed, please install it by: '
+                    'pip install git+https://github.com/cocodataset/'
+                    'panopticapi.git.')
+            pan = data_sample.pred_panoptic_seg.sem_seg.cpu().numpy()[0]
+            pan[pan % INSTANCE_OFFSET == len(
+                self.model.dataset_meta['classes'])] = VOID
+            pan = id2rgb(pan).astype(np.uint8)
+            if is_save_pred:
+                mmcv.imwrite(pan[:, :, ::-1], out_img_path)
+                result['panoptic_seg_path'] = out_img_path
+            else:
+                result['panoptic_seg'] = pan
+        if is_save_pred:
+            mmengine.dump(result, out_json_path)
+        return result

head_extractor/build/lib/mmdet/apis/inference.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import warnings
+from pathlib import Path
+from typing import Optional, Sequence, Union
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.ops import RoIPool
+from mmcv.transforms import Compose
+from mmengine.config import Config
+from mmengine.dataset import default_collate
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner import load_checkpoint
+from mmdet.registry import DATASETS
+from mmdet.utils import ConfigType
+from ..evaluation import get_classes
+from ..registry import MODELS
+from ..structures import DetDataSample, SampleList
+from ..utils import get_test_pipeline_cfg
+def init_detector(
+    config: Union[str, Path, Config],
+    checkpoint: Optional[str] = None,
+    palette: str = 'none',
+    device: str = 'cuda:0',
+    cfg_options: Optional[dict] = None,
+) -> nn.Module:
+    """Initialize a detector from config file.
+    Args:
+        config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path,
+            :obj:`Path`, or the config object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+        palette (str): Color palette used for visualization. If palette
+            is stored in checkpoint, use checkpoint's palette first, otherwise
+            use externally passed palette. Currently, supports 'coco', 'voc',
+            'citys' and 'random'. Defaults to none.
+        device (str): The device where the anchors will be put on.
+            Defaults to cuda:0.
+        cfg_options (dict, optional): Options to override some settings in
+            the used config.
+    Returns:
+        nn.Module: The constructed detector.
+    """
+    if isinstance(config, (str, Path)):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if cfg_options is not None:
+        config.merge_from_dict(cfg_options)
+    elif 'init_cfg' in config.model.backbone:
+        config.model.backbone.init_cfg = None
+    scope = config.get('default_scope', 'mmdet')
+    if scope is not None:
+        init_default_scope(config.get('default_scope', 'mmdet'))
+    model = MODELS.build(config.model)
+    model = revert_sync_batchnorm(model)
+    if checkpoint is None:
+        warnings.simplefilter('once')
+        warnings.warn('checkpoint is None, use COCO classes by default.')
+        model.dataset_meta = {'classes': get_classes('coco')}
+    else:
+        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
+        # Weights converted from elsewhere may not have meta fields.
+        checkpoint_meta = checkpoint.get('meta', {})
+        # save the dataset_meta in the model for convenience
+        if 'dataset_meta' in checkpoint_meta:
+            # mmdet 3.x, all keys should be lowercase
+            model.dataset_meta = {
+                k.lower(): v
+                for k, v in checkpoint_meta['dataset_meta'].items()
+            }
+        elif 'CLASSES' in checkpoint_meta:
+            # < mmdet 3.x
+            classes = checkpoint_meta['CLASSES']
+            model.dataset_meta = {'classes': classes}
+        else:
+            warnings.simplefilter('once')
+            warnings.warn(
+                'dataset_meta or class names are not saved in the '
+                'checkpoint\'s meta data, use COCO classes by default.')
+            model.dataset_meta = {'classes': get_classes('coco')}
+    # Priority:  args.palette -> config -> checkpoint
+    if palette != 'none':
+        model.dataset_meta['palette'] = palette
+    else:
+        test_dataset_cfg = copy.deepcopy(config.test_dataloader.dataset)
+        # lazy init. We only need the metainfo.
+        test_dataset_cfg['lazy_init'] = True
+        metainfo = DATASETS.build(test_dataset_cfg).metainfo
+        cfg_palette = metainfo.get('palette', None)
+        if cfg_palette is not None:
+            model.dataset_meta['palette'] = cfg_palette
+        else:
+            if 'palette' not in model.dataset_meta:
+                warnings.warn(
+                    'palette does not exist, random is used by default. '
+                    'You can also set the palette to customize.')
+                model.dataset_meta['palette'] = 'random'
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model
+ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
+def inference_detector(
+    model: nn.Module,
+    imgs: ImagesType,
+    test_pipeline: Optional[Compose] = None,
+    text_prompt: Optional[str] = None,
+    custom_entities: bool = False,
+) -> Union[DetDataSample, SampleList]:
+    """Inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str, ndarray, Sequence[str/ndarray]):
+           Either image files or loaded images.
+        test_pipeline (:obj:`Compose`): Test pipeline.
+    Returns:
+        :obj:`DetDataSample` or list[:obj:`DetDataSample`]:
+        If imgs is a list or tuple, the same length list type results
+        will be returned, otherwise return the detection results directly.
+    """
+    if isinstance(imgs, (list, tuple)):
+        is_batch = True
+    else:
+        imgs = [imgs]
+        is_batch = False
+    cfg = model.cfg
+    if test_pipeline is None:
+        cfg = cfg.copy()
+        test_pipeline = get_test_pipeline_cfg(cfg)
+        if isinstance(imgs[0], np.ndarray):
+            # Calling this method across libraries will result
+            # in module unregistered error if not prefixed with mmdet.
+            test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
+        test_pipeline = Compose(test_pipeline)
+    if model.data_preprocessor.device.type == 'cpu':
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+    result_list = []
+    for i, img in enumerate(imgs):
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # TODO: remove img_id.
+            data_ = dict(img=img, img_id=0)
+        else:
+            # TODO: remove img_id.
+            data_ = dict(img_path=img, img_id=0)
+        if text_prompt:
+            data_['text'] = text_prompt
+            data_['custom_entities'] = custom_entities
+        # build the data pipeline
+        data_ = test_pipeline(data_)
+        data_['inputs'] = [data_['inputs']]
+        data_['data_samples'] = [data_['data_samples']]
+        # forward the model
+        with torch.no_grad():
+            results = model.test_step(data_)[0]
+        result_list.append(results)
+    if not is_batch:
+        return result_list[0]
+    else:
+        return result_list
+# TODO: Awaiting refactoring
+async def async_inference_detector(model, imgs):
+    """Async inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        img (str | ndarray): Either image files or loaded images.
+    Returns:
+        Awaitable detection results.
+    """
+    if not isinstance(imgs, (list, tuple)):
+        imgs = [imgs]
+    cfg = model.cfg
+    if isinstance(imgs[0], np.ndarray):
+        cfg = cfg.copy()
+        # set loading pipeline type
+        cfg.data.test.pipeline[0].type = 'LoadImageFromNDArray'
+    # cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    datas = []
+    for img in imgs:
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # directly add img
+            data = dict(img=img)
+        else:
+            # add information into dict
+            data = dict(img_info=dict(filename=img), img_prefix=None)
+        # build the data pipeline
+        data = test_pipeline(data)
+        datas.append(data)
+    for m in model.modules():
+        assert not isinstance(
+            m,
+            RoIPool), 'CPU inference with RoIPool is not supported currently.'
+    # We don't restore `torch.is_grad_enabled()` value during concurrent
+    # inference since execution can overlap
+    torch.set_grad_enabled(False)
+    results = await model.aforward_test(data, rescale=True)
+    return results
+def build_test_pipeline(cfg: ConfigType) -> ConfigType:
+    """Build test_pipeline for mot/vis demo. In mot/vis infer, original
+    test_pipeline should remove the "LoadImageFromFile" and
+    "LoadTrackAnnotations".
+    Args:
+         cfg (ConfigDict): The loaded config.
+    Returns:
+         ConfigType: new test_pipeline
+    """
+    # remove the "LoadImageFromFile" and "LoadTrackAnnotations" in pipeline
+    transform_broadcaster = cfg.test_dataloader.dataset.pipeline[0].copy()
+    for transform in transform_broadcaster['transforms']:
+        if transform['type'] == 'Resize':
+            transform_broadcaster['transforms'] = transform
+    pack_track_inputs = cfg.test_dataloader.dataset.pipeline[-1].copy()
+    test_pipeline = Compose([transform_broadcaster, pack_track_inputs])
+    return test_pipeline
+def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
+                  video_len: int) -> SampleList:
+    """Inference image(s) with the mot model.
+    Args:
+        model (nn.Module): The loaded mot model.
+        img (np.ndarray): Loaded image.
+        frame_id (int): frame id.
+        video_len (int): demo video length
+    Returns:
+        SampleList: The tracking data samples.
+    """
+    cfg = model.cfg
+    data = dict(
+        img=[img.astype(np.float32)],
+        frame_id=[frame_id],
+        ori_shape=[img.shape[:2]],
+        img_id=[frame_id + 1],
+        ori_video_length=[video_len])
+    test_pipeline = build_test_pipeline(cfg)
+    data = test_pipeline(data)
+    if not next(model.parameters()).is_cuda:
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+    # forward the model
+    with torch.no_grad():
+        data = default_collate([data])
+        result = model.test_step(data)[0]
+    return result
+def init_track_model(config: Union[str, Config],
+                     checkpoint: Optional[str] = None,
+                     detector: Optional[str] = None,
+                     reid: Optional[str] = None,
+                     device: str = 'cuda:0',
+                     cfg_options: Optional[dict] = None) -> nn.Module:
+    """Initialize a model from config file.
+    Args:
+        config (str or :obj:`mmengine.Config`): Config file path or the config
+            object.
+        checkpoint (Optional[str], optional): Checkpoint path. Defaults to
+            None.
+        detector (Optional[str], optional): Detector Checkpoint path, use in
+            some tracking algorithms like sort.  Defaults to None.
+        reid (Optional[str], optional): Reid checkpoint path. use in
+            some tracking algorithms like sort. Defaults to None.
+        device (str, optional): The device that the model inferences on.
+            Defaults to `cuda:0`.
+        cfg_options (Optional[dict], optional): Options to override some
+            settings in the used config. Defaults to None.
+    Returns:
+        nn.Module: The constructed model.
+    """
+    if isinstance(config, str):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if cfg_options is not None:
+        config.merge_from_dict(cfg_options)
+    model = MODELS.build(config.model)
+    if checkpoint is not None:
+        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
+        # Weights converted from elsewhere may not have meta fields.
+        checkpoint_meta = checkpoint.get('meta', {})
+        # save the dataset_meta in the model for convenience
+        if 'dataset_meta' in checkpoint_meta:
+            if 'CLASSES' in checkpoint_meta['dataset_meta']:
+                value = checkpoint_meta['dataset_meta'].pop('CLASSES')
+                checkpoint_meta['dataset_meta']['classes'] = value
+            model.dataset_meta = checkpoint_meta['dataset_meta']
+    if detector is not None:
+        assert not (checkpoint and detector), \
+            'Error: checkpoint and detector checkpoint cannot both exist'
+        load_checkpoint(model.detector, detector, map_location='cpu')
+    if reid is not None:
+        assert not (checkpoint and reid), \
+            'Error: checkpoint and reid checkpoint cannot both exist'
+        load_checkpoint(model.reid, reid, map_location='cpu')
+    # Some methods don't load checkpoints or checkpoints don't contain
+    # 'dataset_meta'
+    # VIS need dataset_meta, MOT don't need dataset_meta
+    if not hasattr(model, 'dataset_meta'):
+        warnings.warn('dataset_meta or class names are missed, '
+                      'use None by default.')
+        model.dataset_meta = {'classes': None}
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model

head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
+                                       RandomFlip, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoMetric,
+#     metric='bbox',
+#     format_only=True,
+#     ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_detection/test')

head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco import CocoDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_metric import CocoMetric
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=CocoDataset,
+#         data_root=data_root,
+#         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoMetric,
+#     metric=['bbox', 'segm'],
+#     format_only=True,
+#     ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_instance/test')

head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco import CocoDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_metric import CocoMetric
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator

head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco_panoptic import CocoPanopticDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_panoptic_metric import CocoPanopticMetric
+# dataset settings
+dataset_type = 'CocoPanopticDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadPanopticAnnotations, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadPanopticAnnotations, backend_args=backend_args),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoPanopticDataset,
+        data_root=data_root,
+        ann_file='annotations/panoptic_train2017.json',
+        data_prefix=dict(
+            img='train2017/', seg='annotations/panoptic_train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoPanopticDataset,
+        data_root=data_root,
+        ann_file='annotations/panoptic_val2017.json',
+        data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoPanopticMetric,
+    ann_file=data_root + 'annotations/panoptic_val2017.json',
+    seg_prefix=data_root + 'annotations/panoptic_val2017/',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=1,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=CocoPanopticDataset,
+#         data_root=data_root,
+#         ann_file='annotations/panoptic_image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoPanopticMetric,
+#     format_only=True,
+#     ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_panoptic/test')

head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import (LoadImageFromFile, RandomResize,
+                             TransformBroadcaster)
+from mmdet.datasets import MOTChallengeDataset
+from mmdet.datasets.samplers import TrackImgSampler
+from mmdet.datasets.transforms import (LoadTrackAnnotations, PackTrackInputs,
+                                       PhotoMetricDistortion, RandomCrop,
+                                       RandomFlip, Resize,
+                                       UniformRefFrameSample)
+from mmdet.evaluation import MOTChallengeMetric
+# dataset settings
+dataset_type = MOTChallengeDataset
+data_root = 'data/MOT17/'
+img_scale = (1088, 1088)
+backend_args = None
+# data pipeline
+train_pipeline = [
+    dict(
+        type=UniformRefFrameSample,
+        num_ref_imgs=1,
+        frame_range=10,
+        filter_key_img=True),
+    dict(
+        type=TransformBroadcaster,
+        share_random_params=True,
+        transforms=[
+            dict(type=LoadImageFromFile, backend_args=backend_args),
+            dict(type=LoadTrackAnnotations),
+            dict(
+                type=RandomResize,
+                scale=img_scale,
+                ratio_range=(0.8, 1.2),
+                keep_ratio=True,
+                clip_object_border=False),
+            dict(type=PhotoMetricDistortion)
+        ]),
+    dict(
+        type=TransformBroadcaster,
+        # different cropped positions for different frames
+        share_random_params=False,
+        transforms=[
+            dict(type=RandomCrop, crop_size=img_scale, bbox_clip_border=False)
+        ]),
+    dict(
+        type=TransformBroadcaster,
+        share_random_params=True,
+        transforms=[
+            dict(type=RandomFlip, prob=0.5),
+        ]),
+    dict(type=PackTrackInputs)
+]
+test_pipeline = [
+    dict(
+        type=TransformBroadcaster,
+        transforms=[
+            dict(type=LoadImageFromFile, backend_args=backend_args),
+            dict(type=Resize, scale=img_scale, keep_ratio=True),
+            dict(type=LoadTrackAnnotations)
+        ]),
+    dict(type=PackTrackInputs)
+]
+# dataloader
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=TrackImgSampler),  # image-based sampling
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        visibility_thr=-1,
+        ann_file='annotations/half-train_cocoformat.json',
+        data_prefix=dict(img_path='train'),
+        metainfo=dict(classes=('pedestrian', )),
+        pipeline=train_pipeline))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    # Now we support two ways to test, image_based and video_based
+    # if you want to use video_based sampling, you can use as follows
+    # sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    sampler=dict(type=TrackImgSampler),  # image-based sampling
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/half-val_cocoformat.json',
+        data_prefix=dict(img_path='train'),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+# evaluator
+val_evaluator = dict(
+    type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
+test_evaluator = val_evaluator

head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
+                            LoggerHook, ParamSchedulerHook)
+from mmengine.runner import LogProcessor
+from mmengine.visualization import LocalVisBackend
+from mmdet.engine.hooks import DetVisualizationHook
+from mmdet.visualization import DetLocalVisualizer
+default_scope = None
+default_hooks = dict(
+    timer=dict(type=IterTimerHook),
+    logger=dict(type=LoggerHook, interval=50),
+    param_scheduler=dict(type=ParamSchedulerHook),
+    checkpoint=dict(type=CheckpointHook, interval=1),
+    sampler_seed=dict(type=DistSamplerSeedHook),
+    visualization=dict(type=DetVisualizationHook))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+vis_backends = [dict(type=LocalVisBackend)]
+visualizer = dict(
+    type=DetLocalVisualizer, vis_backends=vis_backends, name='visualizer')
+log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False

head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=CascadeRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type=CascadeRoIHead,
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
+        ],
+        mask_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=CascadeRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type=CascadeRoIHead,
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100)))

head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.faster_rcnn import FasterRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=FasterRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type=Shared2FCBBoxHead,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))

head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from mmengine.model.weight_init import PretrainedInit
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.mask_rcnn import MaskRCNN
+from mmdet.models.layers import ResLayer
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+norm_cfg = dict(type=BatchNorm2d, requires_grad=False)
+# model settings
+model = dict(
+    type=MaskRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[103.530, 116.280, 123.675],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type=PretrainedInit,
+            checkpoint='open-mmlab://detectron2/resnet50_caffe')),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[2, 4, 8, 16, 32],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[16]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        shared_head=dict(
+            type=ResLayer,
+            depth=50,
+            stage=3,
+            stride=2,
+            dilation=1,
+            style='caffe',
+            norm_cfg=norm_cfg,
+            norm_eval=True),
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=1024,
+            featmap_strides=[16]),
+        bbox_head=dict(
+            type=BBoxHead,
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+        mask_roi_extractor=None,
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=0,
+            in_channels=2048,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=12000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=6000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from mmengine.model.weight_init import PretrainedInit
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.mask_rcnn import MaskRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=MaskRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(
+            type=PretrainedInit, checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type=Shared2FCBBoxHead,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import nms
+from torch.nn import BatchNorm2d
+from mmdet.models import (FPN, DetDataPreprocessor, FocalLoss, L1Loss, ResNet,
+                          RetinaHead, RetinaNet)
+from mmdet.models.task_modules import (AnchorGenerator, DeltaXYWHBBoxCoder,
+                                       MaxIoUAssigner, PseudoSampler)
+# model settings
+model = dict(
+    type=RetinaNet,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type=RetinaHead,
+        num_classes=80,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=FocalLoss,
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type=MaxIoUAssigner,
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type=PseudoSampler),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type=nms, iou_threshold=0.5),
+        max_per_img=100))

head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim.sgd import SGD
+# training schedule for 1x
+train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim.sgd import SGD
+# training schedule for 1x
+train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.datasets.coco_instance import *
+    from .._base_.default_runtime import *
+    from .._base_.models.cascade_mask_rcnn_r50_fpn import *
+    from .._base_.schedules.schedule_1x import *

head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.datasets.coco_detection import *
+    from .._base_.default_runtime import *
+    from .._base_.models.cascade_rcnn_r50_fpn import *
+    from .._base_.schedules.schedule_1x import *

head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import CocoDataset, RepeatDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+image_size = (1024, 1024)
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.1, 2.0),
+        keep_ratio=True),
+    dict(
+        type=RandomCrop,
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+# Use RepeatDataset to speed up training
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    dataset=dict(
+        type=RepeatDataset,
+        times=4,  # simply change this from 2 to 16 for 50e - 400e training.
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+max_epochs = 25
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# optimizer assumes bs=64
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]
+# only keep latest 2 checkpoints
+default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)

head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import CocoDataset, RepeatDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+image_size = (1024, 1024)
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.1, 2.0),
+        keep_ratio=True),
+    dict(
+        type=RandomCrop,
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+# Use RepeatDataset to speed up training
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    dataset=dict(
+        type=RepeatDataset,
+        times=4,  # simply change this from 2 to 16 for 50e - 400e training.
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+max_epochs = 25
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# optimizer assumes bs=64
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]
+# only keep latest 2 checkpoints
+default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)

head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .lsj_100e_coco_detection import *
+# 8x25=200e
+train_dataloader.update(dict(dataset=dict(times=8)))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=25,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]

head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .lsj_100e_coco_instance import *
+# 8x25=200e
+train_dataloader.update(dict(dataset=dict(times=8)))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=25,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]

head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py ADDED Viewed

	@@ -0,0 +1,130 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
+# multiscale_mode='range'
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    pin_memory=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=RepeatDataset,
+        times=3,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg = dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type='RandomResize', scale=[(1333, 640), (1333, 800)],
+        keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        sampler=dict(type=DefaultSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=RepeatDataset,
+            times=3,
+            dataset=dict(
+                type=dataset_type,
+                data_root=data_root,
+                ann_file='annotations/instances_train2017.json',
+                data_prefix=dict(img='train2017/'),
+                filter_cfg=dict(filter_empty_gt=True, min_size=32),
+                pipeline=train_pipeline,
+                backend_args=backend_args))))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric='bbox',
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg.update(dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))

head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Align with Detectron2
+backend = 'pillow'
+train_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=RandomChoiceResize,
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True,
+        backend=backend),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=InfiniteSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric='bbox',
+        format_only=False,
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 90k
+max_iter = 90000
+train_cfg.update(
+    dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_iter,
+        by_epoch=False,
+        milestones=[60000, 80000],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))
+default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
+log_processor.update(dict(by_epoch=False))

head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
+# multiscale_mode='range'
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type='RandomResize', scale=[(1333, 640), (1333, 800)],
+        keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=RepeatDataset,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric=['bbox', 'segm'],
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg.update(dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))

head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Align with Detectron2
+backend = 'pillow'
+train_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=RandomChoiceResize,
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True,
+        backend=backend),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=InfiniteSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric=['bbox', 'segm'],
+        format_only=False,
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 90k
+max_iter = 90000
+train_cfg.update(
+    dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_iter,
+        by_epoch=False,
+        milestones=[60000, 80000],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))
+default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
+log_processor.update(dict(by_epoch=False))

head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Standard Scale Jittering (SSJ) resizes and crops an image
+# with a resize range of 0.8 to 1.25 of the original image size.
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.8, 1.25),
+        keep_ratio=True),
+    dict(
+        type='RandomCrop',
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        sampler=dict(type=InfiniteSampler),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric=['bbox', 'segm'],
+        format_only=False,
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# The model is trained by 270k iterations with batch_size 64,
+# which is roughly equivalent to 144 epochs.
+max_iter = 270000
+train_cfg.update(
+    dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_iter,
+        by_epoch=False,
+        milestones=[243000, 256500, 263250],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(base_batch_size=64))
+default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
+log_processor.update(dict(by_epoch=False))

head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .ssj_270_coco_instance import *
+from mmdet.datasets import MultiImageMixDataset
+from mmdet.datasets.transforms import CopyPaste
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+image_size = (1024, 1024)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Standard Scale Jittering (SSJ) resizes and crops an image
+# with a resize range of 0.8 to 1.25 of the original image size.
+load_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.8, 1.25),
+        keep_ratio=True),
+    dict(
+        type='RandomCrop',
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=Pad, size=image_size),
+]
+train_pipeline = [
+    dict(type=CopyPaste, max_num_pasted=100),
+    dict(type=PackDetInputs)
+]
+train_dataloader.update(
+    dict(
+        type=MultiImageMixDataset,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=load_pipeline,
+            backend_args=backend_args),
+        pipeline=train_pipeline))

head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py ADDED Viewed

	@@ -0,0 +1,186 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.datasets.coco_detection import *
+    from .._base_.default_runtime import *
+from mmcv.transforms import LoadImageFromFile, RandomChoice, RandomChoiceResize
+from mmengine.optim.optimizer import OptimWrapper
+from mmengine.optim.scheduler import MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim.adamw import AdamW
+from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
+                                       RandomCrop, RandomFlip, Resize)
+from mmdet.models.backbones import ResNet
+from mmdet.models.data_preprocessors import DetDataPreprocessor
+from mmdet.models.dense_heads import DeformableDETRHead
+from mmdet.models.detectors import DeformableDETR
+from mmdet.models.losses import FocalLoss, GIoULoss, L1Loss
+from mmdet.models.necks import ChannelMapper
+from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
+                                       HungarianAssigner, IoUCost)
+model = dict(
+    type=DeformableDETR,
+    num_queries=300,
+    num_feature_levels=4,
+    with_box_refine=False,
+    as_two_stage=False,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=1),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=ChannelMapper,
+        in_channels=[512, 1024, 2048],
+        kernel_size=1,
+        out_channels=256,
+        act_cfg=None,
+        norm_cfg=dict(type='GN', num_groups=32),
+        num_outs=4),
+    encoder=dict(  # DeformableDetrTransformerEncoder
+        num_layers=6,
+        layer_cfg=dict(  # DeformableDetrTransformerEncoderLayer
+            self_attn_cfg=dict(  # MultiScaleDeformableAttention
+                embed_dims=256,
+                batch_first=True),
+            ffn_cfg=dict(
+                embed_dims=256, feedforward_channels=1024, ffn_drop=0.1))),
+    decoder=dict(  # DeformableDetrTransformerDecoder
+        num_layers=6,
+        return_intermediate=True,
+        layer_cfg=dict(  # DeformableDetrTransformerDecoderLayer
+            self_attn_cfg=dict(  # MultiheadAttention
+                embed_dims=256,
+                num_heads=8,
+                dropout=0.1,
+                batch_first=True),
+            cross_attn_cfg=dict(  # MultiScaleDeformableAttention
+                embed_dims=256,
+                batch_first=True),
+            ffn_cfg=dict(
+                embed_dims=256, feedforward_channels=1024, ffn_drop=0.1)),
+        post_norm_cfg=None),
+    positional_encoding=dict(num_feats=128, normalize=True, offset=-0.5),
+    bbox_head=dict(
+        type=DeformableDETRHead,
+        num_classes=80,
+        sync_cls_avg_factor=True,
+        loss_cls=dict(
+            type=FocalLoss,
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=2.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=5.0),
+        loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type=HungarianAssigner,
+            match_costs=[
+                dict(type=FocalLossCost, weight=2.0),
+                dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
+                dict(type=IoUCost, iou_mode='giou', weight=2.0)
+            ])),
+    test_cfg=dict(max_per_img=100))
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(
+        type=RandomChoice,
+        transforms=[
+            [
+                dict(
+                    type=RandomChoiceResize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    resize_type=Resize,
+                    keep_ratio=True)
+            ],
+            [
+                dict(
+                    type=RandomChoiceResize,
+                    # The radio of all image in train dataset < 7
+                    # follow the original implement
+                    scales=[(400, 4200), (500, 4200), (600, 4200)],
+                    resize_type=Resize,
+                    keep_ratio=True),
+                dict(
+                    type=RandomCrop,
+                    crop_type='absolute_range',
+                    crop_size=(384, 600),
+                    allow_negative_crop=True),
+                dict(
+                    type=RandomChoiceResize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    resize_type=Resize,
+                    keep_ratio=True)
+            ]
+        ]),
+    dict(type=PackDetInputs)
+]
+train_dataloader.update(
+    dict(
+        dataset=dict(
+            filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline)))
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=AdamW, lr=0.0002, weight_decay=0.0001),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
+    paramwise_cfg=dict(
+        custom_keys={
+            'backbone': dict(lr_mult=0.1),
+            'sampling_offsets': dict(lr_mult=0.1),
+            'reference_points': dict(lr_mult=0.1)
+        }))
+# learning policy
+max_epochs = 50
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+param_scheduler = [
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[40],
+        gamma=0.1)
+]
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=32)

head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .deformable_detr_r50_16xb2_50e_coco import *
+model.update(dict(with_box_refine=True))

head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .deformable_detr_refine_r50_16xb2_50e_coco import *
+model.update(dict(as_two_stage=True))

head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+from mmengine.model.weight_init import PretrainedInit
+with read_base():
+    from .detr_r50_8xb2_500e_coco import *
+model.update(
+    dict(
+        backbone=dict(
+            depth=101,
+            init_cfg=dict(
+                type=PretrainedInit, checkpoint='torchvision://resnet101'))))

head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+from mmengine.model.weight_init import PretrainedInit
+with read_base():
+    from .detr_r50_8xb2_500e_coco import *
+model.update(
+    dict(
+        backbone=dict(
+            depth=18,
+            init_cfg=dict(
+                type=PretrainedInit, checkpoint='torchvision://resnet18')),
+        neck=dict(in_channels=[512])))

head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py ADDED Viewed

	@@ -0,0 +1,182 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import RandomChoice, RandomChoiceResize
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.config import read_base
+from mmengine.model.weight_init import PretrainedInit
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.nn.modules.activation import ReLU
+from torch.nn.modules.batchnorm import BatchNorm2d
+from torch.optim.adamw import AdamW
+from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
+                                       RandomCrop, RandomFlip, Resize)
+from mmdet.models import (DETR, ChannelMapper, DetDataPreprocessor, DETRHead,
+                          ResNet)
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.iou_loss import GIoULoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.task_modules import (BBoxL1Cost, ClassificationCost,
+                                       HungarianAssigner, IoUCost)
+with read_base():
+    from .._base_.datasets.coco_detection import *
+    from .._base_.default_runtime import *
+model = dict(
+    type=DETR,
+    num_queries=100,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=1),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(3, ),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(
+            type=PretrainedInit, checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=ChannelMapper,
+        in_channels=[2048],
+        kernel_size=1,
+        out_channels=256,
+        act_cfg=None,
+        norm_cfg=None,
+        num_outs=1),
+    encoder=dict(  # DetrTransformerEncoder
+        num_layers=6,
+        layer_cfg=dict(  # DetrTransformerEncoderLayer
+            self_attn_cfg=dict(  # MultiheadAttention
+                embed_dims=256,
+                num_heads=8,
+                dropout=0.1,
+                batch_first=True),
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,
+                num_fcs=2,
+                ffn_drop=0.1,
+                act_cfg=dict(type=ReLU, inplace=True)))),
+    decoder=dict(  # DetrTransformerDecoder
+        num_layers=6,
+        layer_cfg=dict(  # DetrTransformerDecoderLayer
+            self_attn_cfg=dict(  # MultiheadAttention
+                embed_dims=256,
+                num_heads=8,
+                dropout=0.1,
+                batch_first=True),
+            cross_attn_cfg=dict(  # MultiheadAttention
+                embed_dims=256,
+                num_heads=8,
+                dropout=0.1,
+                batch_first=True),
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,
+                num_fcs=2,
+                ffn_drop=0.1,
+                act_cfg=dict(type=ReLU, inplace=True))),
+        return_intermediate=True),
+    positional_encoding=dict(num_feats=128, normalize=True),
+    bbox_head=dict(
+        type=DETRHead,
+        num_classes=80,
+        embed_dims=256,
+        loss_cls=dict(
+            type=CrossEntropyLoss,
+            bg_cls_weight=0.1,
+            use_sigmoid=False,
+            loss_weight=1.0,
+            class_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=5.0),
+        loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type=HungarianAssigner,
+            match_costs=[
+                dict(type=ClassificationCost, weight=1.),
+                dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
+                dict(type=IoUCost, iou_mode='giou', weight=2.0)
+            ])),
+    test_cfg=dict(max_per_img=100))
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(
+        type=RandomChoice,
+        transforms=[[
+            dict(
+                type=RandomChoiceResize,
+                resize_type=Resize,
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
+                keep_ratio=True)
+        ],
+                    [
+                        dict(
+                            type=RandomChoiceResize,
+                            resize_type=Resize,
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type=RandomCrop,
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type=RandomChoiceResize,
+                            resize_type=Resize,
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type=PackDetInputs)
+]
+train_dataloader.update(dataset=dict(pipeline=train_pipeline))
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=AdamW, lr=0.0001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
+    paramwise_cfg=dict(
+        custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
+# learning policy
+max_epochs = 150
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+param_scheduler = [
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[100],
+        gamma=0.1)
+]
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=16)

head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop
+with read_base():
+    from .detr_r50_8xb2_150e_coco import *
+# learning policy
+max_epochs = 500
+train_cfg.update(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=10)
+param_scheduler = [
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[334],
+        gamma=0.1)
+]
+# only keep latest 2 checkpoints
+default_hooks.update(checkpoint=dict(max_keep_ckpts=2))

head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import RandomChoice, RandomChoiceResize
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.config import read_base
+from mmengine.model.weight_init import PretrainedInit
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.nn.modules.batchnorm import BatchNorm2d
+from torch.nn.modules.normalization import GroupNorm
+from torch.optim.adamw import AdamW
+from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
+                                       RandomCrop, RandomFlip, Resize)
+from mmdet.models import (DINO, ChannelMapper, DetDataPreprocessor, DINOHead,
+                          ResNet)
+from mmdet.models.losses.focal_loss import FocalLoss
+from mmdet.models.losses.iou_loss import GIoULoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
+                                       HungarianAssigner, IoUCost)
+with read_base():
+    from .._base_.datasets.coco_detection import *
+    from .._base_.default_runtime import *
+model = dict(
+    type=DINO,
+    num_queries=900,  # num_matching_queries
+    with_box_refine=True,
+    as_two_stage=True,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=1),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(
+            type=PretrainedInit, checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=ChannelMapper,
+        in_channels=[512, 1024, 2048],
+        kernel_size=1,
+        out_channels=256,
+        act_cfg=None,
+        norm_cfg=dict(type=GroupNorm, num_groups=32),
+        num_outs=4),
+    encoder=dict(
+        num_layers=6,
+        layer_cfg=dict(
+            self_attn_cfg=dict(embed_dims=256, num_levels=4,
+                               dropout=0.0),  # 0.1 for DeformDETR
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,  # 1024 for DeformDETR
+                ffn_drop=0.0))),  # 0.1 for DeformDETR
+    decoder=dict(
+        num_layers=6,
+        return_intermediate=True,
+        layer_cfg=dict(
+            self_attn_cfg=dict(embed_dims=256, num_heads=8,
+                               dropout=0.0),  # 0.1 for DeformDETR
+            cross_attn_cfg=dict(embed_dims=256, num_levels=4,
+                                dropout=0.0),  # 0.1 for DeformDETR
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,  # 1024 for DeformDETR
+                ffn_drop=0.0)),  # 0.1 for DeformDETR
+        post_norm_cfg=None),
+    positional_encoding=dict(
+        num_feats=128,
+        normalize=True,
+        offset=0.0,  # -0.5 for DeformDETR
+        temperature=20),  # 10000 for DeformDETR
+    bbox_head=dict(
+        type=DINOHead,
+        num_classes=80,
+        sync_cls_avg_factor=True,
+        loss_cls=dict(
+            type=FocalLoss,
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),  # 2.0 in DeformDETR
+        loss_bbox=dict(type=L1Loss, loss_weight=5.0),
+        loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
+    dn_cfg=dict(  # TODO: Move to model.train_cfg ?
+        label_noise_scale=0.5,
+        box_noise_scale=1.0,  # 0.4 for DN-DETR
+        group_cfg=dict(dynamic=True, num_groups=None,
+                       num_dn_queries=100)),  # TODO: half num_dn_queries
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type=HungarianAssigner,
+            match_costs=[
+                dict(type=FocalLossCost, weight=2.0),
+                dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
+                dict(type=IoUCost, iou_mode='giou', weight=2.0)
+            ])),
+    test_cfg=dict(max_per_img=300))  # 100 for DeformDETR
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(
+        type=RandomChoice,
+        transforms=[
+            [
+                dict(
+                    type=RandomChoiceResize,
+                    resize_type=Resize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    keep_ratio=True)
+            ],
+            [
+                dict(
+                    type=RandomChoiceResize,
+                    resize_type=Resize,
+                    # The radio of all image in train dataset < 7
+                    # follow the original implement
+                    scales=[(400, 4200), (500, 4200), (600, 4200)],
+                    keep_ratio=True),
+                dict(
+                    type=RandomCrop,
+                    crop_type='absolute_range',
+                    crop_size=(384, 600),
+                    allow_negative_crop=True),
+                dict(
+                    type=RandomChoiceResize,
+                    resize_type=Resize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    keep_ratio=True)
+            ]
+        ]),
+    dict(type=PackDetInputs)
+]
+train_dataloader.update(
+    dataset=dict(
+        filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline))
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(
+        type=AdamW,
+        lr=0.0001,  # 0.0002 for DeformDETR
+        weight_decay=0.0001),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
+    paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
+)  # custom_keys contains sampling_offsets and reference_points in DeformDETR  # noqa
+# learning policy
+max_epochs = 12
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+param_scheduler = [
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[11],
+        gamma=0.1)
+]
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=16)

head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+from mmengine.runner.loops import EpochBasedTrainLoop
+with read_base():
+    from .dino_4scale_r50_8xb2_12e_coco import *
+max_epochs = 24
+train_cfg.update(
+    dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
+param_scheduler[0].update(dict(milestones=[20]))

head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+from mmengine.runner.loops import EpochBasedTrainLoop
+with read_base():
+    from .dino_4scale_r50_8xb2_12e_coco import *
+max_epochs = 36
+train_cfg.update(
+    dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
+param_scheduler[0].update(dict(milestones=[30]))