codyshen commited on
Commit
6ed4a9c
·
verified ·
1 Parent(s): 3c4d98e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +70 -0
  2. head_extractor/assets/001.jpg +3 -0
  3. head_extractor/assets/001_head-black-bg.webp +0 -0
  4. head_extractor/assets/001_head-default.webp +0 -0
  5. head_extractor/assets/001_head-pad2square-false.webp +0 -0
  6. head_extractor/build/lib/head_extractor/__init__.py +6 -0
  7. head_extractor/build/lib/head_extractor/models/__init__.py +0 -0
  8. head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py +573 -0
  9. head_extractor/build/lib/head_extractor/processor.py +585 -0
  10. head_extractor/build/lib/mmdet/__init__.py +27 -0
  11. head_extractor/build/lib/mmdet/apis/__init__.py +9 -0
  12. head_extractor/build/lib/mmdet/apis/det_inferencer.py +652 -0
  13. head_extractor/build/lib/mmdet/apis/inference.py +372 -0
  14. head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py +104 -0
  15. head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py +106 -0
  16. head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py +87 -0
  17. head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py +105 -0
  18. head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py +101 -0
  19. head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py +33 -0
  20. head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +220 -0
  21. head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py +201 -0
  22. head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py +138 -0
  23. head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +158 -0
  24. head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py +154 -0
  25. head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py +77 -0
  26. head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py +33 -0
  27. head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py +33 -0
  28. head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +13 -0
  29. head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py +13 -0
  30. head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py +134 -0
  31. head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py +134 -0
  32. head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py +25 -0
  33. head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py +25 -0
  34. head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py +130 -0
  35. head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py +136 -0
  36. head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py +151 -0
  37. head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py +138 -0
  38. head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py +153 -0
  39. head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py +158 -0
  40. head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py +70 -0
  41. head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py +186 -0
  42. head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py +12 -0
  43. head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py +12 -0
  44. head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py +13 -0
  45. head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py +14 -0
  46. head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py +182 -0
  47. head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py +25 -0
  48. head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py +190 -0
  49. head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py +12 -0
  50. head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py +12 -0
.gitattributes CHANGED
@@ -33,3 +33,73 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ head_extractor/assets/001.jpg filter=lfs diff=lfs merge=lfs -text
37
+ head_extractor/mmcv-2.1.0/build/lib.linux-x86_64-cpython-311/mmcv/_ext.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
38
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/.ninja_deps filter=lfs diff=lfs merge=lfs -text
39
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.o filter=lfs diff=lfs merge=lfs -text
40
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.o filter=lfs diff=lfs merge=lfs -text
41
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
42
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.o filter=lfs diff=lfs merge=lfs -text
43
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bezier_align_cuda.o filter=lfs diff=lfs merge=lfs -text
44
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.o filter=lfs diff=lfs merge=lfs -text
45
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/border_align_cuda.o filter=lfs diff=lfs merge=lfs -text
46
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
47
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
48
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_cuda.o filter=lfs diff=lfs merge=lfs -text
49
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.o filter=lfs diff=lfs merge=lfs -text
50
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/chamfer_distance_cuda.o filter=lfs diff=lfs merge=lfs -text
51
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/convex_iou.o filter=lfs diff=lfs merge=lfs -text
52
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/correlation_cuda.o filter=lfs diff=lfs merge=lfs -text
53
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/cudabind.o filter=lfs diff=lfs merge=lfs -text
54
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
55
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
56
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/diff_iou_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
57
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.o filter=lfs diff=lfs merge=lfs -text
58
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.o filter=lfs diff=lfs merge=lfs -text
59
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.o filter=lfs diff=lfs merge=lfs -text
60
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.o filter=lfs diff=lfs merge=lfs -text
61
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
62
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.o filter=lfs diff=lfs merge=lfs -text
63
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
64
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.o filter=lfs diff=lfs merge=lfs -text
65
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/knn_cuda.o filter=lfs diff=lfs merge=lfs -text
66
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.o filter=lfs diff=lfs merge=lfs -text
67
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/min_area_polygons.o filter=lfs diff=lfs merge=lfs -text
68
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.o filter=lfs diff=lfs merge=lfs -text
69
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
70
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_cuda.o filter=lfs diff=lfs merge=lfs -text
71
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_quadri_cuda.o filter=lfs diff=lfs merge=lfs -text
72
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
73
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.o filter=lfs diff=lfs merge=lfs -text
74
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.o filter=lfs diff=lfs merge=lfs -text
75
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/prroi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
76
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/psamask_cuda.o filter=lfs diff=lfs merge=lfs -text
77
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
78
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.o filter=lfs diff=lfs merge=lfs -text
79
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.o filter=lfs diff=lfs merge=lfs -text
80
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.o filter=lfs diff=lfs merge=lfs -text
81
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
82
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.o filter=lfs diff=lfs merge=lfs -text
83
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.o filter=lfs diff=lfs merge=lfs -text
84
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.o filter=lfs diff=lfs merge=lfs -text
85
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_indice.o filter=lfs diff=lfs merge=lfs -text
86
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.o filter=lfs diff=lfs merge=lfs -text
87
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_pool_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
88
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sparse_reordering.o filter=lfs diff=lfs merge=lfs -text
89
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.o filter=lfs diff=lfs merge=lfs -text
90
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_ball_query_cuda.o filter=lfs diff=lfs merge=lfs -text
91
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/stack_group_points_cuda.o filter=lfs diff=lfs merge=lfs -text
92
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.o filter=lfs diff=lfs merge=lfs -text
93
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.o filter=lfs diff=lfs merge=lfs -text
94
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.o filter=lfs diff=lfs merge=lfs -text
95
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.o filter=lfs diff=lfs merge=lfs -text
96
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.o filter=lfs diff=lfs merge=lfs -text
97
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.o filter=lfs diff=lfs merge=lfs -text
98
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/deform_conv.o filter=lfs diff=lfs merge=lfs -text
99
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/modulated_deform_conv.o filter=lfs diff=lfs merge=lfs -text
100
+ head_extractor/mmcv-2.1.0/build/temp.linux-x86_64-cpython-311/mmcv/ops/csrc/pytorch/pybind.o filter=lfs diff=lfs merge=lfs -text
101
+ head_extractor/mmcv-2.1.0/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
102
+ head_extractor/mmcv-2.1.0/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
103
+ head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp.png filter=lfs diff=lfs merge=lfs -text
104
+ head_extractor/mmcv-2.1.0/docs/en/_static/flow_warp_diff.png filter=lfs diff=lfs merge=lfs -text
105
+ head_extractor/mmcv-2.1.0/docs/en/_static/progress.gif filter=lfs diff=lfs merge=lfs -text
head_extractor/assets/001.jpg ADDED

Git LFS Details

  • SHA256: 33562c08290fdd1576ebfe8da41bae3b8f7e21b7e0971ba0568d1fa259e1f409
  • Pointer size: 131 Bytes
  • Size of remote file: 113 kB
head_extractor/assets/001_head-black-bg.webp ADDED
head_extractor/assets/001_head-default.webp ADDED
head_extractor/assets/001_head-pad2square-false.webp ADDED
head_extractor/build/lib/head_extractor/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .processor import ProcessorPipeline, TaskType
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # 让外部可以直接 from head_extractor import ProcessorPipeline
6
+ __all__ = ['ProcessorPipeline', 'TaskType']
head_extractor/build/lib/head_extractor/models/__init__.py ADDED
File without changes
head_extractor/build/lib/head_extractor/models/depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py ADDED
@@ -0,0 +1,573 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backbone_embed_multi = dict(decay_mult=0.0, lr_mult=0.1)
3
+ backbone_norm_multi = dict(decay_mult=0.0, lr_mult=0.1)
4
+ crop_size = (
5
+ 896,
6
+ 896,
7
+ )
8
+ custom_keys = dict({
9
+ 'backbone.dinov2':
10
+ dict(decay_mult=1.0, lr_mult=0.1),
11
+ 'backbone.dinov2.blocks.0.norm':
12
+ dict(decay_mult=0.0, lr_mult=0.1),
13
+ 'backbone.dinov2.blocks.1.norm':
14
+ dict(decay_mult=0.0, lr_mult=0.1),
15
+ 'backbone.dinov2.blocks.10.norm':
16
+ dict(decay_mult=0.0, lr_mult=0.1),
17
+ 'backbone.dinov2.blocks.11.norm':
18
+ dict(decay_mult=0.0, lr_mult=0.1),
19
+ 'backbone.dinov2.blocks.12.norm':
20
+ dict(decay_mult=0.0, lr_mult=0.1),
21
+ 'backbone.dinov2.blocks.13.norm':
22
+ dict(decay_mult=0.0, lr_mult=0.1),
23
+ 'backbone.dinov2.blocks.14.norm':
24
+ dict(decay_mult=0.0, lr_mult=0.1),
25
+ 'backbone.dinov2.blocks.15.norm':
26
+ dict(decay_mult=0.0, lr_mult=0.1),
27
+ 'backbone.dinov2.blocks.16.norm':
28
+ dict(decay_mult=0.0, lr_mult=0.1),
29
+ 'backbone.dinov2.blocks.17.norm':
30
+ dict(decay_mult=0.0, lr_mult=0.1),
31
+ 'backbone.dinov2.blocks.18.norm':
32
+ dict(decay_mult=0.0, lr_mult=0.1),
33
+ 'backbone.dinov2.blocks.19.norm':
34
+ dict(decay_mult=0.0, lr_mult=0.1),
35
+ 'backbone.dinov2.blocks.2.norm':
36
+ dict(decay_mult=0.0, lr_mult=0.1),
37
+ 'backbone.dinov2.blocks.20.norm':
38
+ dict(decay_mult=0.0, lr_mult=0.1),
39
+ 'backbone.dinov2.blocks.21.norm':
40
+ dict(decay_mult=0.0, lr_mult=0.1),
41
+ 'backbone.dinov2.blocks.22.norm':
42
+ dict(decay_mult=0.0, lr_mult=0.1),
43
+ 'backbone.dinov2.blocks.23.norm':
44
+ dict(decay_mult=0.0, lr_mult=0.1),
45
+ 'backbone.dinov2.blocks.3.norm':
46
+ dict(decay_mult=0.0, lr_mult=0.1),
47
+ 'backbone.dinov2.blocks.4.norm':
48
+ dict(decay_mult=0.0, lr_mult=0.1),
49
+ 'backbone.dinov2.blocks.5.norm':
50
+ dict(decay_mult=0.0, lr_mult=0.1),
51
+ 'backbone.dinov2.blocks.6.norm':
52
+ dict(decay_mult=0.0, lr_mult=0.1),
53
+ 'backbone.dinov2.blocks.7.norm':
54
+ dict(decay_mult=0.0, lr_mult=0.1),
55
+ 'backbone.dinov2.blocks.8.norm':
56
+ dict(decay_mult=0.0, lr_mult=0.1),
57
+ 'backbone.dinov2.blocks.9.norm':
58
+ dict(decay_mult=0.0, lr_mult=0.1),
59
+ 'backbone.dinov2.norm':
60
+ dict(decay_mult=0.0, lr_mult=0.1),
61
+ 'level_embed':
62
+ dict(decay_mult=0.0, lr_mult=1.0),
63
+ 'pos_embed':
64
+ dict(decay_mult=0.0, lr_mult=0.1),
65
+ 'query_embed':
66
+ dict(decay_mult=0.0, lr_mult=1.0),
67
+ 'query_feat':
68
+ dict(decay_mult=0.0, lr_mult=1.0)
69
+ })
70
+ data_preprocessor = dict(
71
+ bgr_to_rgb=True,
72
+ mean=[
73
+ 123.675,
74
+ 116.28,
75
+ 103.53,
76
+ ],
77
+ pad_val=0,
78
+ seg_pad_val=255,
79
+ size=(
80
+ 896,
81
+ 896,
82
+ ),
83
+ std=[
84
+ 58.395,
85
+ 57.12,
86
+ 57.375,
87
+ ],
88
+ type='SegDataPreProcessor')
89
+ data_root = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k'
90
+ dataset_type = 'HumanParsingDataset'
91
+ default_hooks = dict(
92
+ checkpoint=dict(
93
+ by_epoch=False,
94
+ interval=2000,
95
+ max_keep_ckpts=50,
96
+ save_best='mIoU',
97
+ type='CheckpointHook'),
98
+ logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
99
+ param_scheduler=dict(type='ParamSchedulerHook'),
100
+ sampler_seed=dict(type='DistSamplerSeedHook'),
101
+ timer=dict(type='IterTimerHook'),
102
+ visualization=dict(type='SegVisualizationHook'))
103
+ default_scope = 'mmseg'
104
+ embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
105
+ env_cfg = dict(
106
+ cudnn_benchmark=True,
107
+ dist_cfg=dict(backend='nccl'),
108
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
109
+ find_unused_parameters = True
110
+ img_ratios = [
111
+ 0.5,
112
+ 0.75,
113
+ 1.0,
114
+ 1.25,
115
+ 1.5,
116
+ 1.75,
117
+ ]
118
+ launcher = 'none'
119
+ load_from = '/mnt/data_ssd/limaopeng/limaopeng/segmentation/mmsegmentation/work_dirs/depth_anything_large_mask2former_16xb1_160k_human_parsing_896x896/best_mIoU_iter_110000.pth'
120
+ log_level = 'INFO'
121
+ log_processor = dict(by_epoch=False)
122
+ model = dict(
123
+ backbone=dict(
124
+ freeze=False,
125
+ # load_from='./checkpoints/depth_anything_vitl14.pth',
126
+ type='DINOv2',
127
+ version='large'),
128
+ data_preprocessor=dict(
129
+ bgr_to_rgb=True,
130
+ mean=[
131
+ 123.675,
132
+ 116.28,
133
+ 103.53,
134
+ ],
135
+ pad_val=0,
136
+ seg_pad_val=255,
137
+ size=(
138
+ 896,
139
+ 896,
140
+ ),
141
+ std=[
142
+ 58.395,
143
+ 57.12,
144
+ 57.375,
145
+ ],
146
+ type='SegDataPreProcessor'),
147
+ decode_head=dict(
148
+ align_corners=False,
149
+ enforce_decoder_input_project=False,
150
+ feat_channels=1024,
151
+ in_channels=[
152
+ 1024,
153
+ 1024,
154
+ 1024,
155
+ 1024,
156
+ ],
157
+ loss_boundary=dict(loss_weight=5.0, type='BoundaryLoss'),
158
+ loss_cls=dict(
159
+ class_weight=[
160
+ 1.0,
161
+ 1.0,
162
+ 1.0,
163
+ 1.0,
164
+ 1.0,
165
+ 1.0,
166
+ 1.0,
167
+ 1.0,
168
+ 1.0,
169
+ 1.0,
170
+ 1.0,
171
+ 1.0,
172
+ 1.0,
173
+ 1.0,
174
+ 1.0,
175
+ 1.0,
176
+ 1.0,
177
+ 1.0,
178
+ 1.0,
179
+ 1.0,
180
+ 1.0,
181
+ 1.0,
182
+ 1.0,
183
+ 1.0,
184
+ 1.0,
185
+ 1.0,
186
+ 1.0,
187
+ 1.0,
188
+ 1.0,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ 1.0,
193
+ 1.0,
194
+ 1.0,
195
+ 1.0,
196
+ 1.0,
197
+ 1.0,
198
+ 1.0,
199
+ 1.0,
200
+ 1.0,
201
+ 1.0,
202
+ 1.0,
203
+ 0.1,
204
+ ],
205
+ loss_weight=2.0,
206
+ reduction='mean',
207
+ type='mmdet.CrossEntropyLoss',
208
+ use_sigmoid=False),
209
+ loss_dice=dict(
210
+ activate=True,
211
+ eps=1.0,
212
+ loss_weight=5.0,
213
+ naive_dice=True,
214
+ reduction='mean',
215
+ type='mmdet.DiceLoss',
216
+ use_sigmoid=True),
217
+ loss_mask=dict(
218
+ loss_weight=5.0,
219
+ reduction='mean',
220
+ type='mmdet.CrossEntropyLoss',
221
+ use_sigmoid=True),
222
+ num_classes=43,
223
+ num_queries=200,
224
+ num_transformer_feat_level=3,
225
+ out_channels=1024,
226
+ pixel_decoder=dict(
227
+ act_cfg=dict(type='ReLU'),
228
+ encoder=dict(
229
+ init_cfg=None,
230
+ layer_cfg=dict(
231
+ ffn_cfg=dict(
232
+ act_cfg=dict(inplace=True, type='ReLU'),
233
+ embed_dims=1024,
234
+ feedforward_channels=4096,
235
+ ffn_drop=0.0,
236
+ num_fcs=2),
237
+ self_attn_cfg=dict(
238
+ batch_first=True,
239
+ dropout=0.0,
240
+ embed_dims=1024,
241
+ im2col_step=64,
242
+ init_cfg=None,
243
+ norm_cfg=None,
244
+ num_heads=32,
245
+ num_levels=3,
246
+ num_points=4)),
247
+ num_layers=6),
248
+ init_cfg=None,
249
+ norm_cfg=dict(num_groups=32, type='GN'),
250
+ num_outs=3,
251
+ positional_encoding=dict(normalize=True, num_feats=512),
252
+ type='mmdet.MSDeformAttnPixelDecoder'),
253
+ positional_encoding=dict(normalize=True, num_feats=512),
254
+ train_cfg=dict(
255
+ assigner=dict(
256
+ match_costs=[
257
+ dict(type='mmdet.ClassificationCost', weight=2.0),
258
+ dict(
259
+ type='mmdet.CrossEntropyLossCost',
260
+ use_sigmoid=True,
261
+ weight=5.0),
262
+ dict(
263
+ eps=1.0,
264
+ pred_act=True,
265
+ type='mmdet.DiceCost',
266
+ weight=5.0),
267
+ ],
268
+ type='mmdet.HungarianAssigner'),
269
+ importance_sample_ratio=0.75,
270
+ num_points=12544,
271
+ oversample_ratio=3.0,
272
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
273
+ transformer_decoder=dict(
274
+ init_cfg=None,
275
+ layer_cfg=dict(
276
+ cross_attn_cfg=dict(
277
+ attn_drop=0.0,
278
+ batch_first=True,
279
+ dropout_layer=None,
280
+ embed_dims=1024,
281
+ num_heads=32,
282
+ proj_drop=0.0),
283
+ ffn_cfg=dict(
284
+ act_cfg=dict(inplace=True, type='ReLU'),
285
+ add_identity=True,
286
+ dropout_layer=None,
287
+ embed_dims=1024,
288
+ feedforward_channels=4096,
289
+ ffn_drop=0.0,
290
+ num_fcs=2),
291
+ self_attn_cfg=dict(
292
+ attn_drop=0.0,
293
+ batch_first=True,
294
+ dropout_layer=None,
295
+ embed_dims=1024,
296
+ num_heads=32,
297
+ proj_drop=0.0)),
298
+ num_layers=9,
299
+ return_intermediate=True),
300
+ type='Mask2FormerHead'),
301
+ neck=dict(
302
+ embed_dim=1024, rescales=[
303
+ 4,
304
+ 2,
305
+ 1,
306
+ 0.5,
307
+ ], type='Feature2Pyramid'),
308
+ test_cfg=dict(crop_size=(
309
+ 896,
310
+ 896,
311
+ ), mode='slide', stride=(
312
+ 426,
313
+ 426,
314
+ )),
315
+ train_cfg=dict(),
316
+ type='EncoderDecoder')
317
+ num_classes = 43
318
+ optim_wrapper = dict(
319
+ clip_grad=dict(max_norm=0.01, norm_type=2),
320
+ optimizer=dict(
321
+ betas=(
322
+ 0.9,
323
+ 0.999,
324
+ ),
325
+ eps=1e-08,
326
+ lr=3e-05,
327
+ type='AdamW',
328
+ weight_decay=0.05),
329
+ paramwise_cfg=dict(
330
+ custom_keys=dict({
331
+ 'backbone.dinov2':
332
+ dict(decay_mult=1.0, lr_mult=0.1),
333
+ 'backbone.dinov2.blocks.0.norm':
334
+ dict(decay_mult=0.0, lr_mult=0.1),
335
+ 'backbone.dinov2.blocks.1.norm':
336
+ dict(decay_mult=0.0, lr_mult=0.1),
337
+ 'backbone.dinov2.blocks.10.norm':
338
+ dict(decay_mult=0.0, lr_mult=0.1),
339
+ 'backbone.dinov2.blocks.11.norm':
340
+ dict(decay_mult=0.0, lr_mult=0.1),
341
+ 'backbone.dinov2.blocks.12.norm':
342
+ dict(decay_mult=0.0, lr_mult=0.1),
343
+ 'backbone.dinov2.blocks.13.norm':
344
+ dict(decay_mult=0.0, lr_mult=0.1),
345
+ 'backbone.dinov2.blocks.14.norm':
346
+ dict(decay_mult=0.0, lr_mult=0.1),
347
+ 'backbone.dinov2.blocks.15.norm':
348
+ dict(decay_mult=0.0, lr_mult=0.1),
349
+ 'backbone.dinov2.blocks.16.norm':
350
+ dict(decay_mult=0.0, lr_mult=0.1),
351
+ 'backbone.dinov2.blocks.17.norm':
352
+ dict(decay_mult=0.0, lr_mult=0.1),
353
+ 'backbone.dinov2.blocks.18.norm':
354
+ dict(decay_mult=0.0, lr_mult=0.1),
355
+ 'backbone.dinov2.blocks.19.norm':
356
+ dict(decay_mult=0.0, lr_mult=0.1),
357
+ 'backbone.dinov2.blocks.2.norm':
358
+ dict(decay_mult=0.0, lr_mult=0.1),
359
+ 'backbone.dinov2.blocks.20.norm':
360
+ dict(decay_mult=0.0, lr_mult=0.1),
361
+ 'backbone.dinov2.blocks.21.norm':
362
+ dict(decay_mult=0.0, lr_mult=0.1),
363
+ 'backbone.dinov2.blocks.22.norm':
364
+ dict(decay_mult=0.0, lr_mult=0.1),
365
+ 'backbone.dinov2.blocks.23.norm':
366
+ dict(decay_mult=0.0, lr_mult=0.1),
367
+ 'backbone.dinov2.blocks.3.norm':
368
+ dict(decay_mult=0.0, lr_mult=0.1),
369
+ 'backbone.dinov2.blocks.4.norm':
370
+ dict(decay_mult=0.0, lr_mult=0.1),
371
+ 'backbone.dinov2.blocks.5.norm':
372
+ dict(decay_mult=0.0, lr_mult=0.1),
373
+ 'backbone.dinov2.blocks.6.norm':
374
+ dict(decay_mult=0.0, lr_mult=0.1),
375
+ 'backbone.dinov2.blocks.7.norm':
376
+ dict(decay_mult=0.0, lr_mult=0.1),
377
+ 'backbone.dinov2.blocks.8.norm':
378
+ dict(decay_mult=0.0, lr_mult=0.1),
379
+ 'backbone.dinov2.blocks.9.norm':
380
+ dict(decay_mult=0.0, lr_mult=0.1),
381
+ 'backbone.dinov2.norm':
382
+ dict(decay_mult=0.0, lr_mult=0.1),
383
+ 'level_embed':
384
+ dict(decay_mult=0.0, lr_mult=1.0),
385
+ 'pos_embed':
386
+ dict(decay_mult=0.0, lr_mult=0.1),
387
+ 'query_embed':
388
+ dict(decay_mult=0.0, lr_mult=1.0),
389
+ 'query_feat':
390
+ dict(decay_mult=0.0, lr_mult=1.0)
391
+ }),
392
+ norm_decay_mult=0.0),
393
+ type='OptimWrapper')
394
+ optimizer = dict(
395
+ betas=(
396
+ 0.9,
397
+ 0.999,
398
+ ), eps=1e-08, lr=3e-05, type='AdamW', weight_decay=0.05)
399
+ param_scheduler = [
400
+ dict(
401
+ begin=0, by_epoch=False, end=1500, start_factor=1e-06,
402
+ type='LinearLR'),
403
+ dict(
404
+ begin=1500,
405
+ by_epoch=False,
406
+ end=300000,
407
+ eta_min=0.0,
408
+ power=0.9,
409
+ type='PolyLR'),
410
+ ]
411
+ resume = False
412
+ test_cfg = dict(type='TestLoop')
413
+ test_dataloader = dict(
414
+ batch_size=1,
415
+ dataset=dict(
416
+ data_prefix=dict(
417
+ img_path='val20250512/images', seg_map_path='val20250512/labels'),
418
+ data_root=
419
+ '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
420
+ pipeline=[
421
+ dict(type='LoadImageFromFile'),
422
+ dict(keep_ratio=False, scale=(
423
+ 896,
424
+ 896,
425
+ ), type='Resize'),
426
+ dict(reduce_zero_label=False, type='LoadAnnotations'),
427
+ dict(type='PackSegInputs'),
428
+ ],
429
+ type='HumanParsingDataset'),
430
+ num_workers=4,
431
+ persistent_workers=True,
432
+ sampler=dict(shuffle=False, type='DefaultSampler'))
433
+ test_evaluator = dict(
434
+ iou_metrics=[
435
+ 'mIoU',
436
+ ], type='IoUMetric')
437
+ test_pipeline = [
438
+ dict(type='LoadImageFromFile'),
439
+ dict(keep_ratio=False, scale=(
440
+ 896,
441
+ 896,
442
+ ), type='Resize'),
443
+ dict(reduce_zero_label=False, type='LoadAnnotations'),
444
+ dict(type='PackSegInputs'),
445
+ ]
446
+ train_cfg = dict(
447
+ max_iters=300000, type='IterBasedTrainLoop', val_interval=2000)
448
+ train_dataloader = dict(
449
+ batch_size=3,
450
+ dataset=dict(
451
+ data_prefix=dict(
452
+ img_path='train20250512/images',
453
+ seg_map_path='train20250512/labels'),
454
+ data_root=
455
+ '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
456
+ pipeline=[
457
+ dict(type='LoadImageFromFile'),
458
+ dict(type='LoadAnnotations'),
459
+ dict(
460
+ keep_ratio=True,
461
+ ratio_range=(
462
+ 0.2,
463
+ 2.0,
464
+ ),
465
+ scale=(
466
+ 896,
467
+ 896,
468
+ ),
469
+ type='RandomResize'),
470
+ dict(
471
+ cat_max_ratio=0.75, crop_size=(
472
+ 896,
473
+ 896,
474
+ ), type='RandomCrop'),
475
+ dict(keep_ratio=True, scale=(
476
+ 896,
477
+ 896,
478
+ ), type='Resize'),
479
+ dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
480
+ dict(type='PhotoMetricDistortion'),
481
+ dict(type='PackSegInputs'),
482
+ ],
483
+ type='HumanParsingDataset'),
484
+ num_workers=4,
485
+ persistent_workers=True,
486
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
487
+ train_pipeline = [
488
+ dict(type='LoadImageFromFile'),
489
+ dict(type='LoadAnnotations'),
490
+ dict(
491
+ keep_ratio=True,
492
+ ratio_range=(
493
+ 0.2,
494
+ 2.0,
495
+ ),
496
+ scale=(
497
+ 896,
498
+ 896,
499
+ ),
500
+ type='RandomResize'),
501
+ dict(cat_max_ratio=0.75, crop_size=(
502
+ 896,
503
+ 896,
504
+ ), type='RandomCrop'),
505
+ dict(keep_ratio=True, scale=(
506
+ 896,
507
+ 896,
508
+ ), type='Resize'),
509
+ dict(degree=45, prob=0.5, seg_pad_val=0, type='RandomRotate'),
510
+ dict(type='PhotoMetricDistortion'),
511
+ dict(type='PackSegInputs'),
512
+ ]
513
+ tta_model = dict(type='SegTTAModel')
514
+ tta_pipeline = [
515
+ dict(backend_args=None, type='LoadImageFromFile'),
516
+ dict(
517
+ transforms=[
518
+ [
519
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
520
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
521
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
522
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
523
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
524
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
525
+ ],
526
+ [
527
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
528
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
529
+ ],
530
+ [
531
+ dict(type='LoadAnnotations'),
532
+ ],
533
+ [
534
+ dict(type='PackSegInputs'),
535
+ ],
536
+ ],
537
+ type='TestTimeAug'),
538
+ ]
539
+ val_cfg = dict(type='ValLoop')
540
+ val_dataloader = dict(
541
+ batch_size=1,
542
+ dataset=dict(
543
+ data_prefix=dict(
544
+ img_path='val20250512/images', seg_map_path='val20250512/labels'),
545
+ data_root=
546
+ '/mnt/data_ssd/limaopeng/limaopeng/segmentation/dataset/deep_fashion_10k',
547
+ pipeline=[
548
+ dict(type='LoadImageFromFile'),
549
+ dict(keep_ratio=False, scale=(
550
+ 896,
551
+ 896,
552
+ ), type='Resize'),
553
+ dict(reduce_zero_label=False, type='LoadAnnotations'),
554
+ dict(type='PackSegInputs'),
555
+ ],
556
+ type='HumanParsingDataset'),
557
+ num_workers=4,
558
+ persistent_workers=True,
559
+ sampler=dict(shuffle=False, type='DefaultSampler'))
560
+ val_evaluator = dict(
561
+ iou_metrics=[
562
+ 'mIoU',
563
+ ], type='IoUMetric')
564
+ vis_backends = [
565
+ dict(type='LocalVisBackend'),
566
+ ]
567
+ visualizer = dict(
568
+ name='visualizer',
569
+ type='SegLocalVisualizer',
570
+ vis_backends=[
571
+ dict(type='LocalVisBackend'),
572
+ ])
573
+ work_dir = './work_dirs/depth_anything_large_mask2former_16xb1_160k_human_pasing_fasion_1024x1024_boundary_20250521'
head_extractor/build/lib/head_extractor/processor.py ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from mmseg.apis import inference_model, init_model
4
+ from PIL import Image
5
+ import cv2
6
+ from enum import Enum
7
+ import importlib.resources
8
+
9
+
10
+ '''
11
+ Labels:
12
+ 0: 'background' 1: 'top' 2: 'outer' 3: 'skirt'
13
+ 4: 'dress' 5: 'pants' 6: 'leggings' 7: 'headwear'
14
+ 8: 'eyeglass' 9: 'neckwear' 10: 'belt' 11: 'footwear'
15
+ 12: 'bag' 13: 'hair' 14: 'face' 15: 'skin'
16
+ 16: 'ring' 17: 'wrist_wearing' 18: 'socks' 19: 'gloves'
17
+ 20: 'necklace' 21: 'rompers' 22: 'earrings' 23: 'tie'
18
+ 24: Left_Foot
19
+ 25: Left_Hand
20
+ 26: Left_Lower_Arm
21
+ 27: Left_Lower_Leg
22
+ 28: Left_Upper_Arm
23
+ 29: Left_Upper_Leg
24
+ 30: Right_Foot
25
+ 31: Right_Hand
26
+ 32: Right_Lower_Arm
27
+ 33: Right_Lower_Leg
28
+ 34: Right_Upper_Arm
29
+ 35: Right_Upper_Leg
30
+ 36: Torso
31
+ '''
32
+
33
+ class PersonSeg:
34
+ def __init__(self, config_path, model_path, device='cuda'):
35
+ # init model
36
+ self.model = init_model(config_path, model_path, device=device)
37
+
38
+ def process(self, image):
39
+ result = inference_model(self.model, image)
40
+ pred_seg = result.pred_sem_seg.data.cpu().numpy()[0]
41
+ return pred_seg
42
+
43
+ class TaskType(Enum):
44
+ face = "face"
45
+ head = "head"
46
+ head_plus_shoulders = "head_plus_shoulders"
47
+
48
+ # 衣服相关任务
49
+ top_cloth = "top_cloth"
50
+ bottom_cloth = "bottom_cloth"
51
+ full_clothes = "full_clothes"
52
+
53
+ # 全身相关任务
54
+ full_character = "full_character"
55
+
56
+ class ProcessorPipeline:
57
+ """
58
+ 该功能主要用于从单个图像中提取指定内容的mask
59
+ """
60
+ def __init__(self, seg_pipe: PersonSeg):
61
+ self.seg_pipe = seg_pipe
62
+
63
+ @classmethod
64
+ def load(cls, device: str = 'cuda') -> "ProcessorPipeline":
65
+ """
66
+ 从包内加载模型和配置来初始化 Pipeline。
67
+ 不再需要外部路径。
68
+ """
69
+ # 使用 importlib.resources 安全地获取包内文件的路径
70
+ with importlib.resources.path('head_extractor.models', 'depth_anything_large_mask2former_16xb1_160k_human_parsing_fashion_1024x1024.py') as config_path:
71
+ with importlib.resources.path('head_extractor.models', 'ckpt.pth') as model_path:
72
+ seg_pipe = PersonSeg(str(config_path), str(model_path), device=device)
73
+
74
+ return cls(seg_pipe)
75
+
76
+ def process(
77
+ self,
78
+ image: Image.Image,
79
+ task_type: TaskType,
80
+ long_edge: int = 1024
81
+ ) -> tuple[np.ndarray, np.ndarray]:
82
+ """
83
+ 从图像中提取mask,内部流程优化为返回NumPy数组。
84
+
85
+ Args:
86
+ image: 输入图像
87
+ task_type: 任务类型 ('head' or 'face')
88
+ long_edge (int): 用于缩放图像的长边尺寸,值越小速度越快。
89
+
90
+ Returns:
91
+ (处理后的图像 NumPy 数组, 生成的mask NumPy 数组)
92
+ """
93
+ # 1. 预处理图像:统一转换为numpy array (RGB)
94
+ if isinstance(image, Image.Image):
95
+ image_np = np.array(image.convert("RGB"))
96
+ else: # 假设是numpy array
97
+ image_np = image
98
+
99
+ if len(image_np.shape) == 2:
100
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
101
+ elif image_np.shape[2] == 4:
102
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_RGBA2RGB)
103
+
104
+ processed_image_np = self.resize_long_edge(image_np, long_edge=long_edge)
105
+ ori_h, ori_w = processed_image_np.shape[:2]
106
+
107
+ # 2. 运行分割
108
+ pred_mask_map = self.seg_pipe.process(processed_image_np)
109
+
110
+ if task_type == TaskType.head_plus_shoulders:
111
+ # 2.1 先做“头部”基础mask
112
+ head_labels = [7, 8, 13, 14] # headwear, eyeglass, hair, face
113
+ head_mask = np.isin(pred_mask_map, head_labels).astype(np.float32)
114
+ head_mask = cv2.resize(head_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
115
+
116
+ # 2.2 计算头部bbox并向下和左右扩展一段
117
+ rows = np.any(head_mask > 0, axis=1)
118
+ cols = np.any(head_mask > 0, axis=0)
119
+ if np.any(rows) and np.any(cols):
120
+ rmin, rmax = np.where(rows)[0][[0, -1]]
121
+ cmin, cmax = np.where(cols)[0][[0, -1]]
122
+ h_box = max(1, rmax - rmin)
123
+ w_box = max(1, cmax - cmin)
124
+
125
+ down_ratio = 0.1 # 向下扩展比例(相对头bbox高)
126
+ side_ratio = 0.6 # 左右扩展比例(相对头bbox宽)
127
+
128
+ r2max = min(ori_h, rmax + int(h_box * down_ratio))
129
+ c2min = max(0, cmin - int(w_box * side_ratio))
130
+ c2max = min(ori_w, cmax + int(w_box * side_ratio))
131
+
132
+ rect_mask = np.zeros((ori_h, ori_w), dtype=np.float32)
133
+ rect_mask[rmin:r2max, c2min:c2max] = 1.0
134
+
135
+ # 2.3 在扩展矩形内,仅保留“人物相关像素”(过滤掉背景)
136
+ person_labels = list(range(1, 37)) # 1..36 都是人物部件
137
+ person_mask = np.isin(pred_mask_map, person_labels).astype(np.float32)
138
+ person_mask = cv2.resize(person_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
139
+
140
+ initial_mask = np.clip(head_mask + (person_mask * rect_mask), 0, 1)
141
+ else:
142
+ initial_mask = head_mask
143
+ else:
144
+ # 其它任务保持原逻辑
145
+ labels_map = self._get_labels_for_task(task_type)
146
+ primary_labels = labels_map['primary']
147
+ initial_mask = np.isin(pred_mask_map, primary_labels).astype(np.float32)
148
+ initial_mask = cv2.resize(initial_mask, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
149
+
150
+ # 3. 后处理(不同任务的形态学策略)
151
+ final_mask_np = self._apply_task_specific_mask_processing(initial_mask, task_type, ori_h, ori_w)
152
+
153
+ # 4. 返回
154
+ final_mask_uint8 = (final_mask_np * 255).astype(np.uint8)
155
+ return processed_image_np, final_mask_uint8
156
+
157
+ def _get_labels_for_task(self, task_type: TaskType) -> dict:
158
+ """根据任务类型获取对应的标签映射"""
159
+ labels_map = {
160
+ TaskType.face: { 'primary': [8, 14] }, # eyeglass, face
161
+ TaskType.head: { 'primary': [7, 8, 13, 14] }, # headwear, eyeglass, hair, face
162
+ TaskType.top_cloth: { 'primary': [1, 2] }, # top, outer
163
+ TaskType.bottom_cloth: { 'primary': [3, 4, 5, 6] }, # skirt, dress, pants, leggings
164
+ TaskType.full_clothes: { 'primary': [1, 2, 3, 4, 5, 6] }, # all clothes
165
+ TaskType.full_character: { 'primary': list(range(1, 37)) }, # 包含所有人物相关部分
166
+ }
167
+ return labels_map.get(task_type, {'primary': []})
168
+
169
+ def _apply_task_specific_mask_processing(self, mask: np.ndarray, task_type: TaskType, ori_h: int, ori_w: int) -> np.ndarray:
170
+ """根据任务类型对mask进行特殊处理"""
171
+ if task_type == TaskType.face:
172
+ # 人脸任务:简单膨胀
173
+ expand_kernel = 5
174
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
175
+ mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
176
+
177
+ elif task_type == TaskType.head:
178
+ # 头部任务:先腐蚀再膨胀
179
+ kernel = np.ones((7, 7), dtype=np.uint8)
180
+ mask = cv2.erode(mask, kernel, iterations=1)
181
+
182
+ expand_kernel = 11
183
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
184
+ mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
185
+
186
+ elif task_type == TaskType.head_plus_shoulders:
187
+ # 比 head 更偏向“向下与左右扩展”的膨胀(高度核 > 宽度核)
188
+ # 轻微腐蚀,避免边界毛刺
189
+ erode_k = 5
190
+ kernel = np.ones((erode_k, erode_k), dtype=np.uint8)
191
+ mask = cv2.erode(mask, kernel, iterations=1)
192
+
193
+ max_side = max(ori_h, ori_w)
194
+ h_kernel = max(15, int(max_side * 0.05)) # 更高
195
+ w_kernel = max(11, int(max_side * 0.03)) # 稍窄
196
+ # 保证奇数
197
+ h_kernel = h_kernel // 2 * 2 + 1
198
+ w_kernel = w_kernel // 2 * 2 + 1
199
+
200
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (w_kernel, h_kernel))
201
+ mask = cv2.dilate((mask > 0.5).astype(np.float32), kernel)
202
+
203
+ if task_type in [TaskType.top_cloth, TaskType.bottom_cloth, TaskType.full_clothes, TaskType.full_character]:
204
+ # 衣服相关任务:膨胀和模糊处理
205
+ expand_ratio = 0.01
206
+ max_side = max(ori_h, ori_w)
207
+ blur_kernel = 1
208
+ expand_kernel = int(max_side * expand_ratio) // 2 * 2 + 1
209
+
210
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (expand_kernel, expand_kernel))
211
+ expanded = cv2.dilate((mask > 0.5).astype(np.uint8), kernel)
212
+
213
+ blurred = cv2.GaussianBlur(
214
+ expanded.astype(np.float32),
215
+ (blur_kernel, blur_kernel),
216
+ sigmaX=0,
217
+ )
218
+ mask = np.clip(blurred / (blurred.max() + 1e-6), 0, 1)
219
+
220
+ return mask
221
+
222
+ @staticmethod
223
+ def resize_long_edge(image_np: np.ndarray, long_edge=1024) -> np.ndarray:
224
+ """将图像等比例缩放到指定长边尺寸 (使用OpenCV)"""
225
+ original_height, original_width = image_np.shape[:2]
226
+
227
+ max_dimension = max(original_width, original_height)
228
+ if max_dimension <= long_edge:
229
+ return image_np
230
+
231
+ ratio = long_edge / max_dimension
232
+ new_width = int(original_width * ratio)
233
+ new_height = int(original_height * ratio)
234
+
235
+ # 使用cv2.INTER_AREA进行缩放,对于缩小图像效果较好且速度快
236
+ return cv2.resize(image_np, (new_width, new_height), interpolation=cv2.INTER_AREA)
237
+
238
+ @staticmethod
239
+ def _pad_to_square_np(image_np: np.ndarray, background_value: tuple) -> np.ndarray:
240
+ """将NumPy图像填充为正方形"""
241
+ height, width = image_np.shape[:2]
242
+ if width == height:
243
+ return image_np
244
+
245
+ max_dim = max(width, height)
246
+
247
+ # 根据通道数确定背景色
248
+ channels = image_np.shape[2] if len(image_np.shape) > 2 else 1
249
+
250
+ # 创建一个正确尺寸的背景板
251
+ padded_image = np.full((max_dim, max_dim, channels), background_value, dtype=image_np.dtype)
252
+
253
+ paste_x = (max_dim - width) // 2
254
+ paste_y = (max_dim - height) // 2
255
+
256
+ padded_image[paste_y:paste_y+height, paste_x:paste_x+width] = image_np
257
+ return padded_image
258
+
259
+ @staticmethod
260
+ def pad_to_square(image: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
261
+ """
262
+ 将图像填充为正方形
263
+
264
+ Args:
265
+ image: 输入图像
266
+ background_color: 填充的背景颜色
267
+
268
+ Returns:
269
+ 填充为正方形的图像
270
+ """
271
+ width, height = image.size
272
+ if width == height:
273
+ return image
274
+
275
+ max_dim = max(width, height)
276
+ padded_image = Image.new(image.mode, (max_dim, max_dim), background_color)
277
+ paste_x = (max_dim - width) // 2
278
+ paste_y = (max_dim - height) // 2
279
+ padded_image.paste(image, (paste_x, paste_y))
280
+ return padded_image
281
+
282
+ def crop_image_by_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> Image.Image:
283
+ """
284
+ 根据mask裁剪图像,只保留mask覆盖的区域
285
+
286
+ Args:
287
+ image: 原始图像
288
+ mask: 二值mask图像
289
+ padding: 裁剪区域的边距扩展像素数
290
+
291
+ Returns:
292
+ 裁剪后的图像
293
+ """
294
+ # 转换为numpy数组
295
+ mask_np = np.array(mask)
296
+ image_np = np.array(image)
297
+
298
+ # 找到mask中非零像素的边界框
299
+ rows = np.any(mask_np > 0, axis=1)
300
+ cols = np.any(mask_np > 0, axis=0)
301
+
302
+ if not np.any(rows) or not np.any(cols):
303
+ # 如果mask为空,返回原图
304
+ return image
305
+
306
+ # 获取边界框坐标
307
+ rmin, rmax = np.where(rows)[0][[0, -1]]
308
+ cmin, cmax = np.where(cols)[0][[0, -1]]
309
+
310
+ # 添加padding并确保不超出图像边界
311
+ h, w = image_np.shape[:2]
312
+ rmin = max(0, rmin - padding)
313
+ rmax = min(h, rmax + padding + 1)
314
+ cmin = max(0, cmin - padding)
315
+ cmax = min(w, cmax + padding + 1)
316
+
317
+ # 裁剪图像
318
+ cropped_image = image_np[rmin:rmax, cmin:cmax]
319
+
320
+ return Image.fromarray(cropped_image)
321
+
322
+ def _crop_image_and_mask_np(self, image_np: np.ndarray, mask_np: np.ndarray, padding: int = 20) -> tuple[np.ndarray, np.ndarray]:
323
+ """根据mask同时裁剪NumPy图像和mask"""
324
+ rows = np.any(mask_np > 0, axis=1)
325
+ cols = np.any(mask_np > 0, axis=0)
326
+
327
+ if not np.any(rows) or not np.any(cols):
328
+ return image_np, mask_np
329
+
330
+ rmin, rmax = np.where(rows)[0][[0, -1]]
331
+ cmin, cmax = np.where(cols)[0][[0, -1]]
332
+
333
+ h, w = image_np.shape[:2]
334
+ rmin = max(0, rmin - padding)
335
+ rmax = min(h, rmax + padding + 1)
336
+ cmin = max(0, cmin - padding)
337
+ cmax = min(w, cmax + padding + 1)
338
+
339
+ cropped_image_np = image_np[rmin:rmax, cmin:cmax]
340
+ cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
341
+
342
+ return cropped_image_np, cropped_mask_np
343
+
344
+ def crop_image_and_mask(self, image: Image.Image, mask: Image.Image, padding: int = 20) -> tuple[Image.Image, Image.Image]:
345
+ """根据mask同时裁剪图像和mask,避免重复计算边界框"""
346
+ mask_np = np.array(mask)
347
+ image_np = np.array(image)
348
+
349
+ rows = np.any(mask_np > 0, axis=1)
350
+ cols = np.any(mask_np > 0, axis=0)
351
+
352
+ if not np.any(rows) or not np.any(cols):
353
+ return image, mask
354
+
355
+ rmin, rmax = np.where(rows)[0][[0, -1]]
356
+ cmin, cmax = np.where(cols)[0][[0, -1]]
357
+
358
+ h, w = image_np.shape[:2]
359
+ rmin = max(0, rmin - padding)
360
+ rmax = min(h, rmax + padding + 1)
361
+ cmin = max(0, cmin - padding)
362
+ cmax = min(w, cmax + padding + 1)
363
+
364
+ cropped_image_np = image_np[rmin:rmax, cmin:cmax]
365
+ cropped_mask_np = mask_np[rmin:rmax, cmin:cmax]
366
+
367
+ return Image.fromarray(cropped_image_np), Image.fromarray(cropped_mask_np)
368
+
369
+ def _apply_mask_to_image_np(self, image_np: np.ndarray, mask_np: np.ndarray, background_color: tuple) -> np.ndarray:
370
+ """将NumPy mask应用到NumPy图像上"""
371
+ mask_normalized = mask_np.astype(np.float32) / 255.0
372
+ background = np.full_like(image_np, background_color)
373
+ result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
374
+ return result.astype(np.uint8)
375
+
376
+ def apply_mask_to_image(self, image: Image.Image, mask: Image.Image, background_color: tuple = (255, 255, 255)) -> Image.Image:
377
+ """
378
+ 将mask应用到图像上,mask外的区域设置为指定背景色
379
+
380
+ Args:
381
+ image: 原始图像
382
+ mask: 二值mask图像
383
+ background_color: 背景颜色 (R, G, B)
384
+
385
+ Returns:
386
+ 应用mask后的图像
387
+ """
388
+ # 转换为numpy数组
389
+ image_np = np.array(image)
390
+ mask_np = np.array(mask)
391
+
392
+ # 将mask归一化到0-1范围
393
+ mask_normalized = mask_np.astype(np.float32) / 255.0
394
+
395
+ # 创建背景
396
+ background = np.full_like(image_np, background_color)
397
+
398
+ # 应用mask:mask区域保持原图,其他区域为背景色
399
+ result = image_np * mask_normalized[..., np.newaxis] + background * (1 - mask_normalized[..., np.newaxis])
400
+
401
+ return Image.fromarray(result.astype(np.uint8))
402
+
403
+ def extract_head(
404
+ self,
405
+ image: Image.Image,
406
+ crop_padding: int = 10,
407
+ background_color: tuple = (255, 255, 255),
408
+ pad2square: bool = True,
409
+ output_mode: str = 'RGB',
410
+ long_edge: int = 1024,
411
+ include_shoulders: bool = False
412
+ ) -> Image.Image:
413
+ """
414
+ 从输入图像中提取头部区域,并返回一个裁剪、填充为正方形的图像。
415
+
416
+ Args:
417
+ image: 输入图像 (PIL.Image or np.ndarray).
418
+ crop_padding: 裁剪边界框的额外边距.
419
+ background_color: `output_mode` 为 'RGB' 时,用于填充背景的颜色.
420
+ pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
421
+ output_mode (str): 输出图像模式,可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
422
+ long_edge (int): 送入模型前缩放的长边尺寸,值越小速度越快,但可能影响精度。默认为1024。
423
+
424
+ Returns:
425
+ 处理后的头部图像 (PIL.Image).
426
+ """
427
+ # 1. 任务类型改为可选
428
+ task = TaskType.head_plus_shoulders if include_shoulders else TaskType.head
429
+ processed_image_np, head_mask_np = self.process(
430
+ image=image,
431
+ task_type=task,
432
+ long_edge=long_edge
433
+ )
434
+
435
+ # 2. NumPy-based 裁剪
436
+ face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
437
+ processed_image_np, head_mask_np, padding=crop_padding
438
+ )
439
+
440
+ # 3. 根据输出模式(RGB/RGBA)应用蒙版
441
+ output_mode = output_mode.upper()
442
+ if output_mode == 'RGBA':
443
+ # 创建一个带透明通道的RGBA图像
444
+ # 首先确保图像是3通道的
445
+ if face_cropped_np.shape[2] == 4:
446
+ face_cropped_np = face_cropped_np[:,:,:3]
447
+ # 创建RGBA图像
448
+ result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
449
+ result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
450
+
451
+ elif output_mode == 'RGB':
452
+ # NumPy-based 蒙版应用
453
+ result_image_np = self._apply_mask_to_image_np(
454
+ face_cropped_np,
455
+ mask_cropped_np,
456
+ background_color=background_color
457
+ )
458
+ else:
459
+ raise ValueError("output_mode must be 'RGB' or 'RGBA'")
460
+
461
+ # 4. 可选:NumPy-based 填充
462
+ if pad2square:
463
+ if output_mode == 'RGBA':
464
+ pad_color = (255, 255, 255, 0) # 透明背景
465
+ else: # RGB
466
+ pad_color = background_color
467
+
468
+ final_image_np = self._pad_to_square_np(
469
+ result_image_np,
470
+ background_value=pad_color
471
+ )
472
+ else:
473
+ final_image_np = result_image_np
474
+
475
+ # 5. 仅在最后一步转换为 PIL Image
476
+ if output_mode == 'RGBA':
477
+ return Image.fromarray(final_image_np, 'RGBA')
478
+ else:
479
+ return Image.fromarray(final_image_np, 'RGB')
480
+
481
+
482
+ def extract(
483
+ self,
484
+ task_type: TaskType.full_character,
485
+ image: Image.Image,
486
+ crop_padding: int = 10,
487
+ background_color: tuple = (255, 255, 255),
488
+ pad2square: bool = True,
489
+ output_mode: str = 'RGB',
490
+ long_edge: int = 1024
491
+ ) -> Image.Image:
492
+ """
493
+ 从输入图像中提取头部区域,并返回一个裁剪、填充为正方形的图像。
494
+
495
+ Args:
496
+ image: 输入图像 (PIL.Image or np.ndarray).
497
+ crop_padding: 裁剪边界框的额外边距.
498
+ background_color: `output_mode` 为 'RGB' 时,用于填充背景的颜色.
499
+ pad2square (bool): 是否将最终结果填充为正方形. 默认为 True.
500
+ output_mode (str): 输出图像模式,可选 'RGB' (纯色背景) 或 'RGBA' (透明背景). 默认为 'RGB'.
501
+ long_edge (int): 送入模型前缩放的长边尺寸,值越小速度越快,但可能影响精度。默认为1024。
502
+
503
+ Returns:
504
+ 处理后的头部图像 (PIL.Image).
505
+ """
506
+ # 1. 运行分割��直接获取 NumPy 结果
507
+ processed_image_np, head_mask_np = self.process(
508
+ image=image,
509
+ task_type=task_type,
510
+ long_edge=long_edge
511
+ )
512
+
513
+ # 2. NumPy-based 裁剪
514
+ face_cropped_np, mask_cropped_np = self._crop_image_and_mask_np(
515
+ processed_image_np, head_mask_np, padding=crop_padding
516
+ )
517
+
518
+ # 3. 根据输出模式(RGB/RGBA)应用蒙版
519
+ output_mode = output_mode.upper()
520
+ if output_mode == 'RGBA':
521
+ # 创建一个带透明通道的RGBA图像
522
+ # 首先确保图像是3通道的
523
+ if face_cropped_np.shape[2] == 4:
524
+ face_cropped_np = face_cropped_np[:,:,:3]
525
+ # 创建RGBA图像
526
+ result_image_np = cv2.cvtColor(face_cropped_np, cv2.COLOR_RGB2RGBA)
527
+ result_image_np[:, :, 3] = mask_cropped_np # 设置alpha通道
528
+
529
+ elif output_mode == 'RGB':
530
+ # NumPy-based 蒙版应用
531
+ result_image_np = self._apply_mask_to_image_np(
532
+ face_cropped_np,
533
+ mask_cropped_np,
534
+ background_color=background_color
535
+ )
536
+ else:
537
+ raise ValueError("output_mode must be 'RGB' or 'RGBA'")
538
+
539
+ # 4. 可选:NumPy-based 填充
540
+ if pad2square:
541
+ if output_mode == 'RGBA':
542
+ pad_color = (255, 255, 255, 0) # 透明背景
543
+ else: # RGB
544
+ pad_color = background_color
545
+
546
+ final_image_np = self._pad_to_square_np(
547
+ result_image_np,
548
+ background_value=pad_color
549
+ )
550
+ else:
551
+ final_image_np = result_image_np
552
+
553
+ # 5. 仅在最后一步转换为 PIL Image
554
+ if output_mode == 'RGBA':
555
+ return Image.fromarray(final_image_np, 'RGBA')
556
+ else:
557
+ return Image.fromarray(final_image_np, 'RGB')
558
+
559
+ if __name__ == '__main__':
560
+ # 这是一个示例如何初始化和使用 Pipeline
561
+ print("Initializing pipeline from package resources...")
562
+ pipeline = ProcessorPipeline.load()
563
+ print("Pipeline initialized.")
564
+
565
+ # 使用示例 (需要提供一张图片):
566
+
567
+ # 请替换为你的图片路径
568
+ image_path = "001.jpg"
569
+ if os.path.exists(image_path):
570
+ print(f"Processing image: {image_path}")
571
+ image = Image.open(image_path)
572
+
573
+ print("正在提取头部...")
574
+ extracted_head = pipeline.extract_head(image)
575
+
576
+ # 保存最终结果
577
+ output_path = "output_head_extracted.png"
578
+ extracted_head.save(output_path)
579
+
580
+ print("\n处理完成!")
581
+ print(f"已保存提取的头部图像至 '{output_path}'")
582
+
583
+ else:
584
+ print(f"示例图片未找到: {image_path}")
585
+
head_extractor/build/lib/mmdet/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import mmcv
3
+ import mmengine
4
+ from mmengine.utils import digit_version
5
+
6
+ from .version import __version__, version_info
7
+
8
+ mmcv_minimum_version = '2.0.0rc4'
9
+ mmcv_maximum_version = '2.2.0'
10
+ mmcv_version = digit_version(mmcv.__version__)
11
+
12
+ mmengine_minimum_version = '0.7.1'
13
+ mmengine_maximum_version = '1.0.0'
14
+ mmengine_version = digit_version(mmengine.__version__)
15
+
16
+ assert (mmcv_version >= digit_version(mmcv_minimum_version)
17
+ and mmcv_version < digit_version(mmcv_maximum_version)), \
18
+ f'MMCV=={mmcv.__version__} is used but incompatible. ' \
19
+ f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
20
+
21
+ assert (mmengine_version >= digit_version(mmengine_minimum_version)
22
+ and mmengine_version < digit_version(mmengine_maximum_version)), \
23
+ f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
24
+ f'Please install mmengine>={mmengine_minimum_version}, ' \
25
+ f'<{mmengine_maximum_version}.'
26
+
27
+ __all__ = ['__version__', 'version_info', 'digit_version']
head_extractor/build/lib/mmdet/apis/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .det_inferencer import DetInferencer
3
+ from .inference import (async_inference_detector, inference_detector,
4
+ inference_mot, init_detector, init_track_model)
5
+
6
+ __all__ = [
7
+ 'init_detector', 'async_inference_detector', 'inference_detector',
8
+ 'DetInferencer', 'inference_mot', 'init_track_model'
9
+ ]
head_extractor/build/lib/mmdet/apis/det_inferencer.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import copy
3
+ import os.path as osp
4
+ import warnings
5
+ from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
6
+
7
+ import mmcv
8
+ import mmengine
9
+ import numpy as np
10
+ import torch.nn as nn
11
+ from mmcv.transforms import LoadImageFromFile
12
+ from mmengine.dataset import Compose
13
+ from mmengine.fileio import (get_file_backend, isdir, join_path,
14
+ list_dir_or_file)
15
+ from mmengine.infer.infer import BaseInferencer, ModelType
16
+ from mmengine.model.utils import revert_sync_batchnorm
17
+ from mmengine.registry import init_default_scope
18
+ from mmengine.runner.checkpoint import _load_checkpoint_to_model
19
+ from mmengine.visualization import Visualizer
20
+ from rich.progress import track
21
+
22
+ from mmdet.evaluation import INSTANCE_OFFSET
23
+ from mmdet.registry import DATASETS
24
+ from mmdet.structures import DetDataSample
25
+ from mmdet.structures.mask import encode_mask_results, mask2bbox
26
+ from mmdet.utils import ConfigType
27
+ from ..evaluation import get_classes
28
+
29
+ try:
30
+ from panopticapi.evaluation import VOID
31
+ from panopticapi.utils import id2rgb
32
+ except ImportError:
33
+ id2rgb = None
34
+ VOID = None
35
+
36
+ InputType = Union[str, np.ndarray]
37
+ InputsType = Union[InputType, Sequence[InputType]]
38
+ PredType = List[DetDataSample]
39
+ ImgType = Union[np.ndarray, Sequence[np.ndarray]]
40
+
41
+ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
42
+ '.tiff', '.webp')
43
+
44
+
45
+ class DetInferencer(BaseInferencer):
46
+ """Object Detection Inferencer.
47
+
48
+ Args:
49
+ model (str, optional): Path to the config file or the model name
50
+ defined in metafile. For example, it could be
51
+ "rtmdet-s" or 'rtmdet_s_8xb32-300e_coco' or
52
+ "configs/rtmdet/rtmdet_s_8xb32-300e_coco.py".
53
+ If model is not specified, user must provide the
54
+ `weights` saved by MMEngine which contains the config string.
55
+ Defaults to None.
56
+ weights (str, optional): Path to the checkpoint. If it is not specified
57
+ and model is a model name of metafile, the weights will be loaded
58
+ from metafile. Defaults to None.
59
+ device (str, optional): Device to run inference. If None, the available
60
+ device will be automatically used. Defaults to None.
61
+ scope (str, optional): The scope of the model. Defaults to mmdet.
62
+ palette (str): Color palette used for visualization. The order of
63
+ priority is palette -> config -> checkpoint. Defaults to 'none'.
64
+ show_progress (bool): Control whether to display the progress
65
+ bar during the inference process. Defaults to True.
66
+ """
67
+
68
+ preprocess_kwargs: set = set()
69
+ forward_kwargs: set = set()
70
+ visualize_kwargs: set = {
71
+ 'return_vis',
72
+ 'show',
73
+ 'wait_time',
74
+ 'draw_pred',
75
+ 'pred_score_thr',
76
+ 'img_out_dir',
77
+ 'no_save_vis',
78
+ }
79
+ postprocess_kwargs: set = {
80
+ 'print_result',
81
+ 'pred_out_dir',
82
+ 'return_datasamples',
83
+ 'no_save_pred',
84
+ }
85
+
86
+ def __init__(self,
87
+ model: Optional[Union[ModelType, str]] = None,
88
+ weights: Optional[str] = None,
89
+ device: Optional[str] = None,
90
+ scope: Optional[str] = 'mmdet',
91
+ palette: str = 'none',
92
+ show_progress: bool = True) -> None:
93
+ # A global counter tracking the number of images processed, for
94
+ # naming of the output images
95
+ self.num_visualized_imgs = 0
96
+ self.num_predicted_imgs = 0
97
+ self.palette = palette
98
+ init_default_scope(scope)
99
+ super().__init__(
100
+ model=model, weights=weights, device=device, scope=scope)
101
+ self.model = revert_sync_batchnorm(self.model)
102
+ self.show_progress = show_progress
103
+
104
+ def _load_weights_to_model(self, model: nn.Module,
105
+ checkpoint: Optional[dict],
106
+ cfg: Optional[ConfigType]) -> None:
107
+ """Loading model weights and meta information from cfg and checkpoint.
108
+
109
+ Args:
110
+ model (nn.Module): Model to load weights and meta information.
111
+ checkpoint (dict, optional): The loaded checkpoint.
112
+ cfg (Config or ConfigDict, optional): The loaded config.
113
+ """
114
+
115
+ if checkpoint is not None:
116
+ _load_checkpoint_to_model(model, checkpoint)
117
+ checkpoint_meta = checkpoint.get('meta', {})
118
+ # save the dataset_meta in the model for convenience
119
+ if 'dataset_meta' in checkpoint_meta:
120
+ # mmdet 3.x, all keys should be lowercase
121
+ model.dataset_meta = {
122
+ k.lower(): v
123
+ for k, v in checkpoint_meta['dataset_meta'].items()
124
+ }
125
+ elif 'CLASSES' in checkpoint_meta:
126
+ # < mmdet 3.x
127
+ classes = checkpoint_meta['CLASSES']
128
+ model.dataset_meta = {'classes': classes}
129
+ else:
130
+ warnings.warn(
131
+ 'dataset_meta or class names are not saved in the '
132
+ 'checkpoint\'s meta data, use COCO classes by default.')
133
+ model.dataset_meta = {'classes': get_classes('coco')}
134
+ else:
135
+ warnings.warn('Checkpoint is not loaded, and the inference '
136
+ 'result is calculated by the randomly initialized '
137
+ 'model!')
138
+ warnings.warn('weights is None, use COCO classes by default.')
139
+ model.dataset_meta = {'classes': get_classes('coco')}
140
+
141
+ # Priority: args.palette -> config -> checkpoint
142
+ if self.palette != 'none':
143
+ model.dataset_meta['palette'] = self.palette
144
+ else:
145
+ test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
146
+ # lazy init. We only need the metainfo.
147
+ test_dataset_cfg['lazy_init'] = True
148
+ metainfo = DATASETS.build(test_dataset_cfg).metainfo
149
+ cfg_palette = metainfo.get('palette', None)
150
+ if cfg_palette is not None:
151
+ model.dataset_meta['palette'] = cfg_palette
152
+ else:
153
+ if 'palette' not in model.dataset_meta:
154
+ warnings.warn(
155
+ 'palette does not exist, random is used by default. '
156
+ 'You can also set the palette to customize.')
157
+ model.dataset_meta['palette'] = 'random'
158
+
159
+ def _init_pipeline(self, cfg: ConfigType) -> Compose:
160
+ """Initialize the test pipeline."""
161
+ pipeline_cfg = cfg.test_dataloader.dataset.pipeline
162
+
163
+ # For inference, the key of ``img_id`` is not used.
164
+ if 'meta_keys' in pipeline_cfg[-1]:
165
+ pipeline_cfg[-1]['meta_keys'] = tuple(
166
+ meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
167
+ if meta_key != 'img_id')
168
+
169
+ load_img_idx = self._get_transform_idx(
170
+ pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
171
+ if load_img_idx == -1:
172
+ raise ValueError(
173
+ 'LoadImageFromFile is not found in the test pipeline')
174
+ pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
175
+ return Compose(pipeline_cfg)
176
+
177
+ def _get_transform_idx(self, pipeline_cfg: ConfigType,
178
+ name: Union[str, Tuple[str, type]]) -> int:
179
+ """Returns the index of the transform in a pipeline.
180
+
181
+ If the transform is not found, returns -1.
182
+ """
183
+ for i, transform in enumerate(pipeline_cfg):
184
+ if transform['type'] in name:
185
+ return i
186
+ return -1
187
+
188
+ def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
189
+ """Initialize visualizers.
190
+
191
+ Args:
192
+ cfg (ConfigType): Config containing the visualizer information.
193
+
194
+ Returns:
195
+ Visualizer or None: Visualizer initialized with config.
196
+ """
197
+ visualizer = super()._init_visualizer(cfg)
198
+ visualizer.dataset_meta = self.model.dataset_meta
199
+ return visualizer
200
+
201
+ def _inputs_to_list(self, inputs: InputsType) -> list:
202
+ """Preprocess the inputs to a list.
203
+
204
+ Preprocess inputs to a list according to its type:
205
+
206
+ - list or tuple: return inputs
207
+ - str:
208
+ - Directory path: return all files in the directory
209
+ - other cases: return a list containing the string. The string
210
+ could be a path to file, a url or other types of string according
211
+ to the task.
212
+
213
+ Args:
214
+ inputs (InputsType): Inputs for the inferencer.
215
+
216
+ Returns:
217
+ list: List of input for the :meth:`preprocess`.
218
+ """
219
+ if isinstance(inputs, str):
220
+ backend = get_file_backend(inputs)
221
+ if hasattr(backend, 'isdir') and isdir(inputs):
222
+ # Backends like HttpsBackend do not implement `isdir`, so only
223
+ # those backends that implement `isdir` could accept the inputs
224
+ # as a directory
225
+ filename_list = list_dir_or_file(
226
+ inputs, list_dir=False, suffix=IMG_EXTENSIONS)
227
+ inputs = [
228
+ join_path(inputs, filename) for filename in filename_list
229
+ ]
230
+
231
+ if not isinstance(inputs, (list, tuple)):
232
+ inputs = [inputs]
233
+
234
+ return list(inputs)
235
+
236
+ def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
237
+ """Process the inputs into a model-feedable format.
238
+
239
+ Customize your preprocess by overriding this method. Preprocess should
240
+ return an iterable object, of which each item will be used as the
241
+ input of ``model.test_step``.
242
+
243
+ ``BaseInferencer.preprocess`` will return an iterable chunked data,
244
+ which will be used in __call__ like this:
245
+
246
+ .. code-block:: python
247
+
248
+ def __call__(self, inputs, batch_size=1, **kwargs):
249
+ chunked_data = self.preprocess(inputs, batch_size, **kwargs)
250
+ for batch in chunked_data:
251
+ preds = self.forward(batch, **kwargs)
252
+
253
+ Args:
254
+ inputs (InputsType): Inputs given by user.
255
+ batch_size (int): batch size. Defaults to 1.
256
+
257
+ Yields:
258
+ Any: Data processed by the ``pipeline`` and ``collate_fn``.
259
+ """
260
+ chunked_data = self._get_chunk_data(inputs, batch_size)
261
+ yield from map(self.collate_fn, chunked_data)
262
+
263
+ def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
264
+ """Get batch data from inputs.
265
+
266
+ Args:
267
+ inputs (Iterable): An iterable dataset.
268
+ chunk_size (int): Equivalent to batch size.
269
+
270
+ Yields:
271
+ list: batch data.
272
+ """
273
+ inputs_iter = iter(inputs)
274
+ while True:
275
+ try:
276
+ chunk_data = []
277
+ for _ in range(chunk_size):
278
+ inputs_ = next(inputs_iter)
279
+ if isinstance(inputs_, dict):
280
+ if 'img' in inputs_:
281
+ ori_inputs_ = inputs_['img']
282
+ else:
283
+ ori_inputs_ = inputs_['img_path']
284
+ chunk_data.append(
285
+ (ori_inputs_,
286
+ self.pipeline(copy.deepcopy(inputs_))))
287
+ else:
288
+ chunk_data.append((inputs_, self.pipeline(inputs_)))
289
+ yield chunk_data
290
+ except StopIteration:
291
+ if chunk_data:
292
+ yield chunk_data
293
+ break
294
+
295
+ # TODO: Video and Webcam are currently not supported and
296
+ # may consume too much memory if your input folder has a lot of images.
297
+ # We will be optimized later.
298
+ def __call__(
299
+ self,
300
+ inputs: InputsType,
301
+ batch_size: int = 1,
302
+ return_vis: bool = False,
303
+ show: bool = False,
304
+ wait_time: int = 0,
305
+ no_save_vis: bool = False,
306
+ draw_pred: bool = True,
307
+ pred_score_thr: float = 0.3,
308
+ return_datasamples: bool = False,
309
+ print_result: bool = False,
310
+ no_save_pred: bool = True,
311
+ out_dir: str = '',
312
+ # by open image task
313
+ texts: Optional[Union[str, list]] = None,
314
+ # by open panoptic task
315
+ stuff_texts: Optional[Union[str, list]] = None,
316
+ # by GLIP and Grounding DINO
317
+ custom_entities: bool = False,
318
+ # by Grounding DINO
319
+ tokens_positive: Optional[Union[int, list]] = None,
320
+ **kwargs) -> dict:
321
+ """Call the inferencer.
322
+
323
+ Args:
324
+ inputs (InputsType): Inputs for the inferencer.
325
+ batch_size (int): Inference batch size. Defaults to 1.
326
+ show (bool): Whether to display the visualization results in a
327
+ popup window. Defaults to False.
328
+ wait_time (float): The interval of show (s). Defaults to 0.
329
+ no_save_vis (bool): Whether to force not to save prediction
330
+ vis results. Defaults to False.
331
+ draw_pred (bool): Whether to draw predicted bounding boxes.
332
+ Defaults to True.
333
+ pred_score_thr (float): Minimum score of bboxes to draw.
334
+ Defaults to 0.3.
335
+ return_datasamples (bool): Whether to return results as
336
+ :obj:`DetDataSample`. Defaults to False.
337
+ print_result (bool): Whether to print the inference result w/o
338
+ visualization to the console. Defaults to False.
339
+ no_save_pred (bool): Whether to force not to save prediction
340
+ results. Defaults to True.
341
+ out_dir: Dir to save the inference results or
342
+ visualization. If left as empty, no file will be saved.
343
+ Defaults to ''.
344
+ texts (str | list[str]): Text prompts. Defaults to None.
345
+ stuff_texts (str | list[str]): Stuff text prompts of open
346
+ panoptic task. Defaults to None.
347
+ custom_entities (bool): Whether to use custom entities.
348
+ Defaults to False. Only used in GLIP and Grounding DINO.
349
+ **kwargs: Other keyword arguments passed to :meth:`preprocess`,
350
+ :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
351
+ Each key in kwargs should be in the corresponding set of
352
+ ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
353
+ and ``postprocess_kwargs``.
354
+
355
+ Returns:
356
+ dict: Inference and visualization results.
357
+ """
358
+ (
359
+ preprocess_kwargs,
360
+ forward_kwargs,
361
+ visualize_kwargs,
362
+ postprocess_kwargs,
363
+ ) = self._dispatch_kwargs(**kwargs)
364
+
365
+ ori_inputs = self._inputs_to_list(inputs)
366
+
367
+ if texts is not None and isinstance(texts, str):
368
+ texts = [texts] * len(ori_inputs)
369
+ if stuff_texts is not None and isinstance(stuff_texts, str):
370
+ stuff_texts = [stuff_texts] * len(ori_inputs)
371
+
372
+ # Currently only supports bs=1
373
+ tokens_positive = [tokens_positive] * len(ori_inputs)
374
+
375
+ if texts is not None:
376
+ assert len(texts) == len(ori_inputs)
377
+ for i in range(len(texts)):
378
+ if isinstance(ori_inputs[i], str):
379
+ ori_inputs[i] = {
380
+ 'text': texts[i],
381
+ 'img_path': ori_inputs[i],
382
+ 'custom_entities': custom_entities,
383
+ 'tokens_positive': tokens_positive[i]
384
+ }
385
+ else:
386
+ ori_inputs[i] = {
387
+ 'text': texts[i],
388
+ 'img': ori_inputs[i],
389
+ 'custom_entities': custom_entities,
390
+ 'tokens_positive': tokens_positive[i]
391
+ }
392
+ if stuff_texts is not None:
393
+ assert len(stuff_texts) == len(ori_inputs)
394
+ for i in range(len(stuff_texts)):
395
+ ori_inputs[i]['stuff_text'] = stuff_texts[i]
396
+
397
+ inputs = self.preprocess(
398
+ ori_inputs, batch_size=batch_size, **preprocess_kwargs)
399
+
400
+ results_dict = {'predictions': [], 'visualization': []}
401
+ for ori_imgs, data in (track(inputs, description='Inference')
402
+ if self.show_progress else inputs):
403
+ preds = self.forward(data, **forward_kwargs)
404
+ visualization = self.visualize(
405
+ ori_imgs,
406
+ preds,
407
+ return_vis=return_vis,
408
+ show=show,
409
+ wait_time=wait_time,
410
+ draw_pred=draw_pred,
411
+ pred_score_thr=pred_score_thr,
412
+ no_save_vis=no_save_vis,
413
+ img_out_dir=out_dir,
414
+ **visualize_kwargs)
415
+ results = self.postprocess(
416
+ preds,
417
+ visualization,
418
+ return_datasamples=return_datasamples,
419
+ print_result=print_result,
420
+ no_save_pred=no_save_pred,
421
+ pred_out_dir=out_dir,
422
+ **postprocess_kwargs)
423
+ results_dict['predictions'].extend(results['predictions'])
424
+ if results['visualization'] is not None:
425
+ results_dict['visualization'].extend(results['visualization'])
426
+ return results_dict
427
+
428
+ def visualize(self,
429
+ inputs: InputsType,
430
+ preds: PredType,
431
+ return_vis: bool = False,
432
+ show: bool = False,
433
+ wait_time: int = 0,
434
+ draw_pred: bool = True,
435
+ pred_score_thr: float = 0.3,
436
+ no_save_vis: bool = False,
437
+ img_out_dir: str = '',
438
+ **kwargs) -> Union[List[np.ndarray], None]:
439
+ """Visualize predictions.
440
+
441
+ Args:
442
+ inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
443
+ preds (List[:obj:`DetDataSample`]): Predictions of the model.
444
+ return_vis (bool): Whether to return the visualization result.
445
+ Defaults to False.
446
+ show (bool): Whether to display the image in a popup window.
447
+ Defaults to False.
448
+ wait_time (float): The interval of show (s). Defaults to 0.
449
+ draw_pred (bool): Whether to draw predicted bounding boxes.
450
+ Defaults to True.
451
+ pred_score_thr (float): Minimum score of bboxes to draw.
452
+ Defaults to 0.3.
453
+ no_save_vis (bool): Whether to force not to save prediction
454
+ vis results. Defaults to False.
455
+ img_out_dir (str): Output directory of visualization results.
456
+ If left as empty, no file will be saved. Defaults to ''.
457
+
458
+ Returns:
459
+ List[np.ndarray] or None: Returns visualization results only if
460
+ applicable.
461
+ """
462
+ if no_save_vis is True:
463
+ img_out_dir = ''
464
+
465
+ if not show and img_out_dir == '' and not return_vis:
466
+ return None
467
+
468
+ if self.visualizer is None:
469
+ raise ValueError('Visualization needs the "visualizer" term'
470
+ 'defined in the config, but got None.')
471
+
472
+ results = []
473
+
474
+ for single_input, pred in zip(inputs, preds):
475
+ if isinstance(single_input, str):
476
+ img_bytes = mmengine.fileio.get(single_input)
477
+ img = mmcv.imfrombytes(img_bytes)
478
+ img = img[:, :, ::-1]
479
+ img_name = osp.basename(single_input)
480
+ elif isinstance(single_input, np.ndarray):
481
+ img = single_input.copy()
482
+ img_num = str(self.num_visualized_imgs).zfill(8)
483
+ img_name = f'{img_num}.jpg'
484
+ else:
485
+ raise ValueError('Unsupported input type: '
486
+ f'{type(single_input)}')
487
+
488
+ out_file = osp.join(img_out_dir, 'vis',
489
+ img_name) if img_out_dir != '' else None
490
+
491
+ self.visualizer.add_datasample(
492
+ img_name,
493
+ img,
494
+ pred,
495
+ show=show,
496
+ wait_time=wait_time,
497
+ draw_gt=False,
498
+ draw_pred=draw_pred,
499
+ pred_score_thr=pred_score_thr,
500
+ out_file=out_file,
501
+ )
502
+ results.append(self.visualizer.get_image())
503
+ self.num_visualized_imgs += 1
504
+
505
+ return results
506
+
507
+ def postprocess(
508
+ self,
509
+ preds: PredType,
510
+ visualization: Optional[List[np.ndarray]] = None,
511
+ return_datasamples: bool = False,
512
+ print_result: bool = False,
513
+ no_save_pred: bool = False,
514
+ pred_out_dir: str = '',
515
+ **kwargs,
516
+ ) -> Dict:
517
+ """Process the predictions and visualization results from ``forward``
518
+ and ``visualize``.
519
+
520
+ This method should be responsible for the following tasks:
521
+
522
+ 1. Convert datasamples into a json-serializable dict if needed.
523
+ 2. Pack the predictions and visualization results and return them.
524
+ 3. Dump or log the predictions.
525
+
526
+ Args:
527
+ preds (List[:obj:`DetDataSample`]): Predictions of the model.
528
+ visualization (Optional[np.ndarray]): Visualized predictions.
529
+ return_datasamples (bool): Whether to use Datasample to store
530
+ inference results. If False, dict will be used.
531
+ print_result (bool): Whether to print the inference result w/o
532
+ visualization to the console. Defaults to False.
533
+ no_save_pred (bool): Whether to force not to save prediction
534
+ results. Defaults to False.
535
+ pred_out_dir: Dir to save the inference results w/o
536
+ visualization. If left as empty, no file will be saved.
537
+ Defaults to ''.
538
+
539
+ Returns:
540
+ dict: Inference and visualization results with key ``predictions``
541
+ and ``visualization``.
542
+
543
+ - ``visualization`` (Any): Returned by :meth:`visualize`.
544
+ - ``predictions`` (dict or DataSample): Returned by
545
+ :meth:`forward` and processed in :meth:`postprocess`.
546
+ If ``return_datasamples=False``, it usually should be a
547
+ json-serializable dict containing only basic data elements such
548
+ as strings and numbers.
549
+ """
550
+ if no_save_pred is True:
551
+ pred_out_dir = ''
552
+
553
+ result_dict = {}
554
+ results = preds
555
+ if not return_datasamples:
556
+ results = []
557
+ for pred in preds:
558
+ result = self.pred2dict(pred, pred_out_dir)
559
+ results.append(result)
560
+ elif pred_out_dir != '':
561
+ warnings.warn('Currently does not support saving datasample '
562
+ 'when return_datasamples is set to True. '
563
+ 'Prediction results are not saved!')
564
+ # Add img to the results after printing and dumping
565
+ result_dict['predictions'] = results
566
+ if print_result:
567
+ print(result_dict)
568
+ result_dict['visualization'] = visualization
569
+ return result_dict
570
+
571
+ # TODO: The data format and fields saved in json need further discussion.
572
+ # Maybe should include model name, timestamp, filename, image info etc.
573
+ def pred2dict(self,
574
+ data_sample: DetDataSample,
575
+ pred_out_dir: str = '') -> Dict:
576
+ """Extract elements necessary to represent a prediction into a
577
+ dictionary.
578
+
579
+ It's better to contain only basic data elements such as strings and
580
+ numbers in order to guarantee it's json-serializable.
581
+
582
+ Args:
583
+ data_sample (:obj:`DetDataSample`): Predictions of the model.
584
+ pred_out_dir: Dir to save the inference results w/o
585
+ visualization. If left as empty, no file will be saved.
586
+ Defaults to ''.
587
+
588
+ Returns:
589
+ dict: Prediction results.
590
+ """
591
+ is_save_pred = True
592
+ if pred_out_dir == '':
593
+ is_save_pred = False
594
+
595
+ if is_save_pred and 'img_path' in data_sample:
596
+ img_path = osp.basename(data_sample.img_path)
597
+ img_path = osp.splitext(img_path)[0]
598
+ out_img_path = osp.join(pred_out_dir, 'preds',
599
+ img_path + '_panoptic_seg.png')
600
+ out_json_path = osp.join(pred_out_dir, 'preds', img_path + '.json')
601
+ elif is_save_pred:
602
+ out_img_path = osp.join(
603
+ pred_out_dir, 'preds',
604
+ f'{self.num_predicted_imgs}_panoptic_seg.png')
605
+ out_json_path = osp.join(pred_out_dir, 'preds',
606
+ f'{self.num_predicted_imgs}.json')
607
+ self.num_predicted_imgs += 1
608
+
609
+ result = {}
610
+ if 'pred_instances' in data_sample:
611
+ masks = data_sample.pred_instances.get('masks')
612
+ pred_instances = data_sample.pred_instances.numpy()
613
+ result = {
614
+ 'labels': pred_instances.labels.tolist(),
615
+ 'scores': pred_instances.scores.tolist()
616
+ }
617
+ if 'bboxes' in pred_instances:
618
+ result['bboxes'] = pred_instances.bboxes.tolist()
619
+ if masks is not None:
620
+ if 'bboxes' not in pred_instances or pred_instances.bboxes.sum(
621
+ ) == 0:
622
+ # Fake bbox, such as the SOLO.
623
+ bboxes = mask2bbox(masks.cpu()).numpy().tolist()
624
+ result['bboxes'] = bboxes
625
+ encode_masks = encode_mask_results(pred_instances.masks)
626
+ for encode_mask in encode_masks:
627
+ if isinstance(encode_mask['counts'], bytes):
628
+ encode_mask['counts'] = encode_mask['counts'].decode()
629
+ result['masks'] = encode_masks
630
+
631
+ if 'pred_panoptic_seg' in data_sample:
632
+ if VOID is None:
633
+ raise RuntimeError(
634
+ 'panopticapi is not installed, please install it by: '
635
+ 'pip install git+https://github.com/cocodataset/'
636
+ 'panopticapi.git.')
637
+
638
+ pan = data_sample.pred_panoptic_seg.sem_seg.cpu().numpy()[0]
639
+ pan[pan % INSTANCE_OFFSET == len(
640
+ self.model.dataset_meta['classes'])] = VOID
641
+ pan = id2rgb(pan).astype(np.uint8)
642
+
643
+ if is_save_pred:
644
+ mmcv.imwrite(pan[:, :, ::-1], out_img_path)
645
+ result['panoptic_seg_path'] = out_img_path
646
+ else:
647
+ result['panoptic_seg'] = pan
648
+
649
+ if is_save_pred:
650
+ mmengine.dump(result, out_json_path)
651
+
652
+ return result
head_extractor/build/lib/mmdet/apis/inference.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import copy
3
+ import warnings
4
+ from pathlib import Path
5
+ from typing import Optional, Sequence, Union
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ from mmcv.ops import RoIPool
11
+ from mmcv.transforms import Compose
12
+ from mmengine.config import Config
13
+ from mmengine.dataset import default_collate
14
+ from mmengine.model.utils import revert_sync_batchnorm
15
+ from mmengine.registry import init_default_scope
16
+ from mmengine.runner import load_checkpoint
17
+
18
+ from mmdet.registry import DATASETS
19
+ from mmdet.utils import ConfigType
20
+ from ..evaluation import get_classes
21
+ from ..registry import MODELS
22
+ from ..structures import DetDataSample, SampleList
23
+ from ..utils import get_test_pipeline_cfg
24
+
25
+
26
+ def init_detector(
27
+ config: Union[str, Path, Config],
28
+ checkpoint: Optional[str] = None,
29
+ palette: str = 'none',
30
+ device: str = 'cuda:0',
31
+ cfg_options: Optional[dict] = None,
32
+ ) -> nn.Module:
33
+ """Initialize a detector from config file.
34
+
35
+ Args:
36
+ config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path,
37
+ :obj:`Path`, or the config object.
38
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
39
+ will not load any weights.
40
+ palette (str): Color palette used for visualization. If palette
41
+ is stored in checkpoint, use checkpoint's palette first, otherwise
42
+ use externally passed palette. Currently, supports 'coco', 'voc',
43
+ 'citys' and 'random'. Defaults to none.
44
+ device (str): The device where the anchors will be put on.
45
+ Defaults to cuda:0.
46
+ cfg_options (dict, optional): Options to override some settings in
47
+ the used config.
48
+
49
+ Returns:
50
+ nn.Module: The constructed detector.
51
+ """
52
+ if isinstance(config, (str, Path)):
53
+ config = Config.fromfile(config)
54
+ elif not isinstance(config, Config):
55
+ raise TypeError('config must be a filename or Config object, '
56
+ f'but got {type(config)}')
57
+ if cfg_options is not None:
58
+ config.merge_from_dict(cfg_options)
59
+ elif 'init_cfg' in config.model.backbone:
60
+ config.model.backbone.init_cfg = None
61
+
62
+ scope = config.get('default_scope', 'mmdet')
63
+ if scope is not None:
64
+ init_default_scope(config.get('default_scope', 'mmdet'))
65
+
66
+ model = MODELS.build(config.model)
67
+ model = revert_sync_batchnorm(model)
68
+ if checkpoint is None:
69
+ warnings.simplefilter('once')
70
+ warnings.warn('checkpoint is None, use COCO classes by default.')
71
+ model.dataset_meta = {'classes': get_classes('coco')}
72
+ else:
73
+ checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
74
+ # Weights converted from elsewhere may not have meta fields.
75
+ checkpoint_meta = checkpoint.get('meta', {})
76
+
77
+ # save the dataset_meta in the model for convenience
78
+ if 'dataset_meta' in checkpoint_meta:
79
+ # mmdet 3.x, all keys should be lowercase
80
+ model.dataset_meta = {
81
+ k.lower(): v
82
+ for k, v in checkpoint_meta['dataset_meta'].items()
83
+ }
84
+ elif 'CLASSES' in checkpoint_meta:
85
+ # < mmdet 3.x
86
+ classes = checkpoint_meta['CLASSES']
87
+ model.dataset_meta = {'classes': classes}
88
+ else:
89
+ warnings.simplefilter('once')
90
+ warnings.warn(
91
+ 'dataset_meta or class names are not saved in the '
92
+ 'checkpoint\'s meta data, use COCO classes by default.')
93
+ model.dataset_meta = {'classes': get_classes('coco')}
94
+
95
+ # Priority: args.palette -> config -> checkpoint
96
+ if palette != 'none':
97
+ model.dataset_meta['palette'] = palette
98
+ else:
99
+ test_dataset_cfg = copy.deepcopy(config.test_dataloader.dataset)
100
+ # lazy init. We only need the metainfo.
101
+ test_dataset_cfg['lazy_init'] = True
102
+ metainfo = DATASETS.build(test_dataset_cfg).metainfo
103
+ cfg_palette = metainfo.get('palette', None)
104
+ if cfg_palette is not None:
105
+ model.dataset_meta['palette'] = cfg_palette
106
+ else:
107
+ if 'palette' not in model.dataset_meta:
108
+ warnings.warn(
109
+ 'palette does not exist, random is used by default. '
110
+ 'You can also set the palette to customize.')
111
+ model.dataset_meta['palette'] = 'random'
112
+
113
+ model.cfg = config # save the config in the model for convenience
114
+ model.to(device)
115
+ model.eval()
116
+ return model
117
+
118
+
119
+ ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
120
+
121
+
122
+ def inference_detector(
123
+ model: nn.Module,
124
+ imgs: ImagesType,
125
+ test_pipeline: Optional[Compose] = None,
126
+ text_prompt: Optional[str] = None,
127
+ custom_entities: bool = False,
128
+ ) -> Union[DetDataSample, SampleList]:
129
+ """Inference image(s) with the detector.
130
+
131
+ Args:
132
+ model (nn.Module): The loaded detector.
133
+ imgs (str, ndarray, Sequence[str/ndarray]):
134
+ Either image files or loaded images.
135
+ test_pipeline (:obj:`Compose`): Test pipeline.
136
+
137
+ Returns:
138
+ :obj:`DetDataSample` or list[:obj:`DetDataSample`]:
139
+ If imgs is a list or tuple, the same length list type results
140
+ will be returned, otherwise return the detection results directly.
141
+ """
142
+
143
+ if isinstance(imgs, (list, tuple)):
144
+ is_batch = True
145
+ else:
146
+ imgs = [imgs]
147
+ is_batch = False
148
+
149
+ cfg = model.cfg
150
+
151
+ if test_pipeline is None:
152
+ cfg = cfg.copy()
153
+ test_pipeline = get_test_pipeline_cfg(cfg)
154
+ if isinstance(imgs[0], np.ndarray):
155
+ # Calling this method across libraries will result
156
+ # in module unregistered error if not prefixed with mmdet.
157
+ test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
158
+
159
+ test_pipeline = Compose(test_pipeline)
160
+
161
+ if model.data_preprocessor.device.type == 'cpu':
162
+ for m in model.modules():
163
+ assert not isinstance(
164
+ m, RoIPool
165
+ ), 'CPU inference with RoIPool is not supported currently.'
166
+
167
+ result_list = []
168
+ for i, img in enumerate(imgs):
169
+ # prepare data
170
+ if isinstance(img, np.ndarray):
171
+ # TODO: remove img_id.
172
+ data_ = dict(img=img, img_id=0)
173
+ else:
174
+ # TODO: remove img_id.
175
+ data_ = dict(img_path=img, img_id=0)
176
+
177
+ if text_prompt:
178
+ data_['text'] = text_prompt
179
+ data_['custom_entities'] = custom_entities
180
+
181
+ # build the data pipeline
182
+ data_ = test_pipeline(data_)
183
+
184
+ data_['inputs'] = [data_['inputs']]
185
+ data_['data_samples'] = [data_['data_samples']]
186
+
187
+ # forward the model
188
+ with torch.no_grad():
189
+ results = model.test_step(data_)[0]
190
+
191
+ result_list.append(results)
192
+
193
+ if not is_batch:
194
+ return result_list[0]
195
+ else:
196
+ return result_list
197
+
198
+
199
+ # TODO: Awaiting refactoring
200
+ async def async_inference_detector(model, imgs):
201
+ """Async inference image(s) with the detector.
202
+
203
+ Args:
204
+ model (nn.Module): The loaded detector.
205
+ img (str | ndarray): Either image files or loaded images.
206
+
207
+ Returns:
208
+ Awaitable detection results.
209
+ """
210
+ if not isinstance(imgs, (list, tuple)):
211
+ imgs = [imgs]
212
+
213
+ cfg = model.cfg
214
+
215
+ if isinstance(imgs[0], np.ndarray):
216
+ cfg = cfg.copy()
217
+ # set loading pipeline type
218
+ cfg.data.test.pipeline[0].type = 'LoadImageFromNDArray'
219
+
220
+ # cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
221
+ test_pipeline = Compose(cfg.data.test.pipeline)
222
+
223
+ datas = []
224
+ for img in imgs:
225
+ # prepare data
226
+ if isinstance(img, np.ndarray):
227
+ # directly add img
228
+ data = dict(img=img)
229
+ else:
230
+ # add information into dict
231
+ data = dict(img_info=dict(filename=img), img_prefix=None)
232
+ # build the data pipeline
233
+ data = test_pipeline(data)
234
+ datas.append(data)
235
+
236
+ for m in model.modules():
237
+ assert not isinstance(
238
+ m,
239
+ RoIPool), 'CPU inference with RoIPool is not supported currently.'
240
+
241
+ # We don't restore `torch.is_grad_enabled()` value during concurrent
242
+ # inference since execution can overlap
243
+ torch.set_grad_enabled(False)
244
+ results = await model.aforward_test(data, rescale=True)
245
+ return results
246
+
247
+
248
+ def build_test_pipeline(cfg: ConfigType) -> ConfigType:
249
+ """Build test_pipeline for mot/vis demo. In mot/vis infer, original
250
+ test_pipeline should remove the "LoadImageFromFile" and
251
+ "LoadTrackAnnotations".
252
+
253
+ Args:
254
+ cfg (ConfigDict): The loaded config.
255
+ Returns:
256
+ ConfigType: new test_pipeline
257
+ """
258
+ # remove the "LoadImageFromFile" and "LoadTrackAnnotations" in pipeline
259
+ transform_broadcaster = cfg.test_dataloader.dataset.pipeline[0].copy()
260
+ for transform in transform_broadcaster['transforms']:
261
+ if transform['type'] == 'Resize':
262
+ transform_broadcaster['transforms'] = transform
263
+ pack_track_inputs = cfg.test_dataloader.dataset.pipeline[-1].copy()
264
+ test_pipeline = Compose([transform_broadcaster, pack_track_inputs])
265
+
266
+ return test_pipeline
267
+
268
+
269
+ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
270
+ video_len: int) -> SampleList:
271
+ """Inference image(s) with the mot model.
272
+
273
+ Args:
274
+ model (nn.Module): The loaded mot model.
275
+ img (np.ndarray): Loaded image.
276
+ frame_id (int): frame id.
277
+ video_len (int): demo video length
278
+ Returns:
279
+ SampleList: The tracking data samples.
280
+ """
281
+ cfg = model.cfg
282
+ data = dict(
283
+ img=[img.astype(np.float32)],
284
+ frame_id=[frame_id],
285
+ ori_shape=[img.shape[:2]],
286
+ img_id=[frame_id + 1],
287
+ ori_video_length=[video_len])
288
+
289
+ test_pipeline = build_test_pipeline(cfg)
290
+ data = test_pipeline(data)
291
+
292
+ if not next(model.parameters()).is_cuda:
293
+ for m in model.modules():
294
+ assert not isinstance(
295
+ m, RoIPool
296
+ ), 'CPU inference with RoIPool is not supported currently.'
297
+
298
+ # forward the model
299
+ with torch.no_grad():
300
+ data = default_collate([data])
301
+ result = model.test_step(data)[0]
302
+ return result
303
+
304
+
305
+ def init_track_model(config: Union[str, Config],
306
+ checkpoint: Optional[str] = None,
307
+ detector: Optional[str] = None,
308
+ reid: Optional[str] = None,
309
+ device: str = 'cuda:0',
310
+ cfg_options: Optional[dict] = None) -> nn.Module:
311
+ """Initialize a model from config file.
312
+
313
+ Args:
314
+ config (str or :obj:`mmengine.Config`): Config file path or the config
315
+ object.
316
+ checkpoint (Optional[str], optional): Checkpoint path. Defaults to
317
+ None.
318
+ detector (Optional[str], optional): Detector Checkpoint path, use in
319
+ some tracking algorithms like sort. Defaults to None.
320
+ reid (Optional[str], optional): Reid checkpoint path. use in
321
+ some tracking algorithms like sort. Defaults to None.
322
+ device (str, optional): The device that the model inferences on.
323
+ Defaults to `cuda:0`.
324
+ cfg_options (Optional[dict], optional): Options to override some
325
+ settings in the used config. Defaults to None.
326
+
327
+ Returns:
328
+ nn.Module: The constructed model.
329
+ """
330
+ if isinstance(config, str):
331
+ config = Config.fromfile(config)
332
+ elif not isinstance(config, Config):
333
+ raise TypeError('config must be a filename or Config object, '
334
+ f'but got {type(config)}')
335
+ if cfg_options is not None:
336
+ config.merge_from_dict(cfg_options)
337
+
338
+ model = MODELS.build(config.model)
339
+
340
+ if checkpoint is not None:
341
+ checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
342
+ # Weights converted from elsewhere may not have meta fields.
343
+ checkpoint_meta = checkpoint.get('meta', {})
344
+ # save the dataset_meta in the model for convenience
345
+ if 'dataset_meta' in checkpoint_meta:
346
+ if 'CLASSES' in checkpoint_meta['dataset_meta']:
347
+ value = checkpoint_meta['dataset_meta'].pop('CLASSES')
348
+ checkpoint_meta['dataset_meta']['classes'] = value
349
+ model.dataset_meta = checkpoint_meta['dataset_meta']
350
+
351
+ if detector is not None:
352
+ assert not (checkpoint and detector), \
353
+ 'Error: checkpoint and detector checkpoint cannot both exist'
354
+ load_checkpoint(model.detector, detector, map_location='cpu')
355
+
356
+ if reid is not None:
357
+ assert not (checkpoint and reid), \
358
+ 'Error: checkpoint and reid checkpoint cannot both exist'
359
+ load_checkpoint(model.reid, reid, map_location='cpu')
360
+
361
+ # Some methods don't load checkpoints or checkpoints don't contain
362
+ # 'dataset_meta'
363
+ # VIS need dataset_meta, MOT don't need dataset_meta
364
+ if not hasattr(model, 'dataset_meta'):
365
+ warnings.warn('dataset_meta or class names are missed, '
366
+ 'use None by default.')
367
+ model.dataset_meta = {'classes': None}
368
+
369
+ model.cfg = config # save the config in the model for convenience
370
+ model.to(device)
371
+ model.eval()
372
+ return model
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_detection.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
6
+ from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
7
+ RandomFlip, Resize)
8
+ from mmdet.evaluation import CocoMetric
9
+
10
+ # dataset settings
11
+ dataset_type = CocoDataset
12
+ data_root = 'data/coco/'
13
+
14
+ # Example to use different file client
15
+ # Method 1: simply set the data root and let the file I/O module
16
+ # automatically infer from prefix (not support LMDB and Memcache yet)
17
+
18
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
19
+
20
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
21
+ # backend_args = dict(
22
+ # backend='petrel',
23
+ # path_mapping=dict({
24
+ # './data/': 's3://openmmlab/datasets/detection/',
25
+ # 'data/': 's3://openmmlab/datasets/detection/'
26
+ # }))
27
+ backend_args = None
28
+
29
+ train_pipeline = [
30
+ dict(type=LoadImageFromFile, backend_args=backend_args),
31
+ dict(type=LoadAnnotations, with_bbox=True),
32
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
33
+ dict(type=RandomFlip, prob=0.5),
34
+ dict(type=PackDetInputs)
35
+ ]
36
+ test_pipeline = [
37
+ dict(type=LoadImageFromFile, backend_args=backend_args),
38
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
39
+ # If you don't have a gt annotation, delete the pipeline
40
+ dict(type=LoadAnnotations, with_bbox=True),
41
+ dict(
42
+ type=PackDetInputs,
43
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
44
+ 'scale_factor'))
45
+ ]
46
+ train_dataloader = dict(
47
+ batch_size=2,
48
+ num_workers=2,
49
+ persistent_workers=True,
50
+ sampler=dict(type=DefaultSampler, shuffle=True),
51
+ batch_sampler=dict(type=AspectRatioBatchSampler),
52
+ dataset=dict(
53
+ type=dataset_type,
54
+ data_root=data_root,
55
+ ann_file='annotations/instances_train2017.json',
56
+ data_prefix=dict(img='train2017/'),
57
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
58
+ pipeline=train_pipeline,
59
+ backend_args=backend_args))
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=2,
63
+ persistent_workers=True,
64
+ drop_last=False,
65
+ sampler=dict(type=DefaultSampler, shuffle=False),
66
+ dataset=dict(
67
+ type=dataset_type,
68
+ data_root=data_root,
69
+ ann_file='annotations/instances_val2017.json',
70
+ data_prefix=dict(img='val2017/'),
71
+ test_mode=True,
72
+ pipeline=test_pipeline,
73
+ backend_args=backend_args))
74
+ test_dataloader = val_dataloader
75
+
76
+ val_evaluator = dict(
77
+ type=CocoMetric,
78
+ ann_file=data_root + 'annotations/instances_val2017.json',
79
+ metric='bbox',
80
+ format_only=False,
81
+ backend_args=backend_args)
82
+ test_evaluator = val_evaluator
83
+
84
+ # inference on test dataset and
85
+ # format the output results for submission.
86
+ # test_dataloader = dict(
87
+ # batch_size=1,
88
+ # num_workers=2,
89
+ # persistent_workers=True,
90
+ # drop_last=False,
91
+ # sampler=dict(type=DefaultSampler, shuffle=False),
92
+ # dataset=dict(
93
+ # type=dataset_type,
94
+ # data_root=data_root,
95
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
96
+ # data_prefix=dict(img='test2017/'),
97
+ # test_mode=True,
98
+ # pipeline=test_pipeline))
99
+ # test_evaluator = dict(
100
+ # type=CocoMetric,
101
+ # metric='bbox',
102
+ # format_only=True,
103
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
104
+ # outfile_prefix='./work_dirs/coco_detection/test')
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco import CocoDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_metric import CocoMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ # If you don't have a gt annotation, delete the pipeline
42
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
43
+ dict(
44
+ type=PackDetInputs,
45
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
46
+ 'scale_factor'))
47
+ ]
48
+ train_dataloader = dict(
49
+ batch_size=2,
50
+ num_workers=2,
51
+ persistent_workers=True,
52
+ sampler=dict(type=DefaultSampler, shuffle=True),
53
+ batch_sampler=dict(type=AspectRatioBatchSampler),
54
+ dataset=dict(
55
+ type=CocoDataset,
56
+ data_root=data_root,
57
+ ann_file='annotations/instances_train2017.json',
58
+ data_prefix=dict(img='train2017/'),
59
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
60
+ pipeline=train_pipeline,
61
+ backend_args=backend_args))
62
+ val_dataloader = dict(
63
+ batch_size=1,
64
+ num_workers=2,
65
+ persistent_workers=True,
66
+ drop_last=False,
67
+ sampler=dict(type=DefaultSampler, shuffle=False),
68
+ dataset=dict(
69
+ type=CocoDataset,
70
+ data_root=data_root,
71
+ ann_file='annotations/instances_val2017.json',
72
+ data_prefix=dict(img='val2017/'),
73
+ test_mode=True,
74
+ pipeline=test_pipeline,
75
+ backend_args=backend_args))
76
+ test_dataloader = val_dataloader
77
+
78
+ val_evaluator = dict(
79
+ type=CocoMetric,
80
+ ann_file=data_root + 'annotations/instances_val2017.json',
81
+ metric=['bbox', 'segm'],
82
+ format_only=False,
83
+ backend_args=backend_args)
84
+ test_evaluator = val_evaluator
85
+
86
+ # inference on test dataset and
87
+ # format the output results for submission.
88
+ # test_dataloader = dict(
89
+ # batch_size=1,
90
+ # num_workers=2,
91
+ # persistent_workers=True,
92
+ # drop_last=False,
93
+ # sampler=dict(type=DefaultSampler, shuffle=False),
94
+ # dataset=dict(
95
+ # type=CocoDataset,
96
+ # data_root=data_root,
97
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
98
+ # data_prefix=dict(img='test2017/'),
99
+ # test_mode=True,
100
+ # pipeline=test_pipeline))
101
+ # test_evaluator = dict(
102
+ # type=CocoMetric,
103
+ # metric=['bbox', 'segm'],
104
+ # format_only=True,
105
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
106
+ # outfile_prefix='./work_dirs/coco_instance/test')
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_instance_semantic.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco import CocoDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_metric import CocoMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ # If you don't have a gt annotation, delete the pipeline
42
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
43
+ dict(
44
+ type=PackDetInputs,
45
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
46
+ 'scale_factor'))
47
+ ]
48
+
49
+ train_dataloader = dict(
50
+ batch_size=2,
51
+ num_workers=2,
52
+ persistent_workers=True,
53
+ sampler=dict(type=DefaultSampler, shuffle=True),
54
+ batch_sampler=dict(type=AspectRatioBatchSampler),
55
+ dataset=dict(
56
+ type=CocoDataset,
57
+ data_root=data_root,
58
+ ann_file='annotations/instances_train2017.json',
59
+ data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
60
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
61
+ pipeline=train_pipeline,
62
+ backend_args=backend_args))
63
+
64
+ val_dataloader = dict(
65
+ batch_size=1,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ drop_last=False,
69
+ sampler=dict(type=DefaultSampler, shuffle=False),
70
+ dataset=dict(
71
+ type=CocoDataset,
72
+ data_root=data_root,
73
+ ann_file='annotations/instances_val2017.json',
74
+ data_prefix=dict(img='val2017/'),
75
+ test_mode=True,
76
+ pipeline=test_pipeline,
77
+ backend_args=backend_args))
78
+
79
+ test_dataloader = val_dataloader
80
+
81
+ val_evaluator = dict(
82
+ type=CocoMetric,
83
+ ann_file=data_root + 'annotations/instances_val2017.json',
84
+ metric=['bbox', 'segm'],
85
+ format_only=False,
86
+ backend_args=backend_args)
87
+ test_evaluator = val_evaluator
head_extractor/build/lib/mmdet/configs/_base_/datasets/coco_panoptic.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco_panoptic import CocoPanopticDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_panoptic_metric import CocoPanopticMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoPanopticDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadPanopticAnnotations, backend_args=backend_args),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ dict(type=LoadPanopticAnnotations, backend_args=backend_args),
42
+ dict(
43
+ type=PackDetInputs,
44
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
45
+ 'scale_factor'))
46
+ ]
47
+
48
+ train_dataloader = dict(
49
+ batch_size=2,
50
+ num_workers=2,
51
+ persistent_workers=True,
52
+ sampler=dict(type=DefaultSampler, shuffle=True),
53
+ batch_sampler=dict(type=AspectRatioBatchSampler),
54
+ dataset=dict(
55
+ type=CocoPanopticDataset,
56
+ data_root=data_root,
57
+ ann_file='annotations/panoptic_train2017.json',
58
+ data_prefix=dict(
59
+ img='train2017/', seg='annotations/panoptic_train2017/'),
60
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
61
+ pipeline=train_pipeline,
62
+ backend_args=backend_args))
63
+ val_dataloader = dict(
64
+ batch_size=1,
65
+ num_workers=2,
66
+ persistent_workers=True,
67
+ drop_last=False,
68
+ sampler=dict(type=DefaultSampler, shuffle=False),
69
+ dataset=dict(
70
+ type=CocoPanopticDataset,
71
+ data_root=data_root,
72
+ ann_file='annotations/panoptic_val2017.json',
73
+ data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
74
+ test_mode=True,
75
+ pipeline=test_pipeline,
76
+ backend_args=backend_args))
77
+ test_dataloader = val_dataloader
78
+
79
+ val_evaluator = dict(
80
+ type=CocoPanopticMetric,
81
+ ann_file=data_root + 'annotations/panoptic_val2017.json',
82
+ seg_prefix=data_root + 'annotations/panoptic_val2017/',
83
+ backend_args=backend_args)
84
+ test_evaluator = val_evaluator
85
+
86
+ # inference on test dataset and
87
+ # format the output results for submission.
88
+ # test_dataloader = dict(
89
+ # batch_size=1,
90
+ # num_workers=1,
91
+ # persistent_workers=True,
92
+ # drop_last=False,
93
+ # sampler=dict(type=DefaultSampler, shuffle=False),
94
+ # dataset=dict(
95
+ # type=CocoPanopticDataset,
96
+ # data_root=data_root,
97
+ # ann_file='annotations/panoptic_image_info_test-dev2017.json',
98
+ # data_prefix=dict(img='test2017/'),
99
+ # test_mode=True,
100
+ # pipeline=test_pipeline))
101
+ # test_evaluator = dict(
102
+ # type=CocoPanopticMetric,
103
+ # format_only=True,
104
+ # ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
105
+ # outfile_prefix='./work_dirs/coco_panoptic/test')
head_extractor/build/lib/mmdet/configs/_base_/datasets/mot_challenge.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import (LoadImageFromFile, RandomResize,
3
+ TransformBroadcaster)
4
+
5
+ from mmdet.datasets import MOTChallengeDataset
6
+ from mmdet.datasets.samplers import TrackImgSampler
7
+ from mmdet.datasets.transforms import (LoadTrackAnnotations, PackTrackInputs,
8
+ PhotoMetricDistortion, RandomCrop,
9
+ RandomFlip, Resize,
10
+ UniformRefFrameSample)
11
+ from mmdet.evaluation import MOTChallengeMetric
12
+
13
+ # dataset settings
14
+ dataset_type = MOTChallengeDataset
15
+ data_root = 'data/MOT17/'
16
+ img_scale = (1088, 1088)
17
+
18
+ backend_args = None
19
+ # data pipeline
20
+ train_pipeline = [
21
+ dict(
22
+ type=UniformRefFrameSample,
23
+ num_ref_imgs=1,
24
+ frame_range=10,
25
+ filter_key_img=True),
26
+ dict(
27
+ type=TransformBroadcaster,
28
+ share_random_params=True,
29
+ transforms=[
30
+ dict(type=LoadImageFromFile, backend_args=backend_args),
31
+ dict(type=LoadTrackAnnotations),
32
+ dict(
33
+ type=RandomResize,
34
+ scale=img_scale,
35
+ ratio_range=(0.8, 1.2),
36
+ keep_ratio=True,
37
+ clip_object_border=False),
38
+ dict(type=PhotoMetricDistortion)
39
+ ]),
40
+ dict(
41
+ type=TransformBroadcaster,
42
+ # different cropped positions for different frames
43
+ share_random_params=False,
44
+ transforms=[
45
+ dict(type=RandomCrop, crop_size=img_scale, bbox_clip_border=False)
46
+ ]),
47
+ dict(
48
+ type=TransformBroadcaster,
49
+ share_random_params=True,
50
+ transforms=[
51
+ dict(type=RandomFlip, prob=0.5),
52
+ ]),
53
+ dict(type=PackTrackInputs)
54
+ ]
55
+
56
+ test_pipeline = [
57
+ dict(
58
+ type=TransformBroadcaster,
59
+ transforms=[
60
+ dict(type=LoadImageFromFile, backend_args=backend_args),
61
+ dict(type=Resize, scale=img_scale, keep_ratio=True),
62
+ dict(type=LoadTrackAnnotations)
63
+ ]),
64
+ dict(type=PackTrackInputs)
65
+ ]
66
+
67
+ # dataloader
68
+ train_dataloader = dict(
69
+ batch_size=2,
70
+ num_workers=2,
71
+ persistent_workers=True,
72
+ sampler=dict(type=TrackImgSampler), # image-based sampling
73
+ dataset=dict(
74
+ type=dataset_type,
75
+ data_root=data_root,
76
+ visibility_thr=-1,
77
+ ann_file='annotations/half-train_cocoformat.json',
78
+ data_prefix=dict(img_path='train'),
79
+ metainfo=dict(classes=('pedestrian', )),
80
+ pipeline=train_pipeline))
81
+ val_dataloader = dict(
82
+ batch_size=1,
83
+ num_workers=2,
84
+ persistent_workers=True,
85
+ # Now we support two ways to test, image_based and video_based
86
+ # if you want to use video_based sampling, you can use as follows
87
+ # sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
88
+ sampler=dict(type=TrackImgSampler), # image-based sampling
89
+ dataset=dict(
90
+ type=dataset_type,
91
+ data_root=data_root,
92
+ ann_file='annotations/half-val_cocoformat.json',
93
+ data_prefix=dict(img_path='train'),
94
+ test_mode=True,
95
+ pipeline=test_pipeline))
96
+ test_dataloader = val_dataloader
97
+
98
+ # evaluator
99
+ val_evaluator = dict(
100
+ type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
101
+ test_evaluator = val_evaluator
head_extractor/build/lib/mmdet/configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
3
+ LoggerHook, ParamSchedulerHook)
4
+ from mmengine.runner import LogProcessor
5
+ from mmengine.visualization import LocalVisBackend
6
+
7
+ from mmdet.engine.hooks import DetVisualizationHook
8
+ from mmdet.visualization import DetLocalVisualizer
9
+
10
+ default_scope = None
11
+
12
+ default_hooks = dict(
13
+ timer=dict(type=IterTimerHook),
14
+ logger=dict(type=LoggerHook, interval=50),
15
+ param_scheduler=dict(type=ParamSchedulerHook),
16
+ checkpoint=dict(type=CheckpointHook, interval=1),
17
+ sampler_seed=dict(type=DistSamplerSeedHook),
18
+ visualization=dict(type=DetVisualizationHook))
19
+
20
+ env_cfg = dict(
21
+ cudnn_benchmark=False,
22
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
23
+ dist_cfg=dict(backend='nccl'),
24
+ )
25
+
26
+ vis_backends = [dict(type=LocalVisBackend)]
27
+ visualizer = dict(
28
+ type=DetLocalVisualizer, vis_backends=vis_backends, name='visualizer')
29
+ log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
30
+
31
+ log_level = 'INFO'
32
+ load_from = None
33
+ resume = False
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
16
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
17
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
18
+ SingleRoIExtractor
19
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
20
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
21
+ DeltaXYWHBBoxCoder
22
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
23
+ AnchorGenerator
24
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
25
+
26
+ # model settings
27
+ model = dict(
28
+ type=CascadeRCNN,
29
+ data_preprocessor=dict(
30
+ type=DetDataPreprocessor,
31
+ mean=[123.675, 116.28, 103.53],
32
+ std=[58.395, 57.12, 57.375],
33
+ bgr_to_rgb=True,
34
+ pad_mask=True,
35
+ pad_size_divisor=32),
36
+ backbone=dict(
37
+ type=ResNet,
38
+ depth=50,
39
+ num_stages=4,
40
+ out_indices=(0, 1, 2, 3),
41
+ frozen_stages=1,
42
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
43
+ norm_eval=True,
44
+ style='pytorch',
45
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
46
+ neck=dict(
47
+ type=FPN,
48
+ in_channels=[256, 512, 1024, 2048],
49
+ out_channels=256,
50
+ num_outs=5),
51
+ rpn_head=dict(
52
+ type=RPNHead,
53
+ in_channels=256,
54
+ feat_channels=256,
55
+ anchor_generator=dict(
56
+ type=AnchorGenerator,
57
+ scales=[8],
58
+ ratios=[0.5, 1.0, 2.0],
59
+ strides=[4, 8, 16, 32, 64]),
60
+ bbox_coder=dict(
61
+ type=DeltaXYWHBBoxCoder,
62
+ target_means=[.0, .0, .0, .0],
63
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
64
+ loss_cls=dict(
65
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
66
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
67
+ roi_head=dict(
68
+ type=CascadeRoIHead,
69
+ num_stages=3,
70
+ stage_loss_weights=[1, 0.5, 0.25],
71
+ bbox_roi_extractor=dict(
72
+ type=SingleRoIExtractor,
73
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
74
+ out_channels=256,
75
+ featmap_strides=[4, 8, 16, 32]),
76
+ bbox_head=[
77
+ dict(
78
+ type=Shared2FCBBoxHead,
79
+ in_channels=256,
80
+ fc_out_channels=1024,
81
+ roi_feat_size=7,
82
+ num_classes=80,
83
+ bbox_coder=dict(
84
+ type=DeltaXYWHBBoxCoder,
85
+ target_means=[0., 0., 0., 0.],
86
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
87
+ reg_class_agnostic=True,
88
+ loss_cls=dict(
89
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
90
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
91
+ dict(
92
+ type=Shared2FCBBoxHead,
93
+ in_channels=256,
94
+ fc_out_channels=1024,
95
+ roi_feat_size=7,
96
+ num_classes=80,
97
+ bbox_coder=dict(
98
+ type=DeltaXYWHBBoxCoder,
99
+ target_means=[0., 0., 0., 0.],
100
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
101
+ reg_class_agnostic=True,
102
+ loss_cls=dict(
103
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
104
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
105
+ dict(
106
+ type=Shared2FCBBoxHead,
107
+ in_channels=256,
108
+ fc_out_channels=1024,
109
+ roi_feat_size=7,
110
+ num_classes=80,
111
+ bbox_coder=dict(
112
+ type=DeltaXYWHBBoxCoder,
113
+ target_means=[0., 0., 0., 0.],
114
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
115
+ reg_class_agnostic=True,
116
+ loss_cls=dict(
117
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
118
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
119
+ ],
120
+ mask_roi_extractor=dict(
121
+ type=SingleRoIExtractor,
122
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
123
+ out_channels=256,
124
+ featmap_strides=[4, 8, 16, 32]),
125
+ mask_head=dict(
126
+ type=FCNMaskHead,
127
+ num_convs=4,
128
+ in_channels=256,
129
+ conv_out_channels=256,
130
+ num_classes=80,
131
+ loss_mask=dict(
132
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
133
+ # model training and testing settings
134
+ train_cfg=dict(
135
+ rpn=dict(
136
+ assigner=dict(
137
+ type=MaxIoUAssigner,
138
+ pos_iou_thr=0.7,
139
+ neg_iou_thr=0.3,
140
+ min_pos_iou=0.3,
141
+ match_low_quality=True,
142
+ ignore_iof_thr=-1),
143
+ sampler=dict(
144
+ type=RandomSampler,
145
+ num=256,
146
+ pos_fraction=0.5,
147
+ neg_pos_ub=-1,
148
+ add_gt_as_proposals=False),
149
+ allowed_border=0,
150
+ pos_weight=-1,
151
+ debug=False),
152
+ rpn_proposal=dict(
153
+ nms_pre=2000,
154
+ max_per_img=2000,
155
+ nms=dict(type=nms, iou_threshold=0.7),
156
+ min_bbox_size=0),
157
+ rcnn=[
158
+ dict(
159
+ assigner=dict(
160
+ type=MaxIoUAssigner,
161
+ pos_iou_thr=0.5,
162
+ neg_iou_thr=0.5,
163
+ min_pos_iou=0.5,
164
+ match_low_quality=False,
165
+ ignore_iof_thr=-1),
166
+ sampler=dict(
167
+ type=RandomSampler,
168
+ num=512,
169
+ pos_fraction=0.25,
170
+ neg_pos_ub=-1,
171
+ add_gt_as_proposals=True),
172
+ mask_size=28,
173
+ pos_weight=-1,
174
+ debug=False),
175
+ dict(
176
+ assigner=dict(
177
+ type=MaxIoUAssigner,
178
+ pos_iou_thr=0.6,
179
+ neg_iou_thr=0.6,
180
+ min_pos_iou=0.6,
181
+ match_low_quality=False,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type=RandomSampler,
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ mask_size=28,
190
+ pos_weight=-1,
191
+ debug=False),
192
+ dict(
193
+ assigner=dict(
194
+ type=MaxIoUAssigner,
195
+ pos_iou_thr=0.7,
196
+ neg_iou_thr=0.7,
197
+ min_pos_iou=0.7,
198
+ match_low_quality=False,
199
+ ignore_iof_thr=-1),
200
+ sampler=dict(
201
+ type=RandomSampler,
202
+ num=512,
203
+ pos_fraction=0.25,
204
+ neg_pos_ub=-1,
205
+ add_gt_as_proposals=True),
206
+ mask_size=28,
207
+ pos_weight=-1,
208
+ debug=False)
209
+ ]),
210
+ test_cfg=dict(
211
+ rpn=dict(
212
+ nms_pre=1000,
213
+ max_per_img=1000,
214
+ nms=dict(type=nms, iou_threshold=0.7),
215
+ min_bbox_size=0),
216
+ rcnn=dict(
217
+ score_thr=0.05,
218
+ nms=dict(type=nms, iou_threshold=0.5),
219
+ max_per_img=100,
220
+ mask_thr_binary=0.5)))
head_extractor/build/lib/mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
16
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
17
+ SingleRoIExtractor
18
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
19
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
20
+ DeltaXYWHBBoxCoder
21
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
22
+ AnchorGenerator
23
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
24
+
25
+ # model settings
26
+ model = dict(
27
+ type=CascadeRCNN,
28
+ data_preprocessor=dict(
29
+ type=DetDataPreprocessor,
30
+ mean=[123.675, 116.28, 103.53],
31
+ std=[58.395, 57.12, 57.375],
32
+ bgr_to_rgb=True,
33
+ pad_size_divisor=32),
34
+ backbone=dict(
35
+ type=ResNet,
36
+ depth=50,
37
+ num_stages=4,
38
+ out_indices=(0, 1, 2, 3),
39
+ frozen_stages=1,
40
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
41
+ norm_eval=True,
42
+ style='pytorch',
43
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44
+ neck=dict(
45
+ type=FPN,
46
+ in_channels=[256, 512, 1024, 2048],
47
+ out_channels=256,
48
+ num_outs=5),
49
+ rpn_head=dict(
50
+ type=RPNHead,
51
+ in_channels=256,
52
+ feat_channels=256,
53
+ anchor_generator=dict(
54
+ type=AnchorGenerator,
55
+ scales=[8],
56
+ ratios=[0.5, 1.0, 2.0],
57
+ strides=[4, 8, 16, 32, 64]),
58
+ bbox_coder=dict(
59
+ type=DeltaXYWHBBoxCoder,
60
+ target_means=[.0, .0, .0, .0],
61
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
62
+ loss_cls=dict(
63
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
64
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
65
+ roi_head=dict(
66
+ type=CascadeRoIHead,
67
+ num_stages=3,
68
+ stage_loss_weights=[1, 0.5, 0.25],
69
+ bbox_roi_extractor=dict(
70
+ type=SingleRoIExtractor,
71
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
72
+ out_channels=256,
73
+ featmap_strides=[4, 8, 16, 32]),
74
+ bbox_head=[
75
+ dict(
76
+ type=Shared2FCBBoxHead,
77
+ in_channels=256,
78
+ fc_out_channels=1024,
79
+ roi_feat_size=7,
80
+ num_classes=80,
81
+ bbox_coder=dict(
82
+ type=DeltaXYWHBBoxCoder,
83
+ target_means=[0., 0., 0., 0.],
84
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
85
+ reg_class_agnostic=True,
86
+ loss_cls=dict(
87
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
88
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
89
+ dict(
90
+ type=Shared2FCBBoxHead,
91
+ in_channels=256,
92
+ fc_out_channels=1024,
93
+ roi_feat_size=7,
94
+ num_classes=80,
95
+ bbox_coder=dict(
96
+ type=DeltaXYWHBBoxCoder,
97
+ target_means=[0., 0., 0., 0.],
98
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
99
+ reg_class_agnostic=True,
100
+ loss_cls=dict(
101
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
102
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
103
+ dict(
104
+ type=Shared2FCBBoxHead,
105
+ in_channels=256,
106
+ fc_out_channels=1024,
107
+ roi_feat_size=7,
108
+ num_classes=80,
109
+ bbox_coder=dict(
110
+ type=DeltaXYWHBBoxCoder,
111
+ target_means=[0., 0., 0., 0.],
112
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
113
+ reg_class_agnostic=True,
114
+ loss_cls=dict(
115
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
116
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
117
+ ]),
118
+ # model training and testing settings
119
+ train_cfg=dict(
120
+ rpn=dict(
121
+ assigner=dict(
122
+ type=MaxIoUAssigner,
123
+ pos_iou_thr=0.7,
124
+ neg_iou_thr=0.3,
125
+ min_pos_iou=0.3,
126
+ match_low_quality=True,
127
+ ignore_iof_thr=-1),
128
+ sampler=dict(
129
+ type=RandomSampler,
130
+ num=256,
131
+ pos_fraction=0.5,
132
+ neg_pos_ub=-1,
133
+ add_gt_as_proposals=False),
134
+ allowed_border=0,
135
+ pos_weight=-1,
136
+ debug=False),
137
+ rpn_proposal=dict(
138
+ nms_pre=2000,
139
+ max_per_img=2000,
140
+ nms=dict(type=nms, iou_threshold=0.7),
141
+ min_bbox_size=0),
142
+ rcnn=[
143
+ dict(
144
+ assigner=dict(
145
+ type=MaxIoUAssigner,
146
+ pos_iou_thr=0.5,
147
+ neg_iou_thr=0.5,
148
+ min_pos_iou=0.5,
149
+ match_low_quality=False,
150
+ ignore_iof_thr=-1),
151
+ sampler=dict(
152
+ type=RandomSampler,
153
+ num=512,
154
+ pos_fraction=0.25,
155
+ neg_pos_ub=-1,
156
+ add_gt_as_proposals=True),
157
+ pos_weight=-1,
158
+ debug=False),
159
+ dict(
160
+ assigner=dict(
161
+ type=MaxIoUAssigner,
162
+ pos_iou_thr=0.6,
163
+ neg_iou_thr=0.6,
164
+ min_pos_iou=0.6,
165
+ match_low_quality=False,
166
+ ignore_iof_thr=-1),
167
+ sampler=dict(
168
+ type=RandomSampler,
169
+ num=512,
170
+ pos_fraction=0.25,
171
+ neg_pos_ub=-1,
172
+ add_gt_as_proposals=True),
173
+ pos_weight=-1,
174
+ debug=False),
175
+ dict(
176
+ assigner=dict(
177
+ type=MaxIoUAssigner,
178
+ pos_iou_thr=0.7,
179
+ neg_iou_thr=0.7,
180
+ min_pos_iou=0.7,
181
+ match_low_quality=False,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type=RandomSampler,
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ pos_weight=-1,
190
+ debug=False)
191
+ ]),
192
+ test_cfg=dict(
193
+ rpn=dict(
194
+ nms_pre=1000,
195
+ max_per_img=1000,
196
+ nms=dict(type=nms, iou_threshold=0.7),
197
+ min_bbox_size=0),
198
+ rcnn=dict(
199
+ score_thr=0.05,
200
+ nms=dict(type=nms, iou_threshold=0.5),
201
+ max_per_img=100)))
head_extractor/build/lib/mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.faster_rcnn import FasterRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
16
+ SingleRoIExtractor
17
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
18
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
19
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
20
+ DeltaXYWHBBoxCoder
21
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
22
+ AnchorGenerator
23
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
24
+
25
+ # model settings
26
+ model = dict(
27
+ type=FasterRCNN,
28
+ data_preprocessor=dict(
29
+ type=DetDataPreprocessor,
30
+ mean=[123.675, 116.28, 103.53],
31
+ std=[58.395, 57.12, 57.375],
32
+ bgr_to_rgb=True,
33
+ pad_size_divisor=32),
34
+ backbone=dict(
35
+ type=ResNet,
36
+ depth=50,
37
+ num_stages=4,
38
+ out_indices=(0, 1, 2, 3),
39
+ frozen_stages=1,
40
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
41
+ norm_eval=True,
42
+ style='pytorch',
43
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44
+ neck=dict(
45
+ type=FPN,
46
+ in_channels=[256, 512, 1024, 2048],
47
+ out_channels=256,
48
+ num_outs=5),
49
+ rpn_head=dict(
50
+ type=RPNHead,
51
+ in_channels=256,
52
+ feat_channels=256,
53
+ anchor_generator=dict(
54
+ type=AnchorGenerator,
55
+ scales=[8],
56
+ ratios=[0.5, 1.0, 2.0],
57
+ strides=[4, 8, 16, 32, 64]),
58
+ bbox_coder=dict(
59
+ type=DeltaXYWHBBoxCoder,
60
+ target_means=[.0, .0, .0, .0],
61
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
62
+ loss_cls=dict(
63
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
64
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
65
+ roi_head=dict(
66
+ type=StandardRoIHead,
67
+ bbox_roi_extractor=dict(
68
+ type=SingleRoIExtractor,
69
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
70
+ out_channels=256,
71
+ featmap_strides=[4, 8, 16, 32]),
72
+ bbox_head=dict(
73
+ type=Shared2FCBBoxHead,
74
+ in_channels=256,
75
+ fc_out_channels=1024,
76
+ roi_feat_size=7,
77
+ num_classes=80,
78
+ bbox_coder=dict(
79
+ type=DeltaXYWHBBoxCoder,
80
+ target_means=[0., 0., 0., 0.],
81
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
82
+ reg_class_agnostic=False,
83
+ loss_cls=dict(
84
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
85
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0))),
86
+ # model training and testing settings
87
+ train_cfg=dict(
88
+ rpn=dict(
89
+ assigner=dict(
90
+ type=MaxIoUAssigner,
91
+ pos_iou_thr=0.7,
92
+ neg_iou_thr=0.3,
93
+ min_pos_iou=0.3,
94
+ match_low_quality=True,
95
+ ignore_iof_thr=-1),
96
+ sampler=dict(
97
+ type=RandomSampler,
98
+ num=256,
99
+ pos_fraction=0.5,
100
+ neg_pos_ub=-1,
101
+ add_gt_as_proposals=False),
102
+ allowed_border=-1,
103
+ pos_weight=-1,
104
+ debug=False),
105
+ rpn_proposal=dict(
106
+ nms_pre=2000,
107
+ max_per_img=1000,
108
+ nms=dict(type=nms, iou_threshold=0.7),
109
+ min_bbox_size=0),
110
+ rcnn=dict(
111
+ assigner=dict(
112
+ type=MaxIoUAssigner,
113
+ pos_iou_thr=0.5,
114
+ neg_iou_thr=0.5,
115
+ min_pos_iou=0.5,
116
+ match_low_quality=False,
117
+ ignore_iof_thr=-1),
118
+ sampler=dict(
119
+ type=RandomSampler,
120
+ num=512,
121
+ pos_fraction=0.25,
122
+ neg_pos_ub=-1,
123
+ add_gt_as_proposals=True),
124
+ pos_weight=-1,
125
+ debug=False)),
126
+ test_cfg=dict(
127
+ rpn=dict(
128
+ nms_pre=1000,
129
+ max_per_img=1000,
130
+ nms=dict(type=nms, iou_threshold=0.7),
131
+ min_bbox_size=0),
132
+ rcnn=dict(
133
+ score_thr=0.05,
134
+ nms=dict(type=nms, iou_threshold=0.5),
135
+ max_per_img=100)
136
+ # soft-nms is also supported for rcnn testing
137
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
138
+ ))
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from mmengine.model.weight_init import PretrainedInit
4
+ from torch.nn import BatchNorm2d
5
+
6
+ from mmdet.models.backbones.resnet import ResNet
7
+ from mmdet.models.data_preprocessors.data_preprocessor import \
8
+ DetDataPreprocessor
9
+ from mmdet.models.dense_heads.rpn_head import RPNHead
10
+ from mmdet.models.detectors.mask_rcnn import MaskRCNN
11
+ from mmdet.models.layers import ResLayer
12
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
13
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
14
+ from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead
15
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
16
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
17
+ SingleRoIExtractor
18
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
19
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
20
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
21
+ DeltaXYWHBBoxCoder
22
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
23
+ AnchorGenerator
24
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
25
+
26
+ # model settings
27
+ norm_cfg = dict(type=BatchNorm2d, requires_grad=False)
28
+ # model settings
29
+ model = dict(
30
+ type=MaskRCNN,
31
+ data_preprocessor=dict(
32
+ type=DetDataPreprocessor,
33
+ mean=[103.530, 116.280, 123.675],
34
+ std=[1.0, 1.0, 1.0],
35
+ bgr_to_rgb=False,
36
+ pad_mask=True,
37
+ pad_size_divisor=32),
38
+ backbone=dict(
39
+ type=ResNet,
40
+ depth=50,
41
+ num_stages=3,
42
+ strides=(1, 2, 2),
43
+ dilations=(1, 1, 1),
44
+ out_indices=(2, ),
45
+ frozen_stages=1,
46
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
47
+ norm_eval=True,
48
+ style='caffe',
49
+ init_cfg=dict(
50
+ type=PretrainedInit,
51
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
52
+ rpn_head=dict(
53
+ type=RPNHead,
54
+ in_channels=1024,
55
+ feat_channels=1024,
56
+ anchor_generator=dict(
57
+ type=AnchorGenerator,
58
+ scales=[2, 4, 8, 16, 32],
59
+ ratios=[0.5, 1.0, 2.0],
60
+ strides=[16]),
61
+ bbox_coder=dict(
62
+ type=DeltaXYWHBBoxCoder,
63
+ target_means=[.0, .0, .0, .0],
64
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
65
+ loss_cls=dict(
66
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
67
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
68
+ roi_head=dict(
69
+ type=StandardRoIHead,
70
+ shared_head=dict(
71
+ type=ResLayer,
72
+ depth=50,
73
+ stage=3,
74
+ stride=2,
75
+ dilation=1,
76
+ style='caffe',
77
+ norm_cfg=norm_cfg,
78
+ norm_eval=True),
79
+ bbox_roi_extractor=dict(
80
+ type=SingleRoIExtractor,
81
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
82
+ out_channels=1024,
83
+ featmap_strides=[16]),
84
+ bbox_head=dict(
85
+ type=BBoxHead,
86
+ with_avg_pool=True,
87
+ roi_feat_size=7,
88
+ in_channels=2048,
89
+ num_classes=80,
90
+ bbox_coder=dict(
91
+ type=DeltaXYWHBBoxCoder,
92
+ target_means=[0., 0., 0., 0.],
93
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
94
+ reg_class_agnostic=False,
95
+ loss_cls=dict(
96
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
97
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
98
+ mask_roi_extractor=None,
99
+ mask_head=dict(
100
+ type=FCNMaskHead,
101
+ num_convs=0,
102
+ in_channels=2048,
103
+ conv_out_channels=256,
104
+ num_classes=80,
105
+ loss_mask=dict(
106
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
107
+ # model training and testing settings
108
+ train_cfg=dict(
109
+ rpn=dict(
110
+ assigner=dict(
111
+ type=MaxIoUAssigner,
112
+ pos_iou_thr=0.7,
113
+ neg_iou_thr=0.3,
114
+ min_pos_iou=0.3,
115
+ match_low_quality=True,
116
+ ignore_iof_thr=-1),
117
+ sampler=dict(
118
+ type=RandomSampler,
119
+ num=256,
120
+ pos_fraction=0.5,
121
+ neg_pos_ub=-1,
122
+ add_gt_as_proposals=False),
123
+ allowed_border=0,
124
+ pos_weight=-1,
125
+ debug=False),
126
+ rpn_proposal=dict(
127
+ nms_pre=12000,
128
+ max_per_img=2000,
129
+ nms=dict(type=nms, iou_threshold=0.7),
130
+ min_bbox_size=0),
131
+ rcnn=dict(
132
+ assigner=dict(
133
+ type=MaxIoUAssigner,
134
+ pos_iou_thr=0.5,
135
+ neg_iou_thr=0.5,
136
+ min_pos_iou=0.5,
137
+ match_low_quality=False,
138
+ ignore_iof_thr=-1),
139
+ sampler=dict(
140
+ type=RandomSampler,
141
+ num=512,
142
+ pos_fraction=0.25,
143
+ neg_pos_ub=-1,
144
+ add_gt_as_proposals=True),
145
+ mask_size=14,
146
+ pos_weight=-1,
147
+ debug=False)),
148
+ test_cfg=dict(
149
+ rpn=dict(
150
+ nms_pre=6000,
151
+ max_per_img=1000,
152
+ nms=dict(type=nms, iou_threshold=0.7),
153
+ min_bbox_size=0),
154
+ rcnn=dict(
155
+ score_thr=0.05,
156
+ nms=dict(type=nms, iou_threshold=0.5),
157
+ max_per_img=100,
158
+ mask_thr_binary=0.5)))
head_extractor/build/lib/mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from mmengine.model.weight_init import PretrainedInit
4
+ from torch.nn import BatchNorm2d
5
+
6
+ from mmdet.models.backbones.resnet import ResNet
7
+ from mmdet.models.data_preprocessors.data_preprocessor import \
8
+ DetDataPreprocessor
9
+ from mmdet.models.dense_heads.rpn_head import RPNHead
10
+ from mmdet.models.detectors.mask_rcnn import MaskRCNN
11
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
12
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
13
+ from mmdet.models.necks.fpn import FPN
14
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
15
+ Shared2FCBBoxHead
16
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
17
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
18
+ SingleRoIExtractor
19
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
20
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
21
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
22
+ DeltaXYWHBBoxCoder
23
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
24
+ AnchorGenerator
25
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
26
+
27
+ # model settings
28
+ model = dict(
29
+ type=MaskRCNN,
30
+ data_preprocessor=dict(
31
+ type=DetDataPreprocessor,
32
+ mean=[123.675, 116.28, 103.53],
33
+ std=[58.395, 57.12, 57.375],
34
+ bgr_to_rgb=True,
35
+ pad_mask=True,
36
+ pad_size_divisor=32),
37
+ backbone=dict(
38
+ type=ResNet,
39
+ depth=50,
40
+ num_stages=4,
41
+ out_indices=(0, 1, 2, 3),
42
+ frozen_stages=1,
43
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
44
+ norm_eval=True,
45
+ style='pytorch',
46
+ init_cfg=dict(
47
+ type=PretrainedInit, checkpoint='torchvision://resnet50')),
48
+ neck=dict(
49
+ type=FPN,
50
+ in_channels=[256, 512, 1024, 2048],
51
+ out_channels=256,
52
+ num_outs=5),
53
+ rpn_head=dict(
54
+ type=RPNHead,
55
+ in_channels=256,
56
+ feat_channels=256,
57
+ anchor_generator=dict(
58
+ type=AnchorGenerator,
59
+ scales=[8],
60
+ ratios=[0.5, 1.0, 2.0],
61
+ strides=[4, 8, 16, 32, 64]),
62
+ bbox_coder=dict(
63
+ type=DeltaXYWHBBoxCoder,
64
+ target_means=[.0, .0, .0, .0],
65
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
66
+ loss_cls=dict(
67
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
68
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
69
+ roi_head=dict(
70
+ type=StandardRoIHead,
71
+ bbox_roi_extractor=dict(
72
+ type=SingleRoIExtractor,
73
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
74
+ out_channels=256,
75
+ featmap_strides=[4, 8, 16, 32]),
76
+ bbox_head=dict(
77
+ type=Shared2FCBBoxHead,
78
+ in_channels=256,
79
+ fc_out_channels=1024,
80
+ roi_feat_size=7,
81
+ num_classes=80,
82
+ bbox_coder=dict(
83
+ type=DeltaXYWHBBoxCoder,
84
+ target_means=[0., 0., 0., 0.],
85
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
86
+ reg_class_agnostic=False,
87
+ loss_cls=dict(
88
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
89
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
90
+ mask_roi_extractor=dict(
91
+ type=SingleRoIExtractor,
92
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
93
+ out_channels=256,
94
+ featmap_strides=[4, 8, 16, 32]),
95
+ mask_head=dict(
96
+ type=FCNMaskHead,
97
+ num_convs=4,
98
+ in_channels=256,
99
+ conv_out_channels=256,
100
+ num_classes=80,
101
+ loss_mask=dict(
102
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
103
+ # model training and testing settings
104
+ train_cfg=dict(
105
+ rpn=dict(
106
+ assigner=dict(
107
+ type=MaxIoUAssigner,
108
+ pos_iou_thr=0.7,
109
+ neg_iou_thr=0.3,
110
+ min_pos_iou=0.3,
111
+ match_low_quality=True,
112
+ ignore_iof_thr=-1),
113
+ sampler=dict(
114
+ type=RandomSampler,
115
+ num=256,
116
+ pos_fraction=0.5,
117
+ neg_pos_ub=-1,
118
+ add_gt_as_proposals=False),
119
+ allowed_border=-1,
120
+ pos_weight=-1,
121
+ debug=False),
122
+ rpn_proposal=dict(
123
+ nms_pre=2000,
124
+ max_per_img=1000,
125
+ nms=dict(type=nms, iou_threshold=0.7),
126
+ min_bbox_size=0),
127
+ rcnn=dict(
128
+ assigner=dict(
129
+ type=MaxIoUAssigner,
130
+ pos_iou_thr=0.5,
131
+ neg_iou_thr=0.5,
132
+ min_pos_iou=0.5,
133
+ match_low_quality=True,
134
+ ignore_iof_thr=-1),
135
+ sampler=dict(
136
+ type=RandomSampler,
137
+ num=512,
138
+ pos_fraction=0.25,
139
+ neg_pos_ub=-1,
140
+ add_gt_as_proposals=True),
141
+ mask_size=28,
142
+ pos_weight=-1,
143
+ debug=False)),
144
+ test_cfg=dict(
145
+ rpn=dict(
146
+ nms_pre=1000,
147
+ max_per_img=1000,
148
+ nms=dict(type=nms, iou_threshold=0.7),
149
+ min_bbox_size=0),
150
+ rcnn=dict(
151
+ score_thr=0.05,
152
+ nms=dict(type=nms, iou_threshold=0.5),
153
+ max_per_img=100,
154
+ mask_thr_binary=0.5)))
head_extractor/build/lib/mmdet/configs/_base_/models/retinanet_r50_fpn.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models import (FPN, DetDataPreprocessor, FocalLoss, L1Loss, ResNet,
6
+ RetinaHead, RetinaNet)
7
+ from mmdet.models.task_modules import (AnchorGenerator, DeltaXYWHBBoxCoder,
8
+ MaxIoUAssigner, PseudoSampler)
9
+
10
+ # model settings
11
+ model = dict(
12
+ type=RetinaNet,
13
+ data_preprocessor=dict(
14
+ type=DetDataPreprocessor,
15
+ mean=[123.675, 116.28, 103.53],
16
+ std=[58.395, 57.12, 57.375],
17
+ bgr_to_rgb=True,
18
+ pad_size_divisor=32),
19
+ backbone=dict(
20
+ type=ResNet,
21
+ depth=50,
22
+ num_stages=4,
23
+ out_indices=(0, 1, 2, 3),
24
+ frozen_stages=1,
25
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
26
+ norm_eval=True,
27
+ style='pytorch',
28
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
29
+ neck=dict(
30
+ type=FPN,
31
+ in_channels=[256, 512, 1024, 2048],
32
+ out_channels=256,
33
+ start_level=1,
34
+ add_extra_convs='on_input',
35
+ num_outs=5),
36
+ bbox_head=dict(
37
+ type=RetinaHead,
38
+ num_classes=80,
39
+ in_channels=256,
40
+ stacked_convs=4,
41
+ feat_channels=256,
42
+ anchor_generator=dict(
43
+ type=AnchorGenerator,
44
+ octave_base_scale=4,
45
+ scales_per_octave=3,
46
+ ratios=[0.5, 1.0, 2.0],
47
+ strides=[8, 16, 32, 64, 128]),
48
+ bbox_coder=dict(
49
+ type=DeltaXYWHBBoxCoder,
50
+ target_means=[.0, .0, .0, .0],
51
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
52
+ loss_cls=dict(
53
+ type=FocalLoss,
54
+ use_sigmoid=True,
55
+ gamma=2.0,
56
+ alpha=0.25,
57
+ loss_weight=1.0),
58
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
59
+ # model training and testing settings
60
+ train_cfg=dict(
61
+ assigner=dict(
62
+ type=MaxIoUAssigner,
63
+ pos_iou_thr=0.5,
64
+ neg_iou_thr=0.4,
65
+ min_pos_iou=0,
66
+ ignore_iof_thr=-1),
67
+ sampler=dict(
68
+ type=PseudoSampler), # Focal loss should use PseudoSampler
69
+ allowed_border=-1,
70
+ pos_weight=-1,
71
+ debug=False),
72
+ test_cfg=dict(
73
+ nms_pre=1000,
74
+ min_bbox_size=0,
75
+ score_thr=0.05,
76
+ nms=dict(type=nms, iou_threshold=0.5),
77
+ max_per_img=100))
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_1x.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
3
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
4
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
5
+ from torch.optim.sgd import SGD
6
+
7
+ # training schedule for 1x
8
+ train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
9
+ val_cfg = dict(type=ValLoop)
10
+ test_cfg = dict(type=TestLoop)
11
+
12
+ # learning rate
13
+ param_scheduler = [
14
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
15
+ dict(
16
+ type=MultiStepLR,
17
+ begin=0,
18
+ end=12,
19
+ by_epoch=True,
20
+ milestones=[8, 11],
21
+ gamma=0.1)
22
+ ]
23
+
24
+ # optimizer
25
+ optim_wrapper = dict(
26
+ type=OptimWrapper,
27
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
28
+
29
+ # Default setting for scaling LR automatically
30
+ # - `enable` means enable scaling LR automatically
31
+ # or not by default.
32
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
33
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
head_extractor/build/lib/mmdet/configs/_base_/schedules/schedule_2x.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
3
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
4
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
5
+ from torch.optim.sgd import SGD
6
+
7
+ # training schedule for 1x
8
+ train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=1)
9
+ val_cfg = dict(type=ValLoop)
10
+ test_cfg = dict(type=TestLoop)
11
+
12
+ # learning rate
13
+ param_scheduler = [
14
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
15
+ dict(
16
+ type=MultiStepLR,
17
+ begin=0,
18
+ end=24,
19
+ by_epoch=True,
20
+ milestones=[16, 22],
21
+ gamma=0.1)
22
+ ]
23
+
24
+ # optimizer
25
+ optim_wrapper = dict(
26
+ type=OptimWrapper,
27
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
28
+
29
+ # Default setting for scaling LR automatically
30
+ # - `enable` means enable scaling LR automatically
31
+ # or not by default.
32
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
33
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.datasets.coco_instance import *
11
+ from .._base_.default_runtime import *
12
+ from .._base_.models.cascade_mask_rcnn_r50_fpn import *
13
+ from .._base_.schedules.schedule_1x import *
head_extractor/build/lib/mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.datasets.coco_detection import *
11
+ from .._base_.default_runtime import *
12
+ from .._base_.models.cascade_rcnn_r50_fpn import *
13
+ from .._base_.schedules.schedule_1x import *
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_detection.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmengine.dataset.sampler import DefaultSampler
13
+ from mmengine.optim import OptimWrapper
14
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
15
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
16
+ from torch.optim import SGD
17
+
18
+ from mmdet.datasets import CocoDataset, RepeatDataset
19
+ from mmdet.datasets.transforms.formatting import PackDetInputs
20
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
21
+ LoadAnnotations,
22
+ LoadImageFromFile)
23
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
24
+ Pad, RandomCrop, RandomFlip,
25
+ RandomResize, Resize)
26
+ from mmdet.evaluation import CocoMetric
27
+
28
+ # dataset settings
29
+ dataset_type = CocoDataset
30
+ data_root = 'data/coco/'
31
+ image_size = (1024, 1024)
32
+
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(type=LoadImageFromFile, backend_args=backend_args),
37
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
38
+ dict(
39
+ type=RandomResize,
40
+ scale=image_size,
41
+ ratio_range=(0.1, 2.0),
42
+ keep_ratio=True),
43
+ dict(
44
+ type=RandomCrop,
45
+ crop_type='absolute_range',
46
+ crop_size=image_size,
47
+ recompute_bbox=True,
48
+ allow_negative_crop=True),
49
+ dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
50
+ dict(type=RandomFlip, prob=0.5),
51
+ dict(type=PackDetInputs)
52
+ ]
53
+ test_pipeline = [
54
+ dict(type=LoadImageFromFile, backend_args=backend_args),
55
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
56
+ dict(type=LoadAnnotations, with_bbox=True),
57
+ dict(
58
+ type=PackDetInputs,
59
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
60
+ 'scale_factor'))
61
+ ]
62
+
63
+ # Use RepeatDataset to speed up training
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ sampler=dict(type=DefaultSampler, shuffle=True),
69
+ dataset=dict(
70
+ type=RepeatDataset,
71
+ times=4, # simply change this from 2 to 16 for 50e - 400e training.
72
+ dataset=dict(
73
+ type=dataset_type,
74
+ data_root=data_root,
75
+ ann_file='annotations/instances_train2017.json',
76
+ data_prefix=dict(img='train2017/'),
77
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
78
+ pipeline=train_pipeline,
79
+ backend_args=backend_args)))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type=DefaultSampler, shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ ann_file='annotations/instances_val2017.json',
90
+ data_prefix=dict(img='val2017/'),
91
+ test_mode=True,
92
+ pipeline=test_pipeline,
93
+ backend_args=backend_args))
94
+ test_dataloader = val_dataloader
95
+
96
+ val_evaluator = dict(
97
+ type=CocoMetric,
98
+ ann_file=data_root + 'annotations/instances_val2017.json',
99
+ metric=['bbox', 'segm'],
100
+ format_only=False,
101
+ backend_args=backend_args)
102
+ test_evaluator = val_evaluator
103
+
104
+ max_epochs = 25
105
+
106
+ train_cfg = dict(
107
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
108
+ val_cfg = dict(type=ValLoop)
109
+ test_cfg = dict(type=TestLoop)
110
+
111
+ # optimizer assumes bs=64
112
+ optim_wrapper = dict(
113
+ type=OptimWrapper,
114
+ optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
115
+
116
+ # learning rate
117
+ param_scheduler = [
118
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
119
+ dict(
120
+ type=MultiStepLR,
121
+ begin=0,
122
+ end=max_epochs,
123
+ by_epoch=True,
124
+ milestones=[22, 24],
125
+ gamma=0.1)
126
+ ]
127
+
128
+ # only keep latest 2 checkpoints
129
+ default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
130
+
131
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
132
+ # USER SHOULD NOT CHANGE ITS VALUES.
133
+ # base_batch_size = (32 GPUs) x (2 samples per GPU)
134
+ auto_scale_lr = dict(base_batch_size=64)
head_extractor/build/lib/mmdet/configs/common/lsj_100e_coco_instance.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmengine.dataset.sampler import DefaultSampler
13
+ from mmengine.optim import OptimWrapper
14
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
15
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
16
+ from torch.optim import SGD
17
+
18
+ from mmdet.datasets import CocoDataset, RepeatDataset
19
+ from mmdet.datasets.transforms.formatting import PackDetInputs
20
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
21
+ LoadAnnotations,
22
+ LoadImageFromFile)
23
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
24
+ Pad, RandomCrop, RandomFlip,
25
+ RandomResize, Resize)
26
+ from mmdet.evaluation import CocoMetric
27
+
28
+ # dataset settings
29
+ dataset_type = CocoDataset
30
+ data_root = 'data/coco/'
31
+ image_size = (1024, 1024)
32
+
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(type=LoadImageFromFile, backend_args=backend_args),
37
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
38
+ dict(
39
+ type=RandomResize,
40
+ scale=image_size,
41
+ ratio_range=(0.1, 2.0),
42
+ keep_ratio=True),
43
+ dict(
44
+ type=RandomCrop,
45
+ crop_type='absolute_range',
46
+ crop_size=image_size,
47
+ recompute_bbox=True,
48
+ allow_negative_crop=True),
49
+ dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
50
+ dict(type=RandomFlip, prob=0.5),
51
+ dict(type=PackDetInputs)
52
+ ]
53
+ test_pipeline = [
54
+ dict(type=LoadImageFromFile, backend_args=backend_args),
55
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
56
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
57
+ dict(
58
+ type=PackDetInputs,
59
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
60
+ 'scale_factor'))
61
+ ]
62
+
63
+ # Use RepeatDataset to speed up training
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ sampler=dict(type=DefaultSampler, shuffle=True),
69
+ dataset=dict(
70
+ type=RepeatDataset,
71
+ times=4, # simply change this from 2 to 16 for 50e - 400e training.
72
+ dataset=dict(
73
+ type=dataset_type,
74
+ data_root=data_root,
75
+ ann_file='annotations/instances_train2017.json',
76
+ data_prefix=dict(img='train2017/'),
77
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
78
+ pipeline=train_pipeline,
79
+ backend_args=backend_args)))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type=DefaultSampler, shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ ann_file='annotations/instances_val2017.json',
90
+ data_prefix=dict(img='val2017/'),
91
+ test_mode=True,
92
+ pipeline=test_pipeline,
93
+ backend_args=backend_args))
94
+ test_dataloader = val_dataloader
95
+
96
+ val_evaluator = dict(
97
+ type=CocoMetric,
98
+ ann_file=data_root + 'annotations/instances_val2017.json',
99
+ metric=['bbox', 'segm'],
100
+ format_only=False,
101
+ backend_args=backend_args)
102
+ test_evaluator = val_evaluator
103
+
104
+ max_epochs = 25
105
+
106
+ train_cfg = dict(
107
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
108
+ val_cfg = dict(type=ValLoop)
109
+ test_cfg = dict(type=TestLoop)
110
+
111
+ # optimizer assumes bs=64
112
+ optim_wrapper = dict(
113
+ type=OptimWrapper,
114
+ optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
115
+
116
+ # learning rate
117
+ param_scheduler = [
118
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
119
+ dict(
120
+ type=MultiStepLR,
121
+ begin=0,
122
+ end=max_epochs,
123
+ by_epoch=True,
124
+ milestones=[22, 24],
125
+ gamma=0.1)
126
+ ]
127
+
128
+ # only keep latest 2 checkpoints
129
+ default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
130
+
131
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
132
+ # USER SHOULD NOT CHANGE ITS VALUES.
133
+ # base_batch_size = (32 GPUs) x (2 samples per GPU)
134
+ auto_scale_lr = dict(base_batch_size=64)
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_detection.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .lsj_100e_coco_detection import *
11
+
12
+ # 8x25=200e
13
+ train_dataloader.update(dict(dataset=dict(times=8)))
14
+
15
+ # learning rate
16
+ param_scheduler = [
17
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
18
+ dict(
19
+ type=MultiStepLR,
20
+ begin=0,
21
+ end=25,
22
+ by_epoch=True,
23
+ milestones=[22, 24],
24
+ gamma=0.1)
25
+ ]
head_extractor/build/lib/mmdet/configs/common/lsj_200e_coco_instance.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .lsj_100e_coco_instance import *
11
+
12
+ # 8x25=200e
13
+ train_dataloader.update(dict(dataset=dict(times=8)))
14
+
15
+ # learning rate
16
+ param_scheduler = [
17
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
18
+ dict(
19
+ type=MultiStepLR,
20
+ begin=0,
21
+ end=25,
22
+ by_epoch=True,
23
+ milestones=[22, 24],
24
+ gamma=0.1)
25
+ ]
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (LoadAnnotations,
23
+ LoadImageFromFile)
24
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
25
+ from mmdet.evaluation import CocoMetric
26
+
27
+ # dataset settings
28
+ dataset_type = CocoDataset
29
+ data_root = 'data/coco/'
30
+
31
+ # Example to use different file client
32
+ # Method 1: simply set the data root and let the file I/O module
33
+ # automatically infer from prefix (not support LMDB and Memcache yet)
34
+
35
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
36
+
37
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
38
+ # backend_args = dict(
39
+ # backend='petrel',
40
+ # path_mapping=dict({
41
+ # './data/': 's3://openmmlab/datasets/detection/',
42
+ # 'data/': 's3://openmmlab/datasets/detection/'
43
+ # }))
44
+ backend_args = None
45
+
46
+ # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
47
+ # multiscale_mode='range'
48
+ train_pipeline = [
49
+ dict(type=LoadImageFromFile, backend_args=backend_args),
50
+ dict(type=LoadAnnotations, with_bbox=True),
51
+ dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
52
+ dict(type=RandomFlip, prob=0.5),
53
+ dict(type=PackDetInputs)
54
+ ]
55
+ test_pipeline = [
56
+ dict(type=LoadImageFromFile, backend_args=backend_args),
57
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
58
+ dict(type=LoadAnnotations, with_bbox=True),
59
+ dict(
60
+ type=PackDetInputs,
61
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
62
+ 'scale_factor'))
63
+ ]
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ pin_memory=True,
69
+ sampler=dict(type=DefaultSampler, shuffle=True),
70
+ batch_sampler=dict(type=AspectRatioBatchSampler),
71
+ dataset=dict(
72
+ type=RepeatDataset,
73
+ times=3,
74
+ dataset=dict(
75
+ type=dataset_type,
76
+ data_root=data_root,
77
+ ann_file='annotations/instances_train2017.json',
78
+ data_prefix=dict(img='train2017/'),
79
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
80
+ pipeline=train_pipeline,
81
+ backend_args=backend_args)))
82
+ val_dataloader = dict(
83
+ batch_size=1,
84
+ num_workers=2,
85
+ persistent_workers=True,
86
+ drop_last=False,
87
+ sampler=dict(type=DefaultSampler, shuffle=False),
88
+ dataset=dict(
89
+ type=dataset_type,
90
+ data_root=data_root,
91
+ ann_file='annotations/instances_val2017.json',
92
+ data_prefix=dict(img='val2017/'),
93
+ test_mode=True,
94
+ pipeline=test_pipeline,
95
+ backend_args=backend_args))
96
+ test_dataloader = val_dataloader
97
+
98
+ val_evaluator = dict(
99
+ type=CocoMetric,
100
+ ann_file=data_root + 'annotations/instances_val2017.json',
101
+ metric='bbox',
102
+ backend_args=backend_args)
103
+ test_evaluator = val_evaluator
104
+
105
+ # training schedule for 3x with `RepeatDataset`
106
+ train_cfg = dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1)
107
+ val_cfg = dict(type=ValLoop)
108
+ test_cfg = dict(type=TestLoop)
109
+
110
+ # learning rate
111
+ param_scheduler = [
112
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
113
+ dict(
114
+ type=MultiStepLR,
115
+ begin=0,
116
+ end=12,
117
+ by_epoch=False,
118
+ milestones=[9, 11],
119
+ gamma=0.1)
120
+ ]
121
+
122
+ # optimizer
123
+ optim_wrapper = dict(
124
+ type=OptimWrapper,
125
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
126
+ # Default setting for scaling LR automatically
127
+ # - `enable` means enable scaling LR automatically
128
+ # or not by default.
129
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
130
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
head_extractor/build/lib/mmdet/configs/common/ms_3x_coco_instance.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+
34
+ # Example to use different file client
35
+ # Method 1: simply set the data root and let the file I/O module
36
+ # automatically infer from prefix (not support LMDB and Memcache yet)
37
+
38
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
39
+
40
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
41
+ # backend_args = dict(
42
+ # backend='petrel',
43
+ # path_mapping=dict({
44
+ # './data/': 's3://openmmlab/datasets/detection/',
45
+ # 'data/': 's3://openmmlab/datasets/detection/'
46
+ # }))
47
+ backend_args = None
48
+
49
+ train_pipeline = [
50
+ dict(type=LoadImageFromFile, backend_args=backend_args),
51
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
52
+ dict(
53
+ type='RandomResize', scale=[(1333, 640), (1333, 800)],
54
+ keep_ratio=True),
55
+ dict(type=RandomFlip, prob=0.5),
56
+ dict(type=PackDetInputs)
57
+ ]
58
+ test_pipeline = [
59
+ dict(type=LoadImageFromFile, backend_args=backend_args),
60
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
61
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
62
+ dict(
63
+ type=PackDetInputs,
64
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
65
+ 'scale_factor'))
66
+ ]
67
+ train_dataloader.update(
68
+ dict(
69
+ batch_size=2,
70
+ num_workers=2,
71
+ persistent_workers=True,
72
+ sampler=dict(type=DefaultSampler, shuffle=True),
73
+ batch_sampler=dict(type=AspectRatioBatchSampler),
74
+ dataset=dict(
75
+ type=RepeatDataset,
76
+ times=3,
77
+ dataset=dict(
78
+ type=dataset_type,
79
+ data_root=data_root,
80
+ ann_file='annotations/instances_train2017.json',
81
+ data_prefix=dict(img='train2017/'),
82
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
83
+ pipeline=train_pipeline,
84
+ backend_args=backend_args))))
85
+ val_dataloader.update(
86
+ dict(
87
+ batch_size=1,
88
+ num_workers=2,
89
+ persistent_workers=True,
90
+ drop_last=False,
91
+ sampler=dict(type=DefaultSampler, shuffle=False),
92
+ dataset=dict(
93
+ type=dataset_type,
94
+ data_root=data_root,
95
+ ann_file='annotations/instances_val2017.json',
96
+ data_prefix=dict(img='val2017/'),
97
+ test_mode=True,
98
+ pipeline=test_pipeline,
99
+ backend_args=backend_args)))
100
+ test_dataloader = val_dataloader
101
+
102
+ val_evaluator.update(
103
+ dict(
104
+ type=CocoMetric,
105
+ ann_file=data_root + 'annotations/instances_val2017.json',
106
+ metric='bbox',
107
+ backend_args=backend_args))
108
+ test_evaluator = val_evaluator
109
+
110
+ # training schedule for 3x with `RepeatDataset`
111
+ train_cfg.update(dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1))
112
+ val_cfg.update(dict(type=ValLoop))
113
+ test_cfg.update(dict(type=TestLoop))
114
+
115
+ # learning rate
116
+ param_scheduler = [
117
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
118
+ dict(
119
+ type=MultiStepLR,
120
+ begin=0,
121
+ end=12,
122
+ by_epoch=False,
123
+ milestones=[9, 11],
124
+ gamma=0.1)
125
+ ]
126
+
127
+ # optimizer
128
+ optim_wrapper.update(
129
+ dict(
130
+ type=OptimWrapper,
131
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
132
+ # Default setting for scaling LR automatically
133
+ # - `enable` means enable scaling LR automatically
134
+ # or not by default.
135
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
136
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
head_extractor/build/lib/mmdet/configs/common/ms_90k_coco.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # Align with Detectron2
49
+ backend = 'pillow'
50
+ train_pipeline = [
51
+ dict(
52
+ type=LoadImageFromFile,
53
+ backend_args=backend_args,
54
+ imdecode_backend=backend),
55
+ dict(type=LoadAnnotations, with_bbox=True),
56
+ dict(
57
+ type=RandomChoiceResize,
58
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
59
+ (1333, 768), (1333, 800)],
60
+ keep_ratio=True,
61
+ backend=backend),
62
+ dict(type=RandomFlip, prob=0.5),
63
+ dict(type=PackDetInputs)
64
+ ]
65
+ test_pipeline = [
66
+ dict(
67
+ type=LoadImageFromFile,
68
+ backend_args=backend_args,
69
+ imdecode_backend=backend),
70
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
71
+ dict(type=LoadAnnotations, with_bbox=True),
72
+ dict(
73
+ type=PackDetInputs,
74
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
75
+ 'scale_factor'))
76
+ ]
77
+ train_dataloader.update(
78
+ dict(
79
+ batch_size=2,
80
+ num_workers=2,
81
+ persistent_workers=True,
82
+ pin_memory=True,
83
+ sampler=dict(type=InfiniteSampler, shuffle=True),
84
+ batch_sampler=dict(type=AspectRatioBatchSampler),
85
+ dataset=dict(
86
+ type=dataset_type,
87
+ data_root=data_root,
88
+ ann_file='annotations/instances_train2017.json',
89
+ data_prefix=dict(img='train2017/'),
90
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
91
+ pipeline=train_pipeline,
92
+ backend_args=backend_args)))
93
+ val_dataloader.update(
94
+ dict(
95
+ batch_size=1,
96
+ num_workers=2,
97
+ persistent_workers=True,
98
+ drop_last=False,
99
+ pin_memory=True,
100
+ sampler=dict(type=DefaultSampler, shuffle=False),
101
+ dataset=dict(
102
+ type=dataset_type,
103
+ data_root=data_root,
104
+ ann_file='annotations/instances_val2017.json',
105
+ data_prefix=dict(img='val2017/'),
106
+ test_mode=True,
107
+ pipeline=test_pipeline,
108
+ backend_args=backend_args)))
109
+ test_dataloader = val_dataloader
110
+
111
+ val_evaluator.update(
112
+ dict(
113
+ type=CocoMetric,
114
+ ann_file=data_root + 'annotations/instances_val2017.json',
115
+ metric='bbox',
116
+ format_only=False,
117
+ backend_args=backend_args))
118
+ test_evaluator = val_evaluator
119
+
120
+ # training schedule for 90k
121
+ max_iter = 90000
122
+ train_cfg.update(
123
+ dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
124
+ val_cfg.update(dict(type=ValLoop))
125
+ test_cfg.update(dict(type=TestLoop))
126
+
127
+ # learning rate
128
+ param_scheduler = [
129
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
130
+ dict(
131
+ type=MultiStepLR,
132
+ begin=0,
133
+ end=max_iter,
134
+ by_epoch=False,
135
+ milestones=[60000, 80000],
136
+ gamma=0.1)
137
+ ]
138
+
139
+ # optimizer
140
+ optim_wrapper.update(
141
+ dict(
142
+ type=OptimWrapper,
143
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
144
+ # Default setting for scaling LR automatically
145
+ # - `enable` means enable scaling LR automatically
146
+ # or not by default.
147
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
148
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
149
+
150
+ default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
151
+ log_processor.update(dict(by_epoch=False))
head_extractor/build/lib/mmdet/configs/common/ms_poly_3x_coco_instance.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
49
+ # multiscale_mode='range'
50
+ train_pipeline = [
51
+ dict(type=LoadImageFromFile, backend_args=backend_args),
52
+ dict(
53
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
54
+ dict(
55
+ type='RandomResize', scale=[(1333, 640), (1333, 800)],
56
+ keep_ratio=True),
57
+ dict(type=RandomFlip, prob=0.5),
58
+ dict(type=PackDetInputs)
59
+ ]
60
+ test_pipeline = [
61
+ dict(type=LoadImageFromFile, backend_args=backend_args),
62
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
63
+ dict(
64
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
65
+ dict(
66
+ type=PackDetInputs,
67
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
68
+ 'scale_factor'))
69
+ ]
70
+ train_dataloader.update(
71
+ dict(
72
+ batch_size=2,
73
+ num_workers=2,
74
+ persistent_workers=True,
75
+ pin_memory=True,
76
+ sampler=dict(type=DefaultSampler, shuffle=True),
77
+ batch_sampler=dict(type=AspectRatioBatchSampler),
78
+ dataset=dict(
79
+ type=RepeatDataset,
80
+ data_root=data_root,
81
+ ann_file='annotations/instances_train2017.json',
82
+ data_prefix=dict(img='train2017/'),
83
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
84
+ pipeline=train_pipeline,
85
+ backend_args=backend_args)))
86
+ val_dataloader.update(
87
+ dict(
88
+ batch_size=2,
89
+ num_workers=2,
90
+ persistent_workers=True,
91
+ drop_last=False,
92
+ pin_memory=True,
93
+ sampler=dict(type=DefaultSampler, shuffle=False),
94
+ dataset=dict(
95
+ type=dataset_type,
96
+ data_root=data_root,
97
+ ann_file='annotations/instances_val2017.json',
98
+ data_prefix=dict(img='val2017/'),
99
+ test_mode=True,
100
+ pipeline=test_pipeline,
101
+ backend_args=backend_args)))
102
+ test_dataloader = val_dataloader
103
+
104
+ val_evaluator.update(
105
+ dict(
106
+ type=CocoMetric,
107
+ ann_file=data_root + 'annotations/instances_val2017.json',
108
+ metric=['bbox', 'segm'],
109
+ backend_args=backend_args))
110
+ test_evaluator = val_evaluator
111
+
112
+ # training schedule for 3x with `RepeatDataset`
113
+ train_cfg.update(dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1))
114
+ val_cfg.update(dict(type=ValLoop))
115
+ test_cfg.update(dict(type=TestLoop))
116
+
117
+ # learning rate
118
+ param_scheduler = [
119
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
120
+ dict(
121
+ type=MultiStepLR,
122
+ begin=0,
123
+ end=12,
124
+ by_epoch=False,
125
+ milestones=[9, 11],
126
+ gamma=0.1)
127
+ ]
128
+
129
+ # optimizer
130
+ optim_wrapper.update(
131
+ dict(
132
+ type=OptimWrapper,
133
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
134
+ # Default setting for scaling LR automatically
135
+ # - `enable` means enable scaling LR automatically
136
+ # or not by default.
137
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
138
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
head_extractor/build/lib/mmdet/configs/common/ms_poly_90k_coco_instance.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # Align with Detectron2
49
+ backend = 'pillow'
50
+ train_pipeline = [
51
+ dict(
52
+ type=LoadImageFromFile,
53
+ backend_args=backend_args,
54
+ imdecode_backend=backend),
55
+ dict(
56
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
57
+ dict(
58
+ type=RandomChoiceResize,
59
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
60
+ (1333, 768), (1333, 800)],
61
+ keep_ratio=True,
62
+ backend=backend),
63
+ dict(type=RandomFlip, prob=0.5),
64
+ dict(type=PackDetInputs)
65
+ ]
66
+ test_pipeline = [
67
+ dict(
68
+ type=LoadImageFromFile,
69
+ backend_args=backend_args,
70
+ imdecode_backend=backend),
71
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
72
+ dict(
73
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
74
+ dict(
75
+ type=PackDetInputs,
76
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
77
+ 'scale_factor'))
78
+ ]
79
+ train_dataloader.update(
80
+ dict(
81
+ batch_size=2,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ pin_memory=True,
85
+ sampler=dict(type=InfiniteSampler, shuffle=True),
86
+ batch_sampler=dict(type=AspectRatioBatchSampler),
87
+ dataset=dict(
88
+ type=dataset_type,
89
+ data_root=data_root,
90
+ ann_file='annotations/instances_train2017.json',
91
+ data_prefix=dict(img='train2017/'),
92
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
93
+ pipeline=train_pipeline,
94
+ backend_args=backend_args)))
95
+ val_dataloader.update(
96
+ dict(
97
+ batch_size=1,
98
+ num_workers=2,
99
+ persistent_workers=True,
100
+ drop_last=False,
101
+ pin_memory=True,
102
+ sampler=dict(type=DefaultSampler, shuffle=False),
103
+ dataset=dict(
104
+ type=dataset_type,
105
+ data_root=data_root,
106
+ ann_file='annotations/instances_val2017.json',
107
+ data_prefix=dict(img='val2017/'),
108
+ test_mode=True,
109
+ pipeline=test_pipeline,
110
+ backend_args=backend_args)))
111
+ test_dataloader = val_dataloader
112
+
113
+ val_evaluator.update(
114
+ dict(
115
+ type=CocoMetric,
116
+ ann_file=data_root + 'annotations/instances_val2017.json',
117
+ metric=['bbox', 'segm'],
118
+ format_only=False,
119
+ backend_args=backend_args))
120
+ test_evaluator = val_evaluator
121
+
122
+ # training schedule for 90k
123
+ max_iter = 90000
124
+ train_cfg.update(
125
+ dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
126
+ val_cfg.update(dict(type=ValLoop))
127
+ test_cfg.update(dict(type=TestLoop))
128
+
129
+ # learning rate
130
+ param_scheduler = [
131
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
132
+ dict(
133
+ type=MultiStepLR,
134
+ begin=0,
135
+ end=max_iter,
136
+ by_epoch=False,
137
+ milestones=[60000, 80000],
138
+ gamma=0.1)
139
+ ]
140
+
141
+ # optimizer
142
+ optim_wrapper.update(
143
+ dict(
144
+ type=OptimWrapper,
145
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
146
+ # Default setting for scaling LR automatically
147
+ # - `enable` means enable scaling LR automatically
148
+ # or not by default.
149
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
150
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
151
+
152
+ default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
153
+ log_processor.update(dict(by_epoch=False))
head_extractor/build/lib/mmdet/configs/common/ssj_270_coco_instance.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # Standard Scale Jittering (SSJ) resizes and crops an image
49
+ # with a resize range of 0.8 to 1.25 of the original image size.
50
+ train_pipeline = [
51
+ dict(type=LoadImageFromFile, backend_args=backend_args),
52
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
53
+ dict(
54
+ type=RandomResize,
55
+ scale=image_size,
56
+ ratio_range=(0.8, 1.25),
57
+ keep_ratio=True),
58
+ dict(
59
+ type='RandomCrop',
60
+ crop_type='absolute_range',
61
+ crop_size=image_size,
62
+ recompute_bbox=True,
63
+ allow_negative_crop=True),
64
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
65
+ dict(type=RandomFlip, prob=0.5),
66
+ dict(type=PackDetInputs)
67
+ ]
68
+ test_pipeline = [
69
+ dict(type=LoadImageFromFile, backend_args=backend_args),
70
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
71
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
72
+ dict(
73
+ type=PackDetInputs,
74
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
75
+ 'scale_factor'))
76
+ ]
77
+ train_dataloader.update(
78
+ dict(
79
+ batch_size=2,
80
+ num_workers=2,
81
+ persistent_workers=True,
82
+ sampler=dict(type=InfiniteSampler),
83
+ dataset=dict(
84
+ type=dataset_type,
85
+ data_root=data_root,
86
+ ann_file='annotations/instances_train2017.json',
87
+ data_prefix=dict(img='train2017/'),
88
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
89
+ pipeline=train_pipeline,
90
+ backend_args=backend_args)))
91
+ val_dataloader.update(
92
+ dict(
93
+ batch_size=1,
94
+ num_workers=2,
95
+ persistent_workers=True,
96
+ drop_last=False,
97
+ sampler=dict(type=DefaultSampler, shuffle=False),
98
+ dataset=dict(
99
+ type=dataset_type,
100
+ data_root=data_root,
101
+ ann_file='annotations/instances_val2017.json',
102
+ data_prefix=dict(img='val2017/'),
103
+ test_mode=True,
104
+ pipeline=test_pipeline,
105
+ backend_args=backend_args)))
106
+ test_dataloader = val_dataloader
107
+
108
+ val_evaluator.update(
109
+ dict(
110
+ type=CocoMetric,
111
+ ann_file=data_root + 'annotations/instances_val2017.json',
112
+ metric=['bbox', 'segm'],
113
+ format_only=False,
114
+ backend_args=backend_args))
115
+ test_evaluator = val_evaluator
116
+
117
+ val_evaluator = dict(
118
+ type=CocoMetric,
119
+ ann_file=data_root + 'annotations/instances_val2017.json',
120
+ metric=['bbox', 'segm'],
121
+ format_only=False,
122
+ backend_args=backend_args)
123
+ test_evaluator = val_evaluator
124
+
125
+ # The model is trained by 270k iterations with batch_size 64,
126
+ # which is roughly equivalent to 144 epochs.
127
+
128
+ max_iter = 270000
129
+ train_cfg.update(
130
+ dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
131
+ val_cfg.update(dict(type=ValLoop))
132
+ test_cfg.update(dict(type=TestLoop))
133
+
134
+ # learning rate
135
+ param_scheduler = [
136
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
137
+ dict(
138
+ type=MultiStepLR,
139
+ begin=0,
140
+ end=max_iter,
141
+ by_epoch=False,
142
+ milestones=[243000, 256500, 263250],
143
+ gamma=0.1)
144
+ ]
145
+
146
+ # optimizer
147
+ optim_wrapper.update(
148
+ dict(
149
+ type=OptimWrapper,
150
+ optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004)))
151
+ # Default setting for scaling LR automatically
152
+ # - `enable` means enable scaling LR automatically
153
+ # or not by default.
154
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
155
+ auto_scale_lr.update(dict(base_batch_size=64))
156
+
157
+ default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
158
+ log_processor.update(dict(by_epoch=False))
head_extractor/build/lib/mmdet/configs/common/ssj_scp_270k_coco_instance.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .ssj_270_coco_instance import *
11
+
12
+ from mmdet.datasets import MultiImageMixDataset
13
+ from mmdet.datasets.transforms import CopyPaste
14
+
15
+ # dataset settings
16
+ dataset_type = CocoDataset
17
+ data_root = 'data/coco/'
18
+ image_size = (1024, 1024)
19
+ # Example to use different file client
20
+ # Method 1: simply set the data root and let the file I/O module
21
+ # automatically infer from prefix (not support LMDB and Memcache yet)
22
+
23
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
24
+
25
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
26
+ # backend_args = dict(
27
+ # backend='petrel',
28
+ # path_mapping=dict({
29
+ # './data/': 's3://openmmlab/datasets/detection/',
30
+ # 'data/': 's3://openmmlab/datasets/detection/'
31
+ # }))
32
+ backend_args = None
33
+
34
+ # Standard Scale Jittering (SSJ) resizes and crops an image
35
+ # with a resize range of 0.8 to 1.25 of the original image size.
36
+ load_pipeline = [
37
+ dict(type=LoadImageFromFile, backend_args=backend_args),
38
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
39
+ dict(
40
+ type=RandomResize,
41
+ scale=image_size,
42
+ ratio_range=(0.8, 1.25),
43
+ keep_ratio=True),
44
+ dict(
45
+ type='RandomCrop',
46
+ crop_type='absolute_range',
47
+ crop_size=image_size,
48
+ recompute_bbox=True,
49
+ allow_negative_crop=True),
50
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
51
+ dict(type=RandomFlip, prob=0.5),
52
+ dict(type=Pad, size=image_size),
53
+ ]
54
+ train_pipeline = [
55
+ dict(type=CopyPaste, max_num_pasted=100),
56
+ dict(type=PackDetInputs)
57
+ ]
58
+
59
+ train_dataloader.update(
60
+ dict(
61
+ type=MultiImageMixDataset,
62
+ dataset=dict(
63
+ type=dataset_type,
64
+ data_root=data_root,
65
+ ann_file='annotations/instances_train2017.json',
66
+ data_prefix=dict(img='train2017/'),
67
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
68
+ pipeline=load_pipeline,
69
+ backend_args=backend_args),
70
+ pipeline=train_pipeline))
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_r50_16xb2_50e_coco.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.datasets.coco_detection import *
11
+ from .._base_.default_runtime import *
12
+
13
+ from mmcv.transforms import LoadImageFromFile, RandomChoice, RandomChoiceResize
14
+ from mmengine.optim.optimizer import OptimWrapper
15
+ from mmengine.optim.scheduler import MultiStepLR
16
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
17
+ from torch.optim.adamw import AdamW
18
+
19
+ from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
20
+ RandomCrop, RandomFlip, Resize)
21
+ from mmdet.models.backbones import ResNet
22
+ from mmdet.models.data_preprocessors import DetDataPreprocessor
23
+ from mmdet.models.dense_heads import DeformableDETRHead
24
+ from mmdet.models.detectors import DeformableDETR
25
+ from mmdet.models.losses import FocalLoss, GIoULoss, L1Loss
26
+ from mmdet.models.necks import ChannelMapper
27
+ from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
28
+ HungarianAssigner, IoUCost)
29
+
30
+ model = dict(
31
+ type=DeformableDETR,
32
+ num_queries=300,
33
+ num_feature_levels=4,
34
+ with_box_refine=False,
35
+ as_two_stage=False,
36
+ data_preprocessor=dict(
37
+ type=DetDataPreprocessor,
38
+ mean=[123.675, 116.28, 103.53],
39
+ std=[58.395, 57.12, 57.375],
40
+ bgr_to_rgb=True,
41
+ pad_size_divisor=1),
42
+ backbone=dict(
43
+ type=ResNet,
44
+ depth=50,
45
+ num_stages=4,
46
+ out_indices=(1, 2, 3),
47
+ frozen_stages=1,
48
+ norm_cfg=dict(type='BN', requires_grad=False),
49
+ norm_eval=True,
50
+ style='pytorch',
51
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
52
+ neck=dict(
53
+ type=ChannelMapper,
54
+ in_channels=[512, 1024, 2048],
55
+ kernel_size=1,
56
+ out_channels=256,
57
+ act_cfg=None,
58
+ norm_cfg=dict(type='GN', num_groups=32),
59
+ num_outs=4),
60
+ encoder=dict( # DeformableDetrTransformerEncoder
61
+ num_layers=6,
62
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
63
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
64
+ embed_dims=256,
65
+ batch_first=True),
66
+ ffn_cfg=dict(
67
+ embed_dims=256, feedforward_channels=1024, ffn_drop=0.1))),
68
+ decoder=dict( # DeformableDetrTransformerDecoder
69
+ num_layers=6,
70
+ return_intermediate=True,
71
+ layer_cfg=dict( # DeformableDetrTransformerDecoderLayer
72
+ self_attn_cfg=dict( # MultiheadAttention
73
+ embed_dims=256,
74
+ num_heads=8,
75
+ dropout=0.1,
76
+ batch_first=True),
77
+ cross_attn_cfg=dict( # MultiScaleDeformableAttention
78
+ embed_dims=256,
79
+ batch_first=True),
80
+ ffn_cfg=dict(
81
+ embed_dims=256, feedforward_channels=1024, ffn_drop=0.1)),
82
+ post_norm_cfg=None),
83
+ positional_encoding=dict(num_feats=128, normalize=True, offset=-0.5),
84
+ bbox_head=dict(
85
+ type=DeformableDETRHead,
86
+ num_classes=80,
87
+ sync_cls_avg_factor=True,
88
+ loss_cls=dict(
89
+ type=FocalLoss,
90
+ use_sigmoid=True,
91
+ gamma=2.0,
92
+ alpha=0.25,
93
+ loss_weight=2.0),
94
+ loss_bbox=dict(type=L1Loss, loss_weight=5.0),
95
+ loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
96
+ # training and testing settings
97
+ train_cfg=dict(
98
+ assigner=dict(
99
+ type=HungarianAssigner,
100
+ match_costs=[
101
+ dict(type=FocalLossCost, weight=2.0),
102
+ dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
103
+ dict(type=IoUCost, iou_mode='giou', weight=2.0)
104
+ ])),
105
+ test_cfg=dict(max_per_img=100))
106
+
107
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
108
+ # from the default setting in mmdet.
109
+ train_pipeline = [
110
+ dict(type=LoadImageFromFile, backend_args=backend_args),
111
+ dict(type=LoadAnnotations, with_bbox=True),
112
+ dict(type=RandomFlip, prob=0.5),
113
+ dict(
114
+ type=RandomChoice,
115
+ transforms=[
116
+ [
117
+ dict(
118
+ type=RandomChoiceResize,
119
+ scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
120
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
121
+ (736, 1333), (768, 1333), (800, 1333)],
122
+ resize_type=Resize,
123
+ keep_ratio=True)
124
+ ],
125
+ [
126
+ dict(
127
+ type=RandomChoiceResize,
128
+ # The radio of all image in train dataset < 7
129
+ # follow the original implement
130
+ scales=[(400, 4200), (500, 4200), (600, 4200)],
131
+ resize_type=Resize,
132
+ keep_ratio=True),
133
+ dict(
134
+ type=RandomCrop,
135
+ crop_type='absolute_range',
136
+ crop_size=(384, 600),
137
+ allow_negative_crop=True),
138
+ dict(
139
+ type=RandomChoiceResize,
140
+ scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
141
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
142
+ (736, 1333), (768, 1333), (800, 1333)],
143
+ resize_type=Resize,
144
+ keep_ratio=True)
145
+ ]
146
+ ]),
147
+ dict(type=PackDetInputs)
148
+ ]
149
+ train_dataloader.update(
150
+ dict(
151
+ dataset=dict(
152
+ filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline)))
153
+
154
+ # optimizer
155
+ optim_wrapper = dict(
156
+ type=OptimWrapper,
157
+ optimizer=dict(type=AdamW, lr=0.0002, weight_decay=0.0001),
158
+ clip_grad=dict(max_norm=0.1, norm_type=2),
159
+ paramwise_cfg=dict(
160
+ custom_keys={
161
+ 'backbone': dict(lr_mult=0.1),
162
+ 'sampling_offsets': dict(lr_mult=0.1),
163
+ 'reference_points': dict(lr_mult=0.1)
164
+ }))
165
+
166
+ # learning policy
167
+ max_epochs = 50
168
+ train_cfg = dict(
169
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
170
+ val_cfg = dict(type=ValLoop)
171
+ test_cfg = dict(type=TestLoop)
172
+
173
+ param_scheduler = [
174
+ dict(
175
+ type=MultiStepLR,
176
+ begin=0,
177
+ end=max_epochs,
178
+ by_epoch=True,
179
+ milestones=[40],
180
+ gamma=0.1)
181
+ ]
182
+
183
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
184
+ # USER SHOULD NOT CHANGE ITS VALUES.
185
+ # base_batch_size = (16 GPUs) x (2 samples per GPU)
186
+ auto_scale_lr = dict(base_batch_size=32)
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_r50_16xb2_50e_coco.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .deformable_detr_r50_16xb2_50e_coco import *
11
+
12
+ model.update(dict(with_box_refine=True))
head_extractor/build/lib/mmdet/configs/deformable_detr/deformable_detr_refine_twostage_r50_16xb2_50e_coco.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .deformable_detr_refine_r50_16xb2_50e_coco import *
11
+
12
+ model.update(dict(as_two_stage=True))
head_extractor/build/lib/mmdet/configs/detr/detr_r101_8xb2_500e_coco.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.config import read_base
3
+ from mmengine.model.weight_init import PretrainedInit
4
+
5
+ with read_base():
6
+ from .detr_r50_8xb2_500e_coco import *
7
+
8
+ model.update(
9
+ dict(
10
+ backbone=dict(
11
+ depth=101,
12
+ init_cfg=dict(
13
+ type=PretrainedInit, checkpoint='torchvision://resnet101'))))
head_extractor/build/lib/mmdet/configs/detr/detr_r18_8xb2_500e_coco.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.config import read_base
3
+ from mmengine.model.weight_init import PretrainedInit
4
+
5
+ with read_base():
6
+ from .detr_r50_8xb2_500e_coco import *
7
+
8
+ model.update(
9
+ dict(
10
+ backbone=dict(
11
+ depth=18,
12
+ init_cfg=dict(
13
+ type=PretrainedInit, checkpoint='torchvision://resnet18')),
14
+ neck=dict(in_channels=[512])))
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_150e_coco.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import RandomChoice, RandomChoiceResize
3
+ from mmcv.transforms.loading import LoadImageFromFile
4
+ from mmengine.config import read_base
5
+ from mmengine.model.weight_init import PretrainedInit
6
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
7
+ from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
8
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
9
+ from torch.nn.modules.activation import ReLU
10
+ from torch.nn.modules.batchnorm import BatchNorm2d
11
+ from torch.optim.adamw import AdamW
12
+
13
+ from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
14
+ RandomCrop, RandomFlip, Resize)
15
+ from mmdet.models import (DETR, ChannelMapper, DetDataPreprocessor, DETRHead,
16
+ ResNet)
17
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
18
+ from mmdet.models.losses.iou_loss import GIoULoss
19
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
20
+ from mmdet.models.task_modules import (BBoxL1Cost, ClassificationCost,
21
+ HungarianAssigner, IoUCost)
22
+
23
+ with read_base():
24
+ from .._base_.datasets.coco_detection import *
25
+ from .._base_.default_runtime import *
26
+
27
+ model = dict(
28
+ type=DETR,
29
+ num_queries=100,
30
+ data_preprocessor=dict(
31
+ type=DetDataPreprocessor,
32
+ mean=[123.675, 116.28, 103.53],
33
+ std=[58.395, 57.12, 57.375],
34
+ bgr_to_rgb=True,
35
+ pad_size_divisor=1),
36
+ backbone=dict(
37
+ type=ResNet,
38
+ depth=50,
39
+ num_stages=4,
40
+ out_indices=(3, ),
41
+ frozen_stages=1,
42
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
43
+ norm_eval=True,
44
+ style='pytorch',
45
+ init_cfg=dict(
46
+ type=PretrainedInit, checkpoint='torchvision://resnet50')),
47
+ neck=dict(
48
+ type=ChannelMapper,
49
+ in_channels=[2048],
50
+ kernel_size=1,
51
+ out_channels=256,
52
+ act_cfg=None,
53
+ norm_cfg=None,
54
+ num_outs=1),
55
+ encoder=dict( # DetrTransformerEncoder
56
+ num_layers=6,
57
+ layer_cfg=dict( # DetrTransformerEncoderLayer
58
+ self_attn_cfg=dict( # MultiheadAttention
59
+ embed_dims=256,
60
+ num_heads=8,
61
+ dropout=0.1,
62
+ batch_first=True),
63
+ ffn_cfg=dict(
64
+ embed_dims=256,
65
+ feedforward_channels=2048,
66
+ num_fcs=2,
67
+ ffn_drop=0.1,
68
+ act_cfg=dict(type=ReLU, inplace=True)))),
69
+ decoder=dict( # DetrTransformerDecoder
70
+ num_layers=6,
71
+ layer_cfg=dict( # DetrTransformerDecoderLayer
72
+ self_attn_cfg=dict( # MultiheadAttention
73
+ embed_dims=256,
74
+ num_heads=8,
75
+ dropout=0.1,
76
+ batch_first=True),
77
+ cross_attn_cfg=dict( # MultiheadAttention
78
+ embed_dims=256,
79
+ num_heads=8,
80
+ dropout=0.1,
81
+ batch_first=True),
82
+ ffn_cfg=dict(
83
+ embed_dims=256,
84
+ feedforward_channels=2048,
85
+ num_fcs=2,
86
+ ffn_drop=0.1,
87
+ act_cfg=dict(type=ReLU, inplace=True))),
88
+ return_intermediate=True),
89
+ positional_encoding=dict(num_feats=128, normalize=True),
90
+ bbox_head=dict(
91
+ type=DETRHead,
92
+ num_classes=80,
93
+ embed_dims=256,
94
+ loss_cls=dict(
95
+ type=CrossEntropyLoss,
96
+ bg_cls_weight=0.1,
97
+ use_sigmoid=False,
98
+ loss_weight=1.0,
99
+ class_weight=1.0),
100
+ loss_bbox=dict(type=L1Loss, loss_weight=5.0),
101
+ loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
102
+ # training and testing settings
103
+ train_cfg=dict(
104
+ assigner=dict(
105
+ type=HungarianAssigner,
106
+ match_costs=[
107
+ dict(type=ClassificationCost, weight=1.),
108
+ dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
109
+ dict(type=IoUCost, iou_mode='giou', weight=2.0)
110
+ ])),
111
+ test_cfg=dict(max_per_img=100))
112
+
113
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
114
+ # from the default setting in mmdet.
115
+ train_pipeline = [
116
+ dict(type=LoadImageFromFile, backend_args=backend_args),
117
+ dict(type=LoadAnnotations, with_bbox=True),
118
+ dict(type=RandomFlip, prob=0.5),
119
+ dict(
120
+ type=RandomChoice,
121
+ transforms=[[
122
+ dict(
123
+ type=RandomChoiceResize,
124
+ resize_type=Resize,
125
+ scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
126
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
127
+ (736, 1333), (768, 1333), (800, 1333)],
128
+ keep_ratio=True)
129
+ ],
130
+ [
131
+ dict(
132
+ type=RandomChoiceResize,
133
+ resize_type=Resize,
134
+ scales=[(400, 1333), (500, 1333), (600, 1333)],
135
+ keep_ratio=True),
136
+ dict(
137
+ type=RandomCrop,
138
+ crop_type='absolute_range',
139
+ crop_size=(384, 600),
140
+ allow_negative_crop=True),
141
+ dict(
142
+ type=RandomChoiceResize,
143
+ resize_type=Resize,
144
+ scales=[(480, 1333), (512, 1333), (544, 1333),
145
+ (576, 1333), (608, 1333), (640, 1333),
146
+ (672, 1333), (704, 1333), (736, 1333),
147
+ (768, 1333), (800, 1333)],
148
+ keep_ratio=True)
149
+ ]]),
150
+ dict(type=PackDetInputs)
151
+ ]
152
+ train_dataloader.update(dataset=dict(pipeline=train_pipeline))
153
+
154
+ # optimizer
155
+ optim_wrapper = dict(
156
+ type=OptimWrapper,
157
+ optimizer=dict(type=AdamW, lr=0.0001, weight_decay=0.0001),
158
+ clip_grad=dict(max_norm=0.1, norm_type=2),
159
+ paramwise_cfg=dict(
160
+ custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
161
+
162
+ # learning policy
163
+ max_epochs = 150
164
+ train_cfg = dict(
165
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
166
+ val_cfg = dict(type=ValLoop)
167
+ test_cfg = dict(type=TestLoop)
168
+
169
+ param_scheduler = [
170
+ dict(
171
+ type=MultiStepLR,
172
+ begin=0,
173
+ end=max_epochs,
174
+ by_epoch=True,
175
+ milestones=[100],
176
+ gamma=0.1)
177
+ ]
178
+
179
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
180
+ # USER SHOULD NOT CHANGE ITS VALUES.
181
+ # base_batch_size = (8 GPUs) x (2 samples per GPU)
182
+ auto_scale_lr = dict(base_batch_size=16)
head_extractor/build/lib/mmdet/configs/detr/detr_r50_8xb2_500e_coco.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.config import read_base
3
+ from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
4
+ from mmengine.runner.loops import EpochBasedTrainLoop
5
+
6
+ with read_base():
7
+ from .detr_r50_8xb2_150e_coco import *
8
+
9
+ # learning policy
10
+ max_epochs = 500
11
+ train_cfg.update(
12
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=10)
13
+
14
+ param_scheduler = [
15
+ dict(
16
+ type=MultiStepLR,
17
+ begin=0,
18
+ end=max_epochs,
19
+ by_epoch=True,
20
+ milestones=[334],
21
+ gamma=0.1)
22
+ ]
23
+
24
+ # only keep latest 2 checkpoints
25
+ default_hooks.update(checkpoint=dict(max_keep_ckpts=2))
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_12e_coco.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import RandomChoice, RandomChoiceResize
3
+ from mmcv.transforms.loading import LoadImageFromFile
4
+ from mmengine.config import read_base
5
+ from mmengine.model.weight_init import PretrainedInit
6
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
7
+ from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
8
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
9
+ from torch.nn.modules.batchnorm import BatchNorm2d
10
+ from torch.nn.modules.normalization import GroupNorm
11
+ from torch.optim.adamw import AdamW
12
+
13
+ from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
14
+ RandomCrop, RandomFlip, Resize)
15
+ from mmdet.models import (DINO, ChannelMapper, DetDataPreprocessor, DINOHead,
16
+ ResNet)
17
+ from mmdet.models.losses.focal_loss import FocalLoss
18
+ from mmdet.models.losses.iou_loss import GIoULoss
19
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
20
+ from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
21
+ HungarianAssigner, IoUCost)
22
+
23
+ with read_base():
24
+ from .._base_.datasets.coco_detection import *
25
+ from .._base_.default_runtime import *
26
+
27
+ model = dict(
28
+ type=DINO,
29
+ num_queries=900, # num_matching_queries
30
+ with_box_refine=True,
31
+ as_two_stage=True,
32
+ data_preprocessor=dict(
33
+ type=DetDataPreprocessor,
34
+ mean=[123.675, 116.28, 103.53],
35
+ std=[58.395, 57.12, 57.375],
36
+ bgr_to_rgb=True,
37
+ pad_size_divisor=1),
38
+ backbone=dict(
39
+ type=ResNet,
40
+ depth=50,
41
+ num_stages=4,
42
+ out_indices=(1, 2, 3),
43
+ frozen_stages=1,
44
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
45
+ norm_eval=True,
46
+ style='pytorch',
47
+ init_cfg=dict(
48
+ type=PretrainedInit, checkpoint='torchvision://resnet50')),
49
+ neck=dict(
50
+ type=ChannelMapper,
51
+ in_channels=[512, 1024, 2048],
52
+ kernel_size=1,
53
+ out_channels=256,
54
+ act_cfg=None,
55
+ norm_cfg=dict(type=GroupNorm, num_groups=32),
56
+ num_outs=4),
57
+ encoder=dict(
58
+ num_layers=6,
59
+ layer_cfg=dict(
60
+ self_attn_cfg=dict(embed_dims=256, num_levels=4,
61
+ dropout=0.0), # 0.1 for DeformDETR
62
+ ffn_cfg=dict(
63
+ embed_dims=256,
64
+ feedforward_channels=2048, # 1024 for DeformDETR
65
+ ffn_drop=0.0))), # 0.1 for DeformDETR
66
+ decoder=dict(
67
+ num_layers=6,
68
+ return_intermediate=True,
69
+ layer_cfg=dict(
70
+ self_attn_cfg=dict(embed_dims=256, num_heads=8,
71
+ dropout=0.0), # 0.1 for DeformDETR
72
+ cross_attn_cfg=dict(embed_dims=256, num_levels=4,
73
+ dropout=0.0), # 0.1 for DeformDETR
74
+ ffn_cfg=dict(
75
+ embed_dims=256,
76
+ feedforward_channels=2048, # 1024 for DeformDETR
77
+ ffn_drop=0.0)), # 0.1 for DeformDETR
78
+ post_norm_cfg=None),
79
+ positional_encoding=dict(
80
+ num_feats=128,
81
+ normalize=True,
82
+ offset=0.0, # -0.5 for DeformDETR
83
+ temperature=20), # 10000 for DeformDETR
84
+ bbox_head=dict(
85
+ type=DINOHead,
86
+ num_classes=80,
87
+ sync_cls_avg_factor=True,
88
+ loss_cls=dict(
89
+ type=FocalLoss,
90
+ use_sigmoid=True,
91
+ gamma=2.0,
92
+ alpha=0.25,
93
+ loss_weight=1.0), # 2.0 in DeformDETR
94
+ loss_bbox=dict(type=L1Loss, loss_weight=5.0),
95
+ loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
96
+ dn_cfg=dict( # TODO: Move to model.train_cfg ?
97
+ label_noise_scale=0.5,
98
+ box_noise_scale=1.0, # 0.4 for DN-DETR
99
+ group_cfg=dict(dynamic=True, num_groups=None,
100
+ num_dn_queries=100)), # TODO: half num_dn_queries
101
+ # training and testing settings
102
+ train_cfg=dict(
103
+ assigner=dict(
104
+ type=HungarianAssigner,
105
+ match_costs=[
106
+ dict(type=FocalLossCost, weight=2.0),
107
+ dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
108
+ dict(type=IoUCost, iou_mode='giou', weight=2.0)
109
+ ])),
110
+ test_cfg=dict(max_per_img=300)) # 100 for DeformDETR
111
+
112
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
113
+ # from the default setting in mmdet.
114
+ train_pipeline = [
115
+ dict(type=LoadImageFromFile, backend_args=backend_args),
116
+ dict(type=LoadAnnotations, with_bbox=True),
117
+ dict(type=RandomFlip, prob=0.5),
118
+ dict(
119
+ type=RandomChoice,
120
+ transforms=[
121
+ [
122
+ dict(
123
+ type=RandomChoiceResize,
124
+ resize_type=Resize,
125
+ scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
126
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
127
+ (736, 1333), (768, 1333), (800, 1333)],
128
+ keep_ratio=True)
129
+ ],
130
+ [
131
+ dict(
132
+ type=RandomChoiceResize,
133
+ resize_type=Resize,
134
+ # The radio of all image in train dataset < 7
135
+ # follow the original implement
136
+ scales=[(400, 4200), (500, 4200), (600, 4200)],
137
+ keep_ratio=True),
138
+ dict(
139
+ type=RandomCrop,
140
+ crop_type='absolute_range',
141
+ crop_size=(384, 600),
142
+ allow_negative_crop=True),
143
+ dict(
144
+ type=RandomChoiceResize,
145
+ resize_type=Resize,
146
+ scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
147
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
148
+ (736, 1333), (768, 1333), (800, 1333)],
149
+ keep_ratio=True)
150
+ ]
151
+ ]),
152
+ dict(type=PackDetInputs)
153
+ ]
154
+ train_dataloader.update(
155
+ dataset=dict(
156
+ filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline))
157
+
158
+ # optimizer
159
+ optim_wrapper = dict(
160
+ type=OptimWrapper,
161
+ optimizer=dict(
162
+ type=AdamW,
163
+ lr=0.0001, # 0.0002 for DeformDETR
164
+ weight_decay=0.0001),
165
+ clip_grad=dict(max_norm=0.1, norm_type=2),
166
+ paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
167
+ ) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa
168
+
169
+ # learning policy
170
+ max_epochs = 12
171
+ train_cfg = dict(
172
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
173
+
174
+ val_cfg = dict(type=ValLoop)
175
+ test_cfg = dict(type=TestLoop)
176
+
177
+ param_scheduler = [
178
+ dict(
179
+ type=MultiStepLR,
180
+ begin=0,
181
+ end=max_epochs,
182
+ by_epoch=True,
183
+ milestones=[11],
184
+ gamma=0.1)
185
+ ]
186
+
187
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
188
+ # USER SHOULD NOT CHANGE ITS VALUES.
189
+ # base_batch_size = (8 GPUs) x (2 samples per GPU)
190
+ auto_scale_lr = dict(base_batch_size=16)
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_24e_coco.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.config import read_base
3
+ from mmengine.runner.loops import EpochBasedTrainLoop
4
+
5
+ with read_base():
6
+ from .dino_4scale_r50_8xb2_12e_coco import *
7
+
8
+ max_epochs = 24
9
+ train_cfg.update(
10
+ dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
11
+
12
+ param_scheduler[0].update(dict(milestones=[20]))
head_extractor/build/lib/mmdet/configs/dino/dino_4scale_r50_8xb2_36e_coco.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.config import read_base
3
+ from mmengine.runner.loops import EpochBasedTrainLoop
4
+
5
+ with read_base():
6
+ from .dino_4scale_r50_8xb2_12e_coco import *
7
+
8
+ max_epochs = 36
9
+ train_cfg.update(
10
+ dict(type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1))
11
+
12
+ param_scheduler[0].update(dict(milestones=[30]))