| | |
| | import torch |
| | import torch.nn as nn |
| | from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule |
| | from mmengine.model import BaseModule |
| | from torch import Tensor |
| |
|
| | from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig |
| | from .se_layer import ChannelAttention |
| |
|
| |
|
| | class DarknetBottleneck(BaseModule): |
| | """The basic bottleneck block used in Darknet. |
| | |
| | Each ResBlock consists of two ConvModules and the input is added to the |
| | final output. Each ConvModule is composed of Conv, BN, and LeakyReLU. |
| | The first convLayer has filter size of 1x1 and the second one has the |
| | filter size of 3x3. |
| | |
| | Args: |
| | in_channels (int): The input channels of this Module. |
| | out_channels (int): The output channels of this Module. |
| | expansion (float): The kernel size of the convolution. |
| | Defaults to 0.5. |
| | add_identity (bool): Whether to add identity to the out. |
| | Defaults to True. |
| | use_depthwise (bool): Whether to use depthwise separable convolution. |
| | Defaults to False. |
| | conv_cfg (dict): Config dict for convolution layer. Defaults to None, |
| | which means using conv2d. |
| | norm_cfg (dict): Config dict for normalization layer. |
| | Defaults to dict(type='BN'). |
| | act_cfg (dict): Config dict for activation layer. |
| | Defaults to dict(type='Swish'). |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels: int, |
| | out_channels: int, |
| | expansion: float = 0.5, |
| | add_identity: bool = True, |
| | use_depthwise: bool = False, |
| | conv_cfg: OptConfigType = None, |
| | norm_cfg: ConfigType = dict( |
| | type='BN', momentum=0.03, eps=0.001), |
| | act_cfg: ConfigType = dict(type='Swish'), |
| | init_cfg: OptMultiConfig = None) -> None: |
| | super().__init__(init_cfg=init_cfg) |
| | hidden_channels = int(out_channels * expansion) |
| | conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule |
| | self.conv1 = ConvModule( |
| | in_channels, |
| | hidden_channels, |
| | 1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.conv2 = conv( |
| | hidden_channels, |
| | out_channels, |
| | 3, |
| | stride=1, |
| | padding=1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.add_identity = \ |
| | add_identity and in_channels == out_channels |
| |
|
| | def forward(self, x: Tensor) -> Tensor: |
| | """Forward function.""" |
| | identity = x |
| | out = self.conv1(x) |
| | out = self.conv2(out) |
| |
|
| | if self.add_identity: |
| | return out + identity |
| | else: |
| | return out |
| |
|
| |
|
| | class CSPNeXtBlock(BaseModule): |
| | """The basic bottleneck block used in CSPNeXt. |
| | |
| | Args: |
| | in_channels (int): The input channels of this Module. |
| | out_channels (int): The output channels of this Module. |
| | expansion (float): Expand ratio of the hidden channel. Defaults to 0.5. |
| | add_identity (bool): Whether to add identity to the out. Only works |
| | when in_channels == out_channels. Defaults to True. |
| | use_depthwise (bool): Whether to use depthwise separable convolution. |
| | Defaults to False. |
| | kernel_size (int): The kernel size of the second convolution layer. |
| | Defaults to 5. |
| | conv_cfg (dict): Config dict for convolution layer. Defaults to None, |
| | which means using conv2d. |
| | norm_cfg (dict): Config dict for normalization layer. |
| | Defaults to dict(type='BN', momentum=0.03, eps=0.001). |
| | act_cfg (dict): Config dict for activation layer. |
| | Defaults to dict(type='SiLU'). |
| | init_cfg (:obj:`ConfigDict` or dict or list[dict] or |
| | list[:obj:`ConfigDict`], optional): Initialization config dict. |
| | Defaults to None. |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels: int, |
| | out_channels: int, |
| | expansion: float = 0.5, |
| | add_identity: bool = True, |
| | use_depthwise: bool = False, |
| | kernel_size: int = 5, |
| | conv_cfg: OptConfigType = None, |
| | norm_cfg: ConfigType = dict( |
| | type='BN', momentum=0.03, eps=0.001), |
| | act_cfg: ConfigType = dict(type='SiLU'), |
| | init_cfg: OptMultiConfig = None) -> None: |
| | super().__init__(init_cfg=init_cfg) |
| | hidden_channels = int(out_channels * expansion) |
| | conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule |
| | self.conv1 = conv( |
| | in_channels, |
| | hidden_channels, |
| | 3, |
| | stride=1, |
| | padding=1, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.conv2 = DepthwiseSeparableConvModule( |
| | hidden_channels, |
| | out_channels, |
| | kernel_size, |
| | stride=1, |
| | padding=kernel_size // 2, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.add_identity = \ |
| | add_identity and in_channels == out_channels |
| |
|
| | def forward(self, x: Tensor) -> Tensor: |
| | """Forward function.""" |
| | identity = x |
| | out = self.conv1(x) |
| | out = self.conv2(out) |
| |
|
| | if self.add_identity: |
| | return out + identity |
| | else: |
| | return out |
| |
|
| |
|
| | class CSPLayer(BaseModule): |
| | """Cross Stage Partial Layer. |
| | |
| | Args: |
| | in_channels (int): The input channels of the CSP layer. |
| | out_channels (int): The output channels of the CSP layer. |
| | expand_ratio (float): Ratio to adjust the number of channels of the |
| | hidden layer. Defaults to 0.5. |
| | num_blocks (int): Number of blocks. Defaults to 1. |
| | add_identity (bool): Whether to add identity in blocks. |
| | Defaults to True. |
| | use_cspnext_block (bool): Whether to use CSPNeXt block. |
| | Defaults to False. |
| | use_depthwise (bool): Whether to use depthwise separable convolution in |
| | blocks. Defaults to False. |
| | channel_attention (bool): Whether to add channel attention in each |
| | stage. Defaults to True. |
| | conv_cfg (dict, optional): Config dict for convolution layer. |
| | Defaults to None, which means using conv2d. |
| | norm_cfg (dict): Config dict for normalization layer. |
| | Defaults to dict(type='BN') |
| | act_cfg (dict): Config dict for activation layer. |
| | Defaults to dict(type='Swish') |
| | init_cfg (:obj:`ConfigDict` or dict or list[dict] or |
| | list[:obj:`ConfigDict`], optional): Initialization config dict. |
| | Defaults to None. |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels: int, |
| | out_channels: int, |
| | expand_ratio: float = 0.5, |
| | num_blocks: int = 1, |
| | add_identity: bool = True, |
| | use_depthwise: bool = False, |
| | use_cspnext_block: bool = False, |
| | channel_attention: bool = False, |
| | conv_cfg: OptConfigType = None, |
| | norm_cfg: ConfigType = dict( |
| | type='BN', momentum=0.03, eps=0.001), |
| | act_cfg: ConfigType = dict(type='Swish'), |
| | init_cfg: OptMultiConfig = None) -> None: |
| | super().__init__(init_cfg=init_cfg) |
| | block = CSPNeXtBlock if use_cspnext_block else DarknetBottleneck |
| | mid_channels = int(out_channels * expand_ratio) |
| | self.channel_attention = channel_attention |
| | self.main_conv = ConvModule( |
| | in_channels, |
| | mid_channels, |
| | 1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.short_conv = ConvModule( |
| | in_channels, |
| | mid_channels, |
| | 1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.final_conv = ConvModule( |
| | 2 * mid_channels, |
| | out_channels, |
| | 1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| |
|
| | self.blocks = nn.Sequential(*[ |
| | block( |
| | mid_channels, |
| | mid_channels, |
| | 1.0, |
| | add_identity, |
| | use_depthwise, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) for _ in range(num_blocks) |
| | ]) |
| | if channel_attention: |
| | self.attention = ChannelAttention(2 * mid_channels) |
| |
|
| | def forward(self, x: Tensor) -> Tensor: |
| | """Forward function.""" |
| | x_short = self.short_conv(x) |
| |
|
| | x_main = self.main_conv(x) |
| | x_main = self.blocks(x_main) |
| |
|
| | x_final = torch.cat((x_main, x_short), dim=1) |
| |
|
| | if self.channel_attention: |
| | x_final = self.attention(x_final) |
| | return self.final_conv(x_final) |
| |
|