| import copy
|
| from abc import ABCMeta
|
| from collections import defaultdict
|
| from typing import Iterable, List, Optional, Union, Callable
|
| import warnings
|
| from inspect import getfullargspec
|
| import functools
|
| import torch.nn as nn
|
|
|
| from .utils import is_model_wrapper
|
| from .weight_init import PretrainedInit, initialize, update_init_info
|
| from ..utils.activation import build_dropout
|
| from ..utils.registry import MODELS
|
|
|
|
|
| class BaseModule(nn.Module, metaclass=ABCMeta):
|
| """Base module for all modules in openmmlab. ``BaseModule`` is a wrapper of
|
| ``torch.nn.Module`` with additional functionality of parameter
|
| initialization. Compared with ``torch.nn.Module``, ``BaseModule`` mainly
|
| adds three attributes.
|
|
|
| - ``init_cfg``: the config to control the initialization.
|
| - ``init_weights``: The function of parameter initialization and recording
|
| initialization information.
|
| - ``_params_init_info``: Used to track the parameter initialization
|
| information. This attribute only exists during executing the
|
| ``init_weights``.
|
|
|
| Note:
|
| :obj:`PretrainedInit` has a higher priority than any other
|
| initializer. The loaded pretrained weights will overwrite
|
| the previous initialized weights.
|
|
|
| Args:
|
| init_cfg (dict or List[dict], optional): Initialization config dict.
|
| """
|
|
|
| def __init__(self, init_cfg: Union[dict, List[dict], None] = None):
|
| """Initialize BaseModule, inherited from `torch.nn.Module`"""
|
|
|
|
|
|
|
|
|
| super().__init__()
|
|
|
|
|
| self._is_init = False
|
|
|
| self.init_cfg = copy.deepcopy(init_cfg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @property
|
| def is_init(self):
|
| return self._is_init
|
|
|
| @is_init.setter
|
| def is_init(self, value):
|
| self._is_init = value
|
|
|
| def init_weights(self):
|
| """Initialize the weights."""
|
|
|
| is_top_level_module = False
|
|
|
| if not hasattr(self, '_params_init_info'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| self._params_init_info = defaultdict(dict)
|
| is_top_level_module = True
|
|
|
|
|
|
|
|
|
|
|
| for name, param in self.named_parameters():
|
| self._params_init_info[param][
|
| 'init_info'] = f'The value is the same before and ' \
|
| f'after calling `init_weights` ' \
|
| f'of {self.__class__.__name__} '
|
| self._params_init_info[param][
|
| 'tmp_mean_value'] = param.data.mean().cpu()
|
|
|
|
|
|
|
|
|
|
|
| for sub_module in self.modules():
|
| sub_module._params_init_info = self._params_init_info
|
|
|
| module_name = self.__class__.__name__
|
| if not self._is_init:
|
| if self.init_cfg:
|
|
|
| init_cfgs = self.init_cfg
|
| if isinstance(self.init_cfg, dict):
|
| init_cfgs = [self.init_cfg]
|
|
|
|
|
|
|
|
|
|
|
| other_cfgs = []
|
| pretrained_cfg = []
|
| for init_cfg in init_cfgs:
|
| assert isinstance(init_cfg, dict)
|
| if (init_cfg['type'] == 'Pretrained'
|
| or init_cfg['type'] is PretrainedInit):
|
| pretrained_cfg.append(init_cfg)
|
| else:
|
| other_cfgs.append(init_cfg)
|
|
|
| initialize(self, other_cfgs)
|
|
|
| for m in self.children():
|
| if is_model_wrapper(m) and not hasattr(m, 'init_weights'):
|
| m = m.module
|
| if hasattr(m, 'init_weights') and not getattr(
|
| m, 'is_init', False):
|
| m.init_weights()
|
|
|
| update_init_info(
|
| m,
|
| init_info=f'Initialized by '
|
| f'user-defined `init_weights`'
|
| f' in {m.__class__.__name__} ')
|
| if self.init_cfg and pretrained_cfg:
|
| initialize(self, pretrained_cfg)
|
| self._is_init = True
|
|
|
| if is_top_level_module:
|
| self._dump_init_info()
|
|
|
| for sub_module in self.modules():
|
| del sub_module._params_init_info
|
|
|
| def __repr__(self):
|
| s = super().__repr__()
|
| if self.init_cfg:
|
| s += f'\ninit_cfg={self.init_cfg}'
|
| return s
|
|
|
|
|
| def deprecated_api_warning(name_dict: dict,
|
| cls_name: Optional[str] = None) -> Callable:
|
| """A decorator to check if some arguments are deprecate and try to replace
|
| deprecate src_arg_name to dst_arg_name.
|
|
|
| Args:
|
| name_dict(dict):
|
| key (str): Deprecate argument names.
|
| val (str): Expected argument names.
|
|
|
| Returns:
|
| func: New function.
|
| """
|
|
|
| def api_warning_wrapper(old_func):
|
|
|
| @functools.wraps(old_func)
|
| def new_func(*args, **kwargs):
|
|
|
| args_info = getfullargspec(old_func)
|
|
|
| func_name = old_func.__name__
|
| if cls_name is not None:
|
| func_name = f'{cls_name}.{func_name}'
|
| if args:
|
| arg_names = args_info.args[:len(args)]
|
| for src_arg_name, dst_arg_name in name_dict.items():
|
| if src_arg_name in arg_names:
|
| warnings.warn(
|
| f'"{src_arg_name}" is deprecated in '
|
| f'`{func_name}`, please use "{dst_arg_name}" '
|
| 'instead', DeprecationWarning)
|
| arg_names[arg_names.index(src_arg_name)] = dst_arg_name
|
| if kwargs:
|
| for src_arg_name, dst_arg_name in name_dict.items():
|
| if src_arg_name in kwargs:
|
| assert dst_arg_name not in kwargs, (
|
| f'The expected behavior is to replace '
|
| f'the deprecated key `{src_arg_name}` to '
|
| f'new key `{dst_arg_name}`, but got them '
|
| f'in the arguments at the same time, which '
|
| f'is confusing. `{src_arg_name} will be '
|
| f'deprecated in the future, please '
|
| f'use `{dst_arg_name}` instead.')
|
|
|
| warnings.warn(
|
| f'"{src_arg_name}" is deprecated in '
|
| f'`{func_name}`, please use "{dst_arg_name}" '
|
| 'instead', DeprecationWarning)
|
| kwargs[dst_arg_name] = kwargs.pop(src_arg_name)
|
|
|
|
|
| output = old_func(*args, **kwargs)
|
| return output
|
|
|
| return new_func
|
|
|
| return api_warning_wrapper
|
|
|
|
|
| @MODELS.register_module()
|
| class MultiheadAttention(BaseModule):
|
| """A wrapper for ``torch.nn.MultiheadAttention``.
|
|
|
| This module implements MultiheadAttention with identity connection,
|
| and positional encoding is also passed as input.
|
|
|
| Args:
|
| embed_dims (int): The embedding dimension.
|
| num_heads (int): Parallel attention heads.
|
| attn_drop (float): A Dropout layer on attn_output_weights.
|
| Default: 0.0.
|
| proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.
|
| Default: 0.0.
|
| dropout_layer (obj:`ConfigDict`): The dropout_layer used
|
| when adding the shortcut.
|
| init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
| Default: None.
|
| batch_first (bool): When it is True, Key, Query and Value are shape of
|
| (batch, n, embed_dim), otherwise (n, batch, embed_dim).
|
| Default to False.
|
| """
|
|
|
| def __init__(self,
|
| embed_dims,
|
| num_heads,
|
| attn_drop=0.,
|
| proj_drop=0.,
|
| dropout_layer=dict(type='Dropout', drop_prob=0.),
|
| init_cfg=None,
|
| batch_first=False,
|
| **kwargs):
|
| super().__init__(init_cfg)
|
| if 'dropout' in kwargs:
|
| warnings.warn(
|
| 'The arguments `dropout` in MultiheadAttention '
|
| 'has been deprecated, now you can separately '
|
| 'set `attn_drop`(float), proj_drop(float), '
|
| 'and `dropout_layer`(dict) ', DeprecationWarning)
|
| attn_drop = kwargs['dropout']
|
| dropout_layer['drop_prob'] = kwargs.pop('dropout')
|
|
|
| self.embed_dims = embed_dims
|
| self.num_heads = num_heads
|
| self.batch_first = batch_first
|
|
|
| self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,
|
| **kwargs)
|
|
|
| self.proj_drop = nn.Dropout(proj_drop)
|
| self.dropout_layer = build_dropout(
|
| dropout_layer) if dropout_layer else nn.Identity()
|
|
|
| @deprecated_api_warning({'residual': 'identity'},
|
| cls_name='MultiheadAttention')
|
| def forward(self,
|
| query,
|
| key=None,
|
| value=None,
|
| identity=None,
|
| query_pos=None,
|
| key_pos=None,
|
| attn_mask=None,
|
| key_padding_mask=None,
|
| **kwargs):
|
| """Forward function for `MultiheadAttention`.
|
|
|
| **kwargs allow passing a more general data flow when combining
|
| with other operations in `transformerlayer`.
|
|
|
| Args:
|
| query (Tensor): The input query with shape [num_queries, bs,
|
| embed_dims] if self.batch_first is False, else
|
| [bs, num_queries embed_dims].
|
| key (Tensor): The key tensor with shape [num_keys, bs,
|
| embed_dims] if self.batch_first is False, else
|
| [bs, num_keys, embed_dims] .
|
| If None, the ``query`` will be used. Defaults to None.
|
| value (Tensor): The value tensor with same shape as `key`.
|
| Same in `nn.MultiheadAttention.forward`. Defaults to None.
|
| If None, the `key` will be used.
|
| identity (Tensor): This tensor, with the same shape as x,
|
| will be used for the identity link.
|
| If None, `x` will be used. Defaults to None.
|
| query_pos (Tensor): The positional encoding for query, with
|
| the same shape as `x`. If not None, it will
|
| be added to `x` before forward function. Defaults to None.
|
| key_pos (Tensor): The positional encoding for `key`, with the
|
| same shape as `key`. Defaults to None. If not None, it will
|
| be added to `key` before forward function. If None, and
|
| `query_pos` has the same shape as `key`, then `query_pos`
|
| will be used for `key_pos`. Defaults to None.
|
| attn_mask (Tensor): ByteTensor mask with shape [num_queries,
|
| num_keys]. Same in `nn.MultiheadAttention.forward`.
|
| Defaults to None.
|
| key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
|
| Defaults to None.
|
|
|
| Returns:
|
| Tensor: forwarded results with shape
|
| [num_queries, bs, embed_dims]
|
| if self.batch_first is False, else
|
| [bs, num_queries embed_dims].
|
| """
|
|
|
| if key is None:
|
| key = query
|
| if value is None:
|
| value = key
|
| if identity is None:
|
| identity = query
|
| if key_pos is None:
|
| if query_pos is not None:
|
|
|
| if query_pos.shape == key.shape:
|
| key_pos = query_pos
|
| if query_pos is not None:
|
| query = query + query_pos
|
| if key_pos is not None:
|
| key = key + key_pos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| if self.batch_first:
|
| query = query.transpose(0, 1)
|
| key = key.transpose(0, 1)
|
| value = value.transpose(0, 1)
|
|
|
| out = self.attn(
|
| query=query,
|
| key=key,
|
| value=value,
|
| attn_mask=attn_mask,
|
| key_padding_mask=key_padding_mask)[0]
|
|
|
| if self.batch_first:
|
| out = out.transpose(0, 1)
|
|
|
| return identity + self.dropout_layer(self.proj_drop(out))
|
|
|
|
|
| class ModuleList(BaseModule, nn.ModuleList):
|
| """ModuleList in openmmlab.
|
|
|
| Ensures that all modules in ``ModuleList`` have a different initialization
|
| strategy than the outer model
|
|
|
| Args:
|
| modules (iterable, optional): An iterable of modules to add.
|
| init_cfg (dict, optional): Initialization config dict.
|
| """
|
|
|
| def __init__(self,
|
| modules: Optional[Iterable] = None,
|
| init_cfg: Optional[dict] = None):
|
| BaseModule.__init__(self, init_cfg)
|
| nn.ModuleList.__init__(self, modules)
|
|
|
|
|
| class Sequential(BaseModule, nn.Sequential):
|
| """Sequential module in openmmlab.
|
|
|
| Ensures that all modules in ``Sequential`` have a different initialization
|
| strategy than the outer model
|
|
|
| Args:
|
| init_cfg (dict, optional): Initialization config dict.
|
| """
|
|
|
| def __init__(self, *args, init_cfg: Optional[dict] = None):
|
| BaseModule.__init__(self, init_cfg)
|
| nn.Sequential.__init__(self, *args) |