| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import math |
| from einops import rearrange |
| import torch.fft as fft |
| class Linear(torch.nn.Linear): |
| def reset_parameters(self): |
| return None |
|
|
| class Conv2d(torch.nn.Conv2d): |
| def reset_parameters(self): |
| return None |
|
|
|
|
|
|
| class Attention2D(nn.Module): |
| def __init__(self, c, nhead, dropout=0.0): |
| super().__init__() |
| self.attn = nn.MultiheadAttention(c, nhead, dropout=dropout, bias=True, batch_first=True) |
|
|
| def forward(self, x, kv, self_attn=False): |
| orig_shape = x.shape |
| x = x.view(x.size(0), x.size(1), -1).permute(0, 2, 1) |
| if self_attn: |
| |
| kv = torch.cat([x, kv], dim=1) |
| |
| |
| |
| x = self.attn(x, kv, kv, need_weights=False)[0] |
| x = x.permute(0, 2, 1).view(*orig_shape) |
| return x |
|
|
|
|
| class LayerNorm2d(nn.LayerNorm): |
| def __init__(self, *args, **kwargs): |
| super().__init__(*args, **kwargs) |
|
|
| def forward(self, x): |
| return super().forward(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) |
|
|
| class GlobalResponseNorm(nn.Module): |
| "from https://github.com/facebookresearch/ConvNeXt-V2/blob/3608f67cc1dae164790c5d0aead7bf2d73d9719b/models/utils.py#L105" |
| def __init__(self, dim): |
| super().__init__() |
| self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim)) |
| self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim)) |
|
|
| def forward(self, x): |
| Gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True) |
| Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6) |
| return self.gamma * (x * Nx) + self.beta + x |
|
|
|
|
| class ResBlock(nn.Module): |
| def __init__(self, c, c_skip=0, kernel_size=3, dropout=0.0): |
| super().__init__() |
| self.depthwise = Conv2d(c, c, kernel_size=kernel_size, padding=kernel_size // 2, groups=c) |
| |
| self.norm = LayerNorm2d(c, elementwise_affine=False, eps=1e-6) |
| self.channelwise = nn.Sequential( |
| Linear(c + c_skip, c * 4), |
| nn.GELU(), |
| GlobalResponseNorm(c * 4), |
| nn.Dropout(dropout), |
| Linear(c * 4, c) |
| ) |
|
|
| def forward(self, x, x_skip=None): |
| x_res = x |
| x = self.norm(self.depthwise(x)) |
| if x_skip is not None: |
| x = torch.cat([x, x_skip], dim=1) |
| x = self.channelwise(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) |
| return x + x_res |
|
|
|
|
| class AttnBlock(nn.Module): |
| def __init__(self, c, c_cond, nhead, self_attn=True, dropout=0.0): |
| super().__init__() |
| self.self_attn = self_attn |
| self.norm = LayerNorm2d(c, elementwise_affine=False, eps=1e-6) |
| self.attention = Attention2D(c, nhead, dropout) |
| self.kv_mapper = nn.Sequential( |
| nn.SiLU(), |
| Linear(c_cond, c) |
| ) |
|
|
| def forward(self, x, kv): |
| kv = self.kv_mapper(kv) |
| res = self.attention(self.norm(x), kv, self_attn=self.self_attn) |
| |
| |
| |
| x = x + res |
| |
| return x |
|
|
| class FeedForwardBlock(nn.Module): |
| def __init__(self, c, dropout=0.0): |
| super().__init__() |
| self.norm = LayerNorm2d(c, elementwise_affine=False, eps=1e-6) |
| self.channelwise = nn.Sequential( |
| Linear(c, c * 4), |
| nn.GELU(), |
| GlobalResponseNorm(c * 4), |
| nn.Dropout(dropout), |
| Linear(c * 4, c) |
| ) |
|
|
| def forward(self, x): |
| x = x + self.channelwise(self.norm(x).permute(0, 2, 3, 1)).permute(0, 3, 1, 2) |
| return x |
|
|
|
|
| class TimestepBlock(nn.Module): |
| def __init__(self, c, c_timestep, conds=['sca']): |
| super().__init__() |
| self.mapper = Linear(c_timestep, c * 2) |
| self.conds = conds |
| for cname in conds: |
| setattr(self, f"mapper_{cname}", Linear(c_timestep, c * 2)) |
|
|
| def forward(self, x, t): |
| t = t.chunk(len(self.conds) + 1, dim=1) |
| a, b = self.mapper(t[0])[:, :, None, None].chunk(2, dim=1) |
| for i, c in enumerate(self.conds): |
| ac, bc = getattr(self, f"mapper_{c}")(t[i + 1])[:, :, None, None].chunk(2, dim=1) |
| a, b = a + ac, b + bc |
| return x * (1 + a) + b |
|
|