Spaces:

PolarisFTL
/

RDFNet

Sleeping

App Files Files Community

PolarisFTL commited on 9 days ago

Commit

c79402e

verified ·

1 Parent(s): efb567f

Add nets modules

Browse files

Files changed (5) hide show

nets/Common.py +311 -0
nets/__init__.py +1 -0
nets/backbone.py +105 -0
nets/model.py +121 -0
nets/yolo_training.py +348 -0

nets/Common.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import torch
+import torch.nn as nn
+from thop import profile
+class SiLU(nn.Module):
+    @staticmethod
+    def forward(x):
+        return x * torch.sigmoid(x)
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p
+class Conv(nn.Module):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=nn.LeakyReLU(0.1, inplace=True)):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv   = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn     = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03)
+        self.act    = nn.LeakyReLU(0.1, inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+class BasicConv(nn.Module):
+    def __init__(
+        self,
+        in_planes,
+        out_planes,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        relu=True,
+        bn=True,
+        bias=False,
+    ):
+        super(BasicConv, self).__init__()
+        self.out_channels = out_planes
+        self.conv = nn.Conv2d(
+            in_planes,
+            out_planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias,
+        )
+        self.bn = (
+            nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
+            if bn
+            else None
+        )
+        self.relu = nn.ReLU() if relu else None
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.relu is not None:
+            x = self.relu(x)
+        return x
+class ChannelPool(nn.Module):
+    def forward(self, x):
+        return torch.cat(
+            (torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1
+        )
+class SpatialGate(nn.Module):
+    def __init__(self):
+        super(SpatialGate, self).__init__()
+        kernel_size = 7
+        self.compress = ChannelPool()
+        self.spatial = BasicConv(
+            2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False
+        )
+    def forward(self, x):
+        x_compress = self.compress(x)
+        x_out = self.spatial(x_compress)
+        scale = torch.sigmoid_(x_out)
+        return x * scale
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p
+class Conv(nn.Module):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=nn.LeakyReLU(0.1, inplace=True)):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv   = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn     = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03)
+        self.act    = nn.LeakyReLU(0.1, inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+#lighting dehaze network
+class LMDNet(nn.Module):
+    def __init__(self):
+        super(LMDNet, self).__init__()
+        # mainNet Architecture
+        self.AAM = nn.Sequential(
+            nn.Conv2d(64, 3, 1, 1),
+            nn.LeakyReLU(inplace=True),
+            nn.Dropout(0.5)
+        )
+        self.AAM_1 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
+            nn.Conv2d(128, 32, 1, 1),
+            nn.LeakyReLU(inplace=True),
+            nn.Dropout(0.5)
+        )
+        self.AAM_2 = nn.Sequential(
+            nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
+            nn.Conv2d(256, 32, 1, 1),
+            nn.LeakyReLU(inplace=True),
+            nn.Dropout(0.5)
+        )
+        self.TA = TripletAttention(64)
+        self.conv = Conv(64, 3, 3, 1)
+        self.up4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
+        self.relu = nn.LeakyReLU(0.1, inplace=True)
+    def forward(self, f1, f2, f3):
+        t = self.AAM(f1)
+        f2 = self.AAM_1(f2)
+        f3 = self.AAM_2(f3)
+        x1 = f1
+        x2 = torch.cat([f2, f3], dim=1)
+        x = x1 + x2
+        x = self.TA(x)
+        x = self.conv(x)
+        dehaze = ((x * t) - x + 1)
+        out = self.up4(dehaze)
+        out = self.relu(out)
+        return out
+class TripletAttention(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        reduction_ratio=16,
+        pool_types=["avg", "max"],
+        no_spatial=False,
+    ):
+        super(TripletAttention, self).__init__()
+        self.ChannelGateH = SpatialGate()
+        self.ChannelGateW = SpatialGate()
+        self.no_spatial = no_spatial
+        if not no_spatial:
+            self.SpatialGate = SpatialGate()
+    def forward(self, x):
+        x_perm1 = x.permute(0, 2, 1, 3).contiguous()
+        x_out1 = self.ChannelGateH(x_perm1)
+        x_out11 = x_out1.permute(0, 2, 1, 3).contiguous()
+        x_perm2 = x.permute(0, 3, 2, 1).contiguous()
+        x_out2 = self.ChannelGateW(x_perm2)
+        x_out21 = x_out2.permute(0, 3, 2, 1).contiguous()
+        if not self.no_spatial:
+            x_out = self.SpatialGate(x)
+            x_out = (1 / 3) * (x_out + x_out11 + x_out21)
+        else:
+            x_out = (1 / 2) * (x_out11 + x_out21)
+        return x_out
+class SiLU(nn.Module):
+    @staticmethod
+    def forward(x):
+        return x * torch.sigmoid(x)
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p
+class Conv(nn.Module):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=nn.LeakyReLU(0.1, inplace=True)):
+        super(Conv, self).__init__()
+        self.conv   = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn     = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03)
+        self.act    = nn.LeakyReLU(0.1, inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+class GIE(torch.nn.Module):
+    def __init__(self, epsilon=1e-8):
+        super(GIE, self).__init__()
+        self.epsilon = epsilon
+    def forward(self, x):
+        # Step 1: Pixel Mean Squared
+        x_mean = torch.mean(x, dim=(2, 3), keepdim=True)
+        epsilon = (x - x_mean) ** 2
+        # Step 2: Global Extraction
+        epsilon_mean = torch.mean(epsilon, dim=(2, 3), keepdim=False)
+        epsilon_mean += self.epsilon
+        # Step 3: Gamma Calculation (Global Extraction)
+        gamma_t_c = epsilon / epsilon_mean.unsqueeze(-1).unsqueeze(-1)
+        sigmoid_gamma = torch.sigmoid(gamma_t_c)
+        output = x * sigmoid_gamma
+        return output
+# Multi-branch Pooling Information Fusion Module
+class MPIF(nn.Module):
+    def __init__(self, c1, c2, c3, s=2, n=4, e=1, ids=[0]):
+        super(MPIF, self).__init__()
+        c_ = int(c2 * e)
+        self.ids = ids
+        if s == 1:
+            self.m1 = nn.MaxPool2d(kernel_size=3, stride=s, padding=1)
+            self.m2 = nn.AvgPool2d(kernel_size=3, stride=s, padding=1)
+        else:
+            self.m1 = nn.MaxPool2d(kernel_size=2, stride=s)
+            self.m2 = nn.AvgPool2d(kernel_size=2, stride=s)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = nn.ModuleList(
+            [Conv(c_ if i ==0 else c2, c2, 3, 1) for i in range(n)]
+        )
+        self.cv4 = Conv(c_ * 2 + c2 * (len(ids) - 2), c3, 1, 1)
+        self.GIE = GIE(c1)
+    def forward(self, x):
+        x1 = self.m1(x)
+        x2 = self.m2(x)
+        x = x1 + x2
+        x_1 = self.cv1(x)
+        x_1 = self.GIE(x_1)
+        x_2 = self.cv2(x)
+        x_all = [x_1, x_2]
+        for i in range(len(self.cv3)):
+            x_2 = self.cv3[i](x_2)
+            x_all.append(x_2)
+        out = self.cv4(torch.cat([x_all[id] for id in self.ids], 1))
+        return out
+class DilatedConvNet(nn.Module):
+    def __init__(self, in_channels, out_channels, dilation, padding, kernel_size):
+        super(DilatedConvNet, self).__init__()
+        self.dilated_conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding, dilation=dilation)
+        self.relu = nn.ReLU(inplace=False)
+    def forward(self, x):
+        x = self.dilated_conv(x)
+        x = self.relu(x)
+        return x
+class SPPELAN(nn.Module):
+    def __init__(self, c1, c2, c3=16):
+        super().__init__()
+        self.c = c3
+        self.cv1 = Conv(c1, c3, 1, 1)
+        self.cv2 = DilatedConvNet(c3, c3, kernel_size=3, padding=1, dilation=1)
+        self.cv3 = DilatedConvNet(c3, c3, kernel_size=3, padding=2, dilation=2)
+        self.cv4 = DilatedConvNet(c3, c3, kernel_size=3, padding=3, dilation=3)
+        self.cv5 = Conv(4*c3, c2, 1, 1)
+    def forward(self, x):
+        y = [self.cv1(x)]
+        y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
+        return self.cv5(torch.cat(y, 1))
+def print_model_flops_and_params(model, inputs):
+    flops, params = profile(model, inputs=inputs)
+    print(f"FLOPs: {flops / 1e9:.2f} GFLOPs")
+    print(f"Parameters: {params / 1e6:.2f} M")
+if __name__ == "__main__":
+    feat1 = torch.randn(1, 64, 160, 160)
+    feat2 = torch.randn(1, 128, 80, 80)
+    feat3 = torch.randn(1, 256, 40, 40)
+    encoder = LMDNet()
+    print_model_flops_and_params(encoder, (feat1, feat2, feat3))

nets/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ #

nets/backbone.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import torch
+import torch.nn as nn
+from nets.Common import GIE, LMDNet
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p
+class Conv(nn.Module):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=nn.LeakyReLU(0.1, inplace=True)):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv   = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn     = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03)
+        self.act    = nn.LeakyReLU(0.1, inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+# Multi-branch Pooling Information Fusion Module （Multi_Concat_Block + MP）#
+# ------------------------------------------------------------------------- #
+class Multi_Concat_Block(nn.Module):
+    def __init__(self, c1, c2, c3, n=4, e=1, ids=[0]):
+        super(Multi_Concat_Block, self).__init__()
+        c_ = int(c2 * e)
+        self.ids = ids
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = nn.ModuleList(
+            [Conv(c_ if i ==0 else c2, c2, 3, 1) for i in range(n)]
+        )
+        self.cv4 = Conv(c_ * 2 + c2 * (len(ids) - 2), c3, 1, 1)
+        self.GIE = GIE(c1)
+    def forward(self, x):
+        x_1 = self.cv1(x)
+        x_1 = self.GIE(x_1)
+        x_2 = self.cv2(x)
+        x_all = [x_1, x_2]
+        for i in range(len(self.cv3)):
+            x_2 = self.cv3[i](x_2)
+            x_all.append(x_2)
+        out = self.cv4(torch.cat([x_all[id] for id in self.ids], 1))
+        return out
+class MP(nn.Module):
+    def __init__(self, k=2):
+        super(MP, self).__init__()
+        self.m1 = nn.MaxPool2d(kernel_size=k, stride=k)
+        self.m2 = nn.AvgPool2d(kernel_size=k, stride=k)
+    def forward(self, x):
+        x1 = self.m1(x)
+        x2 = self.m2(x)
+        return x1 + x2
+# ------------------------------------------------------------------------- #
+class Backbone(nn.Module):
+    def __init__(self, transition_channels, block_channels, n):
+        super().__init__()
+        ids = [-1, -2, -3, -4]
+        self.stem = Conv(3, transition_channels * 2, 3, 2)
+        self.dehze = LMDNet()
+        self.dark2 = nn.Sequential(
+            Conv(transition_channels * 2, transition_channels * 4, 3, 2),
+            Multi_Concat_Block(transition_channels * 4, block_channels * 2, transition_channels * 4, n=n, ids=ids),
+        )
+        self.dark3 = nn.Sequential(
+            MP(),
+            Multi_Concat_Block(transition_channels * 4, block_channels * 4, transition_channels * 8, n=n, ids=ids),
+        )
+        self.dark4 = nn.Sequential(
+            MP(),
+            Multi_Concat_Block(transition_channels * 8, block_channels * 8, transition_channels * 16, n=n, ids=ids),
+        )
+        self.dark5 = nn.Sequential(
+            MP(),
+            Multi_Concat_Block(transition_channels * 16, block_channels * 16, transition_channels * 32, n=n, ids=ids),
+        )
+    def forward(self, x):
+        if self.training:
+            x, clear_x = x.split((8, 8), dim=0)
+        x = self.stem(x)
+        x = self.dark2(x)
+        f1 = x
+        x = self.dark3(x)
+        feat1 = x
+        f2 = x
+        x = self.dark4(x)
+        feat2 = x
+        f3 = x
+        x = self.dark5(x)
+        feat3 = x
+        dehazing = self.dehze(f1, f2, f3)
+        if self.training:
+            return feat1, feat2, feat3, dehazing
+        return feat1, feat2, feat3

nets/model.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import torch
+import torch.nn as nn
+from nets.Common import Conv, SPPELAN
+from nets.backbone import Backbone, Multi_Concat_Block
+def fuse_conv_and_bn(conv, bn):
+    fusedconv = nn.Conv2d(conv.in_channels,
+                          conv.out_channels,
+                          kernel_size=conv.kernel_size,
+                          stride=conv.stride,
+                          padding=conv.padding,
+                          groups=conv.groups,
+                          bias=True).requires_grad_(False).to(conv.weight.device)
+    w_conv  = conv.weight.clone().view(conv.out_channels, -1)
+    w_bn    = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape).detach())
+    b_conv  = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
+    b_bn    = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+    fusedconv.bias.copy_((torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn).detach())
+    return fusedconv
+class MP(nn.Module):
+    def __init__(self, k=2):
+        super(MP, self).__init__()
+        self.m1 = nn.MaxPool2d(kernel_size=k, stride=k)
+        self.m2 = nn.AvgPool2d(kernel_size=k, stride=k)
+        self.up = nn.Upsample(scale_factor=2)
+    def forward(self, x):
+        x1 = self.m1(x)
+        x2 = self.m2(x)
+        return self.up(x1 + x2)
+class YoloBody(nn.Module):
+    def __init__(self, anchors_mask, num_classes):
+        super(YoloBody, self).__init__()
+        transition_channels = 16
+        block_channels      = 16
+        panet_channels      = 16
+        e                   = 1
+        n                   = 2
+        ids                 = [-1, -2, -3, -4]
+        self.backbone   = Backbone(transition_channels, block_channels, n)
+        self.upsample   = nn.Upsample(scale_factor=2, mode="nearest")
+        self.sppelan                = SPPELAN(transition_channels * 32, transition_channels * 16)
+        self.conv_for_P5            = Conv(transition_channels * 16, transition_channels * 8)
+        self.conv_for_feat2         = Conv(transition_channels * 16, transition_channels * 8)
+        self.conv3_for_upsample1    = Multi_Concat_Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)
+        self.conv_for_P4            = Conv(transition_channels * 8, transition_channels * 4)
+        self.conv_for_feat1         = Conv(transition_channels * 8, transition_channels * 4)
+        self.conv3_for_upsample2    = Multi_Concat_Block(transition_channels * 8, panet_channels * 2, transition_channels * 4, e=e, n=n, ids=ids)
+        self.down_sample1           = Conv(transition_channels * 4, transition_channels * 8, k=3, s=2)
+        self.conv3_for_downsample1  = Multi_Concat_Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)
+        self.down_sample2           = Conv(transition_channels * 8, transition_channels * 16, k=3, s=2)
+        self.conv3_for_downsample2  = Multi_Concat_Block(transition_channels * 32, panet_channels * 8, transition_channels * 16, e=e, n=n, ids=ids)
+        self.pf = MP()
+        self.rep_conv_1 = Conv(transition_channels * 4, transition_channels * 8, 3, 1)
+        self.rep_conv_2 = Conv(transition_channels * 8, transition_channels * 16, 3, 1)
+        self.rep_conv_3 = Conv(transition_channels * 16, transition_channels * 32, 3, 1)
+        self.yolo_head_P3 = nn.Conv2d(transition_channels * 8, len(anchors_mask[2]) * (5 + num_classes), 1)
+        self.yolo_head_P4 = nn.Conv2d(transition_channels * 16, len(anchors_mask[1]) * (5 + num_classes), 1)
+        self.yolo_head_P5 = nn.Conv2d(transition_channels * 32, len(anchors_mask[0]) * (5 + num_classes), 1)
+    def fuse(self):
+        print('Fusing layers... ')
+        for m in self.modules():
+            if type(m) is Conv and hasattr(m, 'bn'):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)
+                delattr(m, 'bn')
+                m.forward = m.fuseforward
+        return self
+    def forward(self, x):
+        if self.training:
+            feat1, feat2, feat3, dehazing = self.backbone.forward(x)
+        else:
+            feat1, feat2, feat3 = self.backbone.forward(x)
+        P5          = self.sppelan(feat3)
+        P5_conv     = self.conv_for_P5(P5)
+        P5_upsample = self.upsample(P5_conv)
+        P4          = torch.cat([self.conv_for_feat2(feat2), P5_upsample], 1)
+        P4          = self.conv3_for_upsample1(P4)
+        P4_conv     = self.conv_for_P4(P4)
+        P4_upsample = self.upsample(P4_conv)
+        P3          = torch.cat([self.conv_for_feat1(feat1), P4_upsample], 1)
+        P3          = self.conv3_for_upsample2(P3)
+        P3_downsample = self.down_sample1(P3)
+        P4 = torch.cat([P3_downsample, P4], 1)
+        P4 = self.conv3_for_downsample1(P4)
+        P4 = self.pf(P4)
+        P4_downsample = self.down_sample2(P4)
+        P5 = torch.cat([P4_downsample, P5], 1)
+        P5 = self.conv3_for_downsample2(P5)
+        P3 = self.rep_conv_1(P3)
+        P4 = self.rep_conv_2(P4)
+        P5 = self.rep_conv_3(P5)
+        out2 = self.yolo_head_P3(P3)
+        out1 = self.yolo_head_P4(P4)
+        out0 = self.yolo_head_P5(P5)
+        if self.training:
+            return [out0, out1, out2, dehazing]
+        else:
+            return [out0, out1, out2]

nets/yolo_training.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import math
+from copy import deepcopy
+from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def smooth_BCE(eps=0.1):
+    return 1.0 - 0.5 * eps, 0.5 * eps
+class YOLOLoss(nn.Module):
+    def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0):
+        super(YOLOLoss, self).__init__()
+        self.anchors        = [anchors[mask] for mask in anchors_mask]
+        self.num_classes    = num_classes
+        self.input_shape    = input_shape
+        self.anchors_mask   = anchors_mask
+        self.balance        = [0.4, 1.0, 4]
+        self.stride         = [32, 16, 8]
+        self.box_ratio      = 0.05
+        self.obj_ratio      = 1 * (input_shape[0] * input_shape[1]) / (640 ** 2)
+        self.cls_ratio      = 0.5 * (num_classes / 80)
+        self.threshold      = 4
+        self.cp, self.cn                    = smooth_BCE(eps=label_smoothing)
+        self.BCEcls, self.BCEobj, self.gr   = nn.BCEWithLogitsLoss(), nn.BCEWithLogitsLoss(), 1
+    def bbox_iou(self, box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+        box2 = box2.T
+        if x1y1x2y2:
+            b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+            b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+        else:
+            b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+            b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+            b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+            b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+        inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+                (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+        w1, h1  = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+        w2, h2  = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+        union   = w1 * h1 + w2 * h2 - inter + eps
+        iou = inter / union
+        if GIoU or DIoU or CIoU:
+            cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
+            ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
+            if CIoU or DIoU:
+                c2 = cw ** 2 + ch ** 2 + eps
+                rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
+                        (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4
+                if DIoU:
+                    return iou - rho2 / c2
+                elif CIoU:
+                    v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+                    with torch.no_grad():
+                        alpha = v / (v - iou + (1 + eps))
+                    return iou - (rho2 / c2 + v * alpha)
+            else:
+                c_area = cw * ch + eps
+                return iou - (c_area - union) / c_area
+        else:
+            return iou
+    def __call__(self, predictions, targets, imgs):
+        for i in range(len(predictions)):
+            bs, _, h, w = predictions[i].size()
+            predictions[i] = predictions[i].view(bs, len(self.anchors_mask[i]), -1, h, w).permute(0, 1, 3, 4, 2).contiguous()
+        device              = targets.device
+        cls_loss, box_loss, obj_loss    = torch.zeros(1, device = device), torch.zeros(1, device = device), torch.zeros(1, device = device)
+        bs, as_, gjs, gis, targets, anchors = self.build_targets(predictions, targets, imgs)
+        feature_map_sizes = [torch.tensor(prediction.shape, device=device)[[3, 2, 3, 2]].type_as(prediction) for prediction in predictions]
+        for i, prediction in enumerate(predictions):
+            b, a, gj, gi    = bs[i], as_[i], gjs[i], gis[i]
+            tobj            = torch.zeros_like(prediction[..., 0], device=device)
+            n = b.shape[0]
+            if n:
+                prediction_pos = prediction[b, a, gj, gi]
+                grid    = torch.stack([gi, gj], dim=1)
+                xy      = prediction_pos[:, :2].sigmoid() * 2. - 0.5
+                wh      = (prediction_pos[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                box     = torch.cat((xy, wh), 1)
+                selected_tbox           = targets[i][:, 2:6] * feature_map_sizes[i]
+                selected_tbox[:, :2]    -= grid.type_as(prediction)
+                iou                     = self.bbox_iou(box.T, selected_tbox, x1y1x2y2=False, CIoU=True)
+                box_loss                += (1.0 - iou).mean()
+                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)
+                selected_tcls               = targets[i][:, 1].long()
+                t                           = torch.full_like(prediction_pos[:, 5:], self.cn, device=device)
+                t[range(n), selected_tcls]  = self.cp
+                cls_loss                    += self.BCEcls(prediction_pos[:, 5:], t)
+            obj_loss += self.BCEobj(prediction[..., 4], tobj) * self.balance[i]
+        box_loss    *= self.box_ratio
+        obj_loss    *= self.obj_ratio
+        cls_loss    *= self.cls_ratio
+        bs          = tobj.shape[0]
+        loss    = box_loss + obj_loss + cls_loss
+        return loss
+    def xywh2xyxy(self, x):
+        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+        y[:, 0] = x[:, 0] - x[:, 2] / 2
+        y[:, 1] = x[:, 1] - x[:, 3] / 2
+        y[:, 2] = x[:, 0] + x[:, 2] / 2
+        y[:, 3] = x[:, 1] + x[:, 3] / 2
+        return y
+    def box_iou(self, box1, box2):
+        """
+        Return intersection-over-union (Jaccard index) of boxes.
+        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+        Arguments:
+            box1 (Tensor[N, 4])
+            box2 (Tensor[M, 4])
+        Returns:
+            iou (Tensor[N, M]): the NxM matrix containing the pairwise
+                IoU values for every element in boxes1 and boxes2
+        """
+        def box_area(box):
+            return (box[2] - box[0]) * (box[3] - box[1])
+        area1 = box_area(box1.T)
+        area2 = box_area(box2.T)
+        inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+        return inter / (area1[:, None] + area2 - inter)
+    def build_targets(self, predictions, targets, imgs):
+        indices, anch       = self.find_3_positive(predictions, targets)
+        matching_bs         = [[] for _ in predictions]
+        matching_as         = [[] for _ in predictions]
+        matching_gjs        = [[] for _ in predictions]
+        matching_gis        = [[] for _ in predictions]
+        matching_targets    = [[] for _ in predictions]
+        matching_anchs      = [[] for _ in predictions]
+        num_layer = len(predictions)
+        for batch_idx in range(predictions[0].shape[0]):
+            b_idx       = targets[:, 0]==batch_idx
+            this_target = targets[b_idx]
+            if this_target.shape[0] == 0:
+                continue
+            txywh = this_target[:, 2:6] * imgs[batch_idx].shape[1]
+            txyxy = self.xywh2xyxy(txywh)
+            pxyxys      = []
+            p_cls       = []
+            p_obj       = []
+            from_which_layer = []
+            all_b       = []
+            all_a       = []
+            all_gj      = []
+            all_gi      = []
+            all_anch    = []
+            for i, prediction in enumerate(predictions):
+                b, a, gj, gi    = indices[i]
+                idx             = (b == batch_idx)
+                b, a, gj, gi    = b[idx], a[idx], gj[idx], gi[idx]
+                all_b.append(b)
+                all_a.append(a)
+                all_gj.append(gj)
+                all_gi.append(gi)
+                all_anch.append(anch[i][idx])
+                from_which_layer.append(torch.ones(size=(len(b),)) * i)
+                fg_pred = prediction[b, a, gj, gi]
+                p_obj.append(fg_pred[:, 4:5])
+                p_cls.append(fg_pred[:, 5:])
+                grid    = torch.stack([gi, gj], dim=1).type_as(fg_pred)
+                pxy     = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i]
+                pwh     = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i]
+                pxywh   = torch.cat([pxy, pwh], dim=-1)
+                pxyxy   = self.xywh2xyxy(pxywh)
+                pxyxys.append(pxyxy)
+            pxyxys = torch.cat(pxyxys, dim=0)
+            if pxyxys.shape[0] == 0:
+                continue
+            p_obj       = torch.cat(p_obj, dim=0)
+            p_cls       = torch.cat(p_cls, dim=0)
+            from_which_layer = torch.cat(from_which_layer, dim=0)
+            all_b       = torch.cat(all_b, dim=0)
+            all_a       = torch.cat(all_a, dim=0)
+            all_gj      = torch.cat(all_gj, dim=0)
+            all_gi      = torch.cat(all_gi, dim=0)
+            all_anch    = torch.cat(all_anch, dim=0)
+            pair_wise_iou       = self.box_iou(txyxy, pxyxys)
+            pair_wise_iou_loss  = -torch.log(pair_wise_iou + 1e-8)
+            top_k, _    = torch.topk(pair_wise_iou, min(20, pair_wise_iou.shape[1]), dim=1)
+            dynamic_ks  = torch.clamp(top_k.sum(1).int(), min=1)
+            gt_cls_per_image = F.one_hot(this_target[:, 1].to(torch.int64), self.num_classes).float().unsqueeze(1).repeat(1, pxyxys.shape[0], 1)
+            num_gt              = this_target.shape[0]
+            cls_preds_          = p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
+            y                   = cls_preds_.sqrt_()
+            pair_wise_cls_loss  = F.binary_cross_entropy_with_logits(torch.log(y / (1 - y)), gt_cls_per_image, reduction="none").sum(-1)
+            del cls_preds_
+            cost = (
+                pair_wise_cls_loss
+                + 3.0 * pair_wise_iou_loss
+            )
+            matching_matrix = torch.zeros_like(cost)
+            for gt_idx in range(num_gt):
+                _, pos_idx = torch.topk(cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
+                matching_matrix[gt_idx][pos_idx] = 1.0
+            del top_k, dynamic_ks
+            anchor_matching_gt = matching_matrix.sum(0)
+            if (anchor_matching_gt > 1).sum() > 0:
+                _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
+                matching_matrix[:, anchor_matching_gt > 1]          *= 0.0
+                matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0
+            fg_mask_inboxes = matching_matrix.sum(0) > 0.0
+            matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)
+            from_which_layer    = from_which_layer.to(fg_mask_inboxes.device)[fg_mask_inboxes]
+            all_b               = all_b[fg_mask_inboxes]
+            all_a               = all_a[fg_mask_inboxes]
+            all_gj              = all_gj[fg_mask_inboxes]
+            all_gi              = all_gi[fg_mask_inboxes]
+            all_anch            = all_anch[fg_mask_inboxes]
+            this_target         = this_target[matched_gt_inds]
+            for i in range(num_layer):
+                layer_idx = from_which_layer == i
+                matching_bs[i].append(all_b[layer_idx])
+                matching_as[i].append(all_a[layer_idx])
+                matching_gjs[i].append(all_gj[layer_idx])
+                matching_gis[i].append(all_gi[layer_idx])
+                matching_targets[i].append(this_target[layer_idx])
+                matching_anchs[i].append(all_anch[layer_idx])
+        for i in range(num_layer):
+            matching_bs[i]      = torch.cat(matching_bs[i], dim=0) if len(matching_bs[i]) != 0 else torch.Tensor(matching_bs[i])
+            matching_as[i]      = torch.cat(matching_as[i], dim=0) if len(matching_as[i]) != 0 else torch.Tensor(matching_as[i])
+            matching_gjs[i]     = torch.cat(matching_gjs[i], dim=0) if len(matching_gjs[i]) != 0 else torch.Tensor(matching_gjs[i])
+            matching_gis[i]     = torch.cat(matching_gis[i], dim=0) if len(matching_gis[i]) != 0 else torch.Tensor(matching_gis[i])
+            matching_targets[i] = torch.cat(matching_targets[i], dim=0) if len(matching_targets[i]) != 0 else torch.Tensor(matching_targets[i])
+            matching_anchs[i]   = torch.cat(matching_anchs[i], dim=0) if len(matching_anchs[i]) != 0 else torch.Tensor(matching_anchs[i])
+        return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs
+    def find_3_positive(self, predictions, targets):
+        num_anchor, num_gt  = len(self.anchors_mask[0]), targets.shape[0]
+        indices, anchors    = [], []
+        gain    = torch.ones(7, device=targets.device)
+        ai      = torch.arange(num_anchor, device=targets.device).float().view(num_anchor, 1).repeat(1, num_gt)
+        targets = torch.cat((targets.repeat(num_anchor, 1, 1), ai[:, :, None]), 2)
+        g   = 0.5
+        off = torch.tensor([
+            [0, 0],
+            [1, 0], [0, 1], [-1, 0], [0, -1],
+        ], device=targets.device).float() * g
+        for i in range(len(predictions)):
+            anchors_i = torch.from_numpy(self.anchors[i] / self.stride[i]).type_as(predictions[i])
+            anchors_i, shape = torch.from_numpy(self.anchors[i] / self.stride[i]).type_as(predictions[i]), predictions[i].shape
+            gain[2:6] = torch.tensor(predictions[i].shape)[[3, 2, 3, 2]]
+            t = targets * gain
+            if num_gt:
+                r = t[:, :, 4:6] / anchors_i[:, None]
+                j = torch.max(r, 1. / r).max(2)[0] < self.threshold
+                t = t[j]
+                gxy     = t[:, 2:4]
+                gxi     = gain[[2, 3]] - gxy
+                j, k    = ((gxy % 1. < g) & (gxy > 1.)).T
+                l, m    = ((gxi % 1. < g) & (gxi > 1.)).T
+                j       = torch.stack((torch.ones_like(j), j, k, l, m))
+                t       = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+            b, c    = t[:, :2].long().T
+            gxy     = t[:, 2:4]
+            gwh     = t[:, 4:6]
+            gij     = (gxy - offsets).long()
+            gi, gj  = gij.T
+            a = t[:, 6].long()
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))
+            anchors.append(anchors_i[a])
+        return indices, anchors
+def is_parallel(model):
+    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+def de_parallel(model):
+    return model.module if is_parallel(model) else model
+def copy_attr(a, b, include=(), exclude=()):
+    for k, v in b.__dict__.items():
+        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
+            continue
+        else:
+            setattr(a, k, v)
+class ModelEMA:
+    """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
+    Keeps a moving average of everything in the model state_dict (parameters and buffers)
+    For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    """
+    def __init__(self, model, decay=0.9999, tau=2000, updates=0):
+        self.ema = deepcopy(de_parallel(model)).eval()
+        self.updates = updates
+        self.decay = lambda x: decay * (1 - math.exp(-x / tau))
+        for p in self.ema.parameters():
+            p.requires_grad_(False)
+    def update(self, model):
+        with torch.no_grad():
+            self.updates += 1
+            d = self.decay(self.updates)
+            msd = de_parallel(model).state_dict()
+            for k, v in self.ema.state_dict().items():
+                if v.dtype.is_floating_point:
+                    v *= d
+                    v += (1 - d) * msd[k].detach()
+    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
+        copy_attr(self.ema, model, include, exclude)
+def weights_init(net, init_type='normal', init_gain = 0.02):
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and classname.find('Conv') != -1:
+            if init_type == 'normal':
+                torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
+            elif init_type == 'xavier':
+                torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
+            elif init_type == 'kaiming':
+                torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
+            else:
+                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
+        elif classname.find('BatchNorm2d') != -1:
+            torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+            torch.nn.init.constant_(m.bias.data, 0.0)
+    print('initialize network with %s type' % init_type)
+    net.apply(init_func)
+def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
+    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
+        if iters <= warmup_total_iters:
+            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2
+            ) + warmup_lr_start
+        elif iters >= total_iters - no_aug_iter:
+            lr = min_lr
+        else:
+            lr = min_lr + 0.5 * (lr - min_lr) * (
+                1.0
+                + math.cos(
+                    math.pi
+                    * (iters - warmup_total_iters)
+                    / (total_iters - warmup_total_iters - no_aug_iter)
+                )
+            )
+        return lr
+    def step_lr(lr, decay_rate, step_size, iters):
+        if step_size < 1:
+            raise ValueError("step_size must above 1.")
+        n       = iters // step_size
+        out_lr  = lr * decay_rate ** n
+        return out_lr
+    if lr_decay_type == "cos":
+        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
+        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
+        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
+        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
+    else:
+        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
+        step_size   = total_iters / step_num
+        func = partial(step_lr, lr, decay_rate, step_size)
+    return func
+def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
+    lr = lr_scheduler_func(epoch)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr