本文參考:
輕量級網(wǎng)絡——MobileNetV2_Clichong的博客-CSDN博客_mobilenetv2
1、MobileNetV2介紹
MobileNetV1主要是提出了可分離卷積的概念,大大減少了模型的參數(shù)個數(shù),從而縮小了計算量。但是在CenterNet算法中作為BackBone效果并不佳,模型收斂效果不好導致目標檢測的準確率不高。
MobileNetV2在MobileNetV1的DW和PW的基礎上進行了優(yōu)化,使得準確率更高,作為CenterNet算法的BackBone效果也可以。它的兩個亮點是:
- Inverted Residuals:倒殘差結構
- Linear Bottlenecks:結構的最后一層采用線性層
2、MobileNetV2的結構
(1)倒殘差結構
ResNet網(wǎng)絡:殘差結構是先用1*1卷積降維 再升維 的操作,所以兩頭大中間小。
MobileNetV2中,殘差結構是先用1*1卷積升維 再降維 的操作,所以兩頭小中間大。
在MobileNetV2中采用了新的激活函數(shù):ReLU6,它的定義如下:
(2)線性Bottlenecks
針對倒殘差結構最后一層的卷積層,采用了線性的激活函數(shù)(f(x)=x,可以認為沒有激活函數(shù)),而不是使用ReLU6激活函數(shù)。
原因解釋:
?ReLU激活函數(shù)對于低維的信息可能會造成比較大的損失,而對于高維的特征信息造成的損失很小。而且由于倒殘差結構是兩頭小中間大,所以輸出的是一個低維的特征信息。所以使用一個線性的激活函數(shù)避免特征損失。
低維解釋:低維針對的是channel,低維意味著[batch, channel, height, width]中的height和width還較大。
(3)整體結構
當stride=1且輸入特征矩陣與輸出特征矩陣shape相同時才有shortcut鏈接。
?3、MobileNetV2的pytorch實現(xiàn)
import torch
import torch.nn as nn
import torchvision
# 分類個數(shù)
num_class = 5
# DW卷積
def Conv3x3BNReLU(in_channels, out_channels, stride, groups):
return nn.Sequential(
# stride=2,wh減半; stride=1,wh不變
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
# PW卷積
def Conv1x1BNReLU(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
# PW卷積(Linear)沒有使用激活函數(shù)
def Conv1x1BN(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels)
)
class InvertedResidual(nn.Module):
# t為擴展因子
def __init__(self, in_channels, out_channels, expansion_factor, stride):
super(InvertedResidual, self).__init__()
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
mid_channels = (in_channels * expansion_factor)
# 先1*1卷積升維,再1*1卷積降維
self.bottleneck = nn.Sequential(
# 升維操作
Conv1x1BNReLU(in_channels, mid_channels),
# DW卷積,降低參數(shù)量
Conv3x3BNReLU(mid_channels, mid_channels, stride, groups=mid_channels),
# 降維操作
Conv1x1BN(mid_channels, out_channels)
)
# stride=1才有shortcut,此方法讓原本不相同的channels相同
if self.stride == 1:
self.shortcut = Conv1x1BN(in_channels, out_channels)
def forward(self, x):
out = self.bottleneck(x)
out = (out + self.shortcut(x)) if self.stride == 1 else out
return out
class MobileNetV2(nn.Module):
def make_layer(self, in_channels, out_channels, stride, factor, block_num):
layers = []
layers.append(InvertedResidual(in_channels, out_channels, factor, stride))
for i in range(block_num):
layers.append(InvertedResidual(out_channels, out_channels, factor, 1))
return nn.Sequential(*layers)
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def __init__(self, num_classes=num_class, t=6):
super(MobileNetV2, self).__init__()
self.first_conv = Conv3x3BNReLU(3, 32, 2, groups=1)
# 32 -> 16 stride=1 wh不變
self.layer1 = self.make_layer(in_channels=32, out_channels=16, stride=1, factor=1, block_num=1)
# 16 -> 24 stride=2 wh減半
self.layer2 = self.make_layer(in_channels=16, out_channels=24, stride=2, factor=t, block_num=2)
# 24 -> 32 stride=2 wh減半
self.layer3 = self.make_layer(in_channels=24, out_channels=32, stride=2, factor=t, block_num=3)
# 32 -> 64 stride=2 wh減半
self.layer4 = self.make_layer(in_channels=32, out_channels=64, stride=2, factor=t, block_num=4)
# 64 -> 96 stride=1 wh不變
self.layer5 = self.make_layer(in_channels=64, out_channels=96, stride=1, factor=t, block_num=3)
# 96 -> 160 stride=2 wh減半
self.layer6 = self.make_layer(in_channels=96, out_channels=160, stride=2, factor=t, block_num=3)
# 160 -> 320 stride=1 wh不變
self.layer7 = self.make_layer(in_channels=160, out_channels=320, stride=1, factor=t, block_num=1)
# 320 -> 1280 單純的升維操作
self.last_conv = Conv1x1BNReLU(320, 1280)
self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
self.dropout = nn.Dropout2d(p=0.2)
self.linear = nn.Linear(in_features=1280, out_features=num_classes)
self.init_params()
def forward(self, x):
x = self.first_conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.last_conv(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.linear(x)
return x
if __name__ == '__main__':
model = MobileNetV2()
input = torch.randn(1, 3, 224, 224)
out = model(input)
print(out.shape)
4、MobileNetV2作為CenterNet的BackBone
import torch
import torch.nn as nn
# DW卷積
def Conv3x3BNReLU(in_channels, out_channels, stride, groups):
return nn.Sequential(
# stride=2,wh減半; stride=1,wh不變
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
# PW卷積
def Conv1x1BNReLU(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
# PW卷積(Linear)沒有使用激活函數(shù)
def Conv1x1BN(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels)
)
class InvertedResidual(nn.Module):
# t為擴展因子
def __init__(self, in_channels, out_channels, expansion_factor, stride):
super(InvertedResidual, self).__init__()
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
mid_channels = (in_channels * expansion_factor)
# 先1*1卷積升維,再1*1卷積降維
self.bottleneck = nn.Sequential(
# 升維操作
Conv1x1BNReLU(in_channels, mid_channels),
# DW卷積,降低參數(shù)量
Conv3x3BNReLU(mid_channels, mid_channels, stride, groups=mid_channels),
# 降維操作
Conv1x1BN(mid_channels, out_channels)
)
# stride=1才有shortcut,此方法讓原本不相同的channels相同
if self.stride == 1:
self.shortcut = Conv1x1BN(in_channels, out_channels)
def forward(self, x):
out = self.bottleneck(x)
out = (out + self.shortcut(x)) if self.stride == 1 else out
return out
class MobileNetV2(nn.Module):
def make_layer(self, in_channels, out_channels, stride, factor, block_num):
layers = []
layers.append(InvertedResidual(in_channels, out_channels, factor, stride))
for i in range(block_num):
layers.append(InvertedResidual(out_channels, out_channels, factor, 1))
return nn.Sequential(*layers)
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def __init__(self, heads, t=6):
super(MobileNetV2, self).__init__()
num_classes = heads['hm']
self.first_conv = Conv3x3BNReLU(3, 32, 2, groups=1)
# 32 -> 16 stride=1 wh不變
self.layer1 = self.make_layer(in_channels=32, out_channels=16, stride=1, factor=1, block_num=1)
# 16 -> 24 stride=2 wh減半
self.layer2 = self.make_layer(in_channels=16, out_channels=24, stride=2, factor=t, block_num=2)
# 24 -> 32 stride=2 wh減半
self.layer3 = self.make_layer(in_channels=24, out_channels=32, stride=2, factor=t, block_num=3)
# 32 -> 64 stride=2 wh減半
self.layer4 = self.make_layer(in_channels=32, out_channels=64, stride=2, factor=t, block_num=4)
# 64 -> 96 stride=1 wh不變
self.layer5 = self.make_layer(in_channels=64, out_channels=96, stride=1, factor=t, block_num=3)
# 96 -> 160 stride=2 wh減半
self.layer6 = self.make_layer(in_channels=96, out_channels=160, stride=2, factor=t, block_num=3)
# 160 -> 320 stride=1 wh不變
self.layer7 = self.make_layer(in_channels=160, out_channels=320, stride=1, factor=t, block_num=1)
# 320 -> 1280 單純的升維操作
self.last_conv = Conv1x1BNReLU(320, 1280)
self.init_params()
self.hm = nn.Conv2d(20, num_classes, kernel_size=1)
self.wh = nn.Conv2d(20, 2, kernel_size=1)
self.reg = nn.Conv2d(20, 2, kernel_size=1)
def forward(self, x):
x = self.first_conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.last_conv(x)
y = x.view(x.shape[0], -1, 128, 128)
z = {}
z['hm'] = self.hm(y)
z['wh'] = self.wh(y)
z['reg'] = self.reg(y)
return [z]
if __name__ == '__main__':
heads = {'hm': 10, 'wh': 2, 'reg': 2}
model = MobileNetV2(heads)
input = torch.randn(1, 3, 512, 512)
out = model(input)
print(out.shape)
5、MobileNetV2在CenterNet目標檢測落地情況
(1)訓練情況
訓練loss,mobilenetV1在batch_size=16時最少達到4.0左右,而mobileNetV2在batch_size=16時最少達到0.5以下。與DLASeg的效果基本接近。
(2)目標檢測效果
檢測效果也與DLASeg基本接近
(3)模型參數(shù)量
DLASeg為2000W個左右
MobileNetV1為320W個左右
MobileNetV2為430W個左右,總模型大小為17M
(4)CPU運行時間
DLASeg為1.2s
MobileNetV1為250ms
MobileNetV2為600ms文章來源:http://www.zghlxwxcb.cn/news/detail-720235.html
?文章來源地址http://www.zghlxwxcb.cn/news/detail-720235.html
到了這里,關于MobileNetV2原理說明及實踐落地的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關文章,希望大家以后多多支持TOY模板網(wǎng)!