選取經(jīng)典的早期Pytorch官方實(shí)現(xiàn)代碼進(jìn)行分析
https://github.com/pytorch/vision/blob/9a481d0bec2700763a799ff148fe2e083b575441/torchvision/models/resnet.py
各種ResNet網(wǎng)絡(luò)是由BasicBlock或者bottleneck構(gòu)成的,它們是構(gòu)成深度殘差網(wǎng)絡(luò)的基本模塊
ResNet主體
ResNet的大部分各種結(jié)構(gòu)是1層conv+4個(gè)block+1層fc
class ResNet(nn.Module):
def __init__(self, block, layers, zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
# normly happened when stride = 2
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
# only the first block need downsample thus there is no downsample and stride = 2
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
c2 = self.layer1(x)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return c5
需要注意的是最后的avgpool是全局的平均池化
BasicBlock
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
# here planes names channel number
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
ResNet18
對(duì)應(yīng)的就是[2,2,2,2]
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
print('Loading the pretrained model ...')
# strict = False as we don't need fc layer params.
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False)
return model
ResNet34
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
print('Loading the pretrained model ...')
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), strict=False)
return model
ResNet20
這個(gè)需要強(qiáng)調(diào)一下,正常的ResNet20應(yīng)該是文章中提出,針對(duì)cifar數(shù)據(jù)集設(shè)計(jì)的n=3時(shí)候, 1+6*3+1=20
class ResNet4Cifar(nn.Module):
def __init__(self, block, num_block, num_classes=10):
super().__init__()
self.in_channels = 16
self.conv1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True))
# we use a different inputsize than the original paper
# so conv2_x's stride is 1
self.conv2_x = self._make_layer(block, 16, num_block[0], 1)
self.conv3_x = self._make_layer(block, 32, num_block[1], 2)
self.conv4_x = self._make_layer(block, 64, num_block[2], 2)
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(64 * block.expansion, num_classes)
def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
output = self.conv1(x)
output = self.conv2_x(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
output = self.avg_pool(output)
output = output.view(output.size(0), -1)
output = self.fc(output)
return output
def resnet20(num_classes=10, **kargs):
""" return a ResNet 20 object
"""
return ResNet4Cifar(BasicBlock, [3, 3, 3], num_classes=num_classes)
我們通過(guò)參數(shù)量的計(jì)算也為0.27M,和論文中的一致,對(duì)[1,3,32,32]的輸入,輸出維度為[1,64,8,8]
但是也有一些文章只換了開頭三層的3x3卷積層,通道數(shù)并沒(méi)有采用16、32、64,仍是4層的64、128、256、512
,這樣下來(lái)參數(shù)量是11.25M。針對(duì)的任務(wù)不同,但是如果不關(guān)注原始網(wǎng)絡(luò)結(jié)構(gòu),這一點(diǎn)可以忽略。
Bottleneck Block
Bottleneck Block中使用了1×1卷積層。如輸入通道數(shù)為256,1×1卷積層會(huì)將通道數(shù)先降為64,經(jīng)過(guò)3×3卷積層后,再將通道數(shù)升為256。1×1卷積層的優(yōu)勢(shì)是在更深的網(wǎng)絡(luò)中,用較小的參數(shù)量處理通道數(shù)很大的輸入。
這種結(jié)構(gòu)用在ResNet50、ResNet101中。
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = conv1x1(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = conv1x1(planes, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
ResNet50
和以上的網(wǎng)絡(luò)結(jié)構(gòu)一樣,把Bottleneck按層數(shù)堆起來(lái)就可以了
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
print('Loading the pretrained model ...')
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False)
return model
ResNet到底解決了什么問(wèn)題
推薦看知乎問(wèn)題Resnet到底在解決一個(gè)什么問(wèn)題呢?
貼一些我比較喜歡的回答:
A. 對(duì)于 L L L層的網(wǎng)絡(luò)來(lái)說(shuō),沒(méi)有殘差表示的Plain Net梯度相關(guān)性的衰減在 1 2 L \frac{1}{2^L} 2L1? ,而ResNet的衰減卻只有 1 L \frac{1}{\sqrt{L}} L?1? 。即使BN過(guò)后梯度的模穩(wěn)定在了正常范圍內(nèi),但梯度的相關(guān)性實(shí)際上是隨著層數(shù)增加持續(xù)衰減的。而經(jīng)過(guò)證明,ResNet可以有效減少這種相關(guān)性的衰減。
B. 對(duì)于“梯度彌散”觀點(diǎn)來(lái)說(shuō),在輸出引入一個(gè)輸入x的恒等映射,則梯度也會(huì)對(duì)應(yīng)地引入一個(gè)常數(shù)1,這樣的網(wǎng)絡(luò)的確不容易出現(xiàn)梯度值異常,在某種意義上,起到了穩(wěn)定梯度的作用。
C. 跳連接相加可以實(shí)現(xiàn)不同分辨率特征的組合,因?yàn)闇\層容易有高分辨率但是低級(jí)語(yǔ)義的特征,而深層的特征有高級(jí)語(yǔ)義,但分辨率就很低了。引入跳接實(shí)際上讓模型自身有了更加“靈活”的結(jié)構(gòu),即在訓(xùn)練過(guò)程本身,模型可以選擇在每一個(gè)部分是“更多進(jìn)行卷積與非線性變換”還是“更多傾向于什么都不做”,抑或是將兩者結(jié)合。模型在訓(xùn)練便可以自適應(yīng)本身的結(jié)構(gòu)。3
D. 當(dāng)使用了殘差網(wǎng)絡(luò)時(shí),就是加入了skip connection 結(jié)構(gòu),這時(shí)候由一個(gè)building block 的任務(wù)由: F(x) := H(x),變成了F(x) := H(x)-x對(duì)比這兩個(gè)待擬合的函數(shù), 擬合殘差圖更容易優(yōu)化,也就是說(shuō):F(x) := H(x)-x比F(x) := H(x)更容易優(yōu)化4. 舉了一個(gè)差分放大器的例子:F是求和前網(wǎng)絡(luò)映射,H是從輸入到求和后的網(wǎng)絡(luò)映射。比如把5映射到5.1,那么引入殘差前是F’(5)=5.1,引入殘差后是H(5)=5.1, H(5)=F(5)+5, F(5)=0.1。這里的F’和F都表示網(wǎng)絡(luò)參數(shù)映射,引入殘差后的映射對(duì)輸出的變化更敏感。比如s輸出從5.1變到5.2,映射F’的輸出增加了1/51=2%,而對(duì)于殘差結(jié)構(gòu)輸出從5.1到5.2,映射F是從0.1到0.2,增加了100%。明顯后者輸出變化對(duì)權(quán)重的調(diào)整作用更大,所以效果更好。殘差的思想都是去掉相同的主體部分,從而突出微小的變化。
說(shuō)法眾多,好用就完事兒了嗷~
-
【pytorch系列】ResNet中的BasicBlock與bottleneck ?? ??
-
ResNet50網(wǎng)絡(luò)結(jié)構(gòu)圖及結(jié)構(gòu)詳解 ??
-
https://www.zhihu.com/question/64494691/answer/786270699 ??文章來(lái)源:http://www.zghlxwxcb.cn/news/detail-785543.html
-
https://www.zhihu.com/question/64494691/answer/271335912 ??文章來(lái)源地址http://www.zghlxwxcb.cn/news/detail-785543.html
到了這里,關(guān)于【pytorch】ResNet18、ResNet20、ResNet34、ResNet50網(wǎng)絡(luò)結(jié)構(gòu)與實(shí)現(xiàn)的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!