一.數(shù)據(jù)集準備
新建一個項目文件夾ResNet,并在里面建立data_set文件夾用來保存數(shù)據(jù)集,在data_set文件夾下創(chuàng)建新文件夾"flower_data",點擊鏈接下載花分類數(shù)據(jù)集https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz,會下載一個壓縮包,將它解壓到flower_data文件夾下,執(zhí)行"split_data.py"腳本自動將數(shù)據(jù)集劃分成訓練集train和驗證集val。
?split.py如下:
import os
from shutil import copy, rmtree
import random
def mk_file(file_path: str):
if os.path.exists(file_path):
# 如果文件夾存在,則先刪除原文件夾在重新創(chuàng)建
rmtree(file_path)
os.makedirs(file_path)
def main():
# 保證隨機可復現(xiàn)
random.seed(0)
# 將數(shù)據(jù)集中10%的數(shù)據(jù)劃分到驗證集中
split_rate = 0.1
# 指向你解壓后的flower_photos文件夾
cwd = os.getcwd()
data_root = os.path.join(cwd, "flower_data")
origin_flower_path = os.path.join(data_root, "flower_photos")
assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)
flower_class = [cla for cla in os.listdir(origin_flower_path)
if os.path.isdir(os.path.join(origin_flower_path, cla))]
# 建立保存訓練集的文件夾
train_root = os.path.join(data_root, "train")
mk_file(train_root)
for cla in flower_class:
# 建立每個類別對應的文件夾
mk_file(os.path.join(train_root, cla))
# 建立保存驗證集的文件夾
val_root = os.path.join(data_root, "val")
mk_file(val_root)
for cla in flower_class:
# 建立每個類別對應的文件夾
mk_file(os.path.join(val_root, cla))
for cla in flower_class:
cla_path = os.path.join(origin_flower_path, cla)
images = os.listdir(cla_path)
num = len(images)
# 隨機采樣驗證集的索引
eval_index = random.sample(images, k=int(num*split_rate))
for index, image in enumerate(images):
if image in eval_index:
# 將分配至驗證集中的文件復制到相應目錄
image_path = os.path.join(cla_path, image)
new_path = os.path.join(val_root, cla)
copy(image_path, new_path)
else:
# 將分配至訓練集中的文件復制到相應目錄
image_path = os.path.join(cla_path, image)
new_path = os.path.join(train_root, cla)
copy(image_path, new_path)
print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
print()
print("processing done!")
if __name__ == '__main__':
main()
之后會在文件夾下生成train和val數(shù)據(jù)集,到此,完成了數(shù)據(jù)集的準備。
?二.定義網(wǎng)絡
新建model.py,參照ResNet的網(wǎng)絡結構和pytorch官方給出的代碼,對代碼進行略微的修改即可,首先定義了兩個類BasicBlock和Bottleneck,分別對應著ResNet18、34和ResNet50、101、152,從下面這個圖就可以區(qū)別開來。
可見,18和34層的網(wǎng)絡,他們的conv2_x,conv3_x,conv4_x,conv5_x是相同的,不同的是每一個block的數(shù)量([2 2 2 2]和[3 4 6 3]),50和101和152層的網(wǎng)絡,多了1*1卷積核,block數(shù)量也不盡相同。
接著定義了ResNet類,進行前向傳播。對于34層的網(wǎng)絡(這里借用了知乎牧醬老哥的圖,18和34的block相同,所以用18的進行講解),conv2_x和conv3_x對應的殘差塊對應的殘差快在右側展示出來(可以注意一下stride),當計算特征圖尺寸時,要特別注意。在下方代碼計算尺寸的部分我都進行了注釋。
pytorch官方ResNet代碼
修改后的train.py:
import torch.nn as nn
import torch
class BasicBlock(nn.Module): #18 34層殘差結構, 殘差塊
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x) # 不為none,對應虛線殘差結構(下需要1*1卷積調(diào)整維度),為none,對應實線殘差結構(不需要1*1卷積)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module): #50 101 152層殘差結構
"""
注意:原論文中,在虛線殘差結構的主分支上,第一個1x1卷積層的步距是2,第二個3x3卷積層步距是1。
但在pytorch官方實現(xiàn)過程中是第一個1x1卷積層的步距是1,第二個3x3卷積層步距是2,
這么做的好處是能夠在top1上提升大概0.5%的準確率。
可參考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None, groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
# squeeze channels
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
# unsqueeze channels
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None: # 不為none,對應虛線殘差結構(下需要1*1卷積調(diào)整維度),為none,對應實線殘差結構(不需要1*1卷積)
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
# (channel height width)
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) # (3 224 224) -> (64 112 112)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # (64 112 112) -> (64 56 56)
# 對于每一個block,第一次的兩個卷積層stride=1和1,第二次stride=1和1
self.layer1 = self._make_layer(block, 64, blocks_num[0]) # (64 56 56) -> (64 56 56)
# 對于每一個block,第一次的兩個卷積層stride=2和1,第二次stride=1和1
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) # (64 56 56) -> (128 28 28)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) # (128 28 28) -> (256 14 14)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) # (256 28 28) -> (512 14 14)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1): # channel為當前block所使用的卷積核個數(shù)
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion: # 18和32不滿足判斷條件,會跳過;50 101 152會執(zhí)行這部分
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
if __name__ == "__main__":
resnet = ResNet(BasicBlock, [3, 4, 6, 3], num_classes=5)
in_data = torch.randn(1, 3, 224, 224)
out = resnet(in_data)
print(out)
完成網(wǎng)絡的定義之后,可以單獨執(zhí)行一下這個文件,用來驗證網(wǎng)絡定義的是否正確。如果可以正確輸出,就沒問題。
在這里輸出為
tensor([[-0.4490,? 0.5792, -0.5026, -0.6024,? 0.1399]],
grad_fn=<AddmmBackward0>)
說明網(wǎng)絡定義正確。
三.開始訓練
?加載數(shù)據(jù)集
首先定義一個字典,用于用于對train和val進行預處理,包括裁剪成224*224大小,訓練集隨機水平翻轉(一般驗證集不需要此操作),轉換成張量,圖像歸一化。
然后利用DataLoader模塊加載數(shù)據(jù)集,并設置batch_size為16,同時,設置數(shù)據(jù)加載器的工作進程數(shù)nw,加快速度。
import os
import sys
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from model import resnet34
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"using {device} device.")
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
# 獲取數(shù)據(jù)集路徑
image_path = os.path.join(os.getcwd(), "data_set", "flower_data")
assert os.path.exists(image_path), f"{image_path} path does not exist."
# 加載數(shù)據(jù)集,準備讀取
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"])
nw = min([os.cpu_count(), 16 if 16 > 1 else 0, 8]) # number of workers,加速圖像預處理
print(f'Using {nw} dataloader workers every process')
# 加載數(shù)據(jù)集
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=nw)
validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=16, shuffle=False, num_workers=nw)
train_num = len(train_dataset)
val_num = len(validate_dataset)
print(f"using {train_num} images for training, {val_num} images for validation.")
生成json文件
將訓練數(shù)據(jù)集的類別標簽轉換為字典格式,并將其寫入名為'class_indices.json'的文件中。
- 從
train_dataset
中獲取類別標簽到索引的映射關系,存儲在flower_list
變量中。 - 使用列表推導式將
flower_list
中的鍵值對反轉,得到一個新的字典cla_dict
,其中鍵是原始類別標簽,值是對應的索引。 - 使用
json.dumps()
函數(shù)將cla_dict
轉換為JSON格式的字符串,設置縮進為4個空格。 - 使用
with open()
語句以寫入模式打開名為'class_indices.json'的文件,并將JSON字符串寫入文件
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} 雛菊 蒲公英 玫瑰 向日葵 郁金香
# 從訓練集中獲取類別標簽到索引的映射關系,存儲在flower_list變量
flower_list = train_dataset.class_to_idx
# 使用列表推導式將flower_list中的鍵值對反轉,得到一個新的字典cla_dict
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
加載預訓練模型開始訓練
首先定義網(wǎng)絡對象net,在這里我們使用了遷移學習來使網(wǎng)絡訓練效果更好;使用net.fc = nn.Linear(in_channel, 5)設置輸出類別數(shù)(這里為5);訓練10輪,并使用train_bar = tqdm(train_loader, file=sys.stdout)來可視化訓練進度條,之后再進行反向傳播和參數(shù)更新;同時,每一輪訓練完成都要進行學習率更新;之后開始對驗證集進行計算精確度,完成后保存模型。
# load pretrain weights
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
net = resnet34()
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), f"file {model_weight_path} does not exist."
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# change fc layer structure
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam([p for p in net.parameters() if p.requires_grad], lr=0.0001)
epochs = 10
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{epoch + 1}/{epochs}] loss:{loss:.3f}"
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_bar.desc = f"valid epoch[{epoch + 1}/{epochs}]"
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net, "./resnet.pth")
print('Finished Training')
最后對代碼進行整理,完整的train.py如下
import os
import sys
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from model import resnet34
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"using {device} device.")
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
# 獲取數(shù)據(jù)集路徑
image_path = os.path.join(os.getcwd(), "data_set", "flower_data")
assert os.path.exists(image_path), f"{image_path} path does not exist."
# 加載數(shù)據(jù)集,準備讀取
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"])
nw = min([os.cpu_count(), 16 if 16 > 1 else 0, 8]) # number of workers,加速圖像預處理
print(f'Using {nw} dataloader workers every process')
# 加載數(shù)據(jù)集
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=nw)
validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=16, shuffle=False, num_workers=nw)
train_num = len(train_dataset)
val_num = len(validate_dataset)
print(f"using {train_num} images for training, {val_num} images for validation.")
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} 雛菊 蒲公英 玫瑰 向日葵 郁金香
# 從訓練集中獲取類別標簽到索引的映射關系,存儲在flower_list變量
flower_list = train_dataset.class_to_idx
# 使用列表推導式將flower_list中的鍵值對反轉,得到一個新的字典cla_dict
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
# load pretrain weights
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
net = resnet34()
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), f"file {model_weight_path} does not exist."
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# change fc layer structure
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam([p for p in net.parameters() if p.requires_grad], lr=0.0001)
epochs = 10
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{epoch + 1}/{epochs}] loss:{loss:.3f}"
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_bar.desc = f"valid epoch[{epoch + 1}/{epochs}]"
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net, "./resnet.pth")
print('Finished Training')
if __name__ == '__main__':
main()
四.模型預測
新建一個predict.py文件用于預測,將輸入圖像處理后轉換成張量格式,img = torch.unsqueeze(img, dim=0)是在輸入圖像張量 img 的第一個維度上增加一個大小為1的維度,因此將圖像張量的形狀從 [通道數(shù), 高度, 寬度 ] 轉換為 [1, 通道數(shù), 高度, 寬度]。然后加載模型進行預測,并打印出結果,同時可視化。
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import resnet34
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
img = Image.open("./2536282942_b5ca27577e.jpg")
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
# 在輸入圖像張量 img 的第一個維度上增加一個大小為1的維度
# 將圖像張量的形狀從 [通道數(shù), 高度, 寬度 ] 轉換為 [1, 通道數(shù), 高度, 寬度]
img = torch.unsqueeze(img, dim=0)
# read class_indict
with open('./class_indices.json', "r") as f:
class_indict = json.load(f)
# create model
model = resnet34(num_classes=5).to(device)
model = torch.load("./resnet34.pth")
# prediction
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_class = torch.argmax(predict).numpy()
print_result = f"class: {class_indict[str(predict_class)]} prob: {predict[predict_class].numpy():.3}"
plt.title(print_result)
for i in range(len(predict)):
print(f"class: {class_indict[str(i)]:10} prob: {predict[i].numpy():.3}")
plt.show()
if __name__ == '__main__':
main()
預測結果
五.模型可視化
將生成的pth文件導入netron工具,可視化結果為
發(fā)現(xiàn)很不清晰,因此將它轉換成多用于嵌入式設備部署的onnx格式
編寫onnx.py
import torch
import torchvision
from model import resnet34
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet34(num_classes=5).to(device)
model=torch.load("/home/lm/Resnet/resnet34.pth")
model.eval()
example = torch.ones(1, 3, 244, 244)
example = example.to(device)
torch.onnx.export(model, example, "resnet34.onnx", verbose=True, opset_version=11)
??將生成的onnx文件導入,這樣的可視化清晰了許多
?六.批量數(shù)據(jù)預測
現(xiàn)在新建一個dta文件夾,里面放入五類帶預測的樣本,編寫代碼完成對整個文件夾下所有樣本的預測,即批量預測。
batch_predict.py如下:
import os
import json
import torch
from PIL import Image
from torchvision import transforms
from model import resnet34
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
# 指向需要遍歷預測的圖像文件夾
imgs_root = "./data/imgs"
# 讀取指定文件夾下所有jpg圖像路徑
img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")]
# read class_indict
json_file = open('./class_indices.json', "r")
class_indict = json.load(json_file)
# create model
model = resnet34(num_classes=5).to(device)
model = torch.load("./resnet34.pth")
# prediction
model.eval()
batch_size = 8 # 每次預測時將多少張圖片打包成一個batch
with torch.no_grad():
for ids in range(0, len(img_path_list) // batch_size):
img_list = []
for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:
img = Image.open(img_path)
img = data_transform(img)
img_list.append(img)
# batch img
# 將img_list列表中的所有圖像打包成一個batch
batch_img = torch.stack(img_list, dim=0)
# predict class
output = model(batch_img.to(device)).cpu()
predict = torch.softmax(output, dim=1)
probs, classes = torch.max(predict, dim=1)
for idx, (pro, cla) in enumerate(zip(probs, classes)):
print(f"image: {img_path_list[ids*batch_size+idx]} class: {class_indict[str(cla.numpy())]} prob: {pro.numpy():.3}")
if __name__ == '__main__':
main()
運行之后,輸出
image: ./data/imgs/455728598_c5f3e7fc71_m.jpg? class: dandelion? prob: 0.989
image: ./data/imgs/3464015936_6845f46f64.jpg? class: dandelion? prob: 0.999
image: ./data/imgs/3461986955_29a1abc621.jpg? class: dandelion? prob: 0.996
image: ./data/imgs/8223949_2928d3f6f6_n.jpg? class: dandelion? prob: 0.991
image: ./data/imgs/10919961_0af657c4e8.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/10443973_aeb97513fc_m.jpg? class: dandelion? prob: 0.906
image: ./data/imgs/8475758_4c861ab268_m.jpg? class: dandelion? prob: 0.805
image: ./data/imgs/3857059749_fe8ca621a9.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/2457473644_5242844e52_m.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/146023167_f905574d97_m.jpg? class: dandelion? prob: 0.998
image: ./data/imgs/2502627784_4486978bcf.jpg? class: dandelion? prob: 0.488
image: ./data/imgs/2481428401_bed64dd043.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/13920113_f03e867ea7_m.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/2535769822_513be6bbe9.jpg? class: dandelion? prob: 0.997
image: ./data/imgs/3954167682_128398bf79_m.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/2516714633_87f28f0314.jpg? class: dandelion? prob: 0.998
image: ./data/imgs/2634665077_597910235f_m.jpg? class: dandelion? prob: 0.996
image: ./data/imgs/3502447188_ab4a5055ac_m.jpg? class: dandelion? prob: 0.999
image: ./data/imgs/425800274_27dba84fac_n.jpg? class: dandelion? prob: 0.422
image: ./data/imgs/3365850019_8158a161a8_n.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/674407101_57676c40fb.jpg? class: dandelion? prob: 1.0
image: ./data/imgs/2628514700_b6d5325797_n.jpg? class: dandelion? prob: 0.999
image: ./data/imgs/3688128868_031e7b53e1_n.jpg? class: dandelion? prob: 0.962
image: ./data/imgs/2502613166_2c231b47cb_n.jpg? class: dandelion? prob: 1.0
完成預期功能(這里我的樣本都是dandelion,當然混合的也可以)
七.模型改進
當不加載預訓練模型,而從頭開始訓練的話,當epoch為50時,經(jīng)實際訓練,準確率為80%多,但當加載預訓練模型時,完成第一次迭代準確率就已達到了90%,這也正說明了遷移學習的好處。
同時,這里采用的是Resnet34,也可以嘗試更深的50、101、152層網(wǎng)絡。文章來源:http://www.zghlxwxcb.cn/news/detail-722055.html
還有其他方法會在之后進行補充。文章來源地址http://www.zghlxwxcb.cn/news/detail-722055.html
到了這里,關于基于ResNet34的花朵分類的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關文章,希望大家以后多多支持TOY模板網(wǎng)!