學(xué)習(xí)前言
哈哈哈!我再來一次數(shù)據(jù)增強(qiáng)!
代碼下載
https://github.com/bubbliiiing/object-detection-augmentation
什么是MixUp數(shù)據(jù)增強(qiáng)方法
MixUp數(shù)據(jù)增強(qiáng)方法在最新的幾個(gè)Yolo算法中得到了廣泛的應(yīng)用,特別在YoloX中,s、m、l、x四個(gè)型號(hào)的網(wǎng)絡(luò)都使用了MixUp數(shù)據(jù)增強(qiáng)。nano和tiny由于模型的擬合能力一般沒有使用MixUp,但也說明了MixUp具有強(qiáng)大的數(shù)據(jù)增強(qiáng)能力。
MixUp的思路較為簡(jiǎn)單,主要是將兩張圖像按比例進(jìn)行混合,如圖所示:
圖片混合完成后,原來兩幅圖片的真實(shí)框此時(shí)也位于一幅圖像上。
實(shí)現(xiàn)思路
1、每次讀取兩張的圖片。
2、分別對(duì)兩張圖片進(jìn)行翻轉(zhuǎn)、縮放、色域變化等數(shù)據(jù)增強(qiáng)。
3、將二者的真實(shí)框堆疊到一起。
全部代碼
1、數(shù)據(jù)增強(qiáng)與MixUp
該部分為普通數(shù)據(jù)增強(qiáng)與MixUp的代碼文章來源:http://www.zghlxwxcb.cn/news/detail-789294.html
import cv2
import numpy as np
from PIL import Image, ImageDraw
def rand(a=0, b=1):
return np.random.rand()*(b-a) + a
def get_random_data(annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
line = annotation_line.split()
#------------------------------#
# 讀取圖像并轉(zhuǎn)換成RGB圖像
#------------------------------#
image = Image.open(line[0])
image = image.convert('RGB')
#------------------------------#
# 獲得圖像的高寬與目標(biāo)高寬
#------------------------------#
iw, ih = image.size
h, w = input_shape
#------------------------------#
# 獲得預(yù)測(cè)框
#------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
#---------------------------------#
# 將圖像多余的部分加上灰條
#---------------------------------#
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
#---------------------------------#
# 對(duì)真實(shí)框進(jìn)行調(diào)整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
return image_data, box
#------------------------------------------#
# 對(duì)圖像進(jìn)行縮放并且進(jìn)行長(zhǎng)和寬的扭曲
#------------------------------------------#
new_ar = iw/ih * rand(1-jitter,1+jitter) / rand(1-jitter,1+jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
#------------------------------------------#
# 將圖像多余的部分加上灰條
#------------------------------------------#
dx = int(rand(0, w-nw))
dy = int(rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
#------------------------------------------#
# 翻轉(zhuǎn)圖像
#------------------------------------------#
flip = rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
image_data = np.array(image, np.uint8)
#---------------------------------#
# 對(duì)圖像進(jìn)行色域變換
# 計(jì)算色域變換的參數(shù)
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 將圖像轉(zhuǎn)到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
dtype = image_data.dtype
#---------------------------------#
# 應(yīng)用變換
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 對(duì)真實(shí)框進(jìn)行調(diào)整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
return image_data, box
def get_random_data_with_MixUp(image_1, box_1, image_2, box_2):
new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
new_boxes = np.concatenate([box_1, box_2], axis=0)
return new_image, new_boxes
2、調(diào)用代碼
該部分為調(diào)用代碼文章來源地址http://www.zghlxwxcb.cn/news/detail-789294.html
import os
from random import sample
import numpy as np
from PIL import Image, ImageDraw
from utils.random_data import get_random_data, get_random_data_with_MixUp
from utils.utils import convert_annotation, get_classes
#-----------------------------------------------------------------------------------#
# Origin_VOCdevkit_path 原始數(shù)據(jù)集所在的路徑
#-----------------------------------------------------------------------------------#
Origin_VOCdevkit_path = "VOCdevkit_Origin"
#-----------------------------------------------------------------------------------#
# input_shape 生成的圖片大小。
#-----------------------------------------------------------------------------------#
input_shape = [640, 640]
if __name__ == "__main__":
Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages")
Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations")
#---------------------------#
# 遍歷標(biāo)簽并賦值
#---------------------------#
xml_names = os.listdir(Origin_Annotations_path)
#------------------------------#
# 獲取兩個(gè)圖像與標(biāo)簽
#------------------------------#
sample_xmls = sample(xml_names, 2)
unique_labels = get_classes(sample_xmls, Origin_Annotations_path)
jpg_name_1 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg')
jpg_name_2 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[1])[0] + '.jpg')
xml_name_1 = os.path.join(Origin_Annotations_path, sample_xmls[0])
xml_name_2 = os.path.join(Origin_Annotations_path, sample_xmls[1])
line_1 = convert_annotation(jpg_name_1, xml_name_1, unique_labels)
line_2 = convert_annotation(jpg_name_2, xml_name_2, unique_labels)
#------------------------------#
# 各自數(shù)據(jù)增強(qiáng)
#------------------------------#
image_1, box_1 = get_random_data(line_1, input_shape)
image_2, box_2 = get_random_data(line_2, input_shape)
#------------------------------#
# 合并mixup
#------------------------------#
image_data, box_data = get_random_data_with_MixUp(image_1, box_1, image_2, box_2)
img = Image.fromarray(image_data.astype(np.uint8))
for j in range(len(box_data)):
thickness = 3
left, top, right, bottom = box_data[j][0:4]
draw = ImageDraw.Draw(img)
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255))
img.show()
到了這里,關(guān)于睿智的目標(biāo)檢測(cè)64——目標(biāo)檢測(cè)中的MixUp數(shù)據(jù)增強(qiáng)方法的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!