編程實(shí)現(xiàn)優(yōu)化算法,并3D可視化
1. 函數(shù)3D可視化
分別畫(huà)出?和?
的3D圖
import numpy as np from matplotlib import pyplot as plt import torch # 畫(huà)出x**2 class Op(object): def __init__(self): pass def __call__(self, inputs): return self.forward(inputs) def forward(self, inputs): raise NotImplementedError def backward(self, outputs_grads): raise NotImplementedError class OptimizedFunction3D1(Op): def __init__(self): super(OptimizedFunction3D1, self).__init__() self.params = {'x': 0} self.grads = {'x': 0} def forward(self, x): self.params['x'] = x return x[0] ** 2 + x[1] ** 2 + x[1] ** 3 + x[0] * x[1] def backward(self): x = self.params['x'] gradient1 = 2 * x[0] + x[1] gradient2 = 2 * x[1] + 3 * x[1] ** 2 + x[0] grad1 = torch.Tensor([gradient1]) grad2 = torch.Tensor([gradient2]) self.grads['x'] = torch.cat([grad1, grad2]) class OptimizedFunction3D2(Op): def __init__(self): super(OptimizedFunction3D2, self).__init__() self.params = {'x': 0} self.grads = {'x': 0} def forward(self, x): self.params['x'] = x return x[0] * x[0] / 20 + x[1] * x[1] / 1 def backward(self): x = self.params['x'] gradient1 = 2 * x[0] / 20 gradient2 = 2 * x[1] / 1 grad1 = torch.Tensor([gradient1]) grad2 = torch.Tensor([gradient2]) self.grads['x'] = torch.cat([grad1, grad2]) # 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-3, 3],以0.1為間隔的數(shù)值 x1 = np.arange(-3, 3, 0.1) x2 = np.arange(-3, 3, 0.1) x1, x2 = np.meshgrid(x1, x2) init_x = torch.Tensor(np.array([x1, x2])) model1 = OptimizedFunction3D1() model2 = OptimizedFunction3D2() # 繪制 f_3d 函數(shù)的三維圖像,分別在兩個(gè)子圖中繪制 fig = plt.figure() # 繪制第一個(gè)子圖 ax1 = fig.add_subplot(121, projection='3d') X = init_x[0].numpy() Y = init_x[1].numpy() Z1 = model1(init_x).numpy() ax1.plot_surface(X, Y, Z1, cmap='rainbow') ax1.set_xlabel('x1') ax1.set_ylabel('x2') ax1.set_zlabel('f(x1, x2)') ax1.set_title('Function 1') # 繪制第二個(gè)子圖 ax2 = fig.add_subplot(122, projection='3d') Z2 = model2(init_x).numpy() ax2.plot_surface(X, Y, Z2, cmap='rainbow') ax2.set_xlabel('x1') ax2.set_ylabel('x2') ax2.set_zlabel('f(x1, x2)') ax2.set_title('Function 2') plt.show()
2.加入優(yōu)化算法,畫(huà)出軌跡?
import torch import numpy as np import copy from matplotlib import pyplot as plt from matplotlib import animation from itertools import zip_longest from nndl.op import Op class Optimizer(object): # 優(yōu)化器基類(lèi) def __init__(self, init_lr, model): """ 優(yōu)化器類(lèi)初始化 """ # 初始化學(xué)習(xí)率,用于參數(shù)更新的計(jì)算 self.init_lr = init_lr # 指定優(yōu)化器需要優(yōu)化的模型 self.model = model def step(self): """ 定義每次迭代如何更新參數(shù) """ pass class SimpleBatchGD(Optimizer): def __init__(self, init_lr, model): super(SimpleBatchGD, self).__init__(init_lr=init_lr, model=model) def step(self): # 參數(shù)更新 if isinstance(self.model.params, dict): for key in self.model.params.keys(): self.model.params[key] = self.model.params[key] - self.init_lr * self.model.grads[key] class Adagrad(Optimizer): def __init__(self, init_lr, model, epsilon): """ Adagrad 優(yōu)化器初始化 輸入: - init_lr: 初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的非常小的常數(shù) """ super(Adagrad, self).__init__(init_lr=init_lr, model=model) self.G = {} for key in self.model.params.keys(): self.G[key] = 0 self.epsilon = epsilon def adagrad(self, x, gradient_x, G, init_lr): """ adagrad算法更新參數(shù),G為參數(shù)梯度平方的累計(jì)值。 """ G += gradient_x ** 2 x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x return x, G def step(self): """ 參數(shù)更新 """ for key in self.model.params.keys(): self.model.params[key], self.G[key] = self.adagrad(self.model.params[key], self.model.grads[key], self.G[key], self.init_lr) class RMSprop(Optimizer): def __init__(self, init_lr, model, beta, epsilon): """ RMSprop優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - beta:衰減率 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù) """ super(RMSprop, self).__init__(init_lr=init_lr, model=model) self.G = {} for key in self.model.params.keys(): self.G[key] = 0 self.beta = beta self.epsilon = epsilon def rmsprop(self, x, gradient_x, G, init_lr): """ rmsprop算法更新參數(shù),G為迭代梯度平方的加權(quán)移動(dòng)平均 """ G = self.beta * G + (1 - self.beta) * gradient_x ** 2 x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x return x, G def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.G[key] = self.rmsprop(self.model.params[key], self.model.grads[key], self.G[key], self.init_lr) class Momentum(Optimizer): def __init__(self, init_lr, model, rho): """ Momentum優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - rho:動(dòng)量因子 """ super(Momentum, self).__init__(init_lr=init_lr, model=model) self.delta_x = {} for key in self.model.params.keys(): self.delta_x[key] = 0 self.rho = rho def momentum(self, x, gradient_x, delta_x, init_lr): """ momentum算法更新參數(shù),delta_x為梯度的加權(quán)移動(dòng)平均 """ delta_x = self.rho * delta_x - init_lr * gradient_x x += delta_x return x, delta_x def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.delta_x[key] = self.momentum(self.model.params[key], self.model.grads[key], self.delta_x[key], self.init_lr) class Nesterov(Optimizer): def __init__(self, init_lr, model, rho): """ Nesterov優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - rho:動(dòng)量因子 """ super(Nesterov, self).__init__(init_lr=init_lr, model=model) self.delta_x = {} for key in self.model.params.keys(): self.delta_x[key] = 0 self.rho = rho def nesterov(self, x, gradient_x, delta_x, init_lr): """ Nesterov算法更新參數(shù),delta_x為梯度的加權(quán)移動(dòng)平均 """ delta_x_prev = delta_x delta_x = self.rho * delta_x - init_lr * gradient_x x += -self.rho * delta_x_prev + (1 + self.rho) * delta_x return x, delta_x def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.delta_x[key] = self.nesterov(self.model.params[key], self.model.grads[key], self.delta_x[key], self.init_lr) class Adam(Optimizer): def __init__(self, init_lr, model, beta1, beta2, epsilon): """ Adam優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - beta1, beta2:移動(dòng)平均的衰減率 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù) """ super(Adam, self).__init__(init_lr=init_lr, model=model) self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.M, self.G = {}, {} for key in self.model.params.keys(): self.M[key] = 0 self.G[key] = 0 self.t = 1 def adam(self, x, gradient_x, G, M, t, init_lr): """ adam算法更新參數(shù) 輸入: - x:參數(shù) - G:梯度平方的加權(quán)移動(dòng)平均 - M:梯度的加權(quán)移動(dòng)平均 - t:迭代次數(shù) - init_lr:初始學(xué)習(xí)率 """ M = self.beta1 * M + (1 - self.beta1) * gradient_x G = self.beta2 * G + (1 - self.beta2) * gradient_x ** 2 M_hat = M / (1 - self.beta1 ** t) G_hat = G / (1 - self.beta2 ** t) t += 1 x -= init_lr / torch.sqrt(G_hat + self.epsilon) * M_hat return x, G, M, t def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.G[key], self.M[key], self.t = self.adam(self.model.params[key], self.model.grads[key], self.G[key], self.M[key], self.t, self.init_lr) class OptimizedFunction3D(Op): def __init__(self): super(OptimizedFunction3D, self).__init__() self.params = {'x': 0} self.grads = {'x': 0} def forward(self, x): self.params['x'] = x return x[0] ** 2 + x[1] ** 2 + x[1] ** 3 + x[0] * x[1] def backward(self): x = self.params['x'] gradient1 = 2 * x[0] + x[1] gradient2 = 2 * x[1] + 3 * x[1] ** 2 + x[0] grad1 = torch.Tensor([gradient1]) grad2 = torch.Tensor([gradient2]) self.grads['x'] = torch.cat([grad1, grad2]) class Visualization3D(animation.FuncAnimation): """ 繪制動(dòng)態(tài)圖像,可視化參數(shù)更新軌跡 """ def __init__(self, *xy_values, z_values, labels=[], colors=[], fig, ax, interval=600, blit=True, **kwargs): """ 初始化3d可視化類(lèi) 輸入: xy_values:三維中x,y維度的值 z_values:三維中z維度的值 labels:每個(gè)參數(shù)更新軌跡的標(biāo)簽 colors:每個(gè)軌跡的顏色 interval:幀之間的延遲(以毫秒為單位) blit:是否優(yōu)化繪圖 """ self.fig = fig self.ax = ax self.xy_values = xy_values self.z_values = z_values frames = max(xy_value.shape[0] for xy_value in xy_values) self.lines = [ax.plot([], [], [], label=label, color=color, lw=2)[0] for _, label, color in zip_longest(xy_values, labels, colors)] super(Visualization3D, self).__init__(fig, self.animate, init_func=self.init_animation, frames=frames, interval=interval, blit=blit, **kwargs) def init_animation(self): # 數(shù)值初始化 for line in self.lines: line.set_data([], []) # line.set_3d_properties(np.asarray([])) # 源程序中有這一行,加上會(huì)報(bào)錯(cuò)。 Edit by David 2022.12.4 return self.lines def animate(self, i): # 將x,y,z三個(gè)數(shù)據(jù)傳入,繪制三維圖像 for line, xy_value, z_value in zip(self.lines, self.xy_values, self.z_values): line.set_data(xy_value[:i, 0], xy_value[:i, 1]) line.set_3d_properties(z_value[:i]) return self.lines def train_f(model, optimizer, x_init, epoch): x = x_init all_x = [] losses = [] for i in range(epoch): all_x.append(copy.deepcopy(x.numpy())) # 淺拷貝 改為 深拷貝, 否則List的原值會(huì)被改變。 Edit by David 2022.12.4. loss = model(x) losses.append(loss) model.backward() optimizer.step() x = model.params['x'] return torch.Tensor(np.array(all_x)), losses # 構(gòu)建6個(gè)模型,分別配備不同的優(yōu)化器 model1 = OptimizedFunction3D() opt_gd = SimpleBatchGD(init_lr=0.01, model=model1) model2 = OptimizedFunction3D() opt_adagrad = Adagrad(init_lr=0.5, model=model2, epsilon=1e-7) model3 = OptimizedFunction3D() opt_rmsprop = RMSprop(init_lr=0.1, model=model3, beta=0.9, epsilon=1e-7) model4 = OptimizedFunction3D() opt_momentum = Momentum(init_lr=0.01, model=model4, rho=0.9) model5 = OptimizedFunction3D() opt_adam = Adam(init_lr=0.1, model=model5, beta1=0.9, beta2=0.99, epsilon=1e-7) model6 = OptimizedFunction3D() opt_Nesterov = Nesterov(init_lr=0.1, model=model6, rho=0.9) models = [model1, model2, model3, model4, model5, model6] opts = [opt_gd, opt_adagrad, opt_rmsprop, opt_momentum, opt_adam, opt_Nesterov] x_all_opts = [] z_all_opts = [] # 使用不同優(yōu)化器訓(xùn)練 for model, opt in zip(models, opts): x_init = torch.FloatTensor([2, 3]) x_one_opt, z_one_opt = train_f(model, opt, x_init, 150) # epoch # 保存參數(shù)值 x_all_opts.append(x_one_opt.numpy()) z_all_opts.append(np.squeeze(z_one_opt)) # 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-3, 3],以0.1為間隔的數(shù)值 x1 = np.arange(-3, 3, 0.1) x2 = np.arange(-3, 3, 0.1) x1, x2 = np.meshgrid(x1, x2) init_x = torch.Tensor(np.array([x1, x2])) model = OptimizedFunction3D() # 繪制 f_3d函數(shù) 的 三維圖像 fig = plt.figure() ax = plt.axes(projection='3d') X = init_x[0].numpy() Y = init_x[1].numpy() Z = model(init_x).numpy() # 改為 model(init_x).numpy() David 2022.12.4 ax.plot_surface(X, Y, Z, cmap='rainbow') ax.set_xlabel('x1') ax.set_ylabel('x2') ax.set_zlabel('f(x1,x2)') labels = ['SGD', 'AdaGrad', 'RMSprop', 'Momentum', 'Adam', 'Nesterov'] colors = ['#8B0000', '#0000FF', '#000000', '#008B00', '#FF0000'] animator = Visualization3D(*x_all_opts, z_values=z_all_opts, labels=labels, colors=colors, fig=fig, ax=ax) ax.legend(loc='upper left') plt.show() animator.save('animation.gif') # 效果不好,估計(jì)被擋住了…… 有待進(jìn)一步提高 Edit by David 2022.12.4
import torch
import numpy as np
import copy
from matplotlib import pyplot as plt
from matplotlib import animation
from itertools import zip_longest
from matplotlib import cm
class Op(object):
def __init__(self):
pass
def __call__(self, inputs):
return self.forward(inputs)
# 輸入:張量inputs
# 輸出:張量outputs
def forward(self, inputs):
# return outputs
raise NotImplementedError
# 輸入:最終輸出對(duì)outputs的梯度outputs_grads
# 輸出:最終輸出對(duì)inputs的梯度inputs_grads
def backward(self, outputs_grads):
# return inputs_grads
raise NotImplementedError
class Optimizer(object): # 優(yōu)化器基類(lèi)
def __init__(self, init_lr, model):
"""
優(yōu)化器類(lèi)初始化
"""
# 初始化學(xué)習(xí)率,用于參數(shù)更新的計(jì)算
self.init_lr = init_lr
# 指定優(yōu)化器需要優(yōu)化的模型
self.model = model
def step(self):
"""
定義每次迭代如何更新參數(shù)
"""
pass
class SimpleBatchGD(Optimizer):
def __init__(self, init_lr, model):
super(SimpleBatchGD, self).__init__(init_lr=init_lr, model=model)
def step(self):
# 參數(shù)更新
if isinstance(self.model.params, dict):
for key in self.model.params.keys():
self.model.params[key] = self.model.params[key] - self.init_lr * self.model.grads[key]
class Adagrad(Optimizer):
def __init__(self, init_lr, model, epsilon):
"""
Adagrad 優(yōu)化器初始化
輸入:
- init_lr: 初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的非常小的常數(shù)
"""
super(Adagrad, self).__init__(init_lr=init_lr, model=model)
self.G = {}
for key in self.model.params.keys():
self.G[key] = 0
self.epsilon = epsilon
def adagrad(self, x, gradient_x, G, init_lr):
"""
adagrad算法更新參數(shù),G為參數(shù)梯度平方的累計(jì)值。
"""
G += gradient_x ** 2
x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x
return x, G
def step(self):
"""
參數(shù)更新
"""
for key in self.model.params.keys():
self.model.params[key], self.G[key] = self.adagrad(self.model.params[key],
self.model.grads[key],
self.G[key],
self.init_lr)
class RMSprop(Optimizer):
def __init__(self, init_lr, model, beta, epsilon):
"""
RMSprop優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- beta:衰減率
- epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù)
"""
super(RMSprop, self).__init__(init_lr=init_lr, model=model)
self.G = {}
for key in self.model.params.keys():
self.G[key] = 0
self.beta = beta
self.epsilon = epsilon
def rmsprop(self, x, gradient_x, G, init_lr):
"""
rmsprop算法更新參數(shù),G為迭代梯度平方的加權(quán)移動(dòng)平均
"""
G = self.beta * G + (1 - self.beta) * gradient_x ** 2
x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x
return x, G
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.G[key] = self.rmsprop(self.model.params[key],
self.model.grads[key],
self.G[key],
self.init_lr)
class Momentum(Optimizer):
def __init__(self, init_lr, model, rho):
"""
Momentum優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- rho:動(dòng)量因子
"""
super(Momentum, self).__init__(init_lr=init_lr, model=model)
self.delta_x = {}
for key in self.model.params.keys():
self.delta_x[key] = 0
self.rho = rho
def momentum(self, x, gradient_x, delta_x, init_lr):
"""
momentum算法更新參數(shù),delta_x為梯度的加權(quán)移動(dòng)平均
"""
delta_x = self.rho * delta_x - init_lr * gradient_x
x += delta_x
return x, delta_x
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.delta_x[key] = self.momentum(self.model.params[key],
self.model.grads[key],
self.delta_x[key],
self.init_lr)
class Adam(Optimizer):
def __init__(self, init_lr, model, beta1, beta2, epsilon):
"""
Adam優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- beta1, beta2:移動(dòng)平均的衰減率
- epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù)
"""
super(Adam, self).__init__(init_lr=init_lr, model=model)
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.M, self.G = {}, {}
for key in self.model.params.keys():
self.M[key] = 0
self.G[key] = 0
self.t = 1
def adam(self, x, gradient_x, G, M, t, init_lr):
"""
adam算法更新參數(shù)
輸入:
- x:參數(shù)
- G:梯度平方的加權(quán)移動(dòng)平均
- M:梯度的加權(quán)移動(dòng)平均
- t:迭代次數(shù)
- init_lr:初始學(xué)習(xí)率
"""
M = self.beta1 * M + (1 - self.beta1) * gradient_x
G = self.beta2 * G + (1 - self.beta2) * gradient_x ** 2
M_hat = M / (1 - self.beta1 ** t)
G_hat = G / (1 - self.beta2 ** t)
t += 1
x -= init_lr / torch.sqrt(G_hat + self.epsilon) * M_hat
return x, G, M, t
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.G[key], self.M[key], self.t = self.adam(self.model.params[key],
self.model.grads[key],
self.G[key],
self.M[key],
self.t,
self.init_lr)
class OptimizedFunction3D(Op):
def __init__(self):
super(OptimizedFunction3D, self).__init__()
self.params = {'x': 0}
self.grads = {'x': 0}
def forward(self, x):
self.params['x'] = x
return x[0] * x[0] / 20 + x[1] * x[1] / 1 # x[0] ** 2 + x[1] ** 2 + x[1] ** 3 + x[0] * x[1]
def backward(self):
x = self.params['x']
gradient1 = 2 * x[0] / 20
gradient2 = 2 * x[1] / 1
grad1 = torch.Tensor([gradient1])
grad2 = torch.Tensor([gradient2])
self.grads['x'] = torch.cat([grad1, grad2])
class Visualization3D(animation.FuncAnimation):
""" 繪制動(dòng)態(tài)圖像,可視化參數(shù)更新軌跡 """
def __init__(self, *xy_values, z_values, labels=[], colors=[], fig, ax, interval=100, blit=True, **kwargs):
"""
初始化3d可視化類(lèi)
輸入:
xy_values:三維中x,y維度的值
z_values:三維中z維度的值
labels:每個(gè)參數(shù)更新軌跡的標(biāo)簽
colors:每個(gè)軌跡的顏色
interval:幀之間的延遲(以毫秒為單位)
blit:是否優(yōu)化繪圖
"""
self.fig = fig
self.ax = ax
self.xy_values = xy_values
self.z_values = z_values
frames = max(xy_value.shape[0] for xy_value in xy_values)
self.lines = [ax.plot([], [], [], label=label, color=color, lw=2)[0]
for _, label, color in zip_longest(xy_values, labels, colors)]
self.points = [ax.plot([], [], [], color=color, markeredgewidth=1, markeredgecolor='black', marker='o')[0]
for _, color in zip_longest(xy_values, colors)]
# print(self.lines)
super(Visualization3D, self).__init__(fig, self.animate, init_func=self.init_animation, frames=frames,
interval=interval, blit=blit, **kwargs)
def init_animation(self):
# 數(shù)值初始化
for line in self.lines:
line.set_data_3d([], [], [])
for point in self.points:
point.set_data_3d([], [], [])
return self.points + self.lines
def animate(self, i):
# 將x,y,z三個(gè)數(shù)據(jù)傳入,繪制三維圖像
for line, xy_value, z_value in zip(self.lines, self.xy_values, self.z_values):
line.set_data_3d(xy_value[:i, 0], xy_value[:i, 1], z_value[:i])
for point, xy_value, z_value in zip(self.points, self.xy_values, self.z_values):
point.set_data_3d(xy_value[i, 0], xy_value[i, 1], z_value[i])
return self.points + self.lines
def train_f(model, optimizer, x_init, epoch):
x = x_init
all_x = []
losses = []
for i in range(epoch):
all_x.append(copy.deepcopy(x.numpy())) # 淺拷貝 改為 深拷貝, 否則List的原值會(huì)被改變。 Edit by David 2022.12.4.
loss = model(x)
losses.append(loss)
model.backward()
optimizer.step()
x = model.params['x']
return torch.Tensor(np.array(all_x)), losses
# 構(gòu)建5個(gè)模型,分別配備不同的優(yōu)化器
model1 = OptimizedFunction3D()
opt_gd = SimpleBatchGD(init_lr=0.95, model=model1)
model2 = OptimizedFunction3D()
opt_adagrad = Adagrad(init_lr=1.5, model=model2, epsilon=1e-7)
model3 = OptimizedFunction3D()
opt_rmsprop = RMSprop(init_lr=0.05, model=model3, beta=0.9, epsilon=1e-7)
model4 = OptimizedFunction3D()
opt_momentum = Momentum(init_lr=0.1, model=model4, rho=0.9)
model5 = OptimizedFunction3D()
opt_adam = Adam(init_lr=0.3, model=model5, beta1=0.9, beta2=0.99, epsilon=1e-7)
models = [model1, model2, model3, model4, model5]
opts = [opt_gd, opt_adagrad, opt_rmsprop, opt_momentum, opt_adam]
x_all_opts = []
z_all_opts = []
# 使用不同優(yōu)化器訓(xùn)練
for model, opt in zip(models, opts):
x_init = torch.FloatTensor([-7, 2])
x_one_opt, z_one_opt = train_f(model, opt, x_init, 100) # epoch
# 保存參數(shù)值
x_all_opts.append(x_one_opt.numpy())
z_all_opts.append(np.squeeze(z_one_opt))
# 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-3, 3],以0.1為間隔的數(shù)值
x1 = np.arange(-10, 10, 0.01)
x2 = np.arange(-5, 5, 0.01)
x1, x2 = np.meshgrid(x1, x2)
init_x = torch.Tensor(np.array([x1, x2]))
model = OptimizedFunction3D()
# 繪制 f_3d函數(shù) 的 三維圖像
fig = plt.figure()
ax = plt.axes(projection='3d')
X = init_x[0].numpy()
Y = init_x[1].numpy()
Z = model(init_x).numpy() # 改為 model(init_x).numpy() David 2022.12.4
surf = ax.plot_surface(X, Y, Z, edgecolor='grey', cmap=cm.coolwarm)
# fig.colorbar(surf, shrink=0.5, aspect=1)
# ax.set_zlim(-3, 2)
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('f(x1,x2)')
labels = ['SGD', 'AdaGrad', 'RMSprop', 'Momentum', 'Adam']
colors = ['#8B0000', '#0000FF', '#000000', '#008B00', '#FF0000']
animator = Visualization3D(*x_all_opts, z_values=z_all_opts, labels=labels, colors=colors, fig=fig, ax=ax)
ax.legend(loc='upper right')
plt.show()
# animator.save('teaser' + '.gif', writer='imagemagick',fps=10) # 效果不好,估計(jì)被擋住了…… 有待進(jìn)一步提高 Edit by David 2022.12.4
# save不好用,不費(fèi)勁了,安裝個(gè)軟件做gif https://pc.qq.com/detail/13/detail_23913.html
這段代碼我試了老師給的代碼,不對(duì)勁,不能動(dòng),而且沒(méi)有軌跡,更過(guò)分就是一會(huì)兒就自動(dòng)關(guān)閉了,還有再優(yōu)化優(yōu)化
改了一上午,終于好了,我修改了
class Visualization3D(animation.FuncAnimation)函數(shù)和圖形顯示部分
以下是我的代碼:
import torch
import numpy as np
import copy
from matplotlib import pyplot as plt
from matplotlib import animation
from itertools import zip_longest
from matplotlib import cm
class Op(object):
def __init__(self):
pass
def __call__(self, inputs):
return self.forward(inputs)
# 輸入:張量inputs
# 輸出:張量outputs
def forward(self, inputs):
# return outputs
raise NotImplementedError
# 輸入:最終輸出對(duì)outputs的梯度outputs_grads
# 輸出:最終輸出對(duì)inputs的梯度inputs_grads
def backward(self, outputs_grads):
# return inputs_grads
raise NotImplementedError
class Optimizer(object): # 優(yōu)化器基類(lèi)
def __init__(self, init_lr, model):
"""
優(yōu)化器類(lèi)初始化
"""
# 初始化學(xué)習(xí)率,用于參數(shù)更新的計(jì)算
self.init_lr = init_lr
# 指定優(yōu)化器需要優(yōu)化的模型
self.model = model
def step(self):
"""
定義每次迭代如何更新參數(shù)
"""
pass
class SimpleBatchGD(Optimizer):
def __init__(self, init_lr, model):
super(SimpleBatchGD, self).__init__(init_lr=init_lr, model=model)
def step(self):
# 參數(shù)更新
if isinstance(self.model.params, dict):
for key in self.model.params.keys():
self.model.params[key] = self.model.params[key] - self.init_lr * self.model.grads[key]
class Adagrad(Optimizer):
def __init__(self, init_lr, model, epsilon):
"""
Adagrad 優(yōu)化器初始化
輸入:
- init_lr: 初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的非常小的常數(shù)
"""
super(Adagrad, self).__init__(init_lr=init_lr, model=model)
self.G = {}
for key in self.model.params.keys():
self.G[key] = 0
self.epsilon = epsilon
def adagrad(self, x, gradient_x, G, init_lr):
"""
adagrad算法更新參數(shù),G為參數(shù)梯度平方的累計(jì)值。
"""
G += gradient_x ** 2
x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x
return x, G
def step(self):
"""
參數(shù)更新
"""
for key in self.model.params.keys():
self.model.params[key], self.G[key] = self.adagrad(self.model.params[key],
self.model.grads[key],
self.G[key],
self.init_lr)
class RMSprop(Optimizer):
def __init__(self, init_lr, model, beta, epsilon):
"""
RMSprop優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- beta:衰減率
- epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù)
"""
super(RMSprop, self).__init__(init_lr=init_lr, model=model)
self.G = {}
for key in self.model.params.keys():
self.G[key] = 0
self.beta = beta
self.epsilon = epsilon
def rmsprop(self, x, gradient_x, G, init_lr):
"""
rmsprop算法更新參數(shù),G為迭代梯度平方的加權(quán)移動(dòng)平均
"""
G = self.beta * G + (1 - self.beta) * gradient_x ** 2
x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x
return x, G
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.G[key] = self.rmsprop(self.model.params[key],
self.model.grads[key],
self.G[key],
self.init_lr)
class Momentum(Optimizer):
def __init__(self, init_lr, model, rho):
"""
Momentum優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- rho:動(dòng)量因子
"""
super(Momentum, self).__init__(init_lr=init_lr, model=model)
self.delta_x = {}
for key in self.model.params.keys():
self.delta_x[key] = 0
self.rho = rho
def momentum(self, x, gradient_x, delta_x, init_lr):
"""
momentum算法更新參數(shù),delta_x為梯度的加權(quán)移動(dòng)平均
"""
delta_x = self.rho * delta_x - init_lr * gradient_x
x += delta_x
return x, delta_x
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.delta_x[key] = self.momentum(self.model.params[key],
self.model.grads[key],
self.delta_x[key],
self.init_lr)
class Adam(Optimizer):
def __init__(self, init_lr, model, beta1, beta2, epsilon):
"""
Adam優(yōu)化器初始化
輸入:
- init_lr:初始學(xué)習(xí)率
- model:模型,model.params存儲(chǔ)模型參數(shù)值
- beta1, beta2:移動(dòng)平均的衰減率
- epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù)
"""
super(Adam, self).__init__(init_lr=init_lr, model=model)
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.M, self.G = {}, {}
for key in self.model.params.keys():
self.M[key] = 0
self.G[key] = 0
self.t = 1
def adam(self, x, gradient_x, G, M, t, init_lr):
"""
adam算法更新參數(shù)
輸入:
- x:參數(shù)
- G:梯度平方的加權(quán)移動(dòng)平均
- M:梯度的加權(quán)移動(dòng)平均
- t:迭代次數(shù)
- init_lr:初始學(xué)習(xí)率
"""
M = self.beta1 * M + (1 - self.beta1) * gradient_x
G = self.beta2 * G + (1 - self.beta2) * gradient_x ** 2
M_hat = M / (1 - self.beta1 ** t)
G_hat = G / (1 - self.beta2 ** t)
t += 1
x -= init_lr / torch.sqrt(G_hat + self.epsilon) * M_hat
return x, G, M, t
def step(self):
"""參數(shù)更新"""
for key in self.model.params.keys():
self.model.params[key], self.G[key], self.M[key], self.t = self.adam(self.model.params[key],
self.model.grads[key],
self.G[key],
self.M[key],
self.t,
self.init_lr)
class OptimizedFunction3D(Op):
def __init__(self):
super(OptimizedFunction3D, self).__init__()
self.params = {'x': 0}
self.grads = {'x': 0}
def forward(self, x):
self.params['x'] = x
return x[0] * x[0] / 20 + x[1] * x[1] / 1 # x[0] ** 2 + x[1] ** 2 + x[1] ** 3 + x[0] * x[1]
def backward(self):
x = self.params['x']
gradient1 = 2 * x[0] / 20
gradient2 = 2 * x[1] / 1
grad1 = torch.Tensor([gradient1])
grad2 = torch.Tensor([gradient2])
self.grads['x'] = torch.cat([grad1, grad2])
class Visualization3D(animation.FuncAnimation):
""" 繪制動(dòng)態(tài)圖像,可視化參數(shù)更新軌跡 """
def __init__(self, *xy_values, z_values, labels=[], colors=[], fig, ax, interval=100, blit=True, **kwargs):
"""
初始化3d可視化類(lèi)
輸入:
xy_values:三維中x,y維度的值
z_values:三維中z維度的值
labels:每個(gè)參數(shù)更新軌跡的標(biāo)簽
colors:每個(gè)軌跡的顏色
interval:幀之間的延遲(以毫秒為單位)
blit:是否優(yōu)化繪圖
"""
self.fig = fig
self.ax = ax
self.xy_values = xy_values
self.z_values = z_values
frames = max(xy_value.shape[0] for xy_value in xy_values)
self.lines = [ax.plot([], [], [], label=label, color=color, lw=2)[0]
for _, label, color in zip_longest(xy_values, labels, colors)]
self.points = [ax.plot([], [], [], color=color, markeredgewidth=1, markeredgecolor='black', marker='o')[0]
for _, color in zip_longest(xy_values, colors)]
# print(self.lines)
super(Visualization3D, self).__init__(fig, self.animate, init_func=self.init_animation, frames=frames,
interval=interval, blit=blit, **kwargs)
def init_animation(self):
# 數(shù)值初始化
for line in self.lines:
line.set_data([], [])
line.set_3d_properties([])
for point in self.points:
point.set_data([], [])
point.set_3d_properties([])
return self.points + self.lines
def animate(self, i):
# 將x,y,z三個(gè)數(shù)據(jù)傳入,繪制三維圖像
for line, xy_value, z_value, point in zip(self.lines, self.xy_values, self.z_values, self.points):
line.set_data(xy_value[:i, 0], xy_value[:i, 1])
line.set_3d_properties(z_value[:i])
point.set_data(xy_value[i, 0], xy_value[i, 1])
point.set_3d_properties(z_value[i])
return self.points + self.lines
def train_f(model, optimizer, x_init, epoch):
x = x_init
all_x = []
losses = []
for i in range(epoch):
all_x.append(copy.deepcopy(x.numpy())) # 淺拷貝 改為 深拷貝, 否則List的原值會(huì)被改變。 Edit by David 2022.12.4.
loss = model(x)
losses.append(loss)
model.backward()
optimizer.step()
x = model.params['x']
return torch.Tensor(np.array(all_x)), losses
# 構(gòu)建5個(gè)模型,分別配備不同的優(yōu)化器
model1 = OptimizedFunction3D()
opt_gd = SimpleBatchGD(init_lr=0.95, model=model1)
model2 = OptimizedFunction3D()
opt_adagrad = Adagrad(init_lr=1.5, model=model2, epsilon=1e-7)
model3 = OptimizedFunction3D()
opt_rmsprop = RMSprop(init_lr=0.05, model=model3, beta=0.9, epsilon=1e-7)
model4 = OptimizedFunction3D()
opt_momentum = Momentum(init_lr=0.1, model=model4, rho=0.9)
model5 = OptimizedFunction3D()
opt_adam = Adam(init_lr=0.3, model=model5, beta1=0.9, beta2=0.99, epsilon=1e-7)
models = [model1, model2, model3, model4, model5]
opts = [opt_gd, opt_adagrad, opt_rmsprop, opt_momentum, opt_adam]
x_all_opts = []
z_all_opts = []
# 使用不同優(yōu)化器訓(xùn)練
for model, opt in zip(models, opts):
x_init = torch.FloatTensor([-7, 2])
x_one_opt, z_one_opt = train_f(model, opt, x_init, 100) # epoch
# 保存參數(shù)值
x_all_opts.append(x_one_opt.numpy())
z_all_opts.append(np.squeeze(z_one_opt))
# 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-10, 10],以0.01為間隔的數(shù)值
x1 = np.arange(-10, 10, 0.01)
x2 = np.arange(-5, 5, 0.01)
x1, x2 = np.meshgrid(x1, x2)
init_x = torch.Tensor(np.array([x1, x2]))
model = OptimizedFunction3D()
fig = plt.figure()
ax = plt.axes(projection='3d')
X = init_x[0].numpy()
Y = init_x[1].numpy()
Z = model(init_x).numpy()
surf = ax.plot_surface(X, Y, Z, edgecolor='grey', cmap=cm.coolwarm)
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('f(x1,x2)')
# 添加軌跡圖
labels = ['SGD', 'AdaGrad', 'RMSprop', 'Momentum', 'Adam']
colors = ['#8B0000', '#0000FF', '#000000', '#008B00', '#FF0000']
for x_opt, z_opt, label, color in zip(x_all_opts, z_all_opts, labels, colors):
ax.plot(x_opt[:, 0], x_opt[:, 1], z_opt, label=label, color=color)
ax.legend(loc='upper right')
# 修改下面這行,將Visualization3D的初始化參數(shù)中的fig和ax改為ax.figure和ax
animator = Visualization3D(*x_all_opts, z_values=z_all_opts, labels=labels, colors=colors, fig=ax.figure, ax=ax)
plt.show()
?
用網(wǎng)頁(yè)做的竟然還帶水印?不在意水印的推薦
3.復(fù)現(xiàn)CS231經(jīng)典動(dòng)畫(huà)?
import torch import numpy as np import copy from matplotlib import pyplot as plt from matplotlib import animation from itertools import zip_longest from matplotlib import cm class Op(object): def __init__(self): pass def __call__(self, inputs): return self.forward(inputs) # 輸入:張量inputs # 輸出:張量outputs def forward(self, inputs): # return outputs raise NotImplementedError # 輸入:最終輸出對(duì)outputs的梯度outputs_grads # 輸出:最終輸出對(duì)inputs的梯度inputs_grads def backward(self, outputs_grads): # return inputs_grads raise NotImplementedError class Optimizer(object): # 優(yōu)化器基類(lèi) def __init__(self, init_lr, model): """ 優(yōu)化器類(lèi)初始化 """ # 初始化學(xué)習(xí)率,用于參數(shù)更新的計(jì)算 self.init_lr = init_lr # 指定優(yōu)化器需要優(yōu)化的模型 self.model = model def step(self): """ 定義每次迭代如何更新參數(shù) """ pass class SimpleBatchGD(Optimizer): def __init__(self, init_lr, model): super(SimpleBatchGD, self).__init__(init_lr=init_lr, model=model) def step(self): # 參數(shù)更新 if isinstance(self.model.params, dict): for key in self.model.params.keys(): self.model.params[key] = self.model.params[key] - self.init_lr * self.model.grads[key] class Adagrad(Optimizer): def __init__(self, init_lr, model, epsilon): """ Adagrad 優(yōu)化器初始化 輸入: - init_lr: 初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的非常小的常數(shù) """ super(Adagrad, self).__init__(init_lr=init_lr, model=model) self.G = {} for key in self.model.params.keys(): self.G[key] = 0 self.epsilon = epsilon def adagrad(self, x, gradient_x, G, init_lr): """ adagrad算法更新參數(shù),G為參數(shù)梯度平方的累計(jì)值。 """ G += gradient_x ** 2 x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x return x, G def step(self): """ 參數(shù)更新 """ for key in self.model.params.keys(): self.model.params[key], self.G[key] = self.adagrad(self.model.params[key], self.model.grads[key], self.G[key], self.init_lr) class RMSprop(Optimizer): def __init__(self, init_lr, model, beta, epsilon): """ RMSprop優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - beta:衰減率 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù) """ super(RMSprop, self).__init__(init_lr=init_lr, model=model) self.G = {} for key in self.model.params.keys(): self.G[key] = 0 self.beta = beta self.epsilon = epsilon def rmsprop(self, x, gradient_x, G, init_lr): """ rmsprop算法更新參數(shù),G為迭代梯度平方的加權(quán)移動(dòng)平均 """ G = self.beta * G + (1 - self.beta) * gradient_x ** 2 x -= init_lr / torch.sqrt(G + self.epsilon) * gradient_x return x, G def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.G[key] = self.rmsprop(self.model.params[key], self.model.grads[key], self.G[key], self.init_lr) class Momentum(Optimizer): def __init__(self, init_lr, model, rho): """ Momentum優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - rho:動(dòng)量因子 """ super(Momentum, self).__init__(init_lr=init_lr, model=model) self.delta_x = {} for key in self.model.params.keys(): self.delta_x[key] = 0 self.rho = rho def momentum(self, x, gradient_x, delta_x, init_lr): """ momentum算法更新參數(shù),delta_x為梯度的加權(quán)移動(dòng)平均 """ delta_x = self.rho * delta_x - init_lr * gradient_x x += delta_x return x, delta_x def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.delta_x[key] = self.momentum(self.model.params[key], self.model.grads[key], self.delta_x[key], self.init_lr) class Adam(Optimizer): def __init__(self, init_lr, model, beta1, beta2, epsilon): """ Adam優(yōu)化器初始化 輸入: - init_lr:初始學(xué)習(xí)率 - model:模型,model.params存儲(chǔ)模型參數(shù)值 - beta1, beta2:移動(dòng)平均的衰減率 - epsilon:保持?jǐn)?shù)值穩(wěn)定性而設(shè)置的常數(shù) """ super(Adam, self).__init__(init_lr=init_lr, model=model) self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.M, self.G = {}, {} for key in self.model.params.keys(): self.M[key] = 0 self.G[key] = 0 self.t = 1 def adam(self, x, gradient_x, G, M, t, init_lr): """ adam算法更新參數(shù) 輸入: - x:參數(shù) - G:梯度平方的加權(quán)移動(dòng)平均 - M:梯度的加權(quán)移動(dòng)平均 - t:迭代次數(shù) - init_lr:初始學(xué)習(xí)率 """ M = self.beta1 * M + (1 - self.beta1) * gradient_x G = self.beta2 * G + (1 - self.beta2) * gradient_x ** 2 M_hat = M / (1 - self.beta1 ** t) G_hat = G / (1 - self.beta2 ** t) t += 1 x -= init_lr / torch.sqrt(G_hat + self.epsilon) * M_hat return x, G, M, t def step(self): """參數(shù)更新""" for key in self.model.params.keys(): self.model.params[key], self.G[key], self.M[key], self.t = self.adam(self.model.params[key], self.model.grads[key], self.G[key], self.M[key], self.t, self.init_lr) class OptimizedFunction3D(Op): def __init__(self): super(OptimizedFunction3D, self).__init__() self.params = {'x': 0} self.grads = {'x': 0} def forward(self, x): self.params['x'] = x return - x[0] * x[0] / 2 + x[1] * x[1] / 1 # x[0] ** 2 + x[1] ** 2 + x[1] ** 3 + x[0] * x[1] def backward(self): x = self.params['x'] gradient1 = - 2 * x[0] / 2 gradient2 = 2 * x[1] / 1 grad1 = torch.Tensor([gradient1]) grad2 = torch.Tensor([gradient2]) self.grads['x'] = torch.cat([grad1, grad2]) class Visualization3D(animation.FuncAnimation): """ 繪制動(dòng)態(tài)圖像,可視化參數(shù)更新軌跡 """ def __init__(self, *xy_values, z_values, labels=[], colors=[], fig, ax, interval=100, blit=True, **kwargs): """ 初始化3d可視化類(lèi) 輸入: xy_values:三維中x,y維度的值 z_values:三維中z維度的值 labels:每個(gè)參數(shù)更新軌跡的標(biāo)簽 colors:每個(gè)軌跡的顏色 interval:幀之間的延遲(以毫秒為單位) blit:是否優(yōu)化繪圖 """ self.fig = fig self.ax = ax self.xy_values = xy_values self.z_values = z_values frames = max(xy_value.shape[0] for xy_value in xy_values) # , marker = 'o' self.lines = [ax.plot([], [], [], label=label, color=color, lw=2)[0] for _, label, color in zip_longest(xy_values, labels, colors)] print(self.lines) super(Visualization3D, self).__init__(fig, self.animate, init_func=self.init_animation, frames=frames, interval=interval, blit=blit, **kwargs) def init_animation(self): # 數(shù)值初始化 for line in self.lines: line.set_data([], []) # line.set_3d_properties(np.asarray([])) # 源程序中有這一行,加上會(huì)報(bào)錯(cuò)。 Edit by David 2022.12.4 return self.lines def animate(self, i): # 將x,y,z三個(gè)數(shù)據(jù)傳入,繪制三維圖像 for line, xy_value, z_value in zip(self.lines, self.xy_values, self.z_values): line.set_data(xy_value[:i, 0], xy_value[:i, 1]) line.set_3d_properties(z_value[:i]) return self.lines def train_f(model, optimizer, x_init, epoch): x = x_init all_x = [] losses = [] for i in range(epoch): all_x.append(copy.deepcopy(x.numpy())) # 淺拷貝 改為 深拷貝, 否則List的原值會(huì)被改變。 Edit by David 2022.12.4. loss = model(x) losses.append(loss) model.backward() optimizer.step() x = model.params['x'] return torch.Tensor(np.array(all_x)), losses # 構(gòu)建5個(gè)模型,分別配備不同的優(yōu)化器 model1 = OptimizedFunction3D() opt_gd = SimpleBatchGD(init_lr=0.05, model=model1) model2 = OptimizedFunction3D() opt_adagrad = Adagrad(init_lr=0.05, model=model2, epsilon=1e-7) model3 = OptimizedFunction3D() opt_rmsprop = RMSprop(init_lr=0.05, model=model3, beta=0.9, epsilon=1e-7) model4 = OptimizedFunction3D() opt_momentum = Momentum(init_lr=0.05, model=model4, rho=0.9) model5 = OptimizedFunction3D() opt_adam = Adam(init_lr=0.05, model=model5, beta1=0.9, beta2=0.99, epsilon=1e-7) models = [model5, model2, model3, model4, model1] opts = [opt_adam, opt_adagrad, opt_rmsprop, opt_momentum, opt_gd] x_all_opts = [] z_all_opts = [] # 使用不同優(yōu)化器訓(xùn)練 for model, opt in zip(models, opts): x_init = torch.FloatTensor([0.00001, 0.5]) x_one_opt, z_one_opt = train_f(model, opt, x_init, 100) # epoch # 保存參數(shù)值 x_all_opts.append(x_one_opt.numpy()) z_all_opts.append(np.squeeze(z_one_opt)) # 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-3, 3],以0.1為間隔的數(shù)值 x1 = np.arange(-1, 2, 0.01) x2 = np.arange(-1, 1, 0.05) x1, x2 = np.meshgrid(x1, x2) init_x = torch.Tensor(np.array([x1, x2])) model = OptimizedFunction3D() # 繪制 f_3d函數(shù) 的 三維圖像 fig = plt.figure() ax = plt.axes(projection='3d') X = init_x[0].numpy() Y = init_x[1].numpy() Z = model(init_x).numpy() # 改為 model(init_x).numpy() David 2022.12.4 surf = ax.plot_surface(X, Y, Z, edgecolor='grey', cmap=cm.coolwarm) # fig.colorbar(surf, shrink=0.5, aspect=1) ax.set_zlim(-3, 2) ax.set_xlabel('x1') ax.set_ylabel('x2') ax.set_zlabel('f(x1,x2)') labels = ['Adam', 'AdaGrad', 'RMSprop', 'Momentum', 'SGD'] colors = ['#8B0000', '#0000FF', '#000000', '#008B00', '#FF0000'] animator = Visualization3D(*x_all_opts, z_values=z_all_opts, labels=labels, colors=colors, fig=fig, ax=ax) ax.legend(loc='upper right') plt.show() # animator.save('animation.gif') # 效果不好,估計(jì)被擋住了…… 有待進(jìn)一步提高 Edit by David 2022.12.4
4.?結(jié)合3D動(dòng)畫(huà),用自己的語(yǔ)言,從軌跡、速度等多個(gè)角度講解各個(gè)算法優(yōu)缺點(diǎn)?
1、SGD
? ?SGD從圖像上來(lái)看,呈現(xiàn)“之”字形,路徑不夠平滑,而且在剛才那個(gè)圖中,就陷入了局部最小值,而且還出不來(lái)。
優(yōu)點(diǎn):1、對(duì)于大的數(shù)據(jù)集來(lái)說(shuō),速度比較快,因?yàn)槊看尉退阋粋€(gè)數(shù)據(jù)的梯度就可以了。
? ? ? ? ? ?2、計(jì)算復(fù)雜度也低,因?yàn)榫退阋粋€(gè)數(shù)據(jù)的梯度
缺點(diǎn):1、震蕩的很,呈現(xiàn)“之”字型
? ? ? ? ? ?2、容易陷入局部極小值
? ? ? ? ? ?3、容易受噪聲的影響,如果碰巧選擇噪聲點(diǎn)來(lái)進(jìn)行更新,那就偏了。
? ? ? ? ? ?4、需要調(diào)節(jié)成合適的學(xué)習(xí)率
2、AdaGrad
? ? ? ?從上面看出 藍(lán)色線(xiàn)(AdaGrad)一開(kāi)始更新的很快,然后后面逐漸變慢,但是也能看出來(lái)最平滑了? ? ? ? ? ??
優(yōu)點(diǎn):? ? 1、Adagrad的速度受益于自適應(yīng)學(xué)習(xí)率的特性,可以根據(jù)每個(gè)參數(shù)的歷史梯度動(dòng)態(tài)調(diào)整學(xué)習(xí)率,更有效地更新參數(shù)。
? ? ? ? ? ? ? ?2、對(duì)于具有梯度稀疏性的問(wèn)題,Adagrad可能更為有效,因?yàn)樗梢愿鶕?jù)每個(gè)參數(shù)歷史梯度的信息來(lái)調(diào)整學(xué)習(xí)率。(根據(jù)公式就可以知道)
?缺點(diǎn):? ? 1、隨著時(shí)間推移,Adagrad累積的歷史梯度平方可能導(dǎo)致學(xué)習(xí)率逐漸減小,可能導(dǎo)致訓(xùn)練后期學(xué)習(xí)率過(guò)小,使得模型參數(shù)更新幅度過(guò)小,難以收斂。
? ? ? ? ? ? ? ? 2、衰減的過(guò)快,可能會(huì)早停
3、RMSprop
從軌跡上看? 整體上雖然沒(méi)有AdaGrad平滑,但是依然比其他的要平滑,并且整體上速度很塊,由于學(xué)習(xí)率的自適應(yīng)性,RMSprop的路徑可能在優(yōu)化過(guò)程中逐漸收斂,呈現(xiàn)出更為平穩(wěn)的特點(diǎn)。?
優(yōu)點(diǎn):? ? ?1、RMSprop同樣有自適應(yīng)學(xué)習(xí)率,它通過(guò)梯度平方的移動(dòng)平均來(lái)調(diào)整學(xué)習(xí)率,能夠在不同參數(shù)之間適應(yīng)性地選擇學(xué)習(xí)率。
? ? ? ? ? ? ? ? 2、因?yàn)橛凶赃m應(yīng)學(xué)習(xí)率,所以路徑平滑,此外,歷史梯度逐漸削弱,速度會(huì)塊,解決了早停的問(wèn)題。
缺點(diǎn):? ? ?1、類(lèi)似于Adagrad,RMSprop可能隨著時(shí)間推移導(dǎo)致學(xué)習(xí)率逐漸減小,這可能使得在訓(xùn)練后期模型參數(shù)更新幅度過(guò)小,難以收斂。
4、Momentum
從路徑上來(lái)看,速度很快,但是會(huì)找錯(cuò)路,并且,它是這幾個(gè)算法里對(duì)一個(gè)方向的更新時(shí)間最持續(xù)的并且很直
優(yōu)點(diǎn):? ? ?1、Momentum算法通過(guò)積累動(dòng)量,能夠更快地加速收斂,尤其是在具有平坦或彎曲路徑的情況下,相對(duì)于SGD具有更好的表現(xiàn)。
? ? ? ? ? ? ? ? 2、引入動(dòng)量有助于平滑更新路徑,減輕震蕩,使得模型更為穩(wěn)定。(不走錯(cuò)路還挺平滑的,走錯(cuò)了會(huì)有“之”)
缺點(diǎn):? ? ?1、非凸優(yōu)化問(wèn)題中,動(dòng)量算法可能使得路徑過(guò)于迅速地越過(guò)全局最優(yōu)點(diǎn),導(dǎo)致無(wú)法穩(wěn)定地收斂。
?5、Nesterov
? 從路徑上看,也會(huì)走錯(cuò),但是是最先糾正路徑的,速度最快,改路最快可能是Nesterov先用當(dāng)前的速度v更新一遍參數(shù),在用更新的臨時(shí)參數(shù)計(jì)算梯度。
優(yōu)點(diǎn):? ?1、有前瞻性(改路最快)能夠更快速地收斂,特別是在梯度較為復(fù)雜的情況下,相對(duì)于標(biāo)準(zhǔn)Momentum表現(xiàn)更好。
? ? ? ? ? ? ? 2、路徑平滑,對(duì)于訓(xùn)練更穩(wěn)定。
? ? ? ? ? ? ? 3、塊
缺點(diǎn):
? ? ? ? ? ?調(diào)參復(fù)雜,參數(shù)多
6、Adam
從路徑來(lái)看? 不像動(dòng)量法那樣會(huì)走錯(cuò),既沒(méi)走錯(cuò),也不慢,中間的樣子,還是比較平滑的。
優(yōu)點(diǎn): 1、其自適應(yīng)學(xué)習(xí)率機(jī)制,能夠根據(jù)每個(gè)參數(shù)的歷史梯度信息動(dòng)態(tài)調(diào)整學(xué)習(xí)率,適應(yīng)不同參數(shù)的特點(diǎn)。
? ? ? ? ? ?2、方向性比較好,速度也不慢
缺點(diǎn): 1、Adam算法需要維護(hù)每個(gè)參數(shù)的一階矩和二階矩的歷史信息,導(dǎo)致內(nèi)存需求較高,尤其是在參數(shù)較多的情況下。
總結(jié):
1、第一個(gè)實(shí)驗(yàn),就出師未捷身先死,用的同學(xué)的代碼復(fù)現(xiàn)打算,結(jié)果一直只有第一張圖,第二張圖片出不來(lái),左一那樣,我看了看代碼,感覺(jué)沒(méi)啥毛病,于是,我按照我自己的想法開(kāi)始改,結(jié)果兩張圖出是出來(lái)了,就是出現(xiàn)在一張圖上,而且第二個(gè)函數(shù)的圖還有點(diǎn)怪怪的,我瞅著代碼上看沒(méi)啥毛病,我猜測(cè)是因?yàn)槎攘亢獾膯?wèn)題,于是,我又把兩張圖分開(kāi)看,就長(zhǎng)最下面那樣,嘿,成了!
?
原因就是,我一開(kāi)始就用來(lái)一個(gè)畫(huà)布,后面加了一個(gè)畫(huà)布就好了
2、第二個(gè)代碼一開(kāi)始出現(xiàn)的圖像,我不能動(dòng),而且沒(méi)有軌跡,最狗的就是一會(huì)兒就自己關(guān)了,我修改了一部分,終于和小伙伴們一樣擁有了自己的動(dòng)圖,太不容易了,看其他同學(xué)貌似也有同樣問(wèn)題奉上我的修改過(guò)程:
class Visualization3D(animation.FuncAnimation): # ... (不變) def init_animation(self): # 數(shù)值初始化 for line in self.lines: line.set_data([], []) line.set_3d_properties([]) for point in self.points: point.set_data([], []) point.set_3d_properties([]) return self.points + self.lines def animate(self, i): # 將x,y,z三個(gè)數(shù)據(jù)傳入,繪制三維圖像 for line, xy_value, z_value, point in zip(self.lines, self.xy_values, self.z_values, self.points): line.set_data(xy_value[:i, 0], xy_value[:i, 1]) line.set_3d_properties(z_value[:i]) point.set_data(xy_value[i, 0], xy_value[i, 1]) point.set_3d_properties(z_value[i]) return self.points + self.lines # (后面的代碼不變) # 構(gòu)建5個(gè)模型,分別配備不同的優(yōu)化器 # ... (不變) # 使用不同優(yōu)化器訓(xùn)練 # ... (不變) # 使用numpy.meshgrid生成x1,x2矩陣,矩陣的每一行為[-10, 10],以0.01為間隔的數(shù)值 # ... (不變) # 繪制 f_3d函數(shù) 的 三維圖像 fig = plt.figure() ax = plt.axes(projection='3d') X = init_x[0].numpy() Y = init_x[1].numpy() Z = model(init_x).numpy() surf = ax.plot_surface(X, Y, Z, edgecolor='grey', cmap=cm.coolwarm) ax.set_xlabel('x1') ax.set_ylabel('x2') ax.set_zlabel('f(x1,x2)') # 添加軌跡圖 labels = ['SGD', 'AdaGrad', 'RMSprop', 'Momentum', 'Adam'] colors = ['#8B0000', '#0000FF', '#000000', '#008B00', '#FF0000'] for x_opt, z_opt, label, color in zip(x_all_opts, z_all_opts, labels, colors): ax.plot(x_opt[:, 0], x_opt[:, 1], z_opt, label=label, color=color) ax.legend(loc='upper right') # 修改下面這行,將Visualization3D的初始化參數(shù)中的fig和ax改為ax.figure和ax animator = Visualization3D(*x_all_opts, z_values=z_all_opts, labels=labels, colors=colors, fig=ax.figure, ax=ax) plt.show()
?我是這樣修改的,原因如下:
? ? 在原始的代碼中,
Visualization3D
類(lèi)的init_animation
和animate
方法的實(shí)現(xiàn)存在一些問(wèn)題,這有可能導(dǎo)致軌跡圖無(wú)法正確顯示。原始實(shí)現(xiàn)中使用了set_data_3d
方法,但是這個(gè)方法可能沒(méi)有正確地設(shè)置Z軸的值,導(dǎo)致軌跡圖在三維空間中無(wú)法正確顯示。
? Visualization3D
類(lèi)的初始化參數(shù)中有fig
和ax
,而在動(dòng)畫(huà)的過(guò)程中,我注意到ax
在這個(gè)類(lèi)中被用作動(dòng)畫(huà)的軸。在原始代碼中,fig
和ax
的值分別傳遞給了Visualization3D
類(lèi),但是在動(dòng)畫(huà)的過(guò)程中,ax
的figure
屬性才是正確的Figure對(duì)象。? 所以,我對(duì)
Visualization3D
的初始化參數(shù)進(jìn)行了修改,將fig
和ax
改為ax.figure
和ax
,以確保Visualization3D
正確連接到已有的ax
上。此外,我還更新了init_animation
和animate
方法。在init_animation
方法中,我修改了對(duì)line.set_data_3d
和point.set_data_3d
的調(diào)用,將其分別改為line.set_data
和point.set_data
,同時(shí)添加了set_3d_properties
來(lái)設(shè)置Z軸的值。在animate
方法中,也做了類(lèi)似的修改,以確保在動(dòng)畫(huà)過(guò)程中正確更新軌跡圖的數(shù)據(jù)。
參考鏈接:
NNDL 作業(yè)13 優(yōu)化算法3D可視化-CSDN博客
NNDL實(shí)驗(yàn) 優(yōu)化算法3D軌跡 復(fù)現(xiàn)cs231經(jīng)典動(dòng)畫(huà)_深度學(xué)習(xí) 優(yōu)化算法 動(dòng)畫(huà)展示-CSDN博客
【23-24 秋學(xué)期】NNDL 作業(yè)13 優(yōu)化算法3D可視化-CSDN博客
3、又是美好的一天過(guò)去了,學(xué)了不少知識(shí),希望睡一覺(jué)不會(huì)忘記??!
NNDL結(jié)束了,完結(jié)!撒花?。?/p>
給老師一個(gè),真是辛苦了,看了我寫(xiě)了一學(xué)期的學(xué)術(shù)垃圾文章來(lái)源:http://www.zghlxwxcb.cn/news/detail-773008.html
文章來(lái)源地址http://www.zghlxwxcb.cn/news/detail-773008.html
到了這里,關(guān)于優(yōu)化算法3D可視化的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!