refer:
-
【Pytorch】各網(wǎng)絡(luò)層的默認(rèn)初始化方法
https://blog.csdn.net/guofei_fly/article/details/105109883
20230625
其實(shí)Pytorch初始化方法就在各自的層的def reset_parameters(self) -> None:
方法中。
有人可能會(huì)問(wèn)為什么這個(gè)方法和Pytorch直接出來(lái)的權(quán)重初始值不一樣?單步調(diào)試會(huì)發(fā)現(xiàn)其實(shí)這個(gè)方法運(yùn)行了至少兩次,而每次出來(lái)的結(jié)果都是不同的,說(shuō)明初始化結(jié)果可能還和函數(shù)調(diào)用次數(shù)有關(guān),但是不論調(diào)用多少次還是符合基本的分布情況的。請(qǐng)參考python/pytorch random_seed隨機(jī)種子 https://blog.csdn.net/qq_43369406/article/details/131342983
"""init param"""
# !!write above on the first line!!
import random, numpy as np, torch
# set random seed
seed = 416
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
def _weights_init(m):
"""
intro:
weights init.
finish these:
- torch.nn.Linear
>>> version 1.0.0
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
elif classname.startswith('Conv'):
m.weight.data.normal_(0.0, 0.02)
>>> version 1.0.1
refer https://blog.csdn.net/guofei_fly/article/details/105109883
finish nn.Linear, nn.Conv
args:
:param torch.parameters m: nn.Module
"""
classname = m.__class__.__name__
if type(m) == nn.Linear or classname.startswith("Conv"):
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.kaiming_uniform_(m.weight, a=math.sqrt(5), nonlinearity='leaky_relu')
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight)
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(m.bias, -bound, bound)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
net = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=1), nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(1, 3, 224, 224)) # [ batch_size, channel, height, width ]
Y = net(X)
net.apply(_weights_init)
# check param
print(net[0].weight, '\n', net[0].bias)
20230622
# !!write above on the first line!!
import random, numpy as np, torch
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
def _weights_init(m):
"""
intro:
weights init.
finish these:
- torch.nn.Linear
>>> version 1.0.0
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
args:
:param torch.parameters m: nn.Module
"""
classname = m.__class__.__name__
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
elif classname.startswith('Conv'):
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net.apply(_weights_init)
x.1 指定隨機(jī)性
正態(tài)分布仍然是一個(gè)分布,這會(huì)使得你的實(shí)驗(yàn)結(jié)果無(wú)法完全復(fù)現(xiàn),要讓實(shí)驗(yàn)結(jié)果完全復(fù)現(xiàn)則需要指定seed,如下:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
x.2 參數(shù)初始化
參數(shù)初始化是很重要的工作,數(shù)據(jù)決定了梯度下降的山脈長(zhǎng)什么樣,而參數(shù)初始化決定了你從這個(gè)山的哪個(gè)合適的地方開(kāi)始出發(fā)。Pytorch提供了多種初始化方法,可以搭配nn.Module.apply
和你自己寫(xiě)的初始化方法函數(shù),來(lái)將nn.Module中所有繼承自nn.Module子類(lèi)中的children對(duì)象的參數(shù)初始化,如下:
常見(jiàn)的bulit-in初始化方法,
"""
6.3.1. Built-in Initialization
using built-in func to init.
- `nn.init.normal_(module.weight, mean=0, std=0.01)`
- `nn.init.zeros_(module.bias)`
- `nn.init.constant_(module.weight, 1)`
- `nn.init.zeros_(module.bias)`
- `nn.init.xavier_uniform_(module.weight)`
- `nn.init.kaiming_uniform_(module.weight)` # default one for Linear, and the type is Leaky_ReLU
- `nn.init.uniform_(module.weight, -10, 10)`
"""
進(jìn)行初始化,文章來(lái)源:http://www.zghlxwxcb.cn/news/detail-510069.html
def init_normal(module):
if type(module) == nn.Linear:
nn.init.normal_(module.weight, mean=0, std=0.01)
nn.init.zeros_(module.bias)
net.apply(init_normal)
print(net[0].weight.data[0])
print(net[0].bias.data[0])
使用自己的初始化方法進(jìn)行初始化,文章來(lái)源地址http://www.zghlxwxcb.cn/news/detail-510069.html
def _weights_init(m):
"""
intro:
weights init.
finish these:
- torch.nn.Linear
>>> version 1.0.0
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
args:
:param torch.parameters m: nn.Module
"""
classname = m.__class__.__name__
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
elif classname.startswith('Conv'):
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net.apply(_weights_init)
到了這里,關(guān)于Pytorch權(quán)重初始化/參數(shù)初始化的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!