1、導(dǎo)入庫
引入必要的庫,包括PyTorch、Pandas等。
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import torch.utils.data as Data
import matplotlib.pyplot as plt
import seaborn as sns
2、數(shù)據(jù)準備
這里使用sklearn自帶的加利福尼亞房價數(shù)據(jù),首次運行會下載數(shù)據(jù)集,建議下載之后,處理成csv格式單獨保存,再重新讀取。文章來源:http://www.zghlxwxcb.cn/news/detail-641643.html
后續(xù)完整代碼中,數(shù)據(jù)也是采用先下載,單獨保存之后,再重新讀取的方式。文章來源地址http://www.zghlxwxcb.cn/news/detail-641643.html
# 導(dǎo)入數(shù)據(jù)
housedata = fetch_california_housing() # 首次運行會下載數(shù)據(jù)集
data_x, data_y = housedata.data, housedata.target # 讀取數(shù)據(jù)和標簽
data_df = pd.DataFrame(data=data_x, columns=housedata.feature_names) # 將數(shù)據(jù)處理成dataframe格式
data_df['target'] = data_y # 添加標簽列
data_df.to_csv("california_housing.csv") # 將數(shù)據(jù)輸出為CSV文件
housedata_df = pd.read_csv("california_housing.csv") # 重新讀取數(shù)據(jù)
3、數(shù)據(jù)拆分
# 切分訓(xùn)練集和測試集
X_train, X_test, y_train, y_test = train_test_split(housedata[:, :-1], housedata[:, -1],test_size=0.3, random_state=42)
4、數(shù)據(jù)標準化
# 數(shù)據(jù)標準化處理
scale = StandardScaler()
x_train_std = scale.fit_transform(X_train)
x_test_std = scale.transform(X_test)
5、數(shù)據(jù)轉(zhuǎn)換
# 將數(shù)據(jù)集轉(zhuǎn)為張量
X_train_t = torch.from_numpy(x_train_std.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
X_test_t = torch.from_numpy(x_test_std.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))
# 將訓(xùn)練數(shù)據(jù)處理為數(shù)據(jù)加載器
train_data = Data.TensorDataset(X_train_t, y_train_t)
test_data = Data.TensorDataset(X_test_t, y_test_t)
train_loader = Data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=1)
6、模型搭建
# 搭建全連接神經(jīng)網(wǎng)絡(luò)回歸
class FNN_Regression(nn.Module):
def __init__(self):
super(FNN_Regression, self).__init__()
# 第一個隱含層
self.hidden1 = nn.Linear(in_features=8, out_features=100, bias=True)
# 第二個隱含層
self.hidden2 = nn.Linear(100, 100)
# 第三個隱含層
self.hidden3 = nn.Linear(100, 50)
# 回歸預(yù)測層
self.predict = nn.Linear(50, 1)
# 定義網(wǎng)絡(luò)前向傳播路徑
def forward(self, x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = F.relu(self.hidden3(x))
output = self.predict(x)
# 輸出一個一維向量
return output[:, 0]
7、模型訓(xùn)練
# 定義優(yōu)化器
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.01)
loss_func = nn.MSELoss() # 均方根誤差損失函數(shù)
train_loss_all = []
# 對模型迭代訓(xùn)練,總共epoch輪
for epoch in range(30):
train_loss = 0
train_num = 0
# 對訓(xùn)練數(shù)據(jù)的加載器進行迭代計算
for step, (b_x, b_y) in enumerate(train_loader):
output = testnet(b_x) # MLP在訓(xùn)練batch上的輸出
loss = loss_func(output, b_y) # 均方根損失函數(shù)
optimizer.zero_grad() # 每次迭代梯度初始化0
loss.backward() # 反向傳播,計算梯度
optimizer.step() # 使用梯度進行優(yōu)化
train_loss += loss.item() * b_x.size(0)
train_num += b_x.size(0)
train_loss_all.append(train_loss / train_num)
8、模型預(yù)測
y_pre = testnet(X_test_t)
y_pre = y_pre.data.numpy()
mae = mean_absolute_error(y_test, y_pre)
print('在測試集上的絕對值誤差為:', mae)
9、完整代碼
# -*- coding: utf-8 -*-
# @Time : 2023/8/11 15:58
# @Author : huangjian
# @Email : huangjian013@126.com
# @File : FNN_demo.py
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import torch.utils.data as Data
from torchsummary import summary
from torchviz import make_dot
import matplotlib.pyplot as plt
import seaborn as sns
# 搭建全連接神經(jīng)網(wǎng)絡(luò)回歸
class FNN_Regression(nn.Module):
def __init__(self):
super(FNN_Regression, self).__init__()
# 第一個隱含層
self.hidden1 = nn.Linear(in_features=8, out_features=100, bias=True)
# 第二個隱含層
self.hidden2 = nn.Linear(100, 100)
# 第三個隱含層
self.hidden3 = nn.Linear(100, 50)
# 回歸預(yù)測層
self.predict = nn.Linear(50, 1)
# 定義網(wǎng)絡(luò)前向傳播路徑
def forward(self, x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = F.relu(self.hidden3(x))
output = self.predict(x)
# 輸出一個一維向量
return output[:, 0]
# 導(dǎo)入數(shù)據(jù)
housedata_df = pd.read_csv("california_housing.csv")
housedata = housedata_df.values
# 切分訓(xùn)練集和測試集
X_train, X_test, y_train, y_test = train_test_split(housedata[:, :-1], housedata[:, -1],test_size=0.3, random_state=42)
# 數(shù)據(jù)標準化處理
scale = StandardScaler()
x_train_std = scale.fit_transform(X_train)
x_test_std = scale.transform(X_test)
# 將訓(xùn)練數(shù)據(jù)轉(zhuǎn)為數(shù)據(jù)表
datacor = np.corrcoef(housedata_df.values, rowvar=0)
datacor = pd.DataFrame(data=datacor, columns=housedata_df.columns, index=housedata_df.columns)
plt.figure(figsize=(8, 6))
ax = sns.heatmap(datacor, square=True, annot=True, fmt='.3f', linewidths=.5, cmap='YlGnBu',
cbar_kws={'fraction': 0.046, 'pad': 0.03})
plt.show()
# 將數(shù)據(jù)集轉(zhuǎn)為張量
X_train_t = torch.from_numpy(x_train_std.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
X_test_t = torch.from_numpy(x_test_std.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))
# 將訓(xùn)練數(shù)據(jù)處理為數(shù)據(jù)加載器
train_data = Data.TensorDataset(X_train_t, y_train_t)
test_data = Data.TensorDataset(X_test_t, y_test_t)
train_loader = Data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=1)
# 輸出網(wǎng)絡(luò)結(jié)構(gòu)
testnet = FNN_Regression()
summary(testnet, input_size=(1, 8)) # 表示1個樣本,每個樣本有8個特征
# 輸出網(wǎng)絡(luò)結(jié)構(gòu)
testnet = FNN_Regression()
x = torch.randn(1, 8).requires_grad_(True)
y = testnet(x)
myMLP_vis = make_dot(y, params=dict(list(testnet.named_parameters()) + [('x', x)]))
# 定義優(yōu)化器
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.01)
loss_func = nn.MSELoss() # 均方根誤差損失函數(shù)
train_loss_all = []
# 對模型迭代訓(xùn)練,總共epoch輪
for epoch in range(30):
train_loss = 0
train_num = 0
# 對訓(xùn)練數(shù)據(jù)的加載器進行迭代計算
for step, (b_x, b_y) in enumerate(train_loader):
output = testnet(b_x) # MLP在訓(xùn)練batch上的輸出
loss = loss_func(output, b_y) # 均方根損失函數(shù)
optimizer.zero_grad() # 每次迭代梯度初始化0
loss.backward() # 反向傳播,計算梯度
optimizer.step() # 使用梯度進行優(yōu)化
train_loss += loss.item() * b_x.size(0)
train_num += b_x.size(0)
train_loss_all.append(train_loss / train_num)
# 可視化訓(xùn)練損失函數(shù)的變換情況
plt.figure(figsize=(8, 6))
plt.plot(train_loss_all, 'ro-', label='Train loss')
plt.legend()
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.show()
y_pre = testnet(X_test_t)
y_pre = y_pre.data.numpy()
mae = mean_absolute_error(y_test, y_pre)
print('在測試集上的絕對值誤差為:', mae)
# 可視化測試數(shù)據(jù)的擬合情況
index = np.argsort(y_test)
plt.figure(figsize=(8, 6))
plt.plot(np.arange(len(y_test)), y_test[index], 'r', label='Original Y')
plt.scatter(np.arange(len(y_pre)), y_pre[index], s=3, c='b', label='Prediction')
plt.legend(loc='upper left')
plt.grid()
plt.xlabel('Index')
plt.ylabel('Y')
plt.show()
到了這里,關(guān)于使用自己的數(shù)據(jù)利用pytorch搭建全連接神經(jīng)網(wǎng)絡(luò)進行回歸預(yù)測的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!