数据集是来自
UC Irvine Machine Learning Repository
这个网站提供了很多种类的数据集,截止到目前有662种数据集
今天我们使用的是声呐的数据集,本数据集使用声呐探测了金属和岩石,记录了它返回的波长。
The file "sonar.mines" contains 111 patterns obtained by bouncing sonar signals off a metal cylinder at various angles and under various conditions. The file "sonar.rocks" contains 97 patterns obtained from rocks under similar conditions. The transmitted sonar signal is a frequency-modulated chirp, rising in frequency. The data set contains signals obtained from a variety of different aspect angles, spanning 90 degrees for the cylinder and 180 degrees for the rock.
Each pattern is a set of 60 numbers in the range 0.0 to 1.0. Each number represents the energy within a particular frequency band, integrated over a certain period of time. The integration aperture for higher frequencies occur later in time, since these frequencies are transmitted later during the chirp.
The label associated with each record contains the letter "R" if the object is a rock and "M" if it is a mine (metal cylinder). The numbers in the labels are in increasing order of aspect angle, but they do not encode the angle directly.
上面是官方给到的数据集的介绍。
sonar数据集中有208条数据,每一条数据都有60种特征,数据的最后一列是类别标签,分别是M和R 代表了岩石和金属
然后我们编写代码实现一个简单的二分类的神经网络。
首先导包
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from sklearn.metrics import roc_curve
from sklearn.model_selection import StratifiedKFold,train_test_split
from sklearn.preprocessing import LabelEncoder
加载数据
#加载数据
data=pd.read_csv("sonar.csv",header=None)
X=data.iloc[:,0:60]
y=data.iloc[:,60]
encoder=LabelEncoder()
encoder.fit(y)
y=encoder.transform(y)
X=torch.tensor(X.values,dtype=torch.float32)
y=torch.tensor(y,dtype=torch.float32).reshape(-1,1)
建立模型,这次我们将会搭建两个模型一个是深层网络deep (有三层隐藏层)一个是很浅的网络叫做wide(一个隐藏层)
class Wide(nn.Module):
def __init__(self):
super().__init__()
self.hidden=nn.Linear(60,180)
self.relu=nn.ReLU()
self.output=nn.Linear(180,1)
self.sigmoid=nn.Sigmoid()
def forward(self,x):
x=self.relu(self.hidden(x))
x=self.sigmoid(self.output(x))
return x
class Deep(nn.Module):
def __init__(self):
super(Deep, self).__init__()
self.layer1=nn.Linear(60,60)
self.act1=nn.ReLU()
self.layer2=nn.Linear(60,60)
self.act2=nn.ReLU()
self.layer3=nn.Linear(60,60)
self.act3 = nn.ReLU()
self.output=nn.Linear(60,1)
self.sigmoid=nn.Sigmoid()
def forward(self,x):
x=self.act1(self.layer1(x))
x=self.act2(self.layer2(x))
x=self.act3(self.layer3(x))
x=self.sigmoid(self.output(x))
return x
然后我们打印一下参数来看一下 这两个网络的参数量分别是多少:
model1=Wide()
model2=Deep()
print(sum([x.reshape(-1).shape[0] for x in model1.parameters()]))
print(sum([x.reshape(-1).shape[0] for x in model2.parameters()]))
# 11161
# 11041
模型的训练:
def model_train(model,X_train,y_train,X_val,y_val):
loss_fn=nn.BCELoss()
optimizer=optim.Adam(model.parameters(),lr=0.0001)
n_epochs=300
batch_size=10
batch_start=torch.arange(0,len(X_train),batch_size)
best_acc=-np.inf
best_weights=None
for epoch in range(n_epochs):
model.train()
with tqdm.tqdm(batch_start,unit="batch",mininterval=0,disable=False) as bar:
bar.set_description(f"epoch {epoch}")
for start in bar:
X_batch=X_train[start:start+batch_size]
y_bacth=y_train[start:start+batch_size]
y_pred=model(X_batch)
loss=loss_fn(y_pred,y_bacth)
#backward pss
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc=(y_pred.round()==y_bacth).float().mean()
bar.set_postfix(
loss=float(loss),
acc=float(acc)
)
model.eval()
y_pred=model(X_val)
acc=(y_pred.round()==y_val).float().mean()
acc=float(acc)
if acc>best_acc:
best_acc=acc
best_weights=copy.deepcopy(model.state_dict())
model.load_state_dict(best_weights)
return best_acc
接下来划分数据集,然后在两个模型上分别验证其准确率:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,shuffle=True)
kfold=StratifiedKFold(n_splits=5,shuffle=True)
cv_scores_wide=[]
for train,test in kfold.split(X_train,y_train):
model=Wide()
acc=model_train(model,X_train[train],y_train[train],X_train[test],y_train[test])
print("accuracy (wide): %.2f"%acc)
cv_scores_wide.append(acc)
cv_scores_deep=[]
for train,test in kfold.split(X_train,y_train):
model=Deep()
acc=model_train(model,X_train[train],y_train[train],X_train[test],y_train[test])
print("acc (deep):%.2f"%acc)
cv_scores_deep.append(acc)
最后就是常规的计算和画图了:
wide_acc=np.mean(cv_scores_wide)
wide_std=np.std(cv_scores_wide)
deep_acc=np.mean(cv_scores_deep)
deep_std=np.std(cv_scores_deep)
print("wide: %.2f%% (+/- %.2f%%)" % (wide_acc*100,wide_std*100))
print("deep: %.2f%% (+/- %.2f%%)" % (deep_acc*100,deep_std*100))
if wide_acc>deep_acc:
print("retrain a wide model")
model=Wide()
else:
print("retrain a deep model")
model=Deep()
acc=model_train(model,X_train,y_train,X_test,y_test)
print(f"final model accuracy:{acc*100:.2f}%")
model.eval()
with torch.no_grad():
for i in range(5):
y_pred=model(X_test[i:i+1])
print(f"{X_test[i].numpy()} -> {y_pred[0].numpy()}"
+f"(expected {y_test[i].numpy()})")
y_pred=model(X_test)
fpr,tpr,thresholds=roc_curve(y_test,y_pred)
plt.plot(fpr,tpr)
plt.show()
AOC曲线图:
以及程序的输出:
完