import matplotlib.pylab as plt
from scipy import interp
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve,auc
from sklearn.model_selection import StratifiedKFold
import matplotlib.patches as patches
import numpy as np
import pandas as pd
# 导入数据,查看特征名称,标签数量和数据信息
data = pd .read_csv('./voice.csv')
print(data.columns)
label_value_count = data.label.value_counts()
print(label_value_count)
print(data.info())
Index(['meanfreq', 'sd', 'median', 'Q25', 'Q75', 'IQR', 'skew', 'kurt',
'sp.ent', 'sfm', 'mode', 'centroid', 'meanfun', 'minfun', 'maxfun',
'meandom', 'mindom', 'maxdom', 'dfrange', 'modindx', 'label'],
dtype='object')
male
female
Name: label, dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: entries, to
Data columns (total columns):
meanfreq non-null float64
sd non-null float64
median non-null float64
Q25 non-null float64
Q75 non-null float64
IQR non-null float64
skew non-null float64
kurt non-null float64
sp.ent non-null float64
sfm non-null float64
mode non-null float64
centroid non-null float64
meanfun non-null float64
minfun non-null float64
maxfun non-null float64
meandom non-null float64
mindom non-null float64
maxdom non-null float64
dfrange non-null float64
modindx non-null float64
label non-null object
dtypes: float64(), object()
memory usage: 519.9+ KB
None
可以看到label变量是对象(字符型),所以我们需要将它转换成数值型
# 转换规则 : male = 1, female = 0
dict = {'label':{'male':,'female':}}
data.replace(dict,inplace = True)
x = data.loc[:, data.columns != 'label']
y = data.loc[:,'label']
random_state = np.random.RandomState()
clf = RandomForestClassifier(random_state=random_state)
cv = StratifiedKFold(n_splits=,shuffle=False)
fig1 = plt.figure(figsize=[,])
ax1 = fig1.add_subplot(,aspect = 'equal')
ax1.add_patch(
patches.Arrow(0.45,0.5,-0.25,0.25,width=0.3,color='green',alpha = 0.5)
)
ax1.add_patch(
patches.Arrow(0.5,0.45,0.25,-0.25,width=0.3,color='red',alpha = 0.5)
)
tprs = []
aucs = []
mean_fpr = np.linspace(,,)
i =
for train,test in cv.split(x,y):
prediction = clf.fit(x.iloc[train],y.iloc[train]).predict_proba(x.iloc[test])
fpr, tpr, t = roc_curve(y[test], prediction[:, ])
tprs.append(interp(mean_fpr, fpr, tpr))
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
i= i+
plt.plot([,],[,],linestyle = '--',lw = ,color = 'black')
mean_tpr = np.mean(tprs, axis=)
mean_auc = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='blue',
label=r'Mean ROC (AUC = %0.2f )' % (mean_auc),lw=, alpha=)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC')
plt.legend(loc="lower right")
plt.text(0.32,0.7,'More accurate area',fontsize = )
plt.text(0.63,0.4,'Less accurate area',fontsize = )
plt.show()
在本例中,ROC用于二分类。ROC主要用于二进制类,实际上也可以用于多分类。
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有