In [2]:
#target : 目标值,代表着类别,#得到的不是字典是普通的hash类型from sklearn import datasets
iris = datasets.load_iris()In [3]:
#DESCR得到的是英文文档,需要print解析(内有大量的\n,\t)# print(iris.DESCR)In [4]:
data = iris.datatarget = iris.targettarget_names = iris.target_namesIn [5]:
from sklearn.model_selection import train_test_splitIn [26]:
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.1)In [27]:
from sklearn.neighbors import KNeighborsClassifierknn = KNeighborsClassifier(n_neighbors=5,n_jobs=2)knn.fit(X_train,y_train)Out[27]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=2, n_neighbors=5, p=2,
weights='uniform')In [28]:
knn.score(X_train,y_train)Out[28]:
0.9703703703703703In [29]:
y_pred = knn.predict(X_test)In [30]:
import pandas as pdimport numpy as npnp.sum(y_test==y_pred)/len(y_test)Out[30]:
0.9333333333333333In [15]:
import matplotlib.pyplot as pltplt.scatter(data[:,0],data[:,1],c=target)Out[15]:
<matplotlib.collections.PathCollection at 0x27b0250ae80>
In [83]:
from sklearn.model_selection import GridSearchCVIn [84]:
knn = KNeighborsClassifier()In [85]:
param_grid = {'n_neighbors':list(range(1,26))}In [86]:
gc = GridSearchCV(estimator = knn,param_grid = param_grid,cv=3,n_jobs=2)In [87]:
gc.fit(X_train,y_train)Out[87]:
GridSearchCV(cv=3, error_score='raise',
estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=1, n_neighbors=5, p=2,
weights='uniform'),
fit_params=None, iid=True, n_jobs=2,
param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]},
pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
scoring=None, verbose=0)In [88]:
best_pa = gc.best_params_In [89]:
best_knn = gc.best_estimator_In [90]:
gc.best_score_Out[90]:
0.9703703703703703In [91]:
best_knn.score(X_train,y_train)Out[91]:
0.9703703703703703In [92]:
best_knn.score(X_test,y_test)Out[92]:
0.9333333333333333In [ ]: