import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.externals import joblib
iris = joblib.load('later.pkl')
data = pd.DataFrame(iris,index = range(0,150),columns=['a','b','c','d','e'])
X = data.loc[:,'a':'d']
y = data['e']
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
mean=[]
std =[]
for k in {1,4,7,10,13,16}:
knn_clf = KNeighborsClassifier(weights = "distance",n_neighbors = k,p = 2 )# weights = "uniform"
scores = cross_val_score(knn_clf,X,y,cv=5)
#print(scores)
score_m = np.mean(scores)
mean.append(score_m)
score_s = np.std(scores)
std.append(score_s)
plt.rcParams["axes.grid"] = True
plt.rcParams["grid.linestyle"] = (5,9)
fig = plt.figure(figsize=(8,6),dpi=100)
ax = plt.axes()
l = [1,4,7,10,13,16]
l1 = plt.plot(l, mean, color = '#3a3afa', linewidth=2.0, linestyle='-')
dy = np.array(mean)-np.array(std)
plt.errorbar(l, mean, yerr=std, color = '#3a3afa')
plt.show()
Created
April 12, 2020 07:18
-
-
Save feiji110/67ceac54665fe620e8f8f749b824fd06 to your computer and use it in GitHub Desktop.
雪梨k近邻作业
参数:
- n_neighbors:int,指定k值
- weights:一字符串或者可调用对象,指定投票权重类型。即这些邻居投票权可以为相同或者不同。
- 'uniform':本节点的所有邻居节点的投票权重都相等。
- 'distance':本节点的所有邻居节点的投票权重与距离成反比。即越近的节点,其投票权重越大。
- [callable]:一个可调用对象。它传入距离的数组,返回同样形状的权重数组。
- algorithm:一字符串,指定计算最近邻的算法,可以为如下:
- 'ball_tree':使用BallTree算法。
- 'kd_tree:使用KDTree算法。
- 'brute':使用暴力搜索法。
- 'auto':自动决定最合适的算法。
- leaf _size :指定BallTree或者KDTree叶节点规模。它影响树的构建和查询速度
- metric:str,指定距离度量,default'minkowski'距离
- p:int指定在'Minkowski'度量上的指数。如果p=1,对应曼哈顿距离;如果'p=2'对应欧拉距离
- n_jobs:并行性。default:-1表示派发任务到所有计算机CPU上
Methods:
- fit(X,y):训练模型
- predict(X)使用model来预测,返回带预测Sample的标记
- score(X,y)返回在(X,y)上预测的准确率accuracy
- predict_proba(X)返回Sample为每种标记的概率
- kneighbors([X,n_neighbors,return_distance])返回Sample point 的k近邻点。如果return_distance=True,同时返回到这些紧邻点的距离。
- kneighbors_graph([X,n_neighbors,mode])返回样本点的连接图
def test_KNeighborsClassifier(*data): #首先使用KNeighborsClassifier,给出测试函数:
X_train,X_test,y_train,y_test=data
clf = neighbors.KNeighborsClassifier()
clf.fit(X_train,y_train)
print("Train Score:%f"%clf.score(X_train,y_train))
print("Testing Score:%f"%clf.score(X_test,y_test))
X_train,X_test,y_train,y_test=load_classification_data()
test_KNeighborsClassifier(X_train,X_test,y_train,y_test) #Train Score:0.991091 #Testing Score:0.980000
def test_KNeighborsClassifier_k_w(*data):
X_train,X_test,y_train,y_test=data
Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
weights=['uniform','distance']
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for weight in weights:
training_scores=[]
testing_scores=[]
for K in Ks:
clf=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=K)
clf.fit(X_train,y_train)
testing_scores.append(clf.score(X_test,y_test))
training_scores.append(clf.score(X_train,y_train))
ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight)
ax.plot(Ks,training_scores,label="training score:weight=%s"%weight)
ax.legend(loc='best')
ax.set_xlabel('K')
ax.set_ylabel("score")
ax.set_ylim(0,1.05)
ax.set_title("KNeighborsClassifier")
plt.show()
test_KNeighborsClassifier_k_w(X_train,X_test,y_train,y_test)
````
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
加载数据函数