Skip to content

Instantly share code, notes, and snippets.

@feiji110
Created April 12, 2020 07:18
Show Gist options
  • Save feiji110/67ceac54665fe620e8f8f749b824fd06 to your computer and use it in GitHub Desktop.
Save feiji110/67ceac54665fe620e8f8f749b824fd06 to your computer and use it in GitHub Desktop.
雪梨k近邻作业
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.externals import joblib

iris = joblib.load('later.pkl')
data = pd.DataFrame(iris,index = range(0,150),columns=['a','b','c','d','e'])
X = data.loc[:,'a':'d']
y = data['e']

from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()

mean=[]
std =[] 

for k in {1,4,7,10,13,16}:
    knn_clf = KNeighborsClassifier(weights = "distance",n_neighbors = k,p = 2 )# weights = "uniform"
    scores = cross_val_score(knn_clf,X,y,cv=5)
    #print(scores)
    score_m = np.mean(scores)
    mean.append(score_m)
    score_s = np.std(scores)
    std.append(score_s)

plt.rcParams["axes.grid"] = True
plt.rcParams["grid.linestyle"] = (5,9)
fig = plt.figure(figsize=(8,6),dpi=100)
ax = plt.axes()
l = [1,4,7,10,13,16]

l1 = plt.plot(l, mean, color = '#3a3afa', linewidth=2.0, linestyle='-') 

dy = np.array(mean)-np.array(std)
plt.errorbar(l, mean, yerr=std, color = '#3a3afa')
plt.show()
@feiji110
Copy link
Author

feiji110 commented Apr 12, 2020

def test_KNeighborsClassifier(*data): #首先使用KNeighborsClassifier,给出测试函数:
    X_train,X_test,y_train,y_test=data
    clf = neighbors.KNeighborsClassifier()
    clf.fit(X_train,y_train)
    print("Train Score:%f"%clf.score(X_train,y_train))
    print("Testing Score:%f"%clf.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_classification_data()
test_KNeighborsClassifier(X_train,X_test,y_train,y_test) #Train Score:0.991091 #Testing Score:0.980000

@feiji110
Copy link
Author

def test_KNeighborsClassifier_k_w(*data):
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
    weights=['uniform','distance']
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for weight in weights:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            clf=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=K)
            clf.fit(X_train,y_train)
            testing_scores.append(clf.score(X_test,y_test))
            training_scores.append(clf.score(X_train,y_train))
        
        ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight)
        ax.plot(Ks,training_scores,label="training score:weight=%s"%weight)
    ax.legend(loc='best')
    ax.set_xlabel('K')
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsClassifier")
    plt.show()

test_KNeighborsClassifier_k_w(X_train,X_test,y_train,y_test)
    ````

@feiji110
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment