Last active
May 20, 2016 14:18
-
-
Save sunil-sangwan/5fbc855bd7a6231874500c4f4b3c00cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#learned from online resource | |
import csv | |
import random,math,operator | |
def loadDataset(filename,split,trainingset=[],testset=[]): | |
with open(filename,"rt", encoding='utf8') as csvfile: | |
lines = csv.reader(csvfile) | |
#for row in lines: | |
# print(" ".join(row)) | |
dataset = list(lines) | |
for x in range(len(dataset)-1): | |
for y in range(4): | |
dataset[x][y] = float(dataset[x][y]) | |
if random.random() < split: | |
trainingset.append(dataset[x]) | |
else: | |
testset.append(dataset[x]) | |
def euclideanDistance(instance1,instance2,length): | |
distance = 0 | |
for x in range(length): | |
distance += pow((instance1[x]-instance2[x]),2) | |
return math.sqrt(distance) | |
def getNeighbors(trainingset,testInstance,k): | |
distances = [] | |
length = len(testInstance)-1 | |
for x in range(len(trainingset)): | |
dist = euclideanDistance(testInstance,trainingset[x],length) | |
distances.append((trainingset[x],dist)) | |
distances.sort(key=operator.itemgetter(1)) | |
neighbors=[] | |
for x in range(k): | |
neighbors.append(distances[x][0]) | |
return neighbors | |
def getResponse(neighbors): | |
classVotes = {} | |
for x in range(len(neighbors)): | |
response = neighbors[x][-1] | |
if response in classVotes: | |
classVotes[response] +=1 | |
else: | |
classVotes[response] =1 | |
sortedVotes = sorted(classVotes.iteritems(),key=operator.itemgetter(1),reverse=True) | |
return sortedVotes[0][0] | |
def getAccuracy(testset,predictions): | |
correct = 0 | |
for x in range(len(testset)): | |
if testset[x][-1]==predictions[x]: | |
correct+=1 | |
return (correct/float(len(testset)))*100.0 | |
def main(): | |
trainingset = [] | |
testset = [] | |
split = 0.67 | |
loadDataset(filename,split,trainingset,testset) | |
print ('Train set:' + repr(len(trainingset))) | |
print ('test set:' + repr(len(testset))) | |
predictions = [] | |
# | |
k=3 | |
for k in range(10): | |
for x in range(len(testset)): | |
neighbors = getNeighbors(trainingset,testset[x],k) | |
result = getResponse(neighbors) | |
predictions.append(result) | |
print('predicted='+repr(result)+',actua='+repr(testset[x][-1])) | |
accuracy = getAccuracy(testset,predictions) | |
print ('Accuracy:'+repr(accuracy)+'%') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment