Skip to content

Instantly share code, notes, and snippets.

@sunil-sangwan
Last active May 20, 2016 14:18
Show Gist options
  • Save sunil-sangwan/5fbc855bd7a6231874500c4f4b3c00cd to your computer and use it in GitHub Desktop.
Save sunil-sangwan/5fbc855bd7a6231874500c4f4b3c00cd to your computer and use it in GitHub Desktop.
#learned from online resource
import csv
import random,math,operator
def loadDataset(filename,split,trainingset=[],testset=[]):
with open(filename,"rt", encoding='utf8') as csvfile:
lines = csv.reader(csvfile)
#for row in lines:
# print(" ".join(row))
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
trainingset.append(dataset[x])
else:
testset.append(dataset[x])
def euclideanDistance(instance1,instance2,length):
distance = 0
for x in range(length):
distance += pow((instance1[x]-instance2[x]),2)
return math.sqrt(distance)
def getNeighbors(trainingset,testInstance,k):
distances = []
length = len(testInstance)-1
for x in range(len(trainingset)):
dist = euclideanDistance(testInstance,trainingset[x],length)
distances.append((trainingset[x],dist))
distances.sort(key=operator.itemgetter(1))
neighbors=[]
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] +=1
else:
classVotes[response] =1
sortedVotes = sorted(classVotes.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedVotes[0][0]
def getAccuracy(testset,predictions):
correct = 0
for x in range(len(testset)):
if testset[x][-1]==predictions[x]:
correct+=1
return (correct/float(len(testset)))*100.0
def main():
trainingset = []
testset = []
split = 0.67
loadDataset(filename,split,trainingset,testset)
print ('Train set:' + repr(len(trainingset)))
print ('test set:' + repr(len(testset)))
predictions = []
#
k=3
for k in range(10):
for x in range(len(testset)):
neighbors = getNeighbors(trainingset,testset[x],k)
result = getResponse(neighbors)
predictions.append(result)
print('predicted='+repr(result)+',actua='+repr(testset[x][-1]))
accuracy = getAccuracy(testset,predictions)
print ('Accuracy:'+repr(accuracy)+'%')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment