Skip to content

Instantly share code, notes, and snippets.

@MSDarshan91
Created May 6, 2016 14:43
Show Gist options
  • Save MSDarshan91/eefdae667e77afc7760fb683cd07284b to your computer and use it in GitHub Desktop.
Save MSDarshan91/eefdae667e77afc7760fb683cd07284b to your computer and use it in GitHub Desktop.
import csv
import random
import math
import operator
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((output_labels[train_apps[x]], dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
import operator
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x]
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedVotes[0][0]
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] is predictions[x]:
correct += 1
return (correct/float(len(testSet))) * 100.0
filename = 'training_data_sorted.csv'
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
with open('training_labels_sorted.csv', 'rb') as csvfile:
lines = csv.reader(csvfile)
y = list(lines)
dataset_labels = []
training_set = []
for data in dataset:
dataset_labels.append(data[0])
training_set.append([float(i) for i in data[1:]])
test = training_set[int((len(training_set) * 0.9 )):]
train = training_set[:int((len(training_set) * 0.9 ))]
test_apps= dataset_labels[int((len(training_set) * 0.9 )):]
train_apps = dataset_labels[:int((len(training_set) * 0.9 ))]
output_labels = { k:v for k,v in y }
print len(train)
print len(test)
predicted = []
actual = []
for i,t in enumerate(test):
print i
neighbors = getNeighbors(train,t,10)
print getResponse(neighbors),output_labels[test_apps[i]]
predicted.append(getResponse(neighbors)), actual.append(output_labels[test_apps[i]])
getAccuracy(actual,predicted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment