Created
January 21, 2011 23:25
-
-
Save lynaghk/790631 to your computer and use it in GitHub Desktop.
R k-nearest neighbors example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I'm using two non-standard packages; one for nearest neighbors (FNN) and one for doing split-combine-apply (think map reduce) type operations. | |
#you'll have to run this: install.packages(c('FNN', 'plyr')) | |
#you can also checkout documentation in R using the question mark; run this: ?knn | |
library(FNN) | |
library(plyr) | |
#Here are some random points on the plane to show you the interface | |
neartop = ldply(1:50, function(i){ | |
c( x = rnorm(1) | |
, y = 1 + rnorm(1, sd=0.5)) | |
}) | |
nearbottom = ldply(1:50, function(i){ | |
c( x = rnorm(1) | |
, y = -1 + rnorm(1, sd=0.5)) | |
}) | |
# the first 50 are near the top, the rest are near the bottom | |
points = rbind(neartop, nearbottom) | |
#the vector of classification labels is easy to construct. Since the labels are TRUE/FALSE, the question we're asking is, "is this point in the set near the top?" | |
points[,'label'] = (1:100 < 51) | |
plot(points$x, points$y) | |
#split into training/testing sets by sampling 60 numbers from the set of 1--100, using them for training, and the rest for testing. | |
train_i = sample(1:100, 60) | |
train = points[train_i,c('x','y')] | |
test = points[-train_i,c('x','y')] | |
results = knn( | |
train | |
, test | |
, cpt_vecs | |
, cl = points[train_i,'label'] | |
, k=1) #k = 1 just assigns a point to its nearest neighbor. | |
#percent correct | |
sum(points[-train_i,'label'] == results) / nrow(test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment