Skip to content

Instantly share code, notes, and snippets.

@satojkovic
Last active April 15, 2016 12:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save satojkovic/4539818 to your computer and use it in GitHub Desktop.
Save satojkovic/4539818 to your computer and use it in GitHub Desktop.
k nearest neighbor from Machine Learning for Hackers
#
# read data frame from csv
#
df <- read.csv('data/example_data.csv')
#
# produce distance matrix
#
distance.matrix <- function(df)
{
distance <- matrix(rep(NA, nrow(df)^2), nrow = nrow(df))
for(i in 1:nrow(df))
{
for(j in 1:nrow(df))
{
distance[i, j] <- sqrt( (df[i, 'X']-df[j, 'X'])^2 + (df[i, 'Y']-df[j, 'Y'])^2 )
}
}
return(distance)
}
#
# k nearest neighbors
#
k.nearest.neighbors <- function(i, distance, k = 5)
{
# sort i th row, return distance[i][2], distance[i][3], ..., distance[i][k+1]
return(order(distance[i, ])[2:(k+1)])
}
#
# knn for data frame
#
knn <- function(df, k = 5)
{
distance <- distance.matrix(df)
predictions <- rep(NA, nrow(df))
for(i in 1:nrow(df))
{
indices <- k.nearest.neighbors(i, distance, k = k)
predictions[i] <- ifelse(mean(df[indices, 'Label']) > 0.5, 1, 0)
}
return(predictions)
}
#
# append kNNPredictions
#
df <- transform(df, kNNPredictions = knn(df))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment