Created
December 14, 2017 20:00
-
-
Save mkocabas/feb9c4431e552aa33ffe2dc95d58c76c to your computer and use it in GitHub Desktop.
K Nearest Neighbor Implementation in Matlab
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% In this tutorial, we are going to implement knn algorithm. % | |
% Our aim is to see the most efficient implementation of knn. % | |
% You have to implement knn in two differnt ways: % | |
% 1) with two loops % | |
% 2) without any loop % | |
% % | |
% you have to report the computation times of both pathways. % | |
% Note: the distance metric is �Euclidean�. % | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
clear;close all; clc; | |
%% Section I: Forming and plotting the dataset | |
mu = [1.5,1.5]; sigma = [1,1.5;1.5,3]; | |
sampleNo = 3000; | |
testNo = 1000; | |
C1X = (mvnrnd(mu,sigma,sampleNo))'; | |
mu = [4,1];sigma = [1,1.5;1.5,3]; | |
C2X = (mvnrnd(mu,sigma,sampleNo))'; | |
% mean computation | |
x = [C1X,C2X]; | |
y1 = zeros(1,sampleNo); | |
y2 = ones(1,sampleNo); | |
y = [y1,y2]; | |
test_x = [C1X(:,1:testNo),C2X(:,1:testNo)]; | |
train_x = [C1X(:,testNo+1:sampleNo),C2X(:,testNo+1:sampleNo)]; | |
test_y = [y1(:,1:testNo),y2(:,1:testNo)]; | |
train_y = [y1(:,testNo+1:sampleNo),y2(:,testNo+1:sampleNo)]; | |
% dataset discription | |
% train_x: contains the train set | |
% test_x : contains the test set | |
% train_y: contains the labels of train set | |
% test_y : contains the labels of test set | |
% see the dataset | |
plot(C1X(1,1:testNo),C1X(2,1:testNo),'+'); hold on; grid on; plot(C2X(1,1:testNo),C2X(2,1:testNo),'ko'); | |
plot(C1X(1,testNo+1:sampleNo),C1X(2,testNo+1:sampleNo),'+'); hold on; grid on; plot(C2X(1,testNo+1:sampleNo),C2X(2,testNo+1:sampleNo),'o'); | |
%% | |
% Section II: Implementing KNN using 2 loops. | |
% Here you should: | |
% 1) compute distance matrix with the use of loop, such as (for loop) | |
% 2) record the amount of computation time for (1) | |
% 3) make prediction by the use of differnt k values | |
% Your code for section II goes here ... | |
knn_loop(test_x,train_x,2); | |
function test_data = knn_loop(test_data, tr_data,k) | |
numoftestdata = size(test_data,1); | |
numoftrainingdata = size(tr_data,1); | |
for sample=1:numoftestdata | |
%Step 1: Computing euclidean distance for each testdata | |
R = repmat(test_data(sample,:),numoftrainingdata,1) ; | |
euclideandistance = (R(:,1) - tr_data(:,1)).^2; | |
%Step 2: compute k nearest neighbors and store them in an array | |
[dist position] = sort(euclideandistance,'ascend'); | |
knearestneighbors=position(1:k); | |
knearestdistances=dist(1:k); | |
% Step 3 : Voting | |
for i=1:k | |
A(i) = tr_data(knearestneighbors(i),2); | |
end | |
M = mode(A); | |
if (M~=1) | |
test_data(sample,2) = mode(A); | |
else | |
test_data(sample,2) = tr_data(knearestneighbors(1),2); | |
end | |
end | |
end | |
%% Section III: Ok, It is time to implement more efficent version of knn | |
% Implementing KNN without any loop | |
% Here you should: | |
% 1) compute distance matrix in vectrozed way | |
% 2) record the amount of computation time for (1) | |
% 3) make prediction by the use of differnt k values | |
% Your code for section III goes here ... | |
function out_data = knn_vector(test_data,tr_data,k) | |
test_data_n = size(test_data,1); | |
tr_data_n = size(tr_data,1); | |
% absolute distance between all test and training data | |
dist = abs(repmat(test_data,1,tr_data_n) - repmat(tr_data(:,1)',test_data_n,1)); | |
% indicies of nearest neighbors | |
[~,nearest] = sort(dist,2); | |
% k nearest | |
nearest = nearest(:,1:k); | |
% mode of k nearest | |
val = reshape(tr_data(nearest,2),[],k); | |
out_data = mode(val,2); | |
% if mode is 1, output nearest instead | |
out_data(out_data==1) = val(out_data==1,1); | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment