Skip to content

Instantly share code, notes, and snippets.

@xypaul
Created June 5, 2015 05:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xypaul/e6cf7abd00d2e0739196 to your computer and use it in GitHub Desktop.
Save xypaul/e6cf7abd00d2e0739196 to your computer and use it in GitHub Desktop.
Recreating the dataset from the MFS Paper by Bayne
function main()
% LOAD IRIS
% dataset was modified by changing the class name from string to a
% number
iris = load('iris.csv', ',');
% LOAD SONAR
sonar = load('sonar.csv', ',');
% NN & KNN TESTING
display(['Iris NN ==> ' num2str(averageTestKNN(100, iris, 80, 1))]);
display(['Iris kNN 2 ==> ' num2str(averageTestKNN(100, iris, 80, 2))]);
display(['Iris kNN 3 ==> ' num2str(averageTestKNN(100, iris, 80, 3))]);
display(['Iris kNN 4 ==> ' num2str(averageTestKNN(100, iris, 80, 4))]);
display(['Iris kNN 5 ==> ' num2str(averageTestKNN(100, iris, 80, 5))]);
display(['Sonar NN ==> ' num2str(averageTestKNN(100, sonar, 80, 1))]);
display(['Sonar kNN 2 ==> ' num2str(averageTestKNN(100, sonar, 80, 2))]);
display(['Sonar kNN 3 ==> ' num2str(averageTestKNN(100, sonar, 80, 3))]);
display(['Sonar kNN 4 ==> ' num2str(averageTestKNN(100, sonar, 80, 4))]);
display(['Sonar kNN 5 ==> ' num2str(averageTestKNN(100, sonar, 80, 5))]);
% MFS IRIS
display(['Iris mfs1 1 ==> ' num2str(averageTestMFS(10, iris, 80, 1, 'mfs1'))]);
display(['Iris mfs1 2 ==> ' num2str(averageTestMFS(10, iris, 80, 2, 'mfs1'))]);
display(['Iris mfs1 3 ==> ' num2str(averageTestMFS(10, iris, 80, 3, 'mfs1'))]);
display(['Iris mfs1 4 ==> ' num2str(averageTestMFS(10, iris, 80, 4, 'mfs1'))]);
display(['Iris mfs2 1 ==> ' num2str(averageTestMFS(10, iris, 80, 1, 'mfs2'))]);
display(['Iris mfs2 2 ==> ' num2str(averageTestMFS(10, iris, 80, 2, 'mfs2'))]);
display(['Iris mfs2 3 ==> ' num2str(averageTestMFS(10, iris, 80, 3, 'mfs2'))]);
display(['Iris mfs2 4 ==> ' num2str(averageTestMFS(10, iris, 80, 4, 'mfs2'))]);
% MFS SONAR
display(['Sonar mfs1 6 ==> ' num2str(averageTestMFS(10, sonar, 80, 6, 'mfs1'))]);
display(['Sonar mfs1 12 ==> ' num2str(averageTestMFS(10, sonar, 80, 12, 'mfs1'))]);
display(['Sonar mfs1 18 ==> ' num2str(averageTestMFS(10, sonar, 80, 18, 'mfs1'))]);
display(['Sonar mfs1 24 ==> ' num2str(averageTestMFS(10, sonar, 80, 24, 'mfs1'))]);
display(['Sonar mfs1 30 ==> ' num2str(averageTestMFS(10, sonar, 80, 30, 'mfs1'))]);
display(['Sonar mfs1 36 ==> ' num2str(averageTestMFS(10, sonar, 80, 36, 'mfs1'))]);
display(['Sonar mfs1 42 ==> ' num2str(averageTestMFS(10, sonar, 80, 42, 'mfs1'))]);
display(['Sonar mfs1 48 ==> ' num2str(averageTestMFS(10, sonar, 80, 48, 'mfs1'))]);
display(['Sonar mfs1 54 ==> ' num2str(averageTestMFS(10, sonar, 80, 54, 'mfs1'))]);
display(['Sonar mfs1 60 ==> ' num2str(averageTestMFS(10, sonar, 80, 60, 'mfs1'))]);
display(['Sonar mfs2 6 ==> ' num2str(averageTestMFS(10, sonar, 80, 6, 'mfs2'))]);
display(['Sonar mfs2 12 ==> ' num2str(averageTestMFS(10, sonar, 80, 12, 'mfs2'))]);
display(['Sonar mfs2 18 ==> ' num2str(averageTestMFS(10, sonar, 80, 18, 'mfs2'))]);
display(['Sonar mfs2 24 ==> ' num2str(averageTestMFS(10, sonar, 80, 24, 'mfs2'))]);
display(['Sonar mfs2 30 ==> ' num2str(averageTestMFS(10, sonar, 80, 30, 'mfs2'))]);
display(['Sonar mfs2 36 ==> ' num2str(averageTestMFS(10, sonar, 80, 36, 'mfs2'))]);
display(['Sonar mfs2 42 ==> ' num2str(averageTestMFS(10, sonar, 80, 42, 'mfs2'))]);
display(['Sonar mfs2 48 ==> ' num2str(averageTestMFS(10, sonar, 80, 48, 'mfs2'))]);
display(['Sonar mfs2 54 ==> ' num2str(averageTestMFS(10, sonar, 80, 54, 'mfs2'))]);
display(['Sonar mfs2 60 ==> ' num2str(averageTestMFS(10, sonar, 80, 60, 'mfs2'))]);
end
% Creates two different matrices one for testing and one for trainData
function [train, test] = randomizeDataset(data, percentage)
shuffledArray = data(randperm(size(data,1)),:);
split = round(size(data,1)*(percentage/100));
train = shuffledArray(1:split, :);
test = shuffledArray(split+1:size(data,1), :);
end
function average = averageTestKNN(numberOfTests, data, percentage, n)
result = zeros(numberOfTests,1);
for i = 1:1:numberOfTests
[trainData,testingData] = randomizeDataset(data, percentage);
result(i,1) = 100 - knn(testingData, trainData, n);
end
average = mean(result);
end
function average = averageTestMFS(numberOfTests, data, percentage, features, type)
result = zeros(numberOfTests,1);
for i = 1:1:numberOfTests
[trainData,testingData] = randomizeDataset(data, percentage);
result(i,1) = 100 - mfs(testingData, trainData, features, type);
end
average = mean(result);
end
function error = knn(testingData, trainData, n)
result = zeros(size(testingData, 1),1);
for i = 1:1:size(testingData, 1)
if(testingData(i,size(testingData,2)) == identify(trainData, testingData(i,:), n));
result(i) = 100;
else
result(i) = 0;
end
end
% Get overall error
error = mean(result);
end
function error = mfs(testingData, trainData, features, type)
% Function to easily check the performance of mfs1 & mfs2
% The number of features can be choosen as necessary
result = zeros(size(testingData, 1),1);
for i = 1:1:size(testingData, 1)
if (strcmp(type,'mfs1'))
identification = identifymfs1(trainData, testingData(i,:), features);
elseif (strcmp(type,'mfs2'))
identification = identifymfs2(trainData, testingData(i,:), features);
else
display('Type has to be either mfs1 or mfs2');
end
if(testingData(i,size(testingData,2)) == identification);
result(i) = 100;
else
result(i) = 0;
end
end
% Get overall error
error = mean(result);
end
function class = identifymfs1(training, item, features)
% Wrapper class for mfs1 (with replacement)
class = identifymfs(training, item, true, features);
end
function class = identifymfs2(training, item, features)
% Wrapper class for mfs2 (without replacement)
class = identifymfs(training, item, false, features);
end
function class = identifymfs(training, item, replacement, features)
% features = 4; % to get data this variable was manually changed :)
runthroughs = 10; % how many times it does voting on it
identity = zeros(runthroughs, 1);
for i = 1:1:runthroughs
[pickedFeatures, itemFeatures] = pickFeature(training, item, features, replacement);
identity(i, 1) = identify(pickedFeatures, itemFeatures, 1); % k = 1
end
class = mode(identity);
end
function [dataPicked, itemPicked] = pickFeature(data, item, n, replacement)
% This functions picks random features, but leaves the identification
% column (last one) in tact
numberOfColumns = size(data,2);
pickedColumns = numberOfColumns;
% loop through as many features(columns) to select
for i = 1:1:n
if (replacement)
% With replacement just choose a random value form the columns
% name (that also explain the negative one, as you can't choose
% the last value
value = ceil((numberOfColumns-1)*rand());
else
% find intersection from previous one
chooseFrom = setdiff(1:1:numberOfColumns, pickedColumns);
% pick a random item from the ones left over
value = chooseFrom(ceil(length(chooseFrom)*rand()));
end
% this line isn't as efficient as it could be, as it creates a new
% array in memory, but should be fine since the array is so small
pickedColumns(end+1) = value;
end
% sort it, so the identification number stays at the end
pickedColumns = sort(pickedColumns);
% filter by the correct columns
dataPicked = data(:, pickedColumns);
itemPicked = item(:, pickedColumns);
end
function class = identify(training, item, k)
% get the distances
distData = dist(training, item);
% sort them
distData = sortrows(distData,1);
% find the most common element
class = mode(distData(1:k,2));
end
function d = dist(data, item)
%get the size
w = size(data);
% create empty array for storage
result = zeros(w(1),2);
% loop through each row
for i = 1:1:w(1)
% set label, to the class type
result(i,2) = data(i, w(2));
% loop through all items in row - except for last one which is the
% class name
for j = 1:1:w(2)-1
temp = data(i,j) - item(1,j);
result(i,1) = result(i,1) + temp^2;
end
result(i,1) = sqrt(result(i,1));
end
d = result;
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment