Skip to content

Instantly share code, notes, and snippets.

@oskar-j
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oskar-j/83a29cd48516dbee0c7c to your computer and use it in GitHub Desktop.
Save oskar-j/83a29cd48516dbee0c7c to your computer and use it in GitHub Desktop.
Entropy in decision tree
# The main difference between MATLAB bundled entropy function
# and this custom function is that they use a transformation to uint8
# and the bundled entropy() function is used mostly for signal processing
# while I simply use a straightforward solution usefull e.g. for learning trees
function f = my_entropy(data, weighted, information_gain)
# function @f accepts only cell arrays (in argument @data);
# @weighted tells whether return one weighed average entropy per row
# or return a vector of entropies (one entropy per 1 bucket)
# moreover, I find vectors as the only representation of 'buckets'
# in other words, vector = bucket (leaf of decision tree)
# @information_gain tells whether to calculate Kullback–Leibler divergence
# and treat rows as single states after a transformation, or not
if nargin < 2
weighted = false;
end;
if nargin < 3
information_gain = false;
end;
rows = @(x) size(x,1);
cols = @(x) size(x,2);
if weighted
weights = [];
end;
result = [];
for r = 1:rows(data)
for c = 1:cols(data) # in most cases this will be 1:1
data{r,c}(data{r,c} == 0) = [];
omega = sum(data{r,c});
epsilon = 0;
for b = 1:cols(data{r,c})
epsilon = epsilon + ( (data{r,c}(b) / omega) * (log2(data{r,c}(b) / omega)) );
end;
if (-epsilon == 0) entropy = 0; else entropy = -epsilon; end;
if weighted
result = result + entropy;
end;
result = [result entropy];
end;
end;
f = result;
end;
# test cases
cell1 = { [16];[16];[2 2 2 2 2 2 2 2];[12];[16] }
cell2 = { [16],[12];[16],[2];[2 2 2 2 2 2 2 2],[8 8];[12],[8 8];[16],[8 8] }
cell3 = { [16],[3 3];[16],[2];[2 2 2 2 2 2 2 2],[2 2];[12],[2];[16],[2] }
a1 = [ 100 60 ]
a2 = [ 20 60 ]
a3 = [ 80 0 ]
entr = my_entropy({a1;a2;a3}, false)
# end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment