-
-
Save oskar-j/83a29cd48516dbee0c7c to your computer and use it in GitHub Desktop.
Entropy in decision tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The main difference between MATLAB bundled entropy function | |
# and this custom function is that they use a transformation to uint8 | |
# and the bundled entropy() function is used mostly for signal processing | |
# while I simply use a straightforward solution usefull e.g. for learning trees | |
function f = my_entropy(data, weighted, information_gain) | |
# function @f accepts only cell arrays (in argument @data); | |
# @weighted tells whether return one weighed average entropy per row | |
# or return a vector of entropies (one entropy per 1 bucket) | |
# moreover, I find vectors as the only representation of 'buckets' | |
# in other words, vector = bucket (leaf of decision tree) | |
# @information_gain tells whether to calculate Kullback–Leibler divergence | |
# and treat rows as single states after a transformation, or not | |
if nargin < 2 | |
weighted = false; | |
end; | |
if nargin < 3 | |
information_gain = false; | |
end; | |
rows = @(x) size(x,1); | |
cols = @(x) size(x,2); | |
if weighted | |
weights = []; | |
end; | |
result = []; | |
for r = 1:rows(data) | |
for c = 1:cols(data) # in most cases this will be 1:1 | |
data{r,c}(data{r,c} == 0) = []; | |
omega = sum(data{r,c}); | |
epsilon = 0; | |
for b = 1:cols(data{r,c}) | |
epsilon = epsilon + ( (data{r,c}(b) / omega) * (log2(data{r,c}(b) / omega)) ); | |
end; | |
if (-epsilon == 0) entropy = 0; else entropy = -epsilon; end; | |
if weighted | |
result = result + entropy; | |
end; | |
result = [result entropy]; | |
end; | |
end; | |
f = result; | |
end; | |
# test cases | |
cell1 = { [16];[16];[2 2 2 2 2 2 2 2];[12];[16] } | |
cell2 = { [16],[12];[16],[2];[2 2 2 2 2 2 2 2],[8 8];[12],[8 8];[16],[8 8] } | |
cell3 = { [16],[3 3];[16],[2];[2 2 2 2 2 2 2 2],[2 2];[12],[2];[16],[2] } | |
a1 = [ 100 60 ] | |
a2 = [ 20 60 ] | |
a3 = [ 80 0 ] | |
entr = my_entropy({a1;a2;a3}, false) | |
# end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment