Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
IE381 - Calculating GINI and Entropy
import math
def cal_entropy(lst):
t = sum(lst)
result = 0
for i in lst:
division = float(i)/float(t)
# print "-", i, "/", t, "*", "math.log(", i, "/", t, ",", 2,")"
result = result - division*math.log(division,2)
# print "=", result
return result
def cal_gini(lst):
t = sum(lst)
result = 1.0
for i in lst:
result = result - (float(i)/float(t))**2
return result
def entropy_average(attr1, attr2):
result1 = cal_entropy(attr1)
result2 = cal_entropy(attr2)
total = float(sum(attr1)+sum(attr2))
# print "1. ",attr1, "->", result1
# print "2. ",attr2, "->", result2
sum1 = (sum(attr1)/total)*result1 + (sum(attr2)/total)*result2
return sum1
def gini_average(attr1, attr2):
result1 = cal_gini(attr1)
result2 = cal_gini(attr2)
total = float(sum(attr1)+sum(attr2))
# print "1. ",attr1, "->", result1
# print "2. ",attr2, "->", result2
sum1 = (sum(attr1)/total)*result1 + (sum(attr2)/total)*result2
return sum1
no = [2,3]
yes = [2,3,4]
print "no:", cal_gini(no) , "yes:", cal_gini(yes)
print "gini:",gini_average(no, yes)
print "-"
print "no:", cal_entropy(no), "yes:", cal_entropy(yes)
print "entropy:", entropy_average(no, yes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.