Skip to content

Instantly share code, notes, and snippets.

@ShigekiKarita
Created November 4, 2014 18:40
Show Gist options
  • Save ShigekiKarita/d192772e1373d60eada3 to your computer and use it in GitHub Desktop.
Save ShigekiKarita/d192772e1373d60eada3 to your computer and use it in GitHub Desktop.
descision_tree.py
from math import log2
from csv import reader
from collections import Counter
from sys import argv
def transpose(xx):
return map(list, zip(*xx))
def self_entropy(x):
return x * log2(x)
def entropy(xs):
ps = [xs.count(k) / len(xs) for k in set(xs)]
return - sum(map(self_entropy, ps))
def qualified_entropy(labels, xs):
def dict_ps(xs):
return {k: xs.count(k) for k in set(xs)}
qualifiers = dict_ps(xs)
pairs = dict_ps(list(zip(xs, labels))) # (attr, class): P
ret = 0.0
for k, v in pairs.items():
q = qualifiers[k[0]]
ret += q / len(xs) * self_entropy(v / q)
return ret
def gain(values, x):
labels = values["class"]
xs = values[x]
return entropy(labels) + qualified_entropy(labels, xs)
def heads(xs):
return [x for x in xs if x[1] == xs[0][1]]
def deeper(data):
values = {xs[0]: xs[1:] for xs in transpose(data)}
attributes = [x for x in data[0] if x != "class"]
gains = {x: gain(values, x) for x in attributes}
gains = sorted(gains.items(), key=lambda x: -x[1])
print("gain:", gains)
max_attr = heads(gains)[0][0]
max_values = heads(Counter(values[max_attr]).most_common())
print("next:", max_attr, "/", max_values[0][0])
next_attrs = [x for x in data[0] if x != max_attr]
next_values = [xs[1:] for xs in data if max_values[0][0] in xs]
return [next_attrs] + next_values
if __name__ == '__main__':
if len(argv) == 1:
argv.append('table.csv')
data = list(reader(open(argv[1], 'r')))
while len(data[0]) != 1:
data = deeper(data)
Outlook Temperature Humidity Windy class
sunny hot high FALSE N
sunny hot high TRUE N
overcast hot high FALSE P
rain mild high FALSE P
rain cool normal FALSE P
rain cool normal TRUE N
overcast cool normal TRUE P
sunny mild high FALSE N
sunny cool normal FALSE P
rain mild normal FALSE P
sunny mild normal TRUE P
overcast mild high TRUE P
overcast hot normal FALSE P
rain mild high TRUE N
age of the patient spectacle prescription astigmatic tear production rate class
young myope no normal soft
young myope yes abnormal hard
young hypermetrope yes normal soft
pre-presbyopic hypermetrope yes normal hard
pre-presbyopic myope no abnormal soft
pre-presbyopic myope no normal hard
pre-presbyopic hypermetrope no normal soft
presbyopic myope yes abnormal hard
presbyopic hypermetrope yes normal soft
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment