Skip to content

Instantly share code, notes, and snippets.

@danielmoralesp
Created July 1, 2019 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielmoralesp/22305592536e50f2b280b25b98e4ccb5 to your computer and use it in GitHub Desktop.
Save danielmoralesp/22305592536e50f2b280b25b98e4ccb5 to your computer and use it in GitHub Desktop.
tree.py
from collections import Counter
def split(dataset, labels, column):
data_subsets = []
label_subsets = []
counts = list(set([data[column] for data in dataset]))
counts.sort()
for k in counts:
new_data_subset = []
new_label_subset = []
for i in range(len(dataset)):
if dataset[i][column] == k:
new_data_subset.append(dataset[i])
new_label_subset.append(labels[i])
data_subsets.append(new_data_subset)
label_subsets.append(new_label_subset)
return data_subsets, label_subsets
def gini(dataset):
impurity = 1
label_counts = Counter(dataset)
for label in label_counts:
prob_of_label = label_counts[label] / len(dataset)
impurity -= prob_of_label ** 2
return impurity
def information_gain(starting_labels, split_labels):
info_gain = gini(starting_labels)
for subset in split_labels:
info_gain -= gini(subset) * len(subset)/len(starting_labels)
return info_gain
class Leaf:
def __init__(self, labels):
self.predictions = Counter(labels)
class Internal_Node:
def __init__(self,
feature,
branches):
self.feature = feature
self.branches = branches
def print_tree(node, spacing=""):
"""World's most elegant tree printing function."""
question_dict = {0: "Buying Price", 1:"Price of maintenance", 2:"Number of doors", 3:"Person Capacity", 4:"Size of luggage boot", 5:"Estimated Saftey"}
# Base case: we've reached a leaf
if isinstance(node, Counter):
print (spacing + str(node))
return
# Print the question at this node
print (spacing + "Splitting")
# Call this function recursively on the true branch
for i in range(len(node)):
print (spacing + '--> Branch ' + str(i)+':')
print_tree(node[i], spacing + " ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment