Skip to content

Instantly share code, notes, and snippets.

Rahul J lethalbrains

Block or report user

Report or block lethalbrains

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View decision_tree.py
import numpy as np
import pandas as pd
class DecisionTree:
def __init__(self, max_depth = 6, depth = 1):
self.max_depth = max_depth
self.depth = depth
self.left = None
self.right = None
View predict.py
def predict(self, data):
return np.array([self.__flow_data_thru_tree(row) for _, row in data.iterrows()])
def __flow_data_thru_tree(self, row):
if self.is_leaf_node: return self.probability
tree = self.left if row[self.split_feature] <= self.criteria else self.right
return tree.__flow_data_thru_tree(row)
View is_leaf_node.py
@property
def is_leaf_node(self): return self.left is None
@property
def probability(self):
return self.data[self.target].value_counts().apply(lambda x: x/len(self.data)).tolist()
View __depth.py
def __init__(self, max_depth = 4, depth = 1):
self.max_depth = max_depth
self.depth = depth
self.left = None
self.right = None
def __create_branches(self):
self.left = DecisionTree(max_depth = self.max_depth,
depth = self.depth + 1)
self.right = DecisionTree(max_depth = self.max_depth,
View __create_branches.py
def __create_branches(self):
self.left = DecisionTree()
self.right = DecisionTree()
left_rows = self.data[self.data[self.split_feature] <= self.criteria]
right_rows = self.data[self.data[self.split_feature] > self.criteria]
self.left.fit(data = left_rows, target = self.target)
self.right.fit(data = right_rows, target = self.target)
View __find_best_split.py
def __find_best_split(self):
best_split = {}
for col in self.independent:
information_gain, split = self.__find_best_split_for_column(col)
if split is None: continue
if not best_split or best_split["information_gain"] < information_gain:
best_split = {"split": split,
"col": col,
"information_gain": information_gain}
return best_split["split"], best_split["col"]
View __find_best_split_for_column.py
def __find_best_split_for_column(self, col):
x = self.data[col]
unique_values = x.unique()
if len(unique_values) == 1: return None, None
information_gain = None
split = None
for val in unique_values:
left = x <= val
right = x > val
left_data = self.data[left]
View __calculate_impurity_score.py
def __calculate_impurity_score(self, data):
if data is None or data.empty: return 0
p_i, _ = data.value_counts().apply(lambda x: x/len(data)).tolist()
return p_i * (1 - p_i) * 2
View crude_init_dtree.py
def __init__(self):
self.left = None
self.right = None
@lethalbrains
lethalbrains / decision_tree_crude_v1.py
Last active Aug 8, 2018
Gist for "Learn ML Algorithms by coding" Blog
View decision_tree_crude_v1.py
class DecisionTree:
def fit(self, data, target):
self.data = data
self.target = target
self.independent = self.data.columns.tolist()
self.independent.remove(target)
def predict(self, data):
return np.array([self.__flow_data_thru_tree(row) for row in data.values])
You can’t perform that action at this time.