Instantly share code, notes, and snippets.

# Rahul Jlethalbrains

• Sort options
Last active Aug 14, 2018
View decision_tree.py
 import numpy as np import pandas as pd class DecisionTree: def __init__(self, max_depth = 6, depth = 1): self.max_depth = max_depth self.depth = depth self.left = None self.right = None
Created Aug 8, 2018
View predict.py
 def predict(self, data): return np.array([self.__flow_data_thru_tree(row) for _, row in data.iterrows()]) def __flow_data_thru_tree(self, row): if self.is_leaf_node: return self.probability tree = self.left if row[self.split_feature] <= self.criteria else self.right return tree.__flow_data_thru_tree(row)
Created Aug 8, 2018
View is_leaf_node.py
 @property def is_leaf_node(self): return self.left is None @property def probability(self): return self.data[self.target].value_counts().apply(lambda x: x/len(self.data)).tolist()
Created Aug 8, 2018
View __depth.py
 def __init__(self, max_depth = 4, depth = 1): self.max_depth = max_depth self.depth = depth self.left = None self.right = None def __create_branches(self): self.left = DecisionTree(max_depth = self.max_depth, depth = self.depth + 1) self.right = DecisionTree(max_depth = self.max_depth,
Last active Aug 8, 2018
View __create_branches.py
 def __create_branches(self): self.left = DecisionTree() self.right = DecisionTree() left_rows = self.data[self.data[self.split_feature] <= self.criteria] right_rows = self.data[self.data[self.split_feature] > self.criteria] self.left.fit(data = left_rows, target = self.target) self.right.fit(data = right_rows, target = self.target)
Last active Aug 8, 2018
View __find_best_split.py
 def __find_best_split(self): best_split = {} for col in self.independent: information_gain, split = self.__find_best_split_for_column(col) if split is None: continue if not best_split or best_split["information_gain"] < information_gain: best_split = {"split": split, "col": col, "information_gain": information_gain} return best_split["split"], best_split["col"]
Created Aug 8, 2018
View __find_best_split_for_column.py
 def __find_best_split_for_column(self, col): x = self.data[col] unique_values = x.unique() if len(unique_values) == 1: return None, None information_gain = None split = None for val in unique_values: left = x <= val right = x > val left_data = self.data[left]
Last active Aug 8, 2018
View __calculate_impurity_score.py
 def __calculate_impurity_score(self, data): if data is None or data.empty: return 0 p_i, _ = data.value_counts().apply(lambda x: x/len(data)).tolist() return p_i * (1 - p_i) * 2
Created Aug 7, 2018
View crude_init_dtree.py
 def __init__(self): self.left = None self.right = None
Last active Aug 8, 2018
Gist for "Learn ML Algorithms by coding" Blog
View decision_tree_crude_v1.py
 class DecisionTree: def fit(self, data, target): self.data = data self.target = target self.independent = self.data.columns.tolist() self.independent.remove(target) def predict(self, data): return np.array([self.__flow_data_thru_tree(row) for row in data.values])
You can’t perform that action at this time.