Skip to content

Instantly share code, notes, and snippets.

View TristanBester's full-sized avatar
🔥
Focusing

Tristan Bester TristanBester

🔥
Focusing
View GitHub Profile
def __init_leaf(self, y):
'''Return the value the model is initialized to predict.'''
y = y.astype(int)
class_one_count = np.bincount(y.flatten())[1]
proba = float(class_one_count)/y.shape[0]
log_odds = np.log((proba)/(1-proba))
# Prevent math error - Undefined.
if log_odds == 0:
log_odds = 0.01
def __predict(self, subtree, val):
'''Predict the class probabilities of an instance.'''
if val.ndim == 0:
val = np.array([val])
if subtree.decision is None:
return subtree.prediction
elif val[int(subtree.decision[1])] > subtree.decision[0]:
return self.__predict(subtree.left, val)
else:
def __fit(self, subtree, X, y, curr_depth):
'''
If the dataset does not already have a mean squared error of zero and the
regularization parameters have not been satisfied, create and add a node
to the decision tree that predicts a more accurate target value for the
instances in the given dataset. Then recursively call the __fit method to
create the child nodes.
'''
if (
curr_depth > self.max_depth or
def CART(self, X, y):
'''The CART algorithm for building decision trees.'''
splits, cols = self.get_split_points(X)
splits = np.c_[splits,cols]
best = np.inf
for split_pt in splits:
lower, upper, y_lower, y_upper = self.split(split_pt[0], int(split_pt[1]), X, y)
mse_lower = self.MSE(y_lower[:, 0])
def MSE(self, y):
'''
Calculate the mean squared error from predicting the mean target value
of the instances at the node.
'''
y_hat = np.mean(y)
mse = ((y_hat - y)**2).sum()
return mse
def split(self, split_val, col, X,y):
'''Split the given dataset based on the given feature index and threshold value.'''
lower = []
upper = []
y_lower = []
y_upper = []
for i in range(X.shape[0]):
if X[i, col] < split_val:
lower.append(X[i])
def get_split_points(self, X):
'''Calculate the splitting points in the data.'''
cols = []
split_pts = []
for i,x in enumerate(np.sort(X.T)):
x = np.unique(x)
for j in range(x.shape[0]-1):
split = (x[j] + x[j+1])/2.0
split_pts.append(split)
def predict(self, X):
'''Predict the class of an instance.'''
try:
# As AdaBoost is a binary classifier.
preds = np.array([0,0])
for estimator, say in zip(self.estimators, self.influence):
pred = estimator.predict(X)
if pred:
preds[0] += say
else:
def fit(self, X, y):
'''Fit model to the training set.'''
# Append sample weights as last column.
X = np.c_[X, np.full((X.shape[0], 1), (1/float(X.shape[0])))]
self.estimators = []
self.influence = []
params = self.__get_estimator_params()
for i in range(self.n_estimators):
def __mod_datasets(self,X,y):
'''Resample the dataset to place more emphasis on misclassified instances.'''
temp_X = np.zeros(X.shape)
temp_y = np.zeros(y.shape)
X = self.__order_weights(X)
for i in range(X.shape[0]):
val = np.random.rand()
idx = 0
while val > X[idx, -1]: