Skip to content

Instantly share code, notes, and snippets.

@fuwiak
Created June 12, 2021 13:36
Show Gist options
  • Save fuwiak/89fc885a0a78ac348322b962b6fefac6 to your computer and use it in GitHub Desktop.
Save fuwiak/89fc885a0a78ac348322b962b6fefac6 to your computer and use it in GitHub Desktop.
from __future__ import division, print_function
import numpy as np
import progressbar
from mlfromscratch.utils import train_test_split, standardize, to_categorical
from mlfromscratch.utils import mean_squared_error, accuracy_score
from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy
from mlfromscratch.supervised_learning.decision_tree import RegressionTree
from mlfromscratch.utils.misc import bar_widgets
class GradientBoosting(object):
def __init__(self, n_estimators, learning_rate, min_samples_split,
min_impurity, max_depth, regression):
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.min_samples_split = min_samples_split
self.min_impurity = min_impurity
self.max_depth = max_depth
self.regression = regression
self.bar = progressbar.ProgressBar(widgets=bar_widgets)
# Square loss for regression
# Log loss for classification
self.loss = SquareLoss()
if not self.regression:
self.loss = CrossEntropy()
# Initialize regression trees
self.trees = []
for _ in range(n_estimators):
tree = RegressionTree(
min_samples_split=self.min_samples_split,
min_impurity=min_impurity,
max_depth=self.max_depth)
self.trees.append(tree)
def fit(self, X, y):
y_pred = np.full(np.shape(y), np.mean(y, axis=0))
for i in self.bar(range(self.n_estimators)):
gradient = self.loss.gradient(y, y_pred)
self.trees[i].fit(X, gradient)
update = self.trees[i].predict(X)
# Update y prediction
y_pred -= np.multiply(self.learning_rate, update)
def predict(self, X):
y_pred = np.array([])
# Make predictions
for tree in self.trees:
update = tree.predict(X)
update = np.multiply(self.learning_rate, update)
y_pred = -update if not y_pred.any() else y_pred - update
if not self.regression:
# Turn into probability distribution
y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
# Set label to the value that maximizes probability
y_pred = np.argmax(y_pred, axis=1)
return y_pred
class GradientBoostingRegressor(GradientBoosting):
def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2,
min_var_red=1e-7, max_depth=4, debug=False):
super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators,
learning_rate=learning_rate,
min_samples_split=min_samples_split,
min_impurity=min_var_red,
max_depth=max_depth,
regression=True)
class GradientBoostingClassifier(GradientBoosting):
def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2,
min_info_gain=1e-7, max_depth=2, debug=False):
super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators,
learning_rate=learning_rate,
min_samples_split=min_samples_split,
min_impurity=min_info_gain,
max_depth=max_depth,
regression=False)
def fit(self, X, y):
y = to_categorical(y)
super(GradientBoostingClassifier, self).fit(X, y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment