Created
June 12, 2021 13:36
-
-
Save fuwiak/89fc885a0a78ac348322b962b6fefac6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, print_function | |
import numpy as np | |
import progressbar | |
from mlfromscratch.utils import train_test_split, standardize, to_categorical | |
from mlfromscratch.utils import mean_squared_error, accuracy_score | |
from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy | |
from mlfromscratch.supervised_learning.decision_tree import RegressionTree | |
from mlfromscratch.utils.misc import bar_widgets | |
class GradientBoosting(object): | |
def __init__(self, n_estimators, learning_rate, min_samples_split, | |
min_impurity, max_depth, regression): | |
self.n_estimators = n_estimators | |
self.learning_rate = learning_rate | |
self.min_samples_split = min_samples_split | |
self.min_impurity = min_impurity | |
self.max_depth = max_depth | |
self.regression = regression | |
self.bar = progressbar.ProgressBar(widgets=bar_widgets) | |
# Square loss for regression | |
# Log loss for classification | |
self.loss = SquareLoss() | |
if not self.regression: | |
self.loss = CrossEntropy() | |
# Initialize regression trees | |
self.trees = [] | |
for _ in range(n_estimators): | |
tree = RegressionTree( | |
min_samples_split=self.min_samples_split, | |
min_impurity=min_impurity, | |
max_depth=self.max_depth) | |
self.trees.append(tree) | |
def fit(self, X, y): | |
y_pred = np.full(np.shape(y), np.mean(y, axis=0)) | |
for i in self.bar(range(self.n_estimators)): | |
gradient = self.loss.gradient(y, y_pred) | |
self.trees[i].fit(X, gradient) | |
update = self.trees[i].predict(X) | |
# Update y prediction | |
y_pred -= np.multiply(self.learning_rate, update) | |
def predict(self, X): | |
y_pred = np.array([]) | |
# Make predictions | |
for tree in self.trees: | |
update = tree.predict(X) | |
update = np.multiply(self.learning_rate, update) | |
y_pred = -update if not y_pred.any() else y_pred - update | |
if not self.regression: | |
# Turn into probability distribution | |
y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1) | |
# Set label to the value that maximizes probability | |
y_pred = np.argmax(y_pred, axis=1) | |
return y_pred | |
class GradientBoostingRegressor(GradientBoosting): | |
def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2, | |
min_var_red=1e-7, max_depth=4, debug=False): | |
super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators, | |
learning_rate=learning_rate, | |
min_samples_split=min_samples_split, | |
min_impurity=min_var_red, | |
max_depth=max_depth, | |
regression=True) | |
class GradientBoostingClassifier(GradientBoosting): | |
def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2, | |
min_info_gain=1e-7, max_depth=2, debug=False): | |
super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators, | |
learning_rate=learning_rate, | |
min_samples_split=min_samples_split, | |
min_impurity=min_info_gain, | |
max_depth=max_depth, | |
regression=False) | |
def fit(self, X, y): | |
y = to_categorical(y) | |
super(GradientBoostingClassifier, self).fit(X, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment