fuwiak/gradient.py

## gradient.py
from __future__ import division, print_function
import numpy as np
import progressbar

from mlfromscratch.utils import train_test_split, standardize, to_categorical
from mlfromscratch.utils import mean_squared_error, accuracy_score
from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy
from mlfromscratch.supervised_learning.decision_tree import RegressionTree
from mlfromscratch.utils.misc import bar_widgets


class GradientBoosting(object):

    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(
                    min_samples_split=self.min_samples_split,
                    min_impurity=min_impurity,
                    max_depth=self.max_depth)
            self.trees.append(tree)


    def fit(self, X, y):
        y_pred = np.full(np.shape(y), np.mean(y, axis=0))
        for i in self.bar(range(self.n_estimators)):
            gradient = self.loss.gradient(y, y_pred)
            self.trees[i].fit(X, gradient)
            update = self.trees[i].predict(X)
            # Update y prediction
            y_pred -= np.multiply(self.learning_rate, update)


    def predict(self, X):
        y_pred = np.array([])
        # Make predictions
        for tree in self.trees:
            update = tree.predict(X)
            update = np.multiply(self.learning_rate, update)
            y_pred = -update if not y_pred.any() else y_pred - update

        if not self.regression:
            # Turn into probability distribution
            y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
            # Set label to the value that maximizes probability
            y_pred = np.argmax(y_pred, axis=1)
        return y_pred


class GradientBoostingRegressor(GradientBoosting):
    def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2,
                 min_var_red=1e-7, max_depth=4, debug=False):
        super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators,
            learning_rate=learning_rate,
            min_samples_split=min_samples_split,
            min_impurity=min_var_red,
            max_depth=max_depth,
            regression=True)

class GradientBoostingClassifier(GradientBoosting):
    def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2,
                 min_info_gain=1e-7, max_depth=2, debug=False):
        super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators,
            learning_rate=learning_rate,
            min_samples_split=min_samples_split,
            min_impurity=min_info_gain,
            max_depth=max_depth,
            regression=False)

    def fit(self, X, y):
        y = to_categorical(y)
        super(GradientBoostingClassifier, self).fit(X, y)
	from __future__ import division, print_function
	import numpy as np
	import progressbar

	from mlfromscratch.utils import train_test_split, standardize, to_categorical
	from mlfromscratch.utils import mean_squared_error, accuracy_score
	from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy
	from mlfromscratch.supervised_learning.decision_tree import RegressionTree
	from mlfromscratch.utils.misc import bar_widgets


	class GradientBoosting(object):

	def __init__(self, n_estimators, learning_rate, min_samples_split,
	min_impurity, max_depth, regression):
	self.n_estimators = n_estimators
	self.learning_rate = learning_rate
	self.min_samples_split = min_samples_split
	self.min_impurity = min_impurity
	self.max_depth = max_depth
	self.regression = regression
	self.bar = progressbar.ProgressBar(widgets=bar_widgets)

	# Square loss for regression
	# Log loss for classification
	self.loss = SquareLoss()
	if not self.regression:
	self.loss = CrossEntropy()

	# Initialize regression trees
	self.trees = []
	for _ in range(n_estimators):
	tree = RegressionTree(
	min_samples_split=self.min_samples_split,
	min_impurity=min_impurity,
	max_depth=self.max_depth)
	self.trees.append(tree)


	def fit(self, X, y):
	y_pred = np.full(np.shape(y), np.mean(y, axis=0))
	for i in self.bar(range(self.n_estimators)):
	gradient = self.loss.gradient(y, y_pred)
	self.trees[i].fit(X, gradient)
	update = self.trees[i].predict(X)
	# Update y prediction
	y_pred -= np.multiply(self.learning_rate, update)


	def predict(self, X):
	y_pred = np.array([])
	# Make predictions
	for tree in self.trees:
	update = tree.predict(X)
	update = np.multiply(self.learning_rate, update)
	y_pred = -update if not y_pred.any() else y_pred - update

	if not self.regression:
	# Turn into probability distribution
	y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
	# Set label to the value that maximizes probability
	y_pred = np.argmax(y_pred, axis=1)
	return y_pred


	class GradientBoostingRegressor(GradientBoosting):
	def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2,
	min_var_red=1e-7, max_depth=4, debug=False):
	super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators,
	learning_rate=learning_rate,
	min_samples_split=min_samples_split,
	min_impurity=min_var_red,
	max_depth=max_depth,
	regression=True)

	class GradientBoostingClassifier(GradientBoosting):
	def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2,
	min_info_gain=1e-7, max_depth=2, debug=False):
	super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators,
	learning_rate=learning_rate,
	min_samples_split=min_samples_split,
	min_impurity=min_info_gain,
	max_depth=max_depth,
	regression=False)

	def fit(self, X, y):
	y = to_categorical(y)
	super(GradientBoostingClassifier, self).fit(X, y)