Tristan Bester TristanBester

## gradient_boosting.py
def __init_leaf(self, y):
    '''Return the value the model is initialized to predict.'''
    y = y.astype(int)
    class_one_count = np.bincount(y.flatten())[1]
    proba = float(class_one_count)/y.shape[0]
    log_odds = np.log((proba)/(1-proba))

    # Prevent math error - Undefined.
    if log_odds == 0:
        log_odds = 0.01

## gradient_boosting.py
def __predict(self, subtree, val):
  '''Predict the class probabilities of an instance.'''
  if val.ndim == 0:
      val = np.array([val])

  if subtree.decision is None:
      return subtree.prediction
  elif val[int(subtree.decision[1])] > subtree.decision[0]:
      return self.__predict(subtree.left, val)
  else:

## gradient_boosting.py
def __fit(self, subtree, X, y, curr_depth):
    '''
    If the dataset does not already have a mean squared error of zero and the
    regularization parameters have not been satisfied, create and add a node
    to the decision tree that predicts a more accurate target value for the
    instances in the given dataset. Then recursively call the __fit method to
    create the child nodes.
    '''
    if (
            curr_depth > self.max_depth or

## gradient_boosting.py
 def CART(self, X, y):
    '''The CART algorithm for building decision trees.'''
    splits, cols = self.get_split_points(X)
    splits = np.c_[splits,cols]

    best = np.inf

    for split_pt in splits:
        lower, upper, y_lower, y_upper = self.split(split_pt[0], int(split_pt[1]), X, y)
        mse_lower = self.MSE(y_lower[:, 0])

## gradient_boosting.py
def MSE(self, y):
    '''
    Calculate the mean squared error from predicting the mean target value
    of the instances at the node.
    '''
    y_hat = np.mean(y)
    mse = ((y_hat - y)**2).sum()
    return mse

## gradient_boosting.py
def split(self, split_val, col, X,y):
    '''Split the given dataset based on the given feature index and threshold value.'''
    lower = []
    upper = []
    y_lower = []
    y_upper = []

    for i in range(X.shape[0]):
        if X[i, col] < split_val:
            lower.append(X[i])

## gradient_boosting.py
def get_split_points(self, X):
  '''Calculate the splitting points in the data.'''
  cols = []
  split_pts = []

  for i,x in enumerate(np.sort(X.T)):
      x = np.unique(x)
      for j in range(x.shape[0]-1):
          split = (x[j] + x[j+1])/2.0
          split_pts.append(split)

## adaboost.py
def predict(self, X):
    '''Predict the class of an instance.'''
    try:
        # As AdaBoost is a binary classifier.
        preds = np.array([0,0])
        for estimator, say in zip(self.estimators, self.influence):
            pred = estimator.predict(X)
            if pred:
                preds[0] += say
            else:

## adaboost.py
def fit(self, X, y):
    '''Fit model to the training set.'''
    # Append sample weights as last column.
    X = np.c_[X, np.full((X.shape[0], 1), (1/float(X.shape[0])))]

    self.estimators = []
    self.influence = []
    params = self.__get_estimator_params()

    for i in range(self.n_estimators):

## adaboost.py
def __mod_datasets(self,X,y):
    '''Resample the dataset to place more emphasis on misclassified instances.'''
    temp_X = np.zeros(X.shape)
    temp_y = np.zeros(y.shape)

    X = self.__order_weights(X)
    for i in range(X.shape[0]):
        val = np.random.rand()
        idx = 0
        while val > X[idx, -1]:
	def __init_leaf(self, y):
	'''Return the value the model is initialized to predict.'''
	y = y.astype(int)
	class_one_count = np.bincount(y.flatten())[1]
	proba = float(class_one_count)/y.shape[0]
	log_odds = np.log((proba)/(1-proba))

	# Prevent math error - Undefined.
	if log_odds == 0:
	log_odds = 0.01
	def __predict(self, subtree, val):
	'''Predict the class probabilities of an instance.'''
	if val.ndim == 0:
	val = np.array([val])

	if subtree.decision is None:
	return subtree.prediction
	elif val[int(subtree.decision[1])] > subtree.decision[0]:
	return self.__predict(subtree.left, val)
	else:
	def __fit(self, subtree, X, y, curr_depth):
	'''
	If the dataset does not already have a mean squared error of zero and the
	regularization parameters have not been satisfied, create and add a node
	to the decision tree that predicts a more accurate target value for the
	instances in the given dataset. Then recursively call the __fit method to
	create the child nodes.
	'''
	if (
	curr_depth > self.max_depth or
	def CART(self, X, y):
	'''The CART algorithm for building decision trees.'''
	splits, cols = self.get_split_points(X)
	splits = np.c_[splits,cols]

	best = np.inf

	for split_pt in splits:
	lower, upper, y_lower, y_upper = self.split(split_pt[0], int(split_pt[1]), X, y)
	mse_lower = self.MSE(y_lower[:, 0])
	def MSE(self, y):
	'''
	Calculate the mean squared error from predicting the mean target value
	of the instances at the node.
	'''
	y_hat = np.mean(y)
	mse = ((y_hat - y)**2).sum()
	return mse
	def split(self, split_val, col, X,y):
	'''Split the given dataset based on the given feature index and threshold value.'''
	lower = []
	upper = []
	y_lower = []
	y_upper = []

	for i in range(X.shape[0]):
	if X[i, col] < split_val:
	lower.append(X[i])
	def get_split_points(self, X):
	'''Calculate the splitting points in the data.'''
	cols = []
	split_pts = []

	for i,x in enumerate(np.sort(X.T)):
	x = np.unique(x)
	for j in range(x.shape[0]-1):
	split = (x[j] + x[j+1])/2.0
	split_pts.append(split)
	def predict(self, X):
	'''Predict the class of an instance.'''
	try:
	# As AdaBoost is a binary classifier.
	preds = np.array([0,0])
	for estimator, say in zip(self.estimators, self.influence):
	pred = estimator.predict(X)
	if pred:
	preds[0] += say
	else:
	def fit(self, X, y):
	'''Fit model to the training set.'''
	# Append sample weights as last column.
	X = np.c_[X, np.full((X.shape[0], 1), (1/float(X.shape[0])))]

	self.estimators = []
	self.influence = []
	params = self.__get_estimator_params()

	for i in range(self.n_estimators):
	def __mod_datasets(self,X,y):
	'''Resample the dataset to place more emphasis on misclassified instances.'''
	temp_X = np.zeros(X.shape)
	temp_y = np.zeros(y.shape)

	X = self.__order_weights(X)
	for i in range(X.shape[0]):
	val = np.random.rand()
	idx = 0
	while val > X[idx, -1]: