Tristan Bester TristanBester

## k_nearest_neighbours.py
 def __euclidean_distance(self, x1, x2):
    '''Calculate the Euclidean distance between the given vectors.'''
    diff = np.sum((x1-x2)**2)
    diff = np.sqrt(diff)
    return diff

## principal_component_analysis.py
def fit_transform(self, X):
      '''Calculate the principal components of the data and transform the dataset.'''
      X_centered = self.center(X)

      # Use Singular Value Decomposition to calculate the principal components
      # of the data.
      U, sigma, V = np.linalg.svd(X_centered)
      self.components = V

      # Extract the selected number of principal components.

## principal_component_analysis.py
def center(self, X):
    '''Center the data around the origin.'''
    mean  = X.mean(axis=0)
    return X - mean

## ensemble.py
def predict(self, X):
    '''Predict the target value of the given instance.'''
    base_set = self.estimators[0].predict(X)
    for estimator in self.estimators[1:]:
        base_set = np.c_[base_set, estimator.predict(X)]
    return self.final_estimator.predict(base_set[0])

## ensemble.py
def fit(self, X, y):
      '''Fit the model to the training set.'''
      base_set, hold_out, base_y, hold_y = train_test_split(X, y, test_size=0.3, random_state=42)

      # Train the first layer estimators.
      for estimator in self.estimators:
          estimator.fit(base_set, base_y)

      # Create training set for the blender.
      train_set = [self.estimators[0].predict(x) for x in hold_out]

## gradient_boosting.py
def predict(self, X):
    '''Predict the target value of the given instance.'''
    pred = self.estimators[0]
    pred += sum([self.learning_rate * estimator.predict(X) for estimator in self.estimators[1:]])
    return pred

## gradient_boosting.py
def fit(self, X, y):
    '''Fit the model to the given training set.'''
    # Calculate the value the model is initialized to predict.
    leaf = y.mean()
    self.estimators = [leaf]

    for i in range(self.n_estimators-1):
        residuals = self.__calculate_residuals(X,y)
        tree = DecisionTreeRegressor(**self.param_dict)
        tree.fit(X,residuals)

## gradient_boosting.py
def __calculate_residuals(self,X,y):
    '''Calculate the pseudo-residuals.'''
    residuals = np.zeros(y.shape)
    for i,x in enumerate(X):
        pred = self.predict(x)
        residuals[i] = y[i] -  pred
    return residuals

## gradient_boosting.py
def fit(self, X, y):
    '''Fit the model to the given training set.'''
    leaf = self.__init_leaf(y)
    self.estimators = [leaf]
    residuals = y - leaf

    # Create a matrix containing the residuals and the associated probabilities
    # to be used to train a decision tree.
    y2 = np.c_[residuals, [leaf] * len(residuals)]

## gradient_boosting.py
def predict_proba(self, X):
    '''Predict the class probabilities of the given instance.'''
    log_odds = self.estimators[0]
    for i in self.estimators[1:]:
        log_odds += self.learning_rate * i.predict(X)
    probability = (np.exp(log_odds))/(1 + np.exp(log_odds))
    return probability


def predict(self, X):
	def __euclidean_distance(self, x1, x2):
	'''Calculate the Euclidean distance between the given vectors.'''
	diff = np.sum((x1-x2)**2)
	diff = np.sqrt(diff)
	return diff
	def fit_transform(self, X):
	'''Calculate the principal components of the data and transform the dataset.'''
	X_centered = self.center(X)

	# Use Singular Value Decomposition to calculate the principal components
	# of the data.
	U, sigma, V = np.linalg.svd(X_centered)
	self.components = V

	# Extract the selected number of principal components.
	def center(self, X):
	'''Center the data around the origin.'''
	mean = X.mean(axis=0)
	return X - mean
	def predict(self, X):
	'''Predict the target value of the given instance.'''
	base_set = self.estimators[0].predict(X)
	for estimator in self.estimators[1:]:
	base_set = np.c_[base_set, estimator.predict(X)]
	return self.final_estimator.predict(base_set[0])
	def fit(self, X, y):
	'''Fit the model to the training set.'''
	base_set, hold_out, base_y, hold_y = train_test_split(X, y, test_size=0.3, random_state=42)

	# Train the first layer estimators.
	for estimator in self.estimators:
	estimator.fit(base_set, base_y)

	# Create training set for the blender.
	train_set = [self.estimators[0].predict(x) for x in hold_out]
	def predict(self, X):
	'''Predict the target value of the given instance.'''
	pred = self.estimators[0]
	pred += sum([self.learning_rate * estimator.predict(X) for estimator in self.estimators[1:]])
	return pred
	def fit(self, X, y):
	'''Fit the model to the given training set.'''
	# Calculate the value the model is initialized to predict.
	leaf = y.mean()
	self.estimators = [leaf]

	for i in range(self.n_estimators-1):
	residuals = self.__calculate_residuals(X,y)
	tree = DecisionTreeRegressor(**self.param_dict)
	tree.fit(X,residuals)
	def __calculate_residuals(self,X,y):
	'''Calculate the pseudo-residuals.'''
	residuals = np.zeros(y.shape)
	for i,x in enumerate(X):
	pred = self.predict(x)
	residuals[i] = y[i] - pred
	return residuals
	def fit(self, X, y):
	'''Fit the model to the given training set.'''
	leaf = self.__init_leaf(y)
	self.estimators = [leaf]
	residuals = y - leaf

	# Create a matrix containing the residuals and the associated probabilities
	# to be used to train a decision tree.
	y2 = np.c_[residuals, [leaf] * len(residuals)]
	def predict_proba(self, X):
	'''Predict the class probabilities of the given instance.'''
	log_odds = self.estimators[0]
	for i in self.estimators[1:]:
	log_odds += self.learning_rate * i.predict(X)
	probability = (np.exp(log_odds))/(1 + np.exp(log_odds))
	return probability


	def predict(self, X):