This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def __init_leaf(self, y): | |
| '''Return the value the model is initialized to predict.''' | |
| y = y.astype(int) | |
| class_one_count = np.bincount(y.flatten())[1] | |
| proba = float(class_one_count)/y.shape[0] | |
| log_odds = np.log((proba)/(1-proba)) | |
| # Prevent math error - Undefined. | |
| if log_odds == 0: | |
| log_odds = 0.01 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def __predict(self, subtree, val): | |
| '''Predict the class probabilities of an instance.''' | |
| if val.ndim == 0: | |
| val = np.array([val]) | |
| if subtree.decision is None: | |
| return subtree.prediction | |
| elif val[int(subtree.decision[1])] > subtree.decision[0]: | |
| return self.__predict(subtree.left, val) | |
| else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def __fit(self, subtree, X, y, curr_depth): | |
| ''' | |
| If the dataset does not already have a mean squared error of zero and the | |
| regularization parameters have not been satisfied, create and add a node | |
| to the decision tree that predicts a more accurate target value for the | |
| instances in the given dataset. Then recursively call the __fit method to | |
| create the child nodes. | |
| ''' | |
| if ( | |
| curr_depth > self.max_depth or |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def CART(self, X, y): | |
| '''The CART algorithm for building decision trees.''' | |
| splits, cols = self.get_split_points(X) | |
| splits = np.c_[splits,cols] | |
| best = np.inf | |
| for split_pt in splits: | |
| lower, upper, y_lower, y_upper = self.split(split_pt[0], int(split_pt[1]), X, y) | |
| mse_lower = self.MSE(y_lower[:, 0]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def MSE(self, y): | |
| ''' | |
| Calculate the mean squared error from predicting the mean target value | |
| of the instances at the node. | |
| ''' | |
| y_hat = np.mean(y) | |
| mse = ((y_hat - y)**2).sum() | |
| return mse |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def split(self, split_val, col, X,y): | |
| '''Split the given dataset based on the given feature index and threshold value.''' | |
| lower = [] | |
| upper = [] | |
| y_lower = [] | |
| y_upper = [] | |
| for i in range(X.shape[0]): | |
| if X[i, col] < split_val: | |
| lower.append(X[i]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_split_points(self, X): | |
| '''Calculate the splitting points in the data.''' | |
| cols = [] | |
| split_pts = [] | |
| for i,x in enumerate(np.sort(X.T)): | |
| x = np.unique(x) | |
| for j in range(x.shape[0]-1): | |
| split = (x[j] + x[j+1])/2.0 | |
| split_pts.append(split) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def predict(self, X): | |
| '''Predict the class of an instance.''' | |
| try: | |
| # As AdaBoost is a binary classifier. | |
| preds = np.array([0,0]) | |
| for estimator, say in zip(self.estimators, self.influence): | |
| pred = estimator.predict(X) | |
| if pred: | |
| preds[0] += say | |
| else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def fit(self, X, y): | |
| '''Fit model to the training set.''' | |
| # Append sample weights as last column. | |
| X = np.c_[X, np.full((X.shape[0], 1), (1/float(X.shape[0])))] | |
| self.estimators = [] | |
| self.influence = [] | |
| params = self.__get_estimator_params() | |
| for i in range(self.n_estimators): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def __mod_datasets(self,X,y): | |
| '''Resample the dataset to place more emphasis on misclassified instances.''' | |
| temp_X = np.zeros(X.shape) | |
| temp_y = np.zeros(y.shape) | |
| X = self.__order_weights(X) | |
| for i in range(X.shape[0]): | |
| val = np.random.rand() | |
| idx = 0 | |
| while val > X[idx, -1]: |