Skip to content

Instantly share code, notes, and snippets.

@regonn
Created August 19, 2017 03:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save regonn/3433b32158b48cc481d61add5069b2e5 to your computer and use it in GitHub Desktop.
Save regonn/3433b32158b48cc481d61add5069b2e5 to your computer and use it in GitHub Desktop.
random forest
from sklearn.base import ClassifierMixin
from collections import Counter
import numpy as np
class RandomForest():
def __init__(self, n_trees=10):
self._n_trees = n_trees
self._forest = [None] * self._n_trees
self._using_data = [None] * self._n_trees
def _bootstrap_sample(self, X, y):
n_data = X.shape[1]
n_data_forest = np.floor(np.sqrt(n_data))
bootstrapped_X = list()
bootstrapped_y = list()
for i in range(self._n_trees):
index = np.random.choice(len(y), size=len(y))
col = np.random.choice(n_data, size=n_data_forest, replace=False)
bootstrapped_X.append(X[np.ix_(index, col)])
bootstrapped_y.append(y[index])
self._using_data[i] = col
return bootstrapped_X, bootstrapped_y
def create_forest(self, X, y):
self._targets = np.unique(y)
bootstrap_X, bootstrapped_y = self._bootstrap_sample(X, y)
for i, (i_bootstrapped_X, i_bootstrapped_y) in enumerate(zip(bootstrapped_X, bootstrapped_y)):
tree = DecisionTree()
tree.create_tree(i_bootstrapped_X, i_bootstrapped_y)
self._forest[i] = tree
def predict(self, X):
proba = self._predict_proba(X)
return self._targets[np.argmax(proba, axis=1)]
def _predict_proba(self, X):
if self._forest[0] is None:
raise ValueError('fitしてね')
votes = [tree.predict(X[:, using_data]) for tree, using_data in zip(self._forest, self._using_features)]
counts = [Counter(row) for row in np.array(votes).transpose()]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment