Skip to content

Instantly share code, notes, and snippets.

@theSage21
Created July 23, 2016 03:54
Show Gist options
  • Save theSage21/daf833e3d3499dc287b7e144cfd9b111 to your computer and use it in GitHub Desktop.
Save theSage21/daf833e3d3499dc287b7e144cfd9b111 to your computer and use it in GitHub Desktop.
Random Rotation Forest implementation
# Based on
# https://www.packtpub.com/books/content/rotation-forest-classifier-ensemble-based-feature-extraction
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
import numpy as np
def get_data():
"""
Make a sample classification dataset
Returns : Independent variable y, dependent variable x
"""
no_features = 50
redundant_features = int(0.1*no_features)
informative_features = int(0.6*no_features)
repeated_features = int(0.1*no_features)
x,y = make_classification(n_samples=500,n_features=no_features,flip_y=0.03,\
n_informative = informative_features, n_redundant = redundant_features \
,n_repeated = repeated_features,random_state=7)
return x,y
def get_random_subset(iterable,k):
subsets = []
iteration = 0
np.random.shuffle(iterable)
subset = 0
limit = len(iterable)/k
while iteration < limit:
if k <= len(iterable):
subset = k
else:
subset = len(iterable)
subsets.append(iterable[-subset:])
del iterable[-subset:]
iteration+=1
return subsets
def build_rotationtree_model(x_train,y_train,d,k):
models = []
r_matrices = []
feature_subsets = []
for i in range(d):
x,_,_,_ = train_test_split(x_train,y_train,test_size=0.3,random_state=7)
# Features ids
feature_index = list(range(x.shape[1]))
# Get subsets of features
random_k_subset = get_random_subset(feature_index,k)
feature_subsets.append(random_k_subset)
# Rotation matrix
R_matrix = np.zeros((x.shape[1],x.shape[1]),dtype=float)
for each_subset in random_k_subset:
pca = PCA()
x_subset = x[:,each_subset]
pca.fit(x_subset)
for ii in range(0,len(pca.components_)):
for jj in range(0,len(pca.components_)):
R_matrix[each_subset[ii],each_subset[jj]] = pca.components_[ii,jj]
x_transformed = x_train.dot(R_matrix)
model = DecisionTreeClassifier()
model.fit(x_transformed,y_train)
models.append(model)
r_matrices.append(R_matrix)
return models,r_matrices,feature_subsets
def model_worth(models,r_matrices,x,y):
predicted_ys = []
for i,model in enumerate(models):
x_mod = x.dot(r_matrices[i])
predicted_y = model.predict(x_mod)
predicted_ys.append(predicted_y)
predicted_matrix = np.asmatrix(predicted_ys)
final_prediction = []
for i in range(len(y)):
pred_from_all_models = np.ravel(predicted_matrix[:,i])
non_zero_pred = np.nonzero(pred_from_all_models)[0]
is_one = len(non_zero_pred) > len(models)/2
final_prediction.append(is_one)
print(classification_report(y, final_prediction))
if __name__ == "__main__":
x,y = get_data()
# plot_data(x,y)
# Divide the data into Train, dev and test
x_train,x_test_all,y_train,y_test_all = train_test_split(x,y,test_size = 0.3,random_state=9)
x_dev,x_test,y_dev,y_test = train_test_split(x_test_all,y_test_all,test_size=0.3,random_state=9)
# Build a bag of models
models,r_matrices,features = build_rotationtree_model(x_train,y_train,25,5)
model_worth(models,r_matrices,x_train,y_train)
model_worth(models,r_matrices,x_dev,y_dev)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment