Skip to content

Instantly share code, notes, and snippets.

@olinguyen
Created July 26, 2017 19:34
Show Gist options
  • Save olinguyen/1d7f33487ebfb4cdd33d3432f58a14b0 to your computer and use it in GitHub Desktop.
Save olinguyen/1d7f33487ebfb4cdd33d3432f58a14b0 to your computer and use it in GitHub Desktop.
RandomForest doesn't give the same outputs when comparing shogun & sklearn
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.ensemble import RandomForestClassifier
from modshogun import *
#np.random.seed(42)
X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([0, 0, 1, 1]).astype(np.float)
features = RealFeatures(X.T)
labels = MulticlassLabels(y)
rand_forest = RandomForest(features, labels, num_rand_bags = 10)
rand_forest.set_combination_rule(MajorityVote())
ft = np.ones(2).astype('bool')
rand_forest.set_feature_types(ft)
rand_forest.train()
y_pred = rand_forest.apply_binary(features)
print("Shogun results")
print("predicted labels:", y_pred.get_labels())
print("probabilities:", y_pred.get_values())
# sklearn
rf = RandomForestClassifier(n_estimators=10).fit(X, y)
y_pred = rf.predict(X)
y_prob = rf.predict_proba(X)
print("sklearn results")
print("predicted labels:", y_pred)
print("probabilities:",y_prob)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment