Skip to content

Instantly share code, notes, and snippets.

@vappiah
Last active March 4, 2022 19:24
Show Gist options
  • Save vappiah/8e2190611f600854489b6c7539e7b8d0 to your computer and use it in GitHub Desktop.
Save vappiah/8e2190611f600854489b6c7539e7b8d0 to your computer and use it in GitHub Desktop.
Random Forest unexpected behaviour
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_auc_score
# Import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Binarize the output
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
# Add noisy features to make the problem harder
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
# Learn to predict each class against the other
classifier = OneVsRestClassifier(
RandomForestClassifier()
)
y_score = classifier.fit(X_train, y_train).predict(X_test)
# here is where the issue is
classes=np.unique(y_score.argmax(axis=1))
print(classes)
#i was expecting an output 0 1 2 but i got 0 and 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment