Skip to content

Instantly share code, notes, and snippets.

@sazio
Created August 17, 2020 22:25
Show Gist options
  • Save sazio/070eddd88187e7ef8308feec54d1c24a to your computer and use it in GitHub Desktop.
Save sazio/070eddd88187e7ef8308feec54d1c24a to your computer and use it in GitHub Desktop.
# evaluation of a model using 5 features chosen with random forest importance
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# feature selection
def select_features(X_train, y_train, X_test):
# configure to select a subset of features
fs = SelectFromModel(RandomForestClassifier(n_estimators=1000), max_features=5)
# learn relationship from training data
fs.fit(X_train, y_train)
# transform train input data
X_train_fs = fs.transform(X_train)
# transform test input data
X_test_fs = fs.transform(X_test)
return X_train_fs, X_test_fs, fs
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_clf, y_clf, test_size=0.33, random_state=1)
# feature selection
X_train_fs, X_test_fs, fs = select_features(X_train, y_train, X_test)
# fit the model
model = LogisticRegression(solver='liblinear')
model.fit(X_train_fs, y_train)
# evaluate the model
yhat = model.predict(X_test_fs)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.2f' % (accuracy*100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment