Created
December 15, 2019 09:58
-
-
Save egemenzeytinci/36a960c884d4509037d54ba5449a327f to your computer and use it in GitHub Desktop.
Feature importances in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rfpimp import permutation_importances | |
from sklearn.base import clone | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.metrics import r2_score | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
def imp_df(column_names, importances): | |
data = { | |
'Feature': column_names, | |
'Importance': importances, | |
} | |
df = pd.DataFrame(data) \ | |
.set_index('Feature') \ | |
.sort_values('Importance', ascending=False) | |
return df | |
def r2(rf, X_train, y_train): | |
return r2_score(y_train, rf.predict(X_train)) | |
def drop_col_feat_imp(model, X_train, y_train, random_state=42): | |
model_clone = clone(model) | |
model_clone.random_state = random_state | |
model_clone.fit(X_train, y_train) | |
benchmark_score = model_clone.score(X_train, y_train) | |
importances = [] | |
for col in X_train.columns: | |
model_clone = clone(model) | |
model_clone.random_state = random_state | |
model_clone.fit(X_train.drop(col, axis=1), y_train) | |
drop_col_score = model_clone.score(X_train.drop(col, axis=1), y_train) | |
importances.append(benchmark_score - drop_col_score) | |
return imp_df(X_train.columns, importances) | |
X = dataset.drop(['average_rating', 'title', 'genres'], axis=1) | |
y = dataset['average_rating'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=0) | |
rf = RandomForestRegressor(n_estimators=10).fit(X_train, y_train) | |
print('Random Forest Feature Importances:') | |
print(imp_df(X.columns, rf.feature_importances_)) | |
print() | |
print('Permutation Feature Importance:') | |
print(permutation_importances(rf, X_train, y_train, r2)) | |
print() | |
print('Drop-Col Feature Importance:') | |
print(drop_col_feat_imp(rf, X_train, y_train)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment