Skip to content

Instantly share code, notes, and snippets.

@egemenzeytinci
Created December 15, 2019 09:58
Show Gist options
  • Save egemenzeytinci/36a960c884d4509037d54ba5449a327f to your computer and use it in GitHub Desktop.
Save egemenzeytinci/36a960c884d4509037d54ba5449a327f to your computer and use it in GitHub Desktop.
Feature importances in python
from rfpimp import permutation_importances
from sklearn.base import clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pandas as pd
def imp_df(column_names, importances):
data = {
'Feature': column_names,
'Importance': importances,
}
df = pd.DataFrame(data) \
.set_index('Feature') \
.sort_values('Importance', ascending=False)
return df
def r2(rf, X_train, y_train):
return r2_score(y_train, rf.predict(X_train))
def drop_col_feat_imp(model, X_train, y_train, random_state=42):
model_clone = clone(model)
model_clone.random_state = random_state
model_clone.fit(X_train, y_train)
benchmark_score = model_clone.score(X_train, y_train)
importances = []
for col in X_train.columns:
model_clone = clone(model)
model_clone.random_state = random_state
model_clone.fit(X_train.drop(col, axis=1), y_train)
drop_col_score = model_clone.score(X_train.drop(col, axis=1), y_train)
importances.append(benchmark_score - drop_col_score)
return imp_df(X_train.columns, importances)
X = dataset.drop(['average_rating', 'title', 'genres'], axis=1)
y = dataset['average_rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=0)
rf = RandomForestRegressor(n_estimators=10).fit(X_train, y_train)
print('Random Forest Feature Importances:')
print(imp_df(X.columns, rf.feature_importances_))
print()
print('Permutation Feature Importance:')
print(permutation_importances(rf, X_train, y_train, r2))
print()
print('Drop-Col Feature Importance:')
print(drop_col_feat_imp(rf, X_train, y_train))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment