Skip to content

Instantly share code, notes, and snippets.

@escuccim
Created August 3, 2018 09:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save escuccim/f358688d9412d4bd423275b11382d263 to your computer and use it in GitHub Desktop.
Save escuccim/f358688d9412d4bd423275b11382d263 to your computer and use it in GitHub Desktop.
Code to see what features are not used by any models in sklearn VotingClassifier
# plot feature importance for sklearn decision trees
def feature_importance(forest, X_train, display_results=True):
ranked_list = []
zero_features = []
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
if display_results:
# Print the feature ranking
print("Feature ranking:")
for f in range(X_train.shape[1]):
if display_results:
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]) + " - " + X_train.columns[indices[f]])
ranked_list.append(X_train.columns[indices[f]])
if importances[indices[f]] == 0.0:
zero_features.append(X_train.columns[indices[f]])
return ranked_list, zero_features
# see which features are not used by ANY models
useless_features = []
drop_features = set()
counter = 0
for est in vc.estimators_:
ranked_features, unused_features = feature_importance(est, X_train, display_results=False)
useless_features.append(unused_features)
if counter == 0:
drop_features = set(unused_features)
else:
drop_features = drop_features.intersection(set(unused_features))
counter += 1
drop_features
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment