Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Code to see what features are not used by any models in sklearn VotingClassifier
# plot feature importance for sklearn decision trees
def feature_importance(forest, X_train, display_results=True):
ranked_list = []
zero_features = []
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
if display_results:
# Print the feature ranking
print("Feature ranking:")
for f in range(X_train.shape[1]):
if display_results:
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]) + " - " + X_train.columns[indices[f]])
ranked_list.append(X_train.columns[indices[f]])
if importances[indices[f]] == 0.0:
zero_features.append(X_train.columns[indices[f]])
return ranked_list, zero_features
# see which features are not used by ANY models
useless_features = []
drop_features = set()
counter = 0
for est in vc.estimators_:
ranked_features, unused_features = feature_importance(est, X_train, display_results=False)
useless_features.append(unused_features)
if counter == 0:
drop_features = set(unused_features)
else:
drop_features = drop_features.intersection(set(unused_features))
counter += 1
drop_features
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.