Created
August 3, 2018 09:41
-
-
Save escuccim/f358688d9412d4bd423275b11382d263 to your computer and use it in GitHub Desktop.
Code to see what features are not used by any models in sklearn VotingClassifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# plot feature importance for sklearn decision trees | |
def feature_importance(forest, X_train, display_results=True): | |
ranked_list = [] | |
zero_features = [] | |
importances = forest.feature_importances_ | |
indices = np.argsort(importances)[::-1] | |
if display_results: | |
# Print the feature ranking | |
print("Feature ranking:") | |
for f in range(X_train.shape[1]): | |
if display_results: | |
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]) + " - " + X_train.columns[indices[f]]) | |
ranked_list.append(X_train.columns[indices[f]]) | |
if importances[indices[f]] == 0.0: | |
zero_features.append(X_train.columns[indices[f]]) | |
return ranked_list, zero_features | |
# see which features are not used by ANY models | |
useless_features = [] | |
drop_features = set() | |
counter = 0 | |
for est in vc.estimators_: | |
ranked_features, unused_features = feature_importance(est, X_train, display_results=False) | |
useless_features.append(unused_features) | |
if counter == 0: | |
drop_features = set(unused_features) | |
else: | |
drop_features = drop_features.intersection(set(unused_features)) | |
counter += 1 | |
drop_features |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment