Skip to content

Instantly share code, notes, and snippets.

@3catz
Last active October 28, 2020 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 3catz/a81f1659ad82fa20331289b739216596 to your computer and use it in GitHub Desktop.
Save 3catz/a81f1659ad82fa20331289b739216596 to your computer and use it in GitHub Desktop.
select_from_model_repeated
from sklearn.feature_selection import *
feat_list = []
all_scores = []
for i in range(10):
np.random.seed(i)
sfm = SelectFromModel(estimator = clf, threshold=None, prefit=False,
norm_order=1, max_features = 12)
sfm.fit(D[allfeats], y)
modfeats = sfm.get_support()
#print(modfeats)
Xred = D[allfeats].iloc[:,modfeats]
cols = list(Xred.columns)
feat_list.append(cols)
rskf = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 100)
this_score = np.mean(cross_val_score(clf, Xred, y, cv = rskf, scoring = "roc_auc"))
print(this_score)
all_scores.append(this_score)
for i in range(10):
if all_scores[i] == np.max(all_scores):
print("These are the Best Features", feat_list[i])
print("They scored: ", all_scores[i])
#print("Overall Average", np.mean(all_scores))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment