Skip to content

Instantly share code, notes, and snippets.

@dhesse
Created July 27, 2015 11:31
Show Gist options
  • Save dhesse/6fc0d54304c248f05daf to your computer and use it in GitHub Desktop.
Save dhesse/6fc0d54304c248f05daf to your computer and use it in GitHub Desktop.
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
def goodFFS(X, y, nFeatures):
"""Proper forward feature selection.
Arguments:
X -- matrix containing feature vectors
y -- label data
nFeatures -- maximum number of features
Returns:
selectedFeatures -- list of selected features
scores -- scores[n] is the minimum f1 score that was
obtained adding the n-th feature
"""
selectedFeatures, scores = [], []
while len(selectedFeatures) < nFeatures:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
bestScore, bestK = 0, None
for k in range(Xtrain.shape[1]):
if k in selectedFeatures:
continue
score = f1_score(LogisticRegression()
.fit(Xtrain[:,selectedFeatures + [k]], ytrain)
.predict(Xtest[:,selectedFeatures + [k]]),
ytest)
if score > bestScore:
bestScore, bestK = score, k
if bestK != None:
selectedFeatures.append(bestK)
scores.append(bestScore)
else:
break
return selectedFeatures, scores
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment