Skip to content

Instantly share code, notes, and snippets.

@pritul2
Created March 1, 2022 13:40
Show Gist options
  • Save pritul2/f5df4a543b329b387b0a10cda59d1e26 to your computer and use it in GitHub Desktop.
Save pritul2/f5df4a543b329b387b0a10cda59d1e26 to your computer and use it in GitHub Desktop.
# compare different numbers of features selected using anova f-test
from numpy import mean
from numpy import std
from pandas import read_csv
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from matplotlib import pyplot
# load the dataset
def load_dataset(filename):
# load the dataset as a pandas DataFrame
data = read_csv(filename, header=None)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:,-1]
return X, y
# evaluate a give model using cross-validation
def evaluate_model(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
return scores
# define dataset
X, y = load_dataset('pima-indians-diabetes.csv')
# define number of features to evaluate
num_features = [i+1 for i in range(X.shape[1])]
# enumerate each number of features
results = list()
for k in num_features:
# create pipeline
model = LogisticRegression(solver='liblinear')
fs = SelectKBest(score_func=f_classif, k=k)
pipeline = Pipeline(steps=[('anova',fs), ('lr', model)])
# evaluate the model
scores = evaluate_model(pipeline, X, y)
results.append(scores)
# summarize the results
print('>%d %.3f (%.3f)' % (k, mean(scores), std(scores)))
# plot model performance for comparison
pyplot.boxplot(results, labels=num_features, showmeans=True)
pyplot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment