Skip to content

Instantly share code, notes, and snippets.

@nmayorov
Created December 1, 2015 23:09
Show Gist options
  • Save nmayorov/eb5c091b6756fab86144 to your computer and use it in GitHub Desktop.
Save nmayorov/eb5c091b6756fab86144 to your computer and use it in GitHub Desktop.
Feature selection on Communities and Crime dataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import (
SelectKBest, MutualInfoSelector, f_regression)
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import cross_val_score
data = pd.read_csv('communities.data', header=None)
data = data.replace('?', np.nan)
X = data.iloc[:, 5:-1].astype(float).values
X = Imputer(strategy='median').fit_transform(X)
y = data.iloc[:, -1]
mrmr = MutualInfoSelector(n_features_to_select=X.shape[1]).fit(X, y)
maxrel = MutualInfoSelector(use_redundancy=False,
n_features_to_select=X.shape[1]).fit(X, y)
ftest = SelectKBest(score_func=f_regression).fit(X, y)
ridge = RidgeCV()
mrmr_scores = []
ftest_scores = []
maxrel_scores = []
k_all = np.arange(X.shape[1]) + 1
for k in k_all:
mrmr.set_params(n_features_to_select=k)
maxrel.set_params(n_features_to_select=k)
ftest.set_params(k=k)
X_mrmr = X[:, mrmr.get_support()]
X_ftest = X[:, ftest.get_support()]
X_maxrel = X[:, maxrel.get_support()]
mrmr_scores.append(np.mean(cross_val_score(ridge, X_mrmr, y, cv=5)))
maxrel_scores.append(np.mean(cross_val_score(ridge, X_maxrel, y, cv=5)))
ftest_scores.append(np.mean(cross_val_score(ridge, X_ftest, y, cv=5)))
plt.figure(figsize=(10, 6))
plt.plot(k_all, maxrel_scores, label='MaxRel')
plt.plot(k_all, mrmr_scores, label='mRMR')
plt.plot(k_all, ftest_scores, label='F-test')
plt.xlabel("Number of kept features")
plt.ylabel("5-fold average R^2 score")
plt.suptitle("Comparison of feature selection methods on "
"Communities and Crime dataset ", fontsize=16)
plt.legend(loc='lower right')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment