Skip to content

Instantly share code, notes, and snippets.

@Ayushijain09
Last active July 17, 2020 12:55
Show Gist options
  • Save Ayushijain09/79ef8c3ed897e5a86fa9322b5c05ae08 to your computer and use it in GitHub Desktop.
Save Ayushijain09/79ef8c3ed897e5a86fa9322b5c05ae08 to your computer and use it in GitHub Desktop.
Feature Selection Techniques
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
fvalue_selector = SelectKBest(f_regression, k=20) #select features with 20 best ANOVA F-Values
X_train_new = fvalue_selector.fit_transform(X_train, y_train)
print(X_train.shape, X_train_new.shape) #output (143, 59) (143, 20)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
X_train = X_train.astype(int)
chi2_features = SelectKBest(chi2 , k=12)
X_kbest_features = chi2_features.fit_transform(X_train, y_train)
1. Mutual Information
2. Chi Square
3. ANOVA
4. Pearson , Spearman, Kendall Correlation
5. Tree Model
6. Sequential Feature Selection
7. Variance Threshold
8. Recursive Feature Elimination
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
lasso= Lasso()
parameters = {'alpha':[1e-15, 1e-10, 1e-8, 1e-4,1e-3,1e-2,1,3,5]}
lasso_model = GridSearchCV(lasso, parameters, scoring = 'r2',cv=5)
lasso_model.fit(X_train,y_train)
pred = lasso_model.predict(X_test)
print(lasso_model.best_params_) #output {'alpha': 0.001}
print(lasso_model.best_score_) #output 0.8630550401365724
print(df_train.isnull().sum()/len(df_train)*100).nlargest())
#output => Returns the 5 largest values from the series. No missing values in automobile dataset, so all shows 0%.
"""symboling 0.0
doornumber 0.0
wheelbase 0.0
carlength 0.0
carwidth 0.0
dtype: float64"""
#USING SCIPY
from scipy.stats import spearmanr
from scipy.stats import pearsonr
from scipy.stats import kendalltau
coef, p = pearsonrr(x, y) #Pearson's r
coef, p = spearmanr(x, y) # Spearman's rho
coef, p = kendalltau(x, y) # Kendall's tau
#USING PANDAS
x.corr(y) #Pearson's r
x.corr(y, method='spearman') # Spearman's rho
x.corr(y, method='kendall') # Kendall's tau
from sklearn.feature_selection import RFE
lm = LinearRegression()
rfe1 = RFE(lm, 20) # RFE with 20 features
# Fit on train and test data with 20 features
X_train_new = rfe1.fit_transform(X_train, y_train)
X_test_new = rfe1.transform(X_test)
# Print the boolean results
print(rfe1.support_) # Output [False False False False True False False False True False False...]
print(rfe1.ranking_) # Output [36 34 23 26 1 21 12 27 1 13 28 1 18 19 32 25 1 11 9 7 8 10 30 35...]
lm.fit(X_train_new, y_train)
predictions_rfe = lm.predict(X_test_new)
RMSE = np.sqrt(mean_squared_error(y_test, predictions_rfe))
R2 = r2_score(y_test, predictions)
print('R2:',R2,'RMSE:',RMSE) #Output R2: 0.88 RMSE: 0.33
from mlxtend.feature_selection import SequentialFeatureSelector
sfs = SequentialFeatureSelector(LinearRegression(), # cv = k-fold cross validation, floating is another extension of SFS, not used here
k_features=10,
forward=True,
floating=False,
scoring='accuracy',
cv=2)
sfs = sfs.fit(X_train, y_train)
selected_features = x_train.columns[list(sfs.k_feature_idx_)]
print(selected_features)
# print the selected features.
selected_features = x_train.columns[list(sfs.k_feature_idx_)]
print(selected_features)
# final prediction score.
print(sfs.k_score_)
# transform to the newly selected features.
x_train_new = sfs.transform(X_train)
from sklearn.feature_selection import VarianceThreshold
print(df_train.shape) #output (143, 59)
var_filter = VarianceThreshold(threshold = 0.0)
train = var_filter.fit_transform(df_train)
#to get the count of features that are not constant
print(train.shape()) # output (143, 56)
#or
print(len(df_train.columns[var_filter.get_support()])) #output 56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment