Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#USING SCIPY | |
from scipy.stats import spearmanr | |
from scipy.stats import pearsonr | |
from scipy.stats import kendalltau | |
coef, p = pearsonrr(x, y) #Pearson's r | |
coef, p = spearmanr(x, y) # Spearman's rho | |
coef, p = kendalltau(x, y) # Kendall's tau | |
#USING PANDAS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import VarianceThreshold | |
print(df_train.shape) #output (143, 59) | |
var_filter = VarianceThreshold(threshold = 0.0) | |
train = var_filter.fit_transform(df_train) | |
#to get the count of features that are not constant | |
print(train.shape()) # output (143, 56) | |
#or | |
print(len(df_train.columns[var_filter.get_support()])) #output 56 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import SelectKBest | |
from sklearn.feature_selection import chi2 | |
X_train = X_train.astype(int) | |
chi2_features = SelectKBest(chi2 , k=12) | |
X_kbest_features = chi2_features.fit_transform(X_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(df_train.isnull().sum()/len(df_train)*100).nlargest()) | |
#output => Returns the 5 largest values from the series. No missing values in automobile dataset, so all shows 0%. | |
"""symboling 0.0 | |
doornumber 0.0 | |
wheelbase 0.0 | |
carlength 0.0 | |
carwidth 0.0 | |
dtype: float64""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import mutual_info_regression | |
from sklearn.feature_selection import SelectKBest | |
selector = SelectKBest(mutual_info_regression, k=10) | |
X_train_new = selector.fit_transform(X_train, y_train) #Applying transformation to the training set | |
#to get names of the selected features | |
mask = selector.get_support() # Output array([False, False, True, True, True, False ....]) | |
print(selector.scores_) #Output array([0.16978127, 0.01829886, 0.45461366, 0.55126343, 0.66081217, 0.27715287 ....]) | |
new_features = X_train.columns[mask] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import SelectKBest | |
from sklearn.feature_selection import f_regression | |
fvalue_selector = SelectKBest(f_regression, k=20) #select features with 20 best ANOVA F-Values | |
X_train_new = fvalue_selector.fit_transform(X_train, y_train) | |
print(X_train.shape, X_train_new.shape) #output (143, 59) (143, 20) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mlxtend.feature_selection import SequentialFeatureSelector | |
sfs = SequentialFeatureSelector(LinearRegression(), # cv = k-fold cross validation, floating is another extension of SFS, not used here | |
k_features=10, | |
forward=True, | |
floating=False, | |
scoring='accuracy', | |
cv=2) | |
sfs = sfs.fit(X_train, y_train) | |
selected_features = x_train.columns[list(sfs.k_feature_idx_)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import RFE | |
lm = LinearRegression() | |
rfe1 = RFE(lm, 20) # RFE with 20 features | |
# Fit on train and test data with 20 features | |
X_train_new = rfe1.fit_transform(X_train, y_train) | |
X_test_new = rfe1.transform(X_test) | |
# Print the boolean results | |
print(rfe1.support_) # Output [False False False False True False False False True False False...] |
OlderNewer