Skip to content

Instantly share code, notes, and snippets.

View Ayushijain09's full-sized avatar
🎯
Focusing

Ayushi Jain Ayushijain09

🎯
Focusing
View GitHub Profile
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#USING SCIPY
from scipy.stats import spearmanr
from scipy.stats import pearsonr
from scipy.stats import kendalltau
coef, p = pearsonrr(x, y) #Pearson's r
coef, p = spearmanr(x, y) # Spearman's rho
coef, p = kendalltau(x, y) # Kendall's tau
#USING PANDAS
from sklearn.feature_selection import VarianceThreshold
print(df_train.shape) #output (143, 59)
var_filter = VarianceThreshold(threshold = 0.0)
train = var_filter.fit_transform(df_train)
#to get the count of features that are not constant
print(train.shape()) # output (143, 56)
#or
print(len(df_train.columns[var_filter.get_support()])) #output 56
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
X_train = X_train.astype(int)
chi2_features = SelectKBest(chi2 , k=12)
X_kbest_features = chi2_features.fit_transform(X_train, y_train)
print(df_train.isnull().sum()/len(df_train)*100).nlargest())
#output => Returns the 5 largest values from the series. No missing values in automobile dataset, so all shows 0%.
"""symboling 0.0
doornumber 0.0
wheelbase 0.0
carlength 0.0
carwidth 0.0
dtype: float64"""
@Ayushijain09
Ayushijain09 / mutual_info_regression.py
Last active July 15, 2020 11:59
Mutual Information for Regression
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import SelectKBest
selector = SelectKBest(mutual_info_regression, k=10)
X_train_new = selector.fit_transform(X_train, y_train) #Applying transformation to the training set
#to get names of the selected features
mask = selector.get_support() # Output array([False, False, True, True, True, False ....])
print(selector.scores_) #Output array([0.16978127, 0.01829886, 0.45461366, 0.55126343, 0.66081217, 0.27715287 ....])
new_features = X_train.columns[mask]
@Ayushijain09
Ayushijain09 / ANOVA.py
Last active July 15, 2020 12:08
Anova feature selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
fvalue_selector = SelectKBest(f_regression, k=20) #select features with 20 best ANOVA F-Values
X_train_new = fvalue_selector.fit_transform(X_train, y_train)
print(X_train.shape, X_train_new.shape) #output (143, 59) (143, 20)
@Ayushijain09
Ayushijain09 / Sequential_Forward_Selection.py
Created July 15, 2020 14:08
Sequential Forward Selection
from mlxtend.feature_selection import SequentialFeatureSelector
sfs = SequentialFeatureSelector(LinearRegression(), # cv = k-fold cross validation, floating is another extension of SFS, not used here
k_features=10,
forward=True,
floating=False,
scoring='accuracy',
cv=2)
sfs = sfs.fit(X_train, y_train)
selected_features = x_train.columns[list(sfs.k_feature_idx_)]
@Ayushijain09
Ayushijain09 / RFE_Regression.py
Last active July 16, 2020 02:57
Regression with RFE
from sklearn.feature_selection import RFE
lm = LinearRegression()
rfe1 = RFE(lm, 20) # RFE with 20 features
# Fit on train and test data with 20 features
X_train_new = rfe1.fit_transform(X_train, y_train)
X_test_new = rfe1.transform(X_test)
# Print the boolean results
print(rfe1.support_) # Output [False False False False True False False False True False False...]