Created
October 2, 2020 18:16
-
-
Save finlytics-hub/7291fad4ffc1ba27bb88f60947af3bbb to your computer and use it in GitHub Desktop.
ODI: Feature Selection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# since the player names are not useful for our feature selecting and model training steps, we will drop them after creating a backup of the DF | |
final_data.drop(columns = final_data.loc[:, 'Country_Player_1': 'Opposition_Player_12'].columns.values, inplace = True) | |
# Categorical Feature Selection | |
X = final_data.drop(columns = ['Result'], axis = 1) | |
y = final_data['Result'] | |
# define an empty dictionary to store chi-test results | |
chi2_check = {} | |
# define a list of categorical columns to be evaluated | |
categorical_columns = ['Country', 'Opposition', 'Home/Away', 'Ground', 'Toss Won?'] | |
# loop over each column to calculate chi statistic against the target variable | |
for column in categorical_columns: | |
chi, p, dof, ex = chi2_contingency(pd.crosstab(y, X[column])) | |
chi2_check.setdefault('Feature',[]).append(column) | |
chi2_check.setdefault('p-value',[]).append(np.round(p, 4)) | |
# convert the dictionary to a DF | |
chi2_result = pd.DataFrame(chi2_check).sort_values('p-value') | |
# Numerical Feature Selection | |
# list of numerical columns | |
numerical_columns = final_data.select_dtypes('number').columns.values | |
# keep only numerical columns | |
X = final_data[numerical_columns] | |
# calculate the F-Statistic and corresponding p-values for each numerical column | |
F, pval = f_classif(X, final_data['Result']) | |
# store the results in a DF | |
F_Values = pd.DataFrame([np.round(F, 2), np.round(pval, 4)], columns = numerical_columns, index = ['F-Statistic', 'p-value'], dtype = float).T.sort_values('p-value') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment