This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
import pandas as pd | |
from bokeh.embed import components | |
from bokeh.models import ColumnDataSource, HoverTool, PrintfTickFormatter | |
from bokeh.plotting import figure | |
from bokeh.transform import factor_cmap | |
from flask import Flask, render_template, request |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import warnings | |
warnings.filterwarnings('ignore') | |
pd.options.display.max_columns = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True) | |
gender_mapper = {'male': 0, 'female': 1} | |
data['Sex'].replace(gender_mapper, inplace=True) | |
data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0]) | |
data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']] | |
data = data.rename(columns={'Title': 'Title_Unusual'}) | |
data.drop('Name', axis=1, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('Optimal number of features: {}'.format(rfecv.n_features_)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(np.where(rfecv.support_ == False)[0]) | |
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dset = pd.DataFrame() | |
dset['attr'] = X.columns | |
dset['importance'] = rfecv.estimator_.feature_importances_ | |
dset = dset.sort_values(by='importance', ascending=False) | |
plt.figure(figsize=(16, 14)) | |
plt.barh(y=dset['attr'], width=dset['importance'], color='#1976D2') | |
plt.title('RFECV - Feature Importances', fontsize=20, fontweight='bold', pad=20) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(16, 9)) | |
plt.title('Recursive Feature Elimination with Cross-Validation', fontsize=18, fontweight='bold', pad=20) | |
plt.xlabel('Number of features selected', fontsize=14, labelpad=20) | |
plt.ylabel('% Correct Classification', fontsize=14, labelpad=20) | |
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_, color='#303F9F', linewidth=3) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True) | |
data['CreditScore_Bins'] = pd.qcut(data['CreditScore'], 5, | |
labels=['CS_lt_566', 'CS_556_to_627', 'CS_627_to_678', 'CS_678_to_735', 'CS_gt_735']) | |
data['Age_Bins'] = pd.qcut(data['Age'], 5, | |
labels=['Age_lt_31', 'Age_31_to_35', 'Age_35_to_40', 'Age_40_to_46', 'Age_gt_46']) | |
data['Balance_Bins'] = pd.qcut(data['Balance'], 5, | |
labels=['Bal_lt_73080', 'Bal_73080_to_110138', 'Bal_110138_to_133710', 'Bal_gt_133710'], | |
duplicates='drop') | |
data['Salary_Bins'] = pd.qcut(data['EstimatedSalary'], 5, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for col in data.columns: | |
if col == 'Exited': continue | |
else: | |
print('WoE and IV for column: {}'.format(col)) | |
df, iv = calculate_woe_iv(data, col, 'Exited') | |
print(df) | |
print('IV score: {:.2f}'.format(iv)) | |
print('\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def coarse_classer(df, indexloc_1, indexloc_2): | |
mean_val = pd.DataFrame(np.mean(pd.DataFrame([df.iloc[indexloc_1], df.iloc[indexloc_2]]))).T | |
original = df.drop([indexloc_1, indexloc_2]) | |
coarsed_df = pd.concat([original, mean_val]) | |
coarsed_df = coarsed_df.sort_values(by='WoE', ascending=False).reset_index(drop=True) | |
return coarsed_df | |