Skip to content

Instantly share code, notes, and snippets.

View dradecic's full-sized avatar

Dario Radečić dradecic

View GitHub Profile
@dradecic
dradecic / py-bokeh_1_imports.py
Last active August 25, 2019 10:39
Python-Bokeh - Gist 1: Imports
import math
import numpy as np
import pandas as pd
from bokeh.embed import components
from bokeh.models import ColumnDataSource, HoverTool, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from flask import Flask, render_template, request
@dradecic
dradecic / rfecv_1_imports.py
Created September 1, 2019 15:44
rfecv_1_imports
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
@dradecic
dradecic / rfecv_2_cleaning.py
Created September 1, 2019 15:59
rfecv_2_cleaning
data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)
gender_mapper = {'male': 0, 'female': 1}
data['Sex'].replace(gender_mapper, inplace=True)
data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])
data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]
data = data.rename(columns={'Title': 'Title_Unusual'})
data.drop('Name', axis=1, inplace=True)
@dradecic
dradecic / rfecv_5_num_feats.py
Created September 1, 2019 16:28
rfecv_5_num_feats
print('Optimal number of features: {}'.format(rfecv.n_features_))
@dradecic
dradecic / rfecv_6_low_importance_features.py
Created September 1, 2019 16:35
rfecv_6_low_importance_features
print(np.where(rfecv.support_ == False)[0])
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True)
@dradecic
dradecic / rfecv_7_imporance_plotting.py
Created September 1, 2019 16:41
rfecv_7_imporance_plotting
dset = pd.DataFrame()
dset['attr'] = X.columns
dset['importance'] = rfecv.estimator_.feature_importances_
dset = dset.sort_values(by='importance', ascending=False)
plt.figure(figsize=(16, 14))
plt.barh(y=dset['attr'], width=dset['importance'], color='#1976D2')
plt.title('RFECV - Feature Importances', fontsize=20, fontweight='bold', pad=20)
@dradecic
dradecic / rfecv_plotting_accuracy.py
Created September 1, 2019 16:47
rfecv_plotting_accuracy
plt.figure(figsize=(16, 9))
plt.title('Recursive Feature Elimination with Cross-Validation', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Number of features selected', fontsize=14, labelpad=20)
plt.ylabel('% Correct Classification', fontsize=14, labelpad=20)
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_, color='#303F9F', linewidth=3)
plt.show()
@dradecic
dradecic / ara_1_cleaning.py
Created September 9, 2019 12:44
ara_1_cleaning
data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
data['CreditScore_Bins'] = pd.qcut(data['CreditScore'], 5,
labels=['CS_lt_566', 'CS_556_to_627', 'CS_627_to_678', 'CS_678_to_735', 'CS_gt_735'])
data['Age_Bins'] = pd.qcut(data['Age'], 5,
labels=['Age_lt_31', 'Age_31_to_35', 'Age_35_to_40', 'Age_40_to_46', 'Age_gt_46'])
data['Balance_Bins'] = pd.qcut(data['Balance'], 5,
labels=['Bal_lt_73080', 'Bal_73080_to_110138', 'Bal_110138_to_133710', 'Bal_gt_133710'],
duplicates='drop')
data['Salary_Bins'] = pd.qcut(data['EstimatedSalary'], 5,
@dradecic
dradecic / ara_3_print_loop.py
Created September 9, 2019 12:54
ara_3_print_loop
for col in data.columns:
if col == 'Exited': continue
else:
print('WoE and IV for column: {}'.format(col))
df, iv = calculate_woe_iv(data, col, 'Exited')
print(df)
print('IV score: {:.2f}'.format(iv))
print('\n')
@dradecic
dradecic / ara_4_coarse_classing.py
Created September 9, 2019 13:46
ara_4_coarse_classing
def coarse_classer(df, indexloc_1, indexloc_2):
mean_val = pd.DataFrame(np.mean(pd.DataFrame([df.iloc[indexloc_1], df.iloc[indexloc_2]]))).T
original = df.drop([indexloc_1, indexloc_2])
coarsed_df = pd.concat([original, mean_val])
coarsed_df = coarsed_df.sort_values(by='WoE', ascending=False).reset_index(drop=True)
return coarsed_df