Skip to content

Instantly share code, notes, and snippets.

View dradecic's full-sized avatar

Dario Radečić dradecic

View GitHub Profile
@dradecic
dradecic / py-bokeh_11_redraw.py
Created August 25, 2019 09:20
Python-Bokeh - Gist 11 - Redraw
def redraw(p_class):
survived_chart = survived_bar_chart(df, p_class)
title_chart = class_titles_bar_chart(df, p_class)
hist_age = age_hist(df, p_class)
return (
survived_chart,
title_chart,
hist_age
)
@dradecic
dradecic / py-bokeh_12_route_final.py
Created August 25, 2019 09:21
Python-Bokeh - Gist 12: Route final
@app.route('/', methods=['GET', 'POST'])
def chart():
selected_class = request.form.get('dropdown-select')
if selected_class == 0 or selected_class == None:
survived_chart, title_chart, hist_age = redraw(1)
else:
survived_chart, title_chart, hist_age = redraw(selected_class)
script_survived_chart, div_survived_chart = components(survived_chart)
@dradecic
dradecic / rfecv_1_imports.py
Created September 1, 2019 15:44
rfecv_1_imports
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
@dradecic
dradecic / rfecv_2_cleaning.py
Created September 1, 2019 15:59
rfecv_2_cleaning
data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)
gender_mapper = {'male': 0, 'female': 1}
data['Sex'].replace(gender_mapper, inplace=True)
data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])
data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]
data = data.rename(columns={'Title': 'Title_Unusual'})
data.drop('Name', axis=1, inplace=True)
@dradecic
dradecic / rfecv_3_correlations.py
Created September 1, 2019 16:06
rfecv_3_correlations
correlated_features = set()
correlation_matrix = data.drop('Survived', axis=1).corr()
for i in range(len(correlation_matrix.columns)):
for j in range(i):
if abs(correlation_matrix.iloc[i, j]) > 0.8:
colname = correlation_matrix.columns[i]
correlated_features.add(colname)
@dradecic
dradecic / rfecv_4_rfecv.py
Created September 1, 2019 16:24
rfecv_4_rfecv
X = data.drop('Survived', axis=1)
target = data['Survived']
rfc = RandomForestClassifier(random_state=101)
rfecv = RFECV(estimator=rfc, step=1, cv=StratifiedKFold(10), scoring='accuracy')
rfecv.fit(X, target)
@dradecic
dradecic / rfecv_5_num_feats.py
Created September 1, 2019 16:28
rfecv_5_num_feats
print('Optimal number of features: {}'.format(rfecv.n_features_))
@dradecic
dradecic / rfecv_6_low_importance_features.py
Created September 1, 2019 16:35
rfecv_6_low_importance_features
print(np.where(rfecv.support_ == False)[0])
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True)
@dradecic
dradecic / rfecv_7_imporance_plotting.py
Created September 1, 2019 16:41
rfecv_7_imporance_plotting
dset = pd.DataFrame()
dset['attr'] = X.columns
dset['importance'] = rfecv.estimator_.feature_importances_
dset = dset.sort_values(by='importance', ascending=False)
plt.figure(figsize=(16, 14))
plt.barh(y=dset['attr'], width=dset['importance'], color='#1976D2')
plt.title('RFECV - Feature Importances', fontsize=20, fontweight='bold', pad=20)
@dradecic
dradecic / rfecv_plotting_accuracy.py
Created September 1, 2019 16:47
rfecv_plotting_accuracy
plt.figure(figsize=(16, 9))
plt.title('Recursive Feature Elimination with Cross-Validation', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Number of features selected', fontsize=14, labelpad=20)
plt.ylabel('% Correct Classification', fontsize=14, labelpad=20)
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_, color='#303F9F', linewidth=3)
plt.show()