Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from IPython.display import display | |
pd.options.display.max_columns = None | |
import missingno as msno | |
msno.bar(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Separating with continuous and categorical variables. | |
X_cont = ['age', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'euribor3m', 'nr.employed'] | |
X_cat = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome'] | |
cont_df = X_df[X_cont] | |
cat_df = X_df[X_cat] | |
# Creating dummy variable dataframe from categorical variables. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Label Encode instead of dummy variables | |
mappings = [] | |
from sklearn.preprocessing import LabelEncoder | |
label_encoder = LabelEncoder() | |
label_df = df.drop('y', axis=1) | |
for i, col in enumerate(label_df): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Label encode categorical variables. | |
label_encoder = LabelEncoder() | |
mappings = [] | |
# Desired label orders for categorical columns. | |
educ_order = ['unknown', 'illiterate', 'basic.4y', 'basic.6y', 'basic.9y', 'high.school', 'professional.course', 'university.degree'] | |
month_order = ['mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] | |
day_order = ['mon', 'tue', 'wed', 'thu', 'fri'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Normalizing continuous variables | |
from sklearn.preprocessing import MinMaxScaler | |
scaler = MinMaxScaler(feature_range = (0,1)) | |
scaler.fit(X_train) | |
X_train = scaler.transform(X_train) | |
X_test = scaler.transform(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from imblearn.over_sampling import SMOTE | |
X_train, X_test, y_train, y_test = train_test_split(X_df_2, y, test_size = 0.2, random_state = 10) | |
sm = SMOTE(random_state = 2) | |
X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel()) | |
clf = LogisticRegression() | |
model_res = clf.fit(X_train_res, y_train_res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create first pipeline for base without reducing features. | |
pipe = Pipeline([('classifier' , RandomForestClassifier())]) | |
# pipe = Pipeline([('classifier', RandomForestClassifier())]) | |
# Create param grid. | |
param_grid = [ | |
{'classifier' : [LogisticRegression()], | |
'classifier__penalty' : ['l1', 'l2'], |
OlderNewer