Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
print(test)
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
pd.options.display.max_columns = None
import missingno as msno
msno.bar(df)
# Separating with continuous and categorical variables.
X_cont = ['age', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'euribor3m', 'nr.employed']
X_cat = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']
cont_df = X_df[X_cont]
cat_df = X_df[X_cat]
# Creating dummy variable dataframe from categorical variables.
# Label Encode instead of dummy variables
mappings = []
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_df = df.drop('y', axis=1)
for i, col in enumerate(label_df):
# Label encode categorical variables.
label_encoder = LabelEncoder()
mappings = []
# Desired label orders for categorical columns.
educ_order = ['unknown', 'illiterate', 'basic.4y', 'basic.6y', 'basic.9y', 'high.school', 'professional.course', 'university.degree']
month_order = ['mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
day_order = ['mon', 'tue', 'wed', 'thu', 'fri']
# Normalizing continuous variables
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_df_2, y, test_size = 0.2, random_state = 10)
sm = SMOTE(random_state = 2)
X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel())
clf = LogisticRegression()
model_res = clf.fit(X_train_res, y_train_res)
# Create first pipeline for base without reducing features.
pipe = Pipeline([('classifier' , RandomForestClassifier())])
# pipe = Pipeline([('classifier', RandomForestClassifier())])
# Create param grid.
param_grid = [
{'classifier' : [LogisticRegression()],
'classifier__penalty' : ['l1', 'l2'],