Skip to content

Instantly share code, notes, and snippets.

@finnqiao
finnqiao / data.csv
Last active January 31, 2019 00:40
d3_scatter_with_materialize
date count
12/2018 233
11/2018 228
10/2018 262
09/2018 293
08/2018 350
07/2018 400
06/2018 225
05/2018 243
04/2018 221
# Create first pipeline for base without reducing features.
pipe = Pipeline([('classifier' , RandomForestClassifier())])
# pipe = Pipeline([('classifier', RandomForestClassifier())])
# Create param grid.
param_grid = [
{'classifier' : [LogisticRegression()],
'classifier__penalty' : ['l1', 'l2'],
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_df_2, y, test_size = 0.2, random_state = 10)
sm = SMOTE(random_state = 2)
X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel())
clf = LogisticRegression()
model_res = clf.fit(X_train_res, y_train_res)
# Normalizing continuous variables
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
# Label encode categorical variables.
label_encoder = LabelEncoder()
mappings = []
# Desired label orders for categorical columns.
educ_order = ['unknown', 'illiterate', 'basic.4y', 'basic.6y', 'basic.9y', 'high.school', 'professional.course', 'university.degree']
month_order = ['mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
day_order = ['mon', 'tue', 'wed', 'thu', 'fri']
# Label Encode instead of dummy variables
mappings = []
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_df = df.drop('y', axis=1)
for i, col in enumerate(label_df):
# Separating with continuous and categorical variables.
X_cont = ['age', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'euribor3m', 'nr.employed']
X_cat = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']
cont_df = X_df[X_cont]
cat_df = X_df[X_cat]
# Creating dummy variable dataframe from categorical variables.
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
pd.options.display.max_columns = None
import missingno as msno
msno.bar(df)
print(test)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.