This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* This code helps you create three make up customers moive genre profile on the same radar chart in python*/ | |
import plotly.graph_objects as go | |
import plotly.offline as pyo | |
genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance'] | |
c_1 = [22, 98, 8, 109, 111, 29] | |
c_2 = [49, 67, 140, 13, 24] | |
c_3 = [34, 45, 57, 34, 77, 25] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* This code helps you create two make up customers moive genre profile on separated radar charts in python*/ | |
from plotly.subplots import make_subplots | |
import plotly.graph_objects as go | |
import plotly.offline as pyo | |
genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance'] | |
c_1 = [22, 98, 8, 109, 111, 29] | |
c_2 = [49, 67, 140, 13, 24] | |
def customer_profile_data(result_name): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import the packages | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import sklearn.preprocessing as preprocessing | |
from sklearn import linear_model | |
from sklearn.cross_validation import cross_validation | |
import sklearn.model_selection | |
from sklearn import svm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Model 1: Simple logistic regression with l1 regularization | |
#1. Model parameters | |
model1 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6) | |
model1.fit(X_train_vec, y_train_vec) | |
#2. Accuracy score on testing set | |
y_pred = model1.predict(X_test_vec) | |
accuracy = accuracy_score(y_test_vec, y_pred) | |
print("Accuracy: %.2f%%" % (accuracy * 100.0)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Model 2: Simple logistic regression with l1 regularization on under sampling data | |
#1. Get under sampling training set and testing set | |
df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1) | |
df_u_target = df_u['Lead _Form_submission'] | |
X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2) | |
print(X_train_under.shape) | |
print(X_test_under.shape) | |
print(y_train_under.shape) | |
print(y_test_under.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Model 3: Feature selection based on model2 | |
#1. Find the best number of variables | |
var_selected = [4,5,6,7,8,9,10,11,12,13,14] | |
for var in var_selected: | |
print ("Number of Variables: ",var) | |
rfe = RFE(model2,var) | |
rfe = rfe.fit(X_train_under_vec, y_train_under_vec) | |
y_pred = rfe.predict(X_test_under_vec) | |
accuracy = accuracy_score(y_test_under_vec, y_pred) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#2. Get coef of feature selection model | |
rfe = RFE(model2,7) | |
model3 = rfe.fit(X_train_under_vec, y_train_under_vec) | |
pd.DataFrame({"columns":list(X_train_frame.columns[model3.support_]), "coef":list(model3.estimator_.coef_.T)}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#3. Report the learning curve | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from sklearn.model_selection import learning_curve | |
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True): | |
# Compute learning curve | |
train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose) | |
# Compute mean and standard deviation of training and test scores |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import the dataset and take a look at it | |
df_all = pd.read_csv('creditcard.csv', header = 0) | |
df_all.head(10) | |
#df_all.columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Understand the numberic variables | |
#Continuous Variables | |
featureConCols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', | |
'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', | |
'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'] | |
#Report the correlation table | |
corr = df_all[featureConCols].corr() | |
corr |
OlderNewer