Hehehe421

## Radar_chart_syntax_1.py
/* This code helps you create three make up customers moive genre profile on the same radar chart in python*/
import plotly.graph_objects as go
import plotly.offline as pyo


genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
c_1 = [22, 98, 8, 109, 111, 29]
c_2 = [49, 67, 140, 13, 24]
c_3 = [34, 45, 57, 34, 77, 25]

## Radar_chart_syntax_2.py
/* This code helps you create two make up customers moive genre profile on separated radar charts in python*/
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.offline as pyo

genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
c_1 = [22, 98, 8, 109, 111, 29]
c_2 = [49, 67, 140, 13, 24]

def customer_profile_data(result_name):

## LogisticRegression_loadpackage.py
# import the packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing as preprocessing
from sklearn import linear_model
from sklearn.cross_validation import cross_validation
import sklearn.model_selection
from sklearn import svm

## LogisticRegression_model1.py
# Model 1: Simple logistic regression with l1 regularization
#1. Model parameters
model1 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
model1.fit(X_train_vec, y_train_vec)

#2. Accuracy score on testing set
y_pred = model1.predict(X_test_vec)
accuracy = accuracy_score(y_test_vec, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

## LogisticRegression_model2.py
# Model 2: Simple logistic regression with l1 regularization on under sampling data
#1. Get under sampling training set and testing set
df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1)
df_u_target = df_u['Lead _Form_submission']
X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2)
print(X_train_under.shape)
print(X_test_under.shape)
print(y_train_under.shape)
print(y_test_under.shape)

## LogisticRegression_model3.py
# Model 3: Feature selection based on model2
#1. Find the best number of variables
var_selected = [4,5,6,7,8,9,10,11,12,13,14]

for var in var_selected:
    print ("Number of Variables: ",var)
    rfe = RFE(model2,var)
    rfe = rfe.fit(X_train_under_vec, y_train_under_vec)
    y_pred = rfe.predict(X_test_under_vec)
    accuracy = accuracy_score(y_test_under_vec, y_pred)

## get_coef.py
#2. Get coef of feature selection model
rfe = RFE(model2,7)
model3 = rfe.fit(X_train_under_vec, y_train_under_vec)
pd.DataFrame({"columns":list(X_train_frame.columns[model3.support_]), "coef":list(model3.estimator_.coef_.T)})

## learning_curve.py
#3. Report the learning curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import learning_curve

def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
    # Compute learning curve
    train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)

    # Compute mean and standard deviation of training and test scores

## read_data.py
#import the dataset and take a look at it
df_all = pd.read_csv('creditcard.csv', header = 0)
df_all.head(10)
#df_all.columns

## continuous_heatmap.py
#Understand the numberic variables
#Continuous Variables
featureConCols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']

#Report the correlation table
corr = df_all[featureConCols].corr()
corr
	/* This code helps you create three make up customers moive genre profile on the same radar chart in python*/
	import plotly.graph_objects as go
	import plotly.offline as pyo


	genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
	c_1 = [22, 98, 8, 109, 111, 29]
	c_2 = [49, 67, 140, 13, 24]
	c_3 = [34, 45, 57, 34, 77, 25]
	/* This code helps you create two make up customers moive genre profile on separated radar charts in python*/
	from plotly.subplots import make_subplots
	import plotly.graph_objects as go
	import plotly.offline as pyo

	genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
	c_1 = [22, 98, 8, 109, 111, 29]
	c_2 = [49, 67, 140, 13, 24]

	def customer_profile_data(result_name):
	# import the packages
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import sklearn.preprocessing as preprocessing
	from sklearn import linear_model
	from sklearn.cross_validation import cross_validation
	import sklearn.model_selection
	from sklearn import svm
	# Model 1: Simple logistic regression with l1 regularization
	#1. Model parameters
	model1 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
	model1.fit(X_train_vec, y_train_vec)

	#2. Accuracy score on testing set
	y_pred = model1.predict(X_test_vec)
	accuracy = accuracy_score(y_test_vec, y_pred)
	print("Accuracy: %.2f%%" % (accuracy * 100.0))
	# Model 2: Simple logistic regression with l1 regularization on under sampling data
	#1. Get under sampling training set and testing set
	df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1)
	df_u_target = df_u['Lead _Form_submission']
	X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2)
	print(X_train_under.shape)
	print(X_test_under.shape)
	print(y_train_under.shape)
	print(y_test_under.shape)
	# Model 3: Feature selection based on model2
	#1. Find the best number of variables
	var_selected = [4,5,6,7,8,9,10,11,12,13,14]

	for var in var_selected:
	print ("Number of Variables: ",var)
	rfe = RFE(model2,var)
	rfe = rfe.fit(X_train_under_vec, y_train_under_vec)
	y_pred = rfe.predict(X_test_under_vec)
	accuracy = accuracy_score(y_test_under_vec, y_pred)
	#2. Get coef of feature selection model
	rfe = RFE(model2,7)
	model3 = rfe.fit(X_train_under_vec, y_train_under_vec)
	pd.DataFrame({"columns":list(X_train_frame.columns[model3.support_]), "coef":list(model3.estimator_.coef_.T)})
	#3. Report the learning curve
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.model_selection import learning_curve

	def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
	# Compute learning curve
	train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)

	# Compute mean and standard deviation of training and test scores
	#import the dataset and take a look at it
	df_all = pd.read_csv('creditcard.csv', header = 0)
	df_all.head(10)
	#df_all.columns
	#Understand the numberic variables
	#Continuous Variables
	featureConCols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
	'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
	'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']

	#Report the correlation table
	corr = df_all[featureConCols].corr()
	corr