Skip to content

Instantly share code, notes, and snippets.

@Hehehe421
Hehehe421 / Radar_chart_syntax_1.py
Last active January 18, 2022 02:51
Medium-Radar Charts-Syntax 1
/* This code helps you create three make up customers moive genre profile on the same radar chart in python*/
import plotly.graph_objects as go
import plotly.offline as pyo
genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
c_1 = [22, 98, 8, 109, 111, 29]
c_2 = [49, 67, 140, 13, 24]
c_3 = [34, 45, 57, 34, 77, 25]
@Hehehe421
Hehehe421 / Radar_chart_syntax_2.py
Created January 18, 2022 02:52
Medium - Radar Chart - Syntax 2
/* This code helps you create two make up customers moive genre profile on separated radar charts in python*/
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.offline as pyo
genre = ['Action', 'Comedy', 'Drama', 'Horror', 'Mystery', 'Romance']
c_1 = [22, 98, 8, 109, 111, 29]
c_2 = [49, 67, 140, 13, 24]
def customer_profile_data(result_name):
# import the packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing as preprocessing
from sklearn import linear_model
from sklearn.cross_validation import cross_validation
import sklearn.model_selection
from sklearn import svm
# Model 1: Simple logistic regression with l1 regularization
#1. Model parameters
model1 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
model1.fit(X_train_vec, y_train_vec)
#2. Accuracy score on testing set
y_pred = model1.predict(X_test_vec)
accuracy = accuracy_score(y_test_vec, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
# Model 2: Simple logistic regression with l1 regularization on under sampling data
#1. Get under sampling training set and testing set
df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1)
df_u_target = df_u['Lead _Form_submission']
X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2)
print(X_train_under.shape)
print(X_test_under.shape)
print(y_train_under.shape)
print(y_test_under.shape)
# Model 3: Feature selection based on model2
#1. Find the best number of variables
var_selected = [4,5,6,7,8,9,10,11,12,13,14]
for var in var_selected:
print ("Number of Variables: ",var)
rfe = RFE(model2,var)
rfe = rfe.fit(X_train_under_vec, y_train_under_vec)
y_pred = rfe.predict(X_test_under_vec)
accuracy = accuracy_score(y_test_under_vec, y_pred)
#2. Get coef of feature selection model
rfe = RFE(model2,7)
model3 = rfe.fit(X_train_under_vec, y_train_under_vec)
pd.DataFrame({"columns":list(X_train_frame.columns[model3.support_]), "coef":list(model3.estimator_.coef_.T)})
#3. Report the learning curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import learning_curve
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
# Compute learning curve
train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)
# Compute mean and standard deviation of training and test scores
#import the dataset and take a look at it
df_all = pd.read_csv('creditcard.csv', header = 0)
df_all.head(10)
#df_all.columns
#Understand the numberic variables
#Continuous Variables
featureConCols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
#Report the correlation table
corr = df_all[featureConCols].corr()
corr