Hehehe421/LogisticRegression_model2.py

## LogisticRegression_model2.py
# Model 2: Simple logistic regression with l1 regularization on under sampling data
#1. Get under sampling training set and testing set
df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1)
df_u_target = df_u['Lead _Form_submission']
X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2)
print(X_train_under.shape)
print(X_test_under.shape)
print(y_train_under.shape)
print(y_test_under.shape)

X_train_under_vec, y_train_under_vec, X_train_under_frame = Data_preprocessing(X_train_under, y_train_under)
X_test_under_vec,  y_test_under_vec , X_test_under_frame = Data_preprocessing(X_test_under, y_test_under)

print('X Train shape is : ', X_train_under_vec.shape)
print('Y Train shape is : ', y_train_under_vec.shape)
print('X Test shape is: ', X_test_under_vec.shape)
print('Y Test shape is: ', y_test_under_vec.shape)

#2. Model parameters
model2 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
model2.fit(X_train_under_vec, y_train_under_vec)

#2. Accuracy score on testing set
y_pred = model2.predict(X_test_under_vec)
accuracy = accuracy_score(y_test_under_vec, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

#3. Confusion matrix
conf_mat = confusion_matrix(y_true=y_test_under_vec, y_pred=y_pred)
print('Confusion matrix:\n', conf_mat)

labels = ['Class 0', 'Class 1']
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(conf_mat, cmap=plt.cm.Blues)
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('Expected')
plt.show()

#4. Classification report
report = classification_report(y_test_under_vec, y_pred)
print(report)
	# Model 2: Simple logistic regression with l1 regularization on under sampling data
	#1. Get under sampling training set and testing set
	df_u = df_test_under.drop(['Visitor_Identifier'], axis = 1)
	df_u_target = df_u['Lead _Form_submission']
	X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(df_u, df_u_target, test_size=0.2)
	print(X_train_under.shape)
	print(X_test_under.shape)
	print(y_train_under.shape)
	print(y_test_under.shape)

	X_train_under_vec, y_train_under_vec, X_train_under_frame = Data_preprocessing(X_train_under, y_train_under)
	X_test_under_vec, y_test_under_vec , X_test_under_frame = Data_preprocessing(X_test_under, y_test_under)

	print('X Train shape is : ', X_train_under_vec.shape)
	print('Y Train shape is : ', y_train_under_vec.shape)
	print('X Test shape is: ', X_test_under_vec.shape)
	print('Y Test shape is: ', y_test_under_vec.shape)

	#2. Model parameters
	model2 = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
	model2.fit(X_train_under_vec, y_train_under_vec)

	#2. Accuracy score on testing set
	y_pred = model2.predict(X_test_under_vec)
	accuracy = accuracy_score(y_test_under_vec, y_pred)
	print("Accuracy: %.2f%%" % (accuracy * 100.0))

	#3. Confusion matrix
	conf_mat = confusion_matrix(y_true=y_test_under_vec, y_pred=y_pred)
	print('Confusion matrix:\n', conf_mat)

	labels = ['Class 0', 'Class 1']
	fig = plt.figure()
	ax = fig.add_subplot(111)
	cax = ax.matshow(conf_mat, cmap=plt.cm.Blues)
	fig.colorbar(cax)
	ax.set_xticklabels([''] + labels)
	ax.set_yticklabels([''] + labels)
	plt.xlabel('Predicted')
	plt.ylabel('Expected')
	plt.show()

	#4. Classification report
	report = classification_report(y_test_under_vec, y_pred)
	print(report)