amitrani6/naive_bayes_classifier.py

## naive_bayes_classifier.py
# The following code is adapted from Learn.Co's
# Naive Bayes Classifier lessons and labs

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB

#All the feature columns from the 'countries_classify' data frame
X = countries_classify.iloc[:, 3:-1].values

#The target variable column from the 'countries_classify' data frame
Y = countries_classify.iloc[:, 1:2].values

#This converts the Y column's values to numbers representing the continents
labelencoder_Y = LabelEncoder()
Y = labelencoder_Y.fit_transform(Y)

#This creates a train set and test set of data with an 80/20 split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

#Scale the features data frame X
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Calculate the class prior probabilities for each continent
classifier = GaussianNB()
classifier.fit(X_train_scaled, Y_train)

#Make a prediction for the test data
Y_pred = classifier.predict(X_test_scaled)

#Calculate the accuracy of the data
accuracy_score(Y_test, Y_pred)
	# The following code is adapted from Learn.Co's
	# Naive Bayes Classifier lessons and labs

	from sklearn.preprocessing import LabelEncoder
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.naive_bayes import GaussianNB

	#All the feature columns from the 'countries_classify' data frame
	X = countries_classify.iloc[:, 3:-1].values

	#The target variable column from the 'countries_classify' data frame
	Y = countries_classify.iloc[:, 1:2].values

	#This converts the Y column's values to numbers representing the continents
	labelencoder_Y = LabelEncoder()
	Y = labelencoder_Y.fit_transform(Y)

	#This creates a train set and test set of data with an 80/20 split
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

	#Scale the features data frame X
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	#Calculate the class prior probabilities for each continent
	classifier = GaussianNB()
	classifier.fit(X_train_scaled, Y_train)

	#Make a prediction for the test data
	Y_pred = classifier.predict(X_test_scaled)

	#Calculate the accuracy of the data
	accuracy_score(Y_test, Y_pred)