PallawiSinghal/part_3_catagorical.py

## part_3_catagorical.py
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import keras
import pickle
import cv2
import os

#Enter the path of your image data folder that has images for two classes dog and cat
image_data_folder_path = "/images/"

# initialize the data and labels as an empty list
#we will reshape the image data and append it in the list-data
#we will encode the image labels and append it in the list-labels
data = []
labels = []

# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(image_data_folder_path)))
#total number images
total_number_of_images = len(imagePaths)
print("\n")
print("Total number of images----->",total_number_of_images)

#randomly shuffle all the image file name
random.shuffle(imagePaths)

# loop over the shuffled input images
for imagePath in imagePaths:

	#Read the image into a numpy array using opencv
	#all the read images are of different shapes
	image = cv2.imread(imagePath)

	#resize the image to be 32x32 pixels (ignoring aspect ratio)
	#After reshape size of all the images will become 32x32x3
	#Total number of pixels in every image = 32x32x3=3072
	image = cv2.resize(image, (32, 32))

	#flatten converts every 3D image (32x32x3) into 1D numpy array of shape (3072,)
	#(3072,) is the shape of the flatten image
	#(3072,) shape means 3072 columns and 1 row
	image_flatten = image.flatten()

	#Append each image data 1D array to the data list
	data.append(image_flatten)

	# extract the class label from the image path and update the
	label = imagePath.split(os.path.sep)[-2]


 #if the label is a dig string label it as 1 else 0
	label = 1 if label == "dog" else 0

	#Append each image label to the labels list
	labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
#convert the data and label list to numpy array
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
# train_test_split is a scikit-learn's function which helps us to split train and test images kept in the same folders
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

print("Number of training images--->",len(trainX),",","Number of training labels--->",len(trainY))
print("Number of testing images--->",len(testX),",","Number of testing labels--->",len(testY))

#Keras to_categorical function to encode labels
trainY = keras.utils.to_categorical(trainY, num_classes=2)
testY = keras.utils.to_categorical(testY, num_classes=2)

# define the 3072-1024-512-3 architecture using Keras
model = Sequential()
# we construct our neural network architecture — a 3072-768-384-2 feedforward neural network.

# Our input layer has 3,072 nodes, one for each of the 32 x 32 x 3 = 3,072 raw pixel intensities in our flattened input images
model.add(Dense(1024, input_shape=(3072,), activation="sigmoid"))
model.add(Dense(512, activation="sigmoid"))
model.add(Dense(2, activation="softmax"))

print ("Printing the summary of model")
model.summary()

# initialize our initial learning rate and # of epochs to train for
INIT_LR = 0.01
EPOCHS = 75

# compile the model using SGD as our optimizer and categorical
# cross-entropy loss (you'll want to use binary_crossentropy
# for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

# train the neural network
H = model.fit(trainX, trainY, validation_data=(testX, testY),
	epochs=EPOCHS, batch_size=32)

# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)

#Uncomment to see the predicted probabilty for each class in every test image
# print ("predictions---------------->",predictions)
#Uncomment to print the predicted labels in each image
# print("predictions.argmax(axis=1)",predictions.argmax(axis=1))

# plot the training loss and accuracy for each epoch
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy (simple_multiclass_classifcation)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig("training_performance_binary.png")

# save the model and label binarizer to disk
print("[INFO] serializing network")
model.save("simple_binary_classifcation_model.model")
	# set the matplotlib backend so figures can be saved in the background
	import matplotlib
	matplotlib.use("Agg")

	# import the necessary packages
	from sklearn.preprocessing import LabelBinarizer
	from sklearn import preprocessing
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	from keras.models import Sequential
	from keras.layers.core import Dense
	from keras.optimizers import SGD
	from imutils import paths
	import matplotlib.pyplot as plt
	import numpy as np
	import argparse
	import random
	import keras
	import pickle
	import cv2
	import os

	#Enter the path of your image data folder that has images for two classes dog and cat
	image_data_folder_path = "/images/"

	# initialize the data and labels as an empty list
	#we will reshape the image data and append it in the list-data
	#we will encode the image labels and append it in the list-labels
	data = []
	labels = []

	# grab the image paths and randomly shuffle them
	imagePaths = sorted(list(paths.list_images(image_data_folder_path)))
	#total number images
	total_number_of_images = len(imagePaths)
	print("\n")
	print("Total number of images----->",total_number_of_images)

	#randomly shuffle all the image file name
	random.shuffle(imagePaths)

	# loop over the shuffled input images
	for imagePath in imagePaths:

	#Read the image into a numpy array using opencv
	#all the read images are of different shapes
	image = cv2.imread(imagePath)

	#resize the image to be 32x32 pixels (ignoring aspect ratio)
	#After reshape size of all the images will become 32x32x3
	#Total number of pixels in every image = 32x32x3=3072
	image = cv2.resize(image, (32, 32))

	#flatten converts every 3D image (32x32x3) into 1D numpy array of shape (3072,)
	#(3072,) is the shape of the flatten image
	#(3072,) shape means 3072 columns and 1 row
	image_flatten = image.flatten()

	#Append each image data 1D array to the data list
	data.append(image_flatten)

	# extract the class label from the image path and update the
	label = imagePath.split(os.path.sep)[-2]


	#if the label is a dig string label it as 1 else 0
	label = 1 if label == "dog" else 0

	#Append each image label to the labels list
	labels.append(label)

	# scale the raw pixel intensities to the range [0, 1]
	#convert the data and label list to numpy array
	data = np.array(data, dtype="float") / 255.0
	labels = np.array(labels)

	# partition the data into training and testing splits using 75% of
	# the data for training and the remaining 25% for testing
	# train_test_split is a scikit-learn's function which helps us to split train and test images kept in the same folders
	(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

	print("Number of training images--->",len(trainX),",","Number of training labels--->",len(trainY))
	print("Number of testing images--->",len(testX),",","Number of testing labels--->",len(testY))

	#Keras to_categorical function to encode labels
	trainY = keras.utils.to_categorical(trainY, num_classes=2)
	testY = keras.utils.to_categorical(testY, num_classes=2)

	# define the 3072-1024-512-3 architecture using Keras
	model = Sequential()
	# we construct our neural network architecture — a 3072-768-384-2 feedforward neural network.

	# Our input layer has 3,072 nodes, one for each of the 32 x 32 x 3 = 3,072 raw pixel intensities in our flattened input images
	model.add(Dense(1024, input_shape=(3072,), activation="sigmoid"))
	model.add(Dense(512, activation="sigmoid"))
	model.add(Dense(2, activation="softmax"))

	print ("Printing the summary of model")
	model.summary()

	# initialize our initial learning rate and # of epochs to train for
	INIT_LR = 0.01
	EPOCHS = 75

	# compile the model using SGD as our optimizer and categorical
	# cross-entropy loss (you'll want to use binary_crossentropy
	# for 2-class classification)
	print("[INFO] training network...")
	opt = SGD(lr=INIT_LR)
	model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

	# train the neural network
	H = model.fit(trainX, trainY, validation_data=(testX, testY),
	epochs=EPOCHS, batch_size=32)

	# evaluate the network
	print("[INFO] evaluating network...")
	predictions = model.predict(testX, batch_size=32)

	#Uncomment to see the predicted probabilty for each class in every test image
	# print ("predictions---------------->",predictions)
	#Uncomment to print the predicted labels in each image
	# print("predictions.argmax(axis=1)",predictions.argmax(axis=1))

	# plot the training loss and accuracy for each epoch
	N = np.arange(0, EPOCHS)
	plt.style.use("ggplot")
	plt.figure()
	plt.plot(N, H.history["loss"], label="train_loss")
	plt.plot(N, H.history["val_loss"], label="val_loss")
	plt.plot(N, H.history["acc"], label="train_acc")
	plt.plot(N, H.history["val_acc"], label="val_acc")
	plt.title("Training Loss and Accuracy (simple_multiclass_classifcation)")
	plt.xlabel("Epoch #")
	plt.ylabel("Loss/Accuracy")
	plt.legend()
	plt.savefig("training_performance_binary.png")

	# save the model and label binarizer to disk
	print("[INFO] serializing network")
	model.save("simple_binary_classifcation_model.model")