Skip to content

Instantly share code, notes, and snippets.

@Thimira
Last active April 13, 2021 05:52
Show Gist options
  • Star 22 You must be signed in to star a gist
  • Fork 14 You must be signed in to fork a gist
  • Save Thimira/354b90d59faf8b0d758f74eae3a511e2 to your computer and use it in GitHub Desktop.
Save Thimira/354b90d59faf8b0d758f74eae3a511e2 to your computer and use it in GitHub Desktop.
Learn how to build a multi-class image classification system using bottleneck features from a pre-trained model in Keras to achieve transfer learning. Tutorial: https://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html
'''
Using Bottleneck Features for Multi-Class Classification in Keras
We use this technique to build powerful (high accuracy without overfitting) Image Classification systems with small
amount of training data.
The full tutorial to get this code working can be found at the "Codes of Interest" Blog at the following link,
https://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html
Please go through the tutorial before attempting to run this code, as it explains how to setup your training data.
The code was tested on Python 3.5, with the following library versions,
Keras 2.0.6
TensorFlow 1.2.1
OpenCV 3.2.0
This should work with Theano as well, but untested.
'''
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt
import math
import cv2
# dimensions of our images.
img_width, img_height = 224, 224
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
# number of epochs to train top model
epochs = 50
# batch size used by flow_from_directory and predict_generator
batch_size = 16
def save_bottlebeck_features():
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
datagen = ImageDataGenerator(rescale=1. / 255)
generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
print(len(generator.filenames))
print(generator.class_indices)
print(len(generator.class_indices))
nb_train_samples = len(generator.filenames)
num_classes = len(generator.class_indices)
predict_size_train = int(math.ceil(nb_train_samples / batch_size))
bottleneck_features_train = model.predict_generator(
generator, predict_size_train)
np.save('bottleneck_features_train.npy', bottleneck_features_train)
generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
nb_validation_samples = len(generator.filenames)
predict_size_validation = int(
math.ceil(nb_validation_samples / batch_size))
bottleneck_features_validation = model.predict_generator(
generator, predict_size_validation)
np.save('bottleneck_features_validation.npy',
bottleneck_features_validation)
def train_top_model():
datagen_top = ImageDataGenerator(rescale=1. / 255)
generator_top = datagen_top.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle=False)
nb_train_samples = len(generator_top.filenames)
num_classes = len(generator_top.class_indices)
# save the class indices to use use later in predictions
np.save('class_indices.npy', generator_top.class_indices)
# load the bottleneck features saved earlier
train_data = np.load('bottleneck_features_train.npy')
# get the class lebels for the training data, in the original order
train_labels = generator_top.classes
# https://github.com/fchollet/keras/issues/3467
# convert the training labels to categorical vectors
train_labels = to_categorical(train_labels, num_classes=num_classes)
generator_top = datagen_top.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
nb_validation_samples = len(generator_top.filenames)
validation_data = np.load('bottleneck_features_validation.npy')
validation_labels = generator_top.classes
validation_labels = to_categorical(
validation_labels, num_classes=num_classes)
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
(eval_loss, eval_accuracy) = model.evaluate(
validation_data, validation_labels, batch_size=batch_size, verbose=1)
print("[INFO] accuracy: {:.2f}%".format(eval_accuracy * 100))
print("[INFO] Loss: {}".format(eval_loss))
plt.figure(1)
# summarize history for accuracy
plt.subplot(211)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
# summarize history for loss
plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
def predict():
# load the class_indices saved in the earlier step
class_dictionary = np.load('class_indices.npy').item()
num_classes = len(class_dictionary)
# add the path to your test image below
image_path = 'path/to/your/test_image'
orig = cv2.imread(image_path)
print("[INFO] loading and preprocessing image...")
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)
# important! otherwise the predictions will be '0'
image = image / 255
image = np.expand_dims(image, axis=0)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
# get the bottleneck prediction from the pre-trained VGG16 model
bottleneck_prediction = model.predict(image)
# build top model
model = Sequential()
model.add(Flatten(input_shape=bottleneck_prediction.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='sigmoid'))
model.load_weights(top_model_weights_path)
# use the bottleneck prediction on the top model to get the final
# classification
class_predicted = model.predict_classes(bottleneck_prediction)
probabilities = model.predict_proba(bottleneck_prediction)
inID = class_predicted[0]
inv_map = {v: k for k, v in class_dictionary.items()}
label = inv_map[inID]
# get the prediction label
print("Image ID: {}, Label: {}".format(inID, label))
# display the predictions with the image
cv2.putText(orig, "Predicted: {}".format(label), (10, 30),
cv2.FONT_HERSHEY_PLAIN, 1.5, (43, 99, 255), 2)
cv2.imshow("Classification", orig)
cv2.waitKey(0)
cv2.destroyAllWindows()
save_bottlebeck_features()
train_top_model()
predict()
cv2.destroyAllWindows()
@MSPallavi
Copy link

Hi, Thanks for building an application for multi label classification.
Would you please help me out to have 2 different files for training and testing, so that we don't need to train again and again whenever the application runs.

thanks

@NaeemKhanNiazi
Copy link

@Thimira
Can anybody help me ,how we can use Live webcam for the prediction for this blog....thank you

@mehki
Copy link

mehki commented Apr 14, 2020

hi,
i have used this code for training 50,000 training images and 30,000 validation. while training it train only using 2 epochs,after 2 epochs it decrease the accuracy. so kindly tell me im going in a right direction or not?
on 2 epochs a model can train perfectly?

@mehki
Copy link

mehki commented Apr 14, 2020

hi,
can anybody tells , only using 2 epochs a model can be trained or not?
as im using 70,000 training images and 30,000 validation images.
it gives 94% accuracy on 2 epochs only ,after 2 it decreases the accuracy

@Thimira
Copy link
Author

Thimira commented Apr 14, 2020

@mehki 2 epochs are not normally enough for a dataset of that size. How is the validation accuracy behaving?
This code gist is a little bit old. I have a more improved version of multiclass classification in keras with bottleneck and fine tuning in by Bird Watch project. You can check the project at https://github.com/Thimira/bird_watch. See the 'bird_watch_train.py' and 'bird_watch_train_optimized.py' files.

@mehki
Copy link

mehki commented Apr 14, 2020 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment