yudai09/keras-vis-stanford40action.py

## keras-vis-stanford40action.py
import os
import glob
import numpy as np
import keras
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input

from vis.visualization import visualize_cam, overlay
from matplotlib import pyplot as plt

def main(img_width=224, img_height=224, data_dir = './data/Stanford40/JPEGImages/'):

    model_name = 'InceptionV4'
    nb_classes = 40

    model_path = None
    model = None

    if model_name == 'VGG19':
        model_path='./weight/vgg19_full.hdf5'
        input_tensor = Input(shape=(img_height, img_width, 3))

        # Base model is VGG19
        # Pretrained by using imagenet data set
        base_model = keras.applications.vgg19.VGG19(include_top=False, weights='imagenet', input_tensor=input_tensor)

        # Batch normalization
        model = Sequential()
        model.add(BatchNormalization(input_shape=(img_width, img_height, 3)))
        model.add(base_model)

        # Add FC layer for designated number of classes
        fc_model = Sequential()
        fc_model.add(Flatten(input_shape=base_model.output_shape[1:]))
        fc_model.add(Dense(256, activation='relu'))
        fc_model.add(Dropout(0.5))
        fc_model.add(Dense(nb_classes, activation='softmax'))

        model.add(fc_model)

    else: # model == InceptionV4
        input_tensor = Input(shape=(img_height, img_width, 3))
        model_path='./weight/inceptionV4_full.hdf5'
        model = keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=True, weights=None, input_tensor=input_tensor, classes=nb_classes)


    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.SGD(lr=1e-3, momentum=0.90, decay=1e-8, nesterov=True),
                  metrics=['accuracy'])

    model.summary()
    print(model.layers)
    print('{} layers'.format(len(model.layers)))

    if os.path.exists(model_path):
        model.load_weights(model_path)

    classes = ['applauding', 'blowing_bubbles', 'brushing_teeth', 'cleaning_the_floor', 'climbing', 'cooking', 'cutting_trees', 'cutting_vegetables', 'drinking', 'feeding_a_horse', 'fishing', 'fixing_a_bike', 'fixing_a_car', 'gardening', 'holding_an_umbrella', 'jumping', 'looking_through_a_microscope', 'looking_through_a_telescope', 'phoning', 'playing_guitar', 'playing_violin', 'pouring_liquid', 'pushing_a_cart', 'reading', 'riding_a_bike', 'riding_a_horse', 'rowing_a_boat', 'running', 'shooting_an_arrow', 'smoking', 'taking_photos', 'texting_message', 'throwing_frisby', 'using_a_computer', 'walking_the_dog', 'washing_dishes', 'watching_TV', 'waving_hands', 'writing_on_a_board', 'writing_on_a_book']

    data_dir='./data/Stanford40/JPEGImages/'

    for image_path in glob.glob("{}/*/*".format(data_dir)):
        original_image, preprocessed_input = load_image(image_path)
        predictions = model.predict(preprocessed_input)
        print(predictions)
        prediction_iter = np.argmax(predictions)
        print('{} {} {}'.format(image_path, prediction_iter, classes[prediction_iter]))
        gradcam = visualize_cam(model, len(model.layers)-1, prediction_iter, preprocessed_input)
        # print(gradcam)
        print(preprocessed_input.shape)
        print(original_image.shape)
        print(gradcam.shape)

        plt.imshow(overlay(gradcam, original_image))
        plt.show()

    while True:
        pass


def load_image(path):
    img_path = path
    # -----begin image.load_img-----
    img = image.load_img(img_path, target_size=(224, 224))
    img = image.img_to_array(img)
    x = np.expand_dims(img, axis=0)
    return img, x / 255


if __name__ == '__main__':
    main()
	import os
	import glob
	import numpy as np
	import keras
	from keras.callbacks import ModelCheckpoint
	from keras.preprocessing import image
	from keras.preprocessing.image import ImageDataGenerator
	from keras.layers import Input

	from vis.visualization import visualize_cam, overlay
	from matplotlib import pyplot as plt

	def main(img_width=224, img_height=224, data_dir = './data/Stanford40/JPEGImages/'):

	model_name = 'InceptionV4'
	nb_classes = 40

	model_path = None
	model = None

	if model_name == 'VGG19':
	model_path='./weight/vgg19_full.hdf5'
	input_tensor = Input(shape=(img_height, img_width, 3))

	# Base model is VGG19
	# Pretrained by using imagenet data set
	base_model = keras.applications.vgg19.VGG19(include_top=False, weights='imagenet', input_tensor=input_tensor)

	# Batch normalization
	model = Sequential()
	model.add(BatchNormalization(input_shape=(img_width, img_height, 3)))
	model.add(base_model)

	# Add FC layer for designated number of classes
	fc_model = Sequential()
	fc_model.add(Flatten(input_shape=base_model.output_shape[1:]))
	fc_model.add(Dense(256, activation='relu'))
	fc_model.add(Dropout(0.5))
	fc_model.add(Dense(nb_classes, activation='softmax'))

	model.add(fc_model)

	else: # model == InceptionV4
	input_tensor = Input(shape=(img_height, img_width, 3))
	model_path='./weight/inceptionV4_full.hdf5'
	model = keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=True, weights=None, input_tensor=input_tensor, classes=nb_classes)


	model.compile(loss='categorical_crossentropy',
	optimizer=keras.optimizers.SGD(lr=1e-3, momentum=0.90, decay=1e-8, nesterov=True),
	metrics=['accuracy'])

	model.summary()
	print(model.layers)
	print('{} layers'.format(len(model.layers)))

	if os.path.exists(model_path):
	model.load_weights(model_path)

	classes = ['applauding', 'blowing_bubbles', 'brushing_teeth', 'cleaning_the_floor', 'climbing', 'cooking', 'cutting_trees', 'cutting_vegetables', 'drinking', 'feeding_a_horse', 'fishing', 'fixing_a_bike', 'fixing_a_car', 'gardening', 'holding_an_umbrella', 'jumping', 'looking_through_a_microscope', 'looking_through_a_telescope', 'phoning', 'playing_guitar', 'playing_violin', 'pouring_liquid', 'pushing_a_cart', 'reading', 'riding_a_bike', 'riding_a_horse', 'rowing_a_boat', 'running', 'shooting_an_arrow', 'smoking', 'taking_photos', 'texting_message', 'throwing_frisby', 'using_a_computer', 'walking_the_dog', 'washing_dishes', 'watching_TV', 'waving_hands', 'writing_on_a_board', 'writing_on_a_book']

	data_dir='./data/Stanford40/JPEGImages/'

	for image_path in glob.glob("{}//".format(data_dir)):
	original_image, preprocessed_input = load_image(image_path)
	predictions = model.predict(preprocessed_input)
	print(predictions)
	prediction_iter = np.argmax(predictions)
	print('{} {} {}'.format(image_path, prediction_iter, classes[prediction_iter]))
	gradcam = visualize_cam(model, len(model.layers)-1, prediction_iter, preprocessed_input)
	# print(gradcam)
	print(preprocessed_input.shape)
	print(original_image.shape)
	print(gradcam.shape)

	plt.imshow(overlay(gradcam, original_image))
	plt.show()

	while True:
	pass


	def load_image(path):
	img_path = path
	# -----begin image.load_img-----
	img = image.load_img(img_path, target_size=(224, 224))
	img = image.img_to_array(img)
	x = np.expand_dims(img, axis=0)
	return img, x / 255


	if __name__ == '__main__':
	main()