Skip to content

Instantly share code, notes, and snippets.

@mmeendez8
Last active February 25, 2021 21:41
Show Gist options
  • Save mmeendez8/0ef7b6778b4d11c581d1076166ef7828 to your computer and use it in GitHub Desktop.
Save mmeendez8/0ef7b6778b4d11c581d1076166ef7828 to your computer and use it in GitHub Desktop.
CNN using pretrained VGG16 model with a new classification layer. This script reads a csv with file paths and labels and fine-tunes (or retrains) the whole network based on new images and labels. Batch size and epochs can be also personalized
from keras.applications import VGG16
from keras.layers import Dropout, Flatten, Dense
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image as kimage
from keras.models import Model
from keras.utils import to_categorical
import numpy as np
import pandas as pd
''' This file reads from a csv file the paths of the images and its corresponding labels (in my case
they are integers so the must be converted to one hot encoding).
input.csv:
path labels
0 path/file1.jpg 1
1 path/file2.jpg 2
...
'''
def read_image(path):
'''
This function opens an image from the provided path
'''
img = kimage.load_img(path, target_size=(224, 224))
tmp = kimage.img_to_array(img)
tmp = np.expand_dims(tmp, axis=0)
tmp = preprocess_input(tmp)
return tmp
def data_generator(file_paths, labels,batch_size):
'''
Generator used by keras to create batches of images and labels.
'''
i=0
while True:
n = len(data)
batch_holder = np.zeros((batch_size, 224, 224, 3))
if i+batch_size > n:
batch_x = file_paths[i:]
batch_y = labels[i:]
i = 0
else:
batch_x = file_paths[i:i+batch_size]
batch_y = labels[i:i+batch_size]
i += batch_size
for j, path in enumerate(batch_x):
batch_holder[j,:] = read_image(path)
yield batch_holder, batch_y
data = pd.read_csv('input.csv')
data = data.sample(frac=1).reset_index(drop=True)
n_labels = len(data['labels'].unique())
labels = to_categorical(data['labels'], num_classes=n_labels)
file_paths = data['path']
# Generate a model with all layers (with top)
vgg16 = VGG16(weights='imagenet', include_top=True)
# Add a layer where input is the output of the second last layer
x = Dense(n_labels, activation='softmax', name='predictions')(vgg16.layers[-2].output)
# Then create the corresponding model
model = Model(input=vgg16.input, output=x)
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
batch_size = 32
epochs = 2
gen = data_generator(file_paths, labels, batch_size)
model.fit_generator(gen, steps_per_epoch=len(data)//batch_size, epochs=epochs)
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment