Skip to content

Instantly share code, notes, and snippets.

@ikki407
Last active August 4, 2018 01:05
Show Gist options
  • Save ikki407/771426542eb36afeb56ce8b163560a4c to your computer and use it in GitHub Desktop.
Save ikki407/771426542eb36afeb56ce8b163560a4c to your computer and use it in GitHub Desktop.
Customized Image Generator for keras. This script considers that train dataset differ from test dataset (e.g., the objects in test seem different from ones in train.
import numpy as np
import os
import glob
import cv2
import pandas as pd
import random
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Convolution2D, MaxPooling2D, \
ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.regularizers import l2, activity_l2
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
# Global setting
np.random.seed(2016)
# color type: 1 - grey, 3 - rgb
color_type_global = 3
# resize image shape
img_rows, img_cols = 224, 224
# batch size and # of epoch
batch_size = 32
# Image data generator in keras
datagen = ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=13.,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
horizontal_flip=False,
vertical_flip=False,
dim_ordering='th')
if os.path.exists(top_model_weights_path):
os.remove(top_model_weights_path)
def show_image(im, name='image'):
cv2.imshow(name, im)
cv2.waitKey(0)
cv2.destroyAllWindows()
def get_im(path, img_rows, img_cols, color_type=1):
# Load as grayscale
if color_type == 1:
img = cv2.imread(path, 0)
elif color_type == 3:
img = cv2.imread(path)
# Reduce size
resized = cv2.resize(img, (img_cols, img_rows))
# mean_pixel = [103.939, 116.799, 123.68]
#resized = resized.astype(np.float32, copy=False)
# for c in range(3):
# resized[:, :, c] = resized[:, :, c] - mean_pixel[c]
# resized = resized.transpose((2, 0, 1))
# resized = np.expand_dims(img, axis=0)
return resized
def process_line(img_list):
target = int(img_list[1][1:])
img_path = '../input/imgs/train/' + img_list[1] + '/' + img_list[2]
img = get_im(img_path, img_rows, img_cols, color_type=color_type_global)
return img, target
def image_augmentation(X_train, Y_train, batch_size, datagen=datagen):
datagen.fit(X_train)
# fits the model on batches with real-time data augmentation:
return datagen.flow(X_train, Y_train, batch_size=batch_size).next()
def generate_arrays_from_file(path, drivers_list=None, \
color_type=color_type_global, isvalidation=False, isfinetuning=False,\
finetuning_name=None, usingalldata=True):
while 1:
#print 'epoch'
f = open(path)
f.next() #columns
#print f
f_ = list(f)
f.close()
f = f_
if isvalidation==False:
random.shuffle(f)
if isfinetuning == True:
target_id = []
batch_index = 0
for line in f:
if batch_index == 0:
X_train = []
y_train = []
line = line.replace('\n', '').split(',')
if usingalldata == True:
if line[0] not in drivers_list:
continue
# create numpy arrays of input data
# and labels, from each line in the file
#print line
if isvalidation == False:
x, y = process_line(line)
else:
x, y = process_line(line)
if isfinetuning == True:
target_id.append(y)
X_train.append(x)
y_train.append(y)
batch_index += 1
if batch_index % batch_size == 0:
X_train = np.array(X_train, dtype=np.uint8)
y_train = np.array(y_train, dtype=np.uint8)
if color_type == 1:
X_train = X_train.reshape(X_train.shape[0], color_type,
img_rows, img_cols)
else:
X_train = X_train.transpose((0, 3, 1, 2))
y_train = np_utils.to_categorical(y_train, 10)
X_train = X_train.astype('float32')
if color_type == 1:
X_train /= 255
else:
#X_train /= 255
mean_pixel = [103.939, 116.779, 123.68]
for c in range(3):
X_train[:, c, :, :] = X_train[:, c, :, :] - mean_pixel[c]
if isvalidation == False:
X_train, y_train = image_augmentation(X_train, y_train, batch_index)
#init
batch_index = 0
#print X_train
yield (X_train, y_train)
else:
X_train = np.array(X_train, dtype=np.uint8)
y_train = np.array(y_train, dtype=np.uint8)
if color_type == 1:
X_train = X_train.reshape(X_train.shape[0], color_type,
img_rows, img_cols)
else:
X_train = X_train.transpose((0, 3, 1, 2))
y_train = np_utils.to_categorical(y_train, 10)
X_train = X_train.astype('float32')
if color_type == 1:
X_train /= 255
else:
#X_train /= 255
mean_pixel = [103.939, 116.779, 123.68]
for c in range(3):
X_train[:, c, :, :] = X_train[:, c, :, :] - mean_pixel[c]
if isvalidation == False:
X_train, y_train = image_augmentation(X_train, y_train, batch_index)
#init
batch_index = 0
if isfinetuning == True:
target_id = np_utils.to_categorical(target_id, 10)
np.save(open('target_{}.npy'.format(finetuning_name), 'w'), target_id)
yield (X_train, y_train)
# close file and shuffle data
#f.close()
def test_prediction(data_path, color_type=color_type_global, batch_size = 64):
"""
test_data_generator = test_prediction('../input/imgs/test/*.jpg')
"""
print('Read test images')
while 1:
path = os.path.join(data_path)
f = glob.glob(path)
#for debug
#f = f[:6000]
X_test = []
batch_index = 0
for file_ in f:
#X_test_id.append(os.path.basename(file_))
if batch_index == 0:
X_test = []
#print line
x = get_im(file_, img_rows, img_cols, color_type)
X_test.append(x)
batch_index += 1
if batch_index % batch_size == 0:
X_test = np.array(X_test, dtype=np.uint8)
if color_type == 1:
X_test = X_test.reshape(X_test.shape[0], color_type,
img_rows, img_cols)
else:
X_test = X_test.transpose((0, 3, 1, 2))
X_test = X_test.astype('float32')
if color_type == 1:
X_test /= 255
else:
#X_test /= 255
mean_pixel = [103.939, 116.779, 123.68]
for c in range(3):
X_test[:, c, :, :] = X_test[:, c, :, :] - mean_pixel[c]
#init
batch_index = 0
yield X_test
else:
X_test = np.array(X_test, dtype=np.uint8)
if color_type == 1:
X_test = X_test.reshape(X_test.shape[0], color_type,
img_rows, img_cols)
else:
X_test = X_test.transpose((0, 3, 1, 2))
X_test = X_test.astype('float32')
if color_type == 1:
X_test /= 255
else:
#X_test /= 255
mean_pixel = [103.939, 116.779, 123.68]
for c in range(3):
X_test[:, c, :, :] = X_test[:, c, :, :] - mean_pixel[c]
#init
batch_index = 0
yield X_test
def save_pred(preds, data_path, submission_name='submission'):
print('Read test images name for submission file')
path = os.path.join(data_path)
f = glob.glob(path)
X_test_id = []
for file_ in f:
X_test_id.append(os.path.basename(file_))
preds_df = pd.DataFrame(preds, columns=['c0', 'c1', 'c2', 'c3',
'c4', 'c5', 'c6', 'c7',
'c8', 'c9'])
preds_df['img'] = X_test_id
print 'Saving predictions'
preds_df.to_csv('submission/' + submission_name + '.csv', index=False)
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment