Last active
August 4, 2018 01:05
-
-
Save ikki407/771426542eb36afeb56ce8b163560a4c to your computer and use it in GitHub Desktop.
Customized Image Generator for keras. This script considers that train dataset differ from test dataset (e.g., the objects in test seem different from ones in train.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import os | |
import glob | |
import cv2 | |
import pandas as pd | |
import random | |
from keras.models import Sequential | |
from keras.layers.core import Dense, Dropout, Flatten, Activation | |
from keras.layers.normalization import BatchNormalization | |
from keras.layers.convolutional import Convolution2D, MaxPooling2D, \ | |
ZeroPadding2D | |
from keras.layers.advanced_activations import LeakyReLU, PReLU | |
from keras.regularizers import l2, activity_l2 | |
from keras.callbacks import EarlyStopping | |
from keras.preprocessing.image import ImageDataGenerator | |
from keras.optimizers import SGD, Adam | |
from keras.utils import np_utils | |
# Global setting | |
np.random.seed(2016) | |
# color type: 1 - grey, 3 - rgb | |
color_type_global = 3 | |
# resize image shape | |
img_rows, img_cols = 224, 224 | |
# batch size and # of epoch | |
batch_size = 32 | |
# Image data generator in keras | |
datagen = ImageDataGenerator(featurewise_center=False, | |
samplewise_center=False, | |
featurewise_std_normalization=False, | |
samplewise_std_normalization=False, | |
zca_whitening=False, | |
rotation_range=13., | |
width_shift_range=0.1, | |
height_shift_range=0.1, | |
shear_range=0.1, | |
zoom_range=0.1, | |
channel_shift_range=0., | |
fill_mode='nearest', | |
cval=0., | |
horizontal_flip=False, | |
vertical_flip=False, | |
dim_ordering='th') | |
if os.path.exists(top_model_weights_path): | |
os.remove(top_model_weights_path) | |
def show_image(im, name='image'): | |
cv2.imshow(name, im) | |
cv2.waitKey(0) | |
cv2.destroyAllWindows() | |
def get_im(path, img_rows, img_cols, color_type=1): | |
# Load as grayscale | |
if color_type == 1: | |
img = cv2.imread(path, 0) | |
elif color_type == 3: | |
img = cv2.imread(path) | |
# Reduce size | |
resized = cv2.resize(img, (img_cols, img_rows)) | |
# mean_pixel = [103.939, 116.799, 123.68] | |
#resized = resized.astype(np.float32, copy=False) | |
# for c in range(3): | |
# resized[:, :, c] = resized[:, :, c] - mean_pixel[c] | |
# resized = resized.transpose((2, 0, 1)) | |
# resized = np.expand_dims(img, axis=0) | |
return resized | |
def process_line(img_list): | |
target = int(img_list[1][1:]) | |
img_path = '../input/imgs/train/' + img_list[1] + '/' + img_list[2] | |
img = get_im(img_path, img_rows, img_cols, color_type=color_type_global) | |
return img, target | |
def image_augmentation(X_train, Y_train, batch_size, datagen=datagen): | |
datagen.fit(X_train) | |
# fits the model on batches with real-time data augmentation: | |
return datagen.flow(X_train, Y_train, batch_size=batch_size).next() | |
def generate_arrays_from_file(path, drivers_list=None, \ | |
color_type=color_type_global, isvalidation=False, isfinetuning=False,\ | |
finetuning_name=None, usingalldata=True): | |
while 1: | |
#print 'epoch' | |
f = open(path) | |
f.next() #columns | |
#print f | |
f_ = list(f) | |
f.close() | |
f = f_ | |
if isvalidation==False: | |
random.shuffle(f) | |
if isfinetuning == True: | |
target_id = [] | |
batch_index = 0 | |
for line in f: | |
if batch_index == 0: | |
X_train = [] | |
y_train = [] | |
line = line.replace('\n', '').split(',') | |
if usingalldata == True: | |
if line[0] not in drivers_list: | |
continue | |
# create numpy arrays of input data | |
# and labels, from each line in the file | |
#print line | |
if isvalidation == False: | |
x, y = process_line(line) | |
else: | |
x, y = process_line(line) | |
if isfinetuning == True: | |
target_id.append(y) | |
X_train.append(x) | |
y_train.append(y) | |
batch_index += 1 | |
if batch_index % batch_size == 0: | |
X_train = np.array(X_train, dtype=np.uint8) | |
y_train = np.array(y_train, dtype=np.uint8) | |
if color_type == 1: | |
X_train = X_train.reshape(X_train.shape[0], color_type, | |
img_rows, img_cols) | |
else: | |
X_train = X_train.transpose((0, 3, 1, 2)) | |
y_train = np_utils.to_categorical(y_train, 10) | |
X_train = X_train.astype('float32') | |
if color_type == 1: | |
X_train /= 255 | |
else: | |
#X_train /= 255 | |
mean_pixel = [103.939, 116.779, 123.68] | |
for c in range(3): | |
X_train[:, c, :, :] = X_train[:, c, :, :] - mean_pixel[c] | |
if isvalidation == False: | |
X_train, y_train = image_augmentation(X_train, y_train, batch_index) | |
#init | |
batch_index = 0 | |
#print X_train | |
yield (X_train, y_train) | |
else: | |
X_train = np.array(X_train, dtype=np.uint8) | |
y_train = np.array(y_train, dtype=np.uint8) | |
if color_type == 1: | |
X_train = X_train.reshape(X_train.shape[0], color_type, | |
img_rows, img_cols) | |
else: | |
X_train = X_train.transpose((0, 3, 1, 2)) | |
y_train = np_utils.to_categorical(y_train, 10) | |
X_train = X_train.astype('float32') | |
if color_type == 1: | |
X_train /= 255 | |
else: | |
#X_train /= 255 | |
mean_pixel = [103.939, 116.779, 123.68] | |
for c in range(3): | |
X_train[:, c, :, :] = X_train[:, c, :, :] - mean_pixel[c] | |
if isvalidation == False: | |
X_train, y_train = image_augmentation(X_train, y_train, batch_index) | |
#init | |
batch_index = 0 | |
if isfinetuning == True: | |
target_id = np_utils.to_categorical(target_id, 10) | |
np.save(open('target_{}.npy'.format(finetuning_name), 'w'), target_id) | |
yield (X_train, y_train) | |
# close file and shuffle data | |
#f.close() | |
def test_prediction(data_path, color_type=color_type_global, batch_size = 64): | |
""" | |
test_data_generator = test_prediction('../input/imgs/test/*.jpg') | |
""" | |
print('Read test images') | |
while 1: | |
path = os.path.join(data_path) | |
f = glob.glob(path) | |
#for debug | |
#f = f[:6000] | |
X_test = [] | |
batch_index = 0 | |
for file_ in f: | |
#X_test_id.append(os.path.basename(file_)) | |
if batch_index == 0: | |
X_test = [] | |
#print line | |
x = get_im(file_, img_rows, img_cols, color_type) | |
X_test.append(x) | |
batch_index += 1 | |
if batch_index % batch_size == 0: | |
X_test = np.array(X_test, dtype=np.uint8) | |
if color_type == 1: | |
X_test = X_test.reshape(X_test.shape[0], color_type, | |
img_rows, img_cols) | |
else: | |
X_test = X_test.transpose((0, 3, 1, 2)) | |
X_test = X_test.astype('float32') | |
if color_type == 1: | |
X_test /= 255 | |
else: | |
#X_test /= 255 | |
mean_pixel = [103.939, 116.779, 123.68] | |
for c in range(3): | |
X_test[:, c, :, :] = X_test[:, c, :, :] - mean_pixel[c] | |
#init | |
batch_index = 0 | |
yield X_test | |
else: | |
X_test = np.array(X_test, dtype=np.uint8) | |
if color_type == 1: | |
X_test = X_test.reshape(X_test.shape[0], color_type, | |
img_rows, img_cols) | |
else: | |
X_test = X_test.transpose((0, 3, 1, 2)) | |
X_test = X_test.astype('float32') | |
if color_type == 1: | |
X_test /= 255 | |
else: | |
#X_test /= 255 | |
mean_pixel = [103.939, 116.779, 123.68] | |
for c in range(3): | |
X_test[:, c, :, :] = X_test[:, c, :, :] - mean_pixel[c] | |
#init | |
batch_index = 0 | |
yield X_test | |
def save_pred(preds, data_path, submission_name='submission'): | |
print('Read test images name for submission file') | |
path = os.path.join(data_path) | |
f = glob.glob(path) | |
X_test_id = [] | |
for file_ in f: | |
X_test_id.append(os.path.basename(file_)) | |
preds_df = pd.DataFrame(preds, columns=['c0', 'c1', 'c2', 'c3', | |
'c4', 'c5', 'c6', 'c7', | |
'c8', 'c9']) | |
preds_df['img'] = X_test_id | |
print 'Saving predictions' | |
preds_df.to_csv('submission/' + submission_name + '.csv', index=False) | |
return | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment