munyari/demo.py

## demo.py
#!/usr/bin/env python3
import os
from glob import glob
from shutil import copyfile, copytree, rmtree
import numpy as np

def mkdir(path):
    os.makedirs(path, exist_ok=True)

cd = os.chdir

DATA_DIR='/data'

mkdir(DATA_DIR)
copytree('/input/train', DATA_DIR+'/train')
copytree('/input/test', DATA_DIR+'/test')
mkdir(DATA_DIR+'/valid')
mkdir('/output/results')
mkdir(DATA_DIR+'/sample/train')
mkdir(DATA_DIR+'/sample/test')
mkdir(DATA_DIR+'/sample/valid')
mkdir('/output/sample/results')
mkdir(DATA_DIR+'/test/unknown')

cd(DATA_DIR+'/train')

g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000):
    os.rename(shuf[i], DATA_DIR+'/valid/'+shuf[i])


g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200):
    os.rename(shuf[i], DATA_DIR+'/sample/train/'+shuf[i])

cd(DATA_DIR+'/valid')

g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50):
    copyfile(shuf[i], DATA_DIR+'/sample/valid/'+shuf[i])

def move_stuff():
    mkdir('cats')
    mkdir('dogs')
    cats = glob('cat.*.jpg')
    dogs = glob('dog.*.jpg')
    for cat in cats:
        os.rename(cat, 'cats/'+cat)
    for dog in dogs:
        os.rename(dog, 'dogs/'+dog)

cd(DATA_DIR+'/sample/train')
move_stuff()

cd(DATA_DIR+'/sample/valid')
move_stuff()

cd(DATA_DIR+'/valid')
move_stuff()

cd(DATA_DIR+'/train')
move_stuff()

cd(DATA_DIR+'/test')
for img in glob('*.jpg'):
    os.rename(img, 'unknown/'+img)

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

model = Sequential([
    Dense(32,input_shape=(3,224,224)),
    Activation('relu'),
    Dense(10),
    Activation('relu'),
    Dense(20),
    Activation('relu')
    ])

# TODO: fit documentation
path = DATA_DIR + '/sample'
test_path = DATA_DIR + '/test' # we use all the test data
results_path = '/output/' + '/results'
train_path = path + '/train'
valid_path = path + '/valid'

# also, try binary
def get_batches(path, shuffle=True, class_mode='categorical'):
    # target_size is where you set the dimensions that you want all images to
    # be resized to
    return ImageDataGenerator().flow_from_directory(path, target_size=(224,224),
            class_mode=class_mode, batch_size=64, shuffle=shuffle)

train_batches = get_batches(train_path)
valid_batches = get_batches(valid_path)

# model.compile(optimizer='sgd', loss='binary_crosentropy', metrics=['accuracy'])
model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

# they use a fit generator. What does that do?
# fit generator allows training batch by batch from a Python generator. Generator
# run in parallel to the model, for efficiency.
# recall that ImageDataGenerator returns a generator
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
        nb_epoch=1, validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)
model.save_weights('/output/model_weights.hdf5')

test_batches = get_batches(test_path, shuffle=False, class_mode=None)
preds = model.predict_generator(test_batches, test_batches.nb_sample)

filenames = test_batches.filenames

isdog = preds[:,1].clip(min=0.05,max=0.95)

ids = np.array([int(f[8:f.find('.')]) for f in filenames])

subm = np.stack([ids, isdog], axis=1)
print("Just some of our submissions")
print(subm[:5])

np.savetxt('/output/submision1.csv', subm, fmt='%d,%.5f', header='id,label', comments='')

## floydhub.log
2017-03-14 12:40:23,622 INFO - Preparing to run TaskInstance <TaskInstance: munyari/cats-dogs:39 (id: SB36w4qBTYLNZAMpHPwDNW) (checksum: 08eaf1315c8fd6449ffc48ec61673967) (last update: 2017-03-14 12:40:23.600813) [queued]>
2017-03-14 12:40:23,643 INFO - Starting attempt 1 at 2017-03-14 12:40:23.626258
2017-03-14 12:40:23,850 INFO - Executing command in container: stdbuf -o0 sh command.sh
2017-03-14 12:40:23,851 INFO - Pulling Docker image: floydhub/theano:latest-gpu-py3
2017-03-14 12:40:25,576 INFO - Starting container...
2017-03-14 12:40:25,889 INFO -
################################################################################

2017-03-14 12:40:25,889 INFO - Run Output:
2017-03-14 12:53:40,217 INFO - Found 200 images belonging to 2 classes.
2017-03-14 12:53:40,219 INFO - Found 50 images belonging to 2 classes.
2017-03-14 12:53:40,219 INFO - Epoch 1/1
2017-03-14 12:53:40,219 INFO - Using Theano backend.
2017-03-14 12:53:40,220 INFO - Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5110)
2017-03-14 12:53:40,220 INFO - /usr/local/lib/python3.5/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
2017-03-14 12:53:40,221 INFO - warnings.warn(warn)
2017-03-14 12:53:40,289 INFO - Traceback (most recent call last):
2017-03-14 12:53:40,289 INFO - File "demo.py", line 110, in <module>
2017-03-14 12:53:40,290 INFO - nb_epoch=1, validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)
2017-03-14 12:53:40,290 INFO - File "/usr/local/lib/python3.5/site-packages/keras/models.py", line 935, in fit_generator
2017-03-14 12:53:40,290 INFO - initial_epoch=initial_epoch)
2017-03-14 12:53:40,291 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1557, in fit_generator
2017-03-14 12:53:40,291 INFO - class_weight=class_weight)
2017-03-14 12:53:40,291 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1314, in train_on_batch
2017-03-14 12:53:40,292 INFO - check_batch_axis=True)
2017-03-14 12:53:40,292 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1029, in _standardize_user_data
2017-03-14 12:53:40,292 INFO - exception_prefix='model input')
2017-03-14 12:53:40,293 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 124, in standardize_input_data
2017-03-14 12:53:40,293 INFO - str(array.shape))
2017-03-14 12:53:40,293 INFO - ValueError: Error when checking model input: expected dense_input_1 to have shape (None, 3, 224, 224) but got array with shape (64, 224, 224, 3)
2017-03-14 12:53:40,579 INFO -
################################################################################

2017-03-14 12:53:40,579 INFO - Waiting for container to complete...
2017-03-14 12:53:41,030 ERROR - [failed] Execution failed in 797 seconds for TaskInstance <TaskInstance: munyari/cats-dogs:39 (id: SB36w4qBTYLNZAMpHPwDNW) (checksum: 08eaf1315c8fd6449ffc48ec61673967) (last update: 2017-03-14 12:53:41.021630) [failed]>
2017-03-14 12:53:41,031 ERROR - Docker container id cb243b80eb3fd11a6aa590829b548104fb1cb2992ee7ac2dcf7c697c7730d16c failed
Traceback (most recent call last):
  File "/app/floyd/models/task_instance.py", line 614, in run
    self.execute(module, context)
  File "/app/floyd/models/task_instance.py", line 795, in execute
    self.container.execute(module, context)
  File "/app/floyd/containers/docker_container.py", line 500, in execute
    self.wait()
  File "/app/floyd/containers/docker_container.py", line 372, in wait
    raise FloydException("Docker container id {} failed".format(self.docker_uuid))
floyd.exceptions.FloydException: Docker container id cb243b80eb3fd11a6aa590829b548104fb1cb2992ee7ac2dcf7c697c7730d16c failed
	#!/usr/bin/env python3
	import os
	from glob import glob
	from shutil import copyfile, copytree, rmtree
	import numpy as np

	def mkdir(path):
	os.makedirs(path, exist_ok=True)

	cd = os.chdir

	DATA_DIR='/data'

	mkdir(DATA_DIR)
	copytree('/input/train', DATA_DIR+'/train')
	copytree('/input/test', DATA_DIR+'/test')
	mkdir(DATA_DIR+'/valid')
	mkdir('/output/results')
	mkdir(DATA_DIR+'/sample/train')
	mkdir(DATA_DIR+'/sample/test')
	mkdir(DATA_DIR+'/sample/valid')
	mkdir('/output/sample/results')
	mkdir(DATA_DIR+'/test/unknown')

	cd(DATA_DIR+'/train')

	g = glob('*.jpg')
	shuf = np.random.permutation(g)
	for i in range(2000):
	os.rename(shuf[i], DATA_DIR+'/valid/'+shuf[i])


	g = glob('*.jpg')
	shuf = np.random.permutation(g)
	for i in range(200):
	os.rename(shuf[i], DATA_DIR+'/sample/train/'+shuf[i])

	cd(DATA_DIR+'/valid')

	g = glob('*.jpg')
	shuf = np.random.permutation(g)
	for i in range(50):
	copyfile(shuf[i], DATA_DIR+'/sample/valid/'+shuf[i])

	def move_stuff():
	mkdir('cats')
	mkdir('dogs')
	cats = glob('cat.*.jpg')
	dogs = glob('dog.*.jpg')
	for cat in cats:
	os.rename(cat, 'cats/'+cat)
	for dog in dogs:
	os.rename(dog, 'dogs/'+dog)

	cd(DATA_DIR+'/sample/train')
	move_stuff()

	cd(DATA_DIR+'/sample/valid')
	move_stuff()

	cd(DATA_DIR+'/valid')
	move_stuff()

	cd(DATA_DIR+'/train')
	move_stuff()

	cd(DATA_DIR+'/test')
	for img in glob('*.jpg'):
	os.rename(img, 'unknown/'+img)

	from keras.models import Sequential
	from keras.layers import Dense, Activation
	from keras.preprocessing.image import ImageDataGenerator
	from keras.optimizers import Adam

	model = Sequential([
	Dense(32,input_shape=(3,224,224)),
	Activation('relu'),
	Dense(10),
	Activation('relu'),
	Dense(20),
	Activation('relu')
	])

	# TODO: fit documentation
	path = DATA_DIR + '/sample'
	test_path = DATA_DIR + '/test' # we use all the test data
	results_path = '/output/' + '/results'
	train_path = path + '/train'
	valid_path = path + '/valid'

	# also, try binary
	def get_batches(path, shuffle=True, class_mode='categorical'):
	# target_size is where you set the dimensions that you want all images to
	# be resized to
	return ImageDataGenerator().flow_from_directory(path, target_size=(224,224),
	class_mode=class_mode, batch_size=64, shuffle=shuffle)

	train_batches = get_batches(train_path)
	valid_batches = get_batches(valid_path)

	# model.compile(optimizer='sgd', loss='binary_crosentropy', metrics=['accuracy'])
	model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

	# they use a fit generator. What does that do?
	# fit generator allows training batch by batch from a Python generator. Generator
	# run in parallel to the model, for efficiency.
	# recall that ImageDataGenerator returns a generator
	model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
	nb_epoch=1, validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)
	model.save_weights('/output/model_weights.hdf5')

	test_batches = get_batches(test_path, shuffle=False, class_mode=None)
	preds = model.predict_generator(test_batches, test_batches.nb_sample)

	filenames = test_batches.filenames

	isdog = preds[:,1].clip(min=0.05,max=0.95)

	ids = np.array([int(f[8:f.find('.')]) for f in filenames])

	subm = np.stack([ids, isdog], axis=1)
	print("Just some of our submissions")
	print(subm[:5])

	np.savetxt('/output/submision1.csv', subm, fmt='%d,%.5f', header='id,label', comments='')
	2017-03-14 12:40:23,622 INFO - Preparing to run TaskInstance <TaskInstance: munyari/cats-dogs:39 (id: SB36w4qBTYLNZAMpHPwDNW) (checksum: 08eaf1315c8fd6449ffc48ec61673967) (last update: 2017-03-14 12:40:23.600813) [queued]>
	2017-03-14 12:40:23,643 INFO - Starting attempt 1 at 2017-03-14 12:40:23.626258
	2017-03-14 12:40:23,850 INFO - Executing command in container: stdbuf -o0 sh command.sh
	2017-03-14 12:40:23,851 INFO - Pulling Docker image: floydhub/theano:latest-gpu-py3
	2017-03-14 12:40:25,576 INFO - Starting container...
	2017-03-14 12:40:25,889 INFO -
	################################################################################

	2017-03-14 12:40:25,889 INFO - Run Output:
	2017-03-14 12:53:40,217 INFO - Found 200 images belonging to 2 classes.
	2017-03-14 12:53:40,219 INFO - Found 50 images belonging to 2 classes.
	2017-03-14 12:53:40,219 INFO - Epoch 1/1
	2017-03-14 12:53:40,219 INFO - Using Theano backend.
	2017-03-14 12:53:40,220 INFO - Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5110)
	2017-03-14 12:53:40,220 INFO - /usr/local/lib/python3.5/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
	2017-03-14 12:53:40,221 INFO - warnings.warn(warn)
	2017-03-14 12:53:40,289 INFO - Traceback (most recent call last):
	2017-03-14 12:53:40,289 INFO - File "demo.py", line 110, in <module>
	2017-03-14 12:53:40,290 INFO - nb_epoch=1, validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)
	2017-03-14 12:53:40,290 INFO - File "/usr/local/lib/python3.5/site-packages/keras/models.py", line 935, in fit_generator
	2017-03-14 12:53:40,290 INFO - initial_epoch=initial_epoch)
	2017-03-14 12:53:40,291 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1557, in fit_generator
	2017-03-14 12:53:40,291 INFO - class_weight=class_weight)
	2017-03-14 12:53:40,291 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1314, in train_on_batch
	2017-03-14 12:53:40,292 INFO - check_batch_axis=True)
	2017-03-14 12:53:40,292 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 1029, in _standardize_user_data
	2017-03-14 12:53:40,292 INFO - exception_prefix='model input')
	2017-03-14 12:53:40,293 INFO - File "/usr/local/lib/python3.5/site-packages/keras/engine/training.py", line 124, in standardize_input_data
	2017-03-14 12:53:40,293 INFO - str(array.shape))
	2017-03-14 12:53:40,293 INFO - ValueError: Error when checking model input: expected dense_input_1 to have shape (None, 3, 224, 224) but got array with shape (64, 224, 224, 3)
	2017-03-14 12:53:40,579 INFO -
	################################################################################

	2017-03-14 12:53:40,579 INFO - Waiting for container to complete...
	2017-03-14 12:53:41,030 ERROR - [failed] Execution failed in 797 seconds for TaskInstance <TaskInstance: munyari/cats-dogs:39 (id: SB36w4qBTYLNZAMpHPwDNW) (checksum: 08eaf1315c8fd6449ffc48ec61673967) (last update: 2017-03-14 12:53:41.021630) [failed]>
	2017-03-14 12:53:41,031 ERROR - Docker container id cb243b80eb3fd11a6aa590829b548104fb1cb2992ee7ac2dcf7c697c7730d16c failed
	Traceback (most recent call last):
	File "/app/floyd/models/task_instance.py", line 614, in run
	self.execute(module, context)
	File "/app/floyd/models/task_instance.py", line 795, in execute
	self.container.execute(module, context)
	File "/app/floyd/containers/docker_container.py", line 500, in execute
	self.wait()
	File "/app/floyd/containers/docker_container.py", line 372, in wait
	raise FloydException("Docker container id {} failed".format(self.docker_uuid))
	floyd.exceptions.FloydException: Docker container id cb243b80eb3fd11a6aa590829b548104fb1cb2992ee7ac2dcf7c697c7730d16c failed