Created
December 22, 2018 20:02
-
-
Save JosephCatrambone/81a331bbe2769c2256e7e2e7aff26fb2 to your computer and use it in GitHub Desktop.
A quick and dirty Python file for loading and training on the NYU depth dataset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from PIL import Image | |
from glob import iglob | |
import h5py | |
import tensorflow as tf | |
import numpy | |
import logging | |
import itertools | |
log = logging.getLogger(__name__) | |
# Load training data from the NYU v2 Labelled Training Data | |
def depth_generator(): | |
with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin: | |
images = fin['images'] | |
depths = fin['depths'] | |
nearest = 0.7132995128631592 | |
farthest = 9.99547004699707 | |
for i in itertools.count(1): | |
im = numpy.asarray(images[i%images.shape[0],:,:,:], dtype=numpy.float32) | |
if im.shape[0] == 3: # If channels are first... | |
im = numpy.transpose(im, (1, 2, 0)) # Make channels last. | |
dep = numpy.asarray(depths[i % images.shape[0], :, :], dtype=numpy.float32) | |
yield (im, (dep-nearest)/farthest) | |
def load_training_data(): | |
loading_all_at_once=""" | |
log.info("Loading training data") | |
images = None | |
depths = None | |
with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin: | |
images = numpy.asarray(fin['images'][:,:,:,:], dtype=numpy.float32) | |
depths = numpy.asarray(fin['depths'][:,:,:], dtype=numpy.float32) | |
log.info("Loaded images with shape {} and depths with shape {}".format(images.shape, depths.shape)) | |
# 'accelData', 'depths', 'images', 'instances', 'labels', 'names', 'namesToIds', 'rawDepthFilenames', 'rawDepths', 'rawRgbFilenames', 'sceneTypes', 'scenes' | |
# TODO: Downsample images and downsample depths? | |
#imshow(fin['depths'][0].T) | |
#imshow(fin['images'][0].T) | |
nearest = 0.7132995128631592 | |
farthest = 9.99547004699707 | |
#for i in range(depths.shape[0]): | |
# nearest = min(nearest, numpy.min(depths[i])) | |
# farthest = max(farthest, numpy.max(depths[i])) | |
print("Nearest: {}\nFarthest: {}".format(nearest, farthest)) | |
# Convert big numpy array into a dataset. | |
dataset = tf.data.Dataset.from_tensor_slices((images, depths)) | |
""" | |
dataset = tf.data.Dataset.from_generator(depth_generator, (tf.float32, tf.float32), (tf.TensorShape([640, 480, 3]), tf.TensorShape([640, 480]))) | |
#dataset = dataset.map(lambda img, dep : (img, (dep - nearest)/farthest)) | |
dataset = dataset.shuffle(10) | |
#dataset = dataset.repeat | |
dataset = dataset.batch(100) | |
#iterator = dataset.make_one_shot_iterator() | |
#next_example, next_label = iterator.get_next() | |
return dataset | |
# Define the model. | |
log.info("Building model") | |
def model_fn(features, labels, mode): | |
# Input Layer | |
input_layer = tf.reshape(features, [-1, 640, 480, 3]) # 640x480 -> 128*96 greyscale. | |
conv1 = tf.layers.conv2d( | |
inputs=input_layer, | |
filters=32, | |
kernel_size=[3, 3], | |
padding="same", | |
activation=tf.nn.relu | |
) | |
conv2 = tf.layers.conv2d( | |
inputs=conv1, | |
filters=32, | |
kernel_size=[3, 3], | |
padding="same", | |
activation=tf.nn.relu | |
) | |
pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) | |
# 640x480 -> 320x240 | |
conv3 = tf.layers.conv2d( | |
inputs=pool1, | |
filters=64, | |
kernel_size=[3, 3], | |
padding="same", | |
activation=tf.nn.relu | |
) | |
conv4 = tf.layers.conv2d( | |
inputs=conv3, | |
filters=64, | |
kernel_size=[3, 3], | |
padding="same", | |
activation=tf.nn.relu | |
) | |
pool2 = tf.layers.max_pooling2d( | |
inputs=conv4, | |
pool_size=[2, 2], | |
strides=2 | |
) | |
# 320x240 -> 160x120 | |
conv5 = tf.layers.conv2d( | |
inputs=pool2, | |
filters=128, | |
kernel_size=[3, 3], | |
padding="same", | |
activation=tf.nn.relu | |
) | |
pool3 = tf.layers.max_pooling2d( | |
inputs=conv5, | |
pool_size=[2, 2], | |
strides=2 | |
) | |
# 160x120 -> 80x60 | |
# Flatten | |
pool3_flat = tf.reshape( | |
pool3, | |
[-1, 80 * 60 * 128] | |
) | |
# Dense section | |
dense1 = tf.layers.dense( | |
inputs=pool3_flat, | |
units=1024, | |
activation=tf.nn.relu | |
) | |
dropout1 = tf.layers.dropout( | |
inputs=dense1, | |
rate=0.4, | |
training=(mode == tf.estimator.ModeKeys.TRAIN) | |
) | |
dense2 = tf.layers.dense( | |
inputs=dropout1, | |
units=1024, | |
activation=tf.nn.relu | |
) | |
dropout2 = tf.layers.dropout( | |
inputs=dense2, | |
rate=0.4, | |
training=(mode == tf.estimator.ModeKeys.TRAIN) | |
) | |
dense3 = tf.layers.dense( | |
inputs=dropout2, | |
units=1024, | |
activation=tf.nn.relu | |
) | |
# Start deconvolution to restore size. | |
dense4 = tf.layers.dense( | |
inputs=dense3, | |
units=80 * 60 * 128 | |
) | |
unpool1 = tf.reshape( | |
dense4, | |
[-1, 80, 60, 128] | |
) | |
deconv1 = tf.layers.conv2d_transpose( | |
inputs=unpool1, | |
filters=64, | |
kernel_size=[2, 2], | |
strides=(2, 2), # Stride two so we upscale | |
padding="same", | |
activation=tf.nn.relu, | |
) # 80x60x128 -> 160x120x64 | |
deconv2 = tf.layers.conv2d_transpose( | |
inputs=deconv1, | |
filters=32, | |
kernel_size=[2, 2], | |
strides=(2, 2), | |
padding="same", | |
activation=tf.nn.relu, | |
) # 160x120x64 -> 320x240x32 | |
deconv3 = tf.layers.conv2d_transpose( | |
inputs=deconv2, | |
filters=1, | |
kernel_size=[2, 2], | |
strides=(2, 2), | |
padding="same", | |
activation=tf.nn.relu, | |
) # 320x240x32 -> 640x480x1 | |
clip_tail = tf.reshape( | |
deconv3, | |
[-1, 640, 480] # Cut off trailing '1'. | |
) | |
# Logits Layer | |
# logits = tf.layers.dense(inputs=dropout, units=10) | |
output = clip_tail | |
predictions = { | |
# Generate predictions (for PREDICT and EVAL mode) | |
"depths": output # tf.argmax(input=logits, axis=1), | |
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the | |
# `logging_hook`. | |
# "probabilities": tf.nn.softmax(logits, name="softmax_tensor") | |
} | |
if mode == tf.estimator.ModeKeys.PREDICT: | |
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) | |
# Calculate Loss (for both TRAIN and EVAL modes) | |
# loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) | |
loss = tf.losses.mean_squared_error( | |
labels=labels, | |
predictions=output, | |
weights=1.0 | |
) | |
# Configure the Training Op (for TRAIN mode) | |
if mode == tf.estimator.ModeKeys.TRAIN: | |
optimizer = tf.train.AdamOptimizer() | |
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) | |
train_op = optimizer.minimize( | |
loss=loss, | |
global_step=tf.train.get_global_step()) | |
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) | |
# Add evaluation metrics (for EVAL mode) | |
eval_metric_ops = { | |
"accuracy": tf.metrics.accuracy( | |
labels=labels, | |
predictions=predictions["depths"] | |
) | |
} | |
return tf.estimator.EstimatorSpec( | |
mode=mode, | |
loss=loss, | |
eval_metric_ops=eval_metric_ops | |
) | |
# Define the training op. | |
log.info("Building estimator") | |
est = tf.estimator.Estimator(model_fn=model_fn, model_dir="./depth_model") | |
log.info("Training model") | |
est.train(input_fn=load_training_data, steps=1) | |
# Save results. | |
log.info("Saving model") | |
feature_spec = {'image': tf.FixedLenFeature(shape=[640, 480, 3], dtype=numpy.float32)} | |
est.export_savedmodel( | |
export_dir_base="./depth_model", | |
serving_input_receiver_fn=tf.estimator.export.build_parsing_serving_input_receiver_fn( | |
feature_spec=feature_spec, | |
default_batch_size=None | |
), | |
) | |
#with tf.train.MonitoredTrainingSession() as session: | |
# while not session.should_stop(): | |
# session.run(training_op) | |
#predict_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, mode= tf.estimator.ModeKeys.PREDICT, batch_size= 5) | |
#predictions = estimator.predict(input_fn=predict_input_fn) | |
#values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5)))) | |
#print() | |
#print("Predicted Values: {}".format(values)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment