Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A quick and dirty Python file for loading and training on the NYU depth dataset.
#!/usr/bin/env python
from PIL import Image
from glob import iglob
import h5py
import tensorflow as tf
import numpy
import logging
import itertools
log = logging.getLogger(__name__)
# Load training data from the NYU v2 Labelled Training Data
def depth_generator():
with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
images = fin['images']
depths = fin['depths']
nearest = 0.7132995128631592
farthest = 9.99547004699707
for i in itertools.count(1):
im = numpy.asarray(images[i%images.shape[0],:,:,:], dtype=numpy.float32)
if im.shape[0] == 3: # If channels are first...
im = numpy.transpose(im, (1, 2, 0)) # Make channels last.
dep = numpy.asarray(depths[i % images.shape[0], :, :], dtype=numpy.float32)
yield (im, (dep-nearest)/farthest)
def load_training_data():
loading_all_at_once="""
log.info("Loading training data")
images = None
depths = None
with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
images = numpy.asarray(fin['images'][:,:,:,:], dtype=numpy.float32)
depths = numpy.asarray(fin['depths'][:,:,:], dtype=numpy.float32)
log.info("Loaded images with shape {} and depths with shape {}".format(images.shape, depths.shape))
# 'accelData', 'depths', 'images', 'instances', 'labels', 'names', 'namesToIds', 'rawDepthFilenames', 'rawDepths', 'rawRgbFilenames', 'sceneTypes', 'scenes'
# TODO: Downsample images and downsample depths?
#imshow(fin['depths'][0].T)
#imshow(fin['images'][0].T)
nearest = 0.7132995128631592
farthest = 9.99547004699707
#for i in range(depths.shape[0]):
# nearest = min(nearest, numpy.min(depths[i]))
# farthest = max(farthest, numpy.max(depths[i]))
print("Nearest: {}\nFarthest: {}".format(nearest, farthest))
# Convert big numpy array into a dataset.
dataset = tf.data.Dataset.from_tensor_slices((images, depths))
"""
dataset = tf.data.Dataset.from_generator(depth_generator, (tf.float32, tf.float32), (tf.TensorShape([640, 480, 3]), tf.TensorShape([640, 480])))
#dataset = dataset.map(lambda img, dep : (img, (dep - nearest)/farthest))
dataset = dataset.shuffle(10)
#dataset = dataset.repeat
dataset = dataset.batch(100)
#iterator = dataset.make_one_shot_iterator()
#next_example, next_label = iterator.get_next()
return dataset
# Define the model.
log.info("Building model")
def model_fn(features, labels, mode):
# Input Layer
input_layer = tf.reshape(features, [-1, 640, 480, 3]) # 640x480 -> 128*96 greyscale.
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu
)
conv2 = tf.layers.conv2d(
inputs=conv1,
filters=32,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu
)
pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# 640x480 -> 320x240
conv3 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu
)
conv4 = tf.layers.conv2d(
inputs=conv3,
filters=64,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu
)
pool2 = tf.layers.max_pooling2d(
inputs=conv4,
pool_size=[2, 2],
strides=2
)
# 320x240 -> 160x120
conv5 = tf.layers.conv2d(
inputs=pool2,
filters=128,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu
)
pool3 = tf.layers.max_pooling2d(
inputs=conv5,
pool_size=[2, 2],
strides=2
)
# 160x120 -> 80x60
# Flatten
pool3_flat = tf.reshape(
pool3,
[-1, 80 * 60 * 128]
)
# Dense section
dense1 = tf.layers.dense(
inputs=pool3_flat,
units=1024,
activation=tf.nn.relu
)
dropout1 = tf.layers.dropout(
inputs=dense1,
rate=0.4,
training=(mode == tf.estimator.ModeKeys.TRAIN)
)
dense2 = tf.layers.dense(
inputs=dropout1,
units=1024,
activation=tf.nn.relu
)
dropout2 = tf.layers.dropout(
inputs=dense2,
rate=0.4,
training=(mode == tf.estimator.ModeKeys.TRAIN)
)
dense3 = tf.layers.dense(
inputs=dropout2,
units=1024,
activation=tf.nn.relu
)
# Start deconvolution to restore size.
dense4 = tf.layers.dense(
inputs=dense3,
units=80 * 60 * 128
)
unpool1 = tf.reshape(
dense4,
[-1, 80, 60, 128]
)
deconv1 = tf.layers.conv2d_transpose(
inputs=unpool1,
filters=64,
kernel_size=[2, 2],
strides=(2, 2), # Stride two so we upscale
padding="same",
activation=tf.nn.relu,
) # 80x60x128 -> 160x120x64
deconv2 = tf.layers.conv2d_transpose(
inputs=deconv1,
filters=32,
kernel_size=[2, 2],
strides=(2, 2),
padding="same",
activation=tf.nn.relu,
) # 160x120x64 -> 320x240x32
deconv3 = tf.layers.conv2d_transpose(
inputs=deconv2,
filters=1,
kernel_size=[2, 2],
strides=(2, 2),
padding="same",
activation=tf.nn.relu,
) # 320x240x32 -> 640x480x1
clip_tail = tf.reshape(
deconv3,
[-1, 640, 480] # Cut off trailing '1'.
)
# Logits Layer
# logits = tf.layers.dense(inputs=dropout, units=10)
output = clip_tail
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"depths": output # tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
# "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
# loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
loss = tf.losses.mean_squared_error(
labels=labels,
predictions=output,
weights=1.0
)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer()
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels,
predictions=predictions["depths"]
)
}
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
eval_metric_ops=eval_metric_ops
)
# Define the training op.
log.info("Building estimator")
est = tf.estimator.Estimator(model_fn=model_fn, model_dir="./depth_model")
log.info("Training model")
est.train(input_fn=load_training_data, steps=1)
# Save results.
log.info("Saving model")
feature_spec = {'image': tf.FixedLenFeature(shape=[640, 480, 3], dtype=numpy.float32)}
est.export_savedmodel(
export_dir_base="./depth_model",
serving_input_receiver_fn=tf.estimator.export.build_parsing_serving_input_receiver_fn(
feature_spec=feature_spec,
default_batch_size=None
),
)
#with tf.train.MonitoredTrainingSession() as session:
# while not session.should_stop():
# session.run(training_op)
#predict_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, mode= tf.estimator.ModeKeys.PREDICT, batch_size= 5)
#predictions = estimator.predict(input_fn=predict_input_fn)
#values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
#print()
#print("Predicted Values: {}".format(values))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.