JosephCatrambone/depth_guess.py

## depth_guess.py
#!/usr/bin/env python

from PIL import Image
from glob import iglob
import h5py
import tensorflow as tf
import numpy
import logging
import itertools

log = logging.getLogger(__name__)

# Load training data from the NYU v2 Labelled Training Data
def depth_generator():
	with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
		images = fin['images']
		depths = fin['depths']
		nearest = 0.7132995128631592
		farthest = 9.99547004699707
		for i in itertools.count(1):
			im = numpy.asarray(images[i%images.shape[0],:,:,:], dtype=numpy.float32)
			if im.shape[0] == 3: # If channels are first...
				im = numpy.transpose(im, (1, 2, 0)) # Make channels last.
			dep = numpy.asarray(depths[i % images.shape[0], :, :], dtype=numpy.float32)
			yield (im, (dep-nearest)/farthest)

def load_training_data():
	loading_all_at_once="""
	log.info("Loading training data")
	images = None
	depths = None
	with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
		images = numpy.asarray(fin['images'][:,:,:,:], dtype=numpy.float32)
		depths = numpy.asarray(fin['depths'][:,:,:], dtype=numpy.float32)
	log.info("Loaded images with shape {} and depths with shape {}".format(images.shape, depths.shape))
	# 'accelData', 'depths', 'images', 'instances', 'labels', 'names', 'namesToIds', 'rawDepthFilenames', 'rawDepths', 'rawRgbFilenames', 'sceneTypes', 'scenes'
	# TODO: Downsample images and downsample depths?
	#imshow(fin['depths'][0].T)
	#imshow(fin['images'][0].T)
	nearest = 0.7132995128631592
	farthest = 9.99547004699707
	#for i in range(depths.shape[0]):
	#	nearest = min(nearest, numpy.min(depths[i]))
	#	farthest = max(farthest, numpy.max(depths[i]))
	print("Nearest: {}\nFarthest: {}".format(nearest, farthest))

	# Convert big numpy array into a dataset.
	dataset = tf.data.Dataset.from_tensor_slices((images, depths))
	"""
	dataset = tf.data.Dataset.from_generator(depth_generator, (tf.float32, tf.float32), (tf.TensorShape([640, 480, 3]), tf.TensorShape([640, 480])))
	#dataset = dataset.map(lambda img, dep : (img, (dep - nearest)/farthest))
	dataset = dataset.shuffle(10)
	#dataset = dataset.repeat
	dataset = dataset.batch(100)

	#iterator = dataset.make_one_shot_iterator()
	#next_example, next_label = iterator.get_next()
	return dataset

# Define the model.
log.info("Building model")
def model_fn(features, labels, mode):
	# Input Layer
	input_layer = tf.reshape(features, [-1, 640, 480, 3])  # 640x480 -> 128*96 greyscale.

	conv1 = tf.layers.conv2d(
		inputs=input_layer,
		filters=32,
		kernel_size=[3, 3],
		padding="same",
		activation=tf.nn.relu
	)
	conv2 = tf.layers.conv2d(
		inputs=conv1,
		filters=32,
		kernel_size=[3, 3],
		padding="same",
		activation=tf.nn.relu
	)
	pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
	# 640x480 -> 320x240

	conv3 = tf.layers.conv2d(
		inputs=pool1,
		filters=64,
		kernel_size=[3, 3],
		padding="same",
		activation=tf.nn.relu
	)
	conv4 = tf.layers.conv2d(
		inputs=conv3,
		filters=64,
		kernel_size=[3, 3],
		padding="same",
		activation=tf.nn.relu
	)
	pool2 = tf.layers.max_pooling2d(
		inputs=conv4,
		pool_size=[2, 2],
		strides=2
	)
	# 320x240 -> 160x120

	conv5 = tf.layers.conv2d(
		inputs=pool2,
		filters=128,
		kernel_size=[3, 3],
		padding="same",
		activation=tf.nn.relu
	)
	pool3 = tf.layers.max_pooling2d(
		inputs=conv5,
		pool_size=[2, 2],
		strides=2
	)
	# 160x120 -> 80x60

	# Flatten
	pool3_flat = tf.reshape(
		pool3,
		[-1, 80 * 60 * 128]
	)

	# Dense section
	dense1 = tf.layers.dense(
		inputs=pool3_flat,
		units=1024,
		activation=tf.nn.relu
	)
	dropout1 = tf.layers.dropout(
		inputs=dense1,
		rate=0.4,
		training=(mode == tf.estimator.ModeKeys.TRAIN)
	)
	dense2 = tf.layers.dense(
		inputs=dropout1,
		units=1024,
		activation=tf.nn.relu
	)
	dropout2 = tf.layers.dropout(
		inputs=dense2,
		rate=0.4,
		training=(mode == tf.estimator.ModeKeys.TRAIN)
	)
	dense3 = tf.layers.dense(
		inputs=dropout2,
		units=1024,
		activation=tf.nn.relu
	)

	# Start deconvolution to restore size.
	dense4 = tf.layers.dense(
		inputs=dense3,
		units=80 * 60 * 128
	)
	unpool1 = tf.reshape(
		dense4,
		[-1, 80, 60, 128]
	)

	deconv1 = tf.layers.conv2d_transpose(
		inputs=unpool1,
		filters=64,
		kernel_size=[2, 2],
		strides=(2, 2), # Stride two so we upscale
		padding="same",
		activation=tf.nn.relu,
	)  # 80x60x128 -> 160x120x64

	deconv2 = tf.layers.conv2d_transpose(
		inputs=deconv1,
		filters=32,
		kernel_size=[2, 2],
		strides=(2, 2),
		padding="same",
		activation=tf.nn.relu,
	)  # 160x120x64 -> 320x240x32

	deconv3 = tf.layers.conv2d_transpose(
		inputs=deconv2,
		filters=1,
		kernel_size=[2, 2],
		strides=(2, 2),
		padding="same",
		activation=tf.nn.relu,
	)  # 320x240x32 -> 640x480x1

	clip_tail = tf.reshape(
		deconv3,
		[-1, 640, 480] # Cut off trailing '1'.
	)

	# Logits Layer
	# logits = tf.layers.dense(inputs=dropout, units=10)
	output = clip_tail

	predictions = {
		# Generate predictions (for PREDICT and EVAL mode)
		"depths": output  # tf.argmax(input=logits, axis=1),
		# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
		# `logging_hook`.
		# "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
	}

	if mode == tf.estimator.ModeKeys.PREDICT:
		return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

	# Calculate Loss (for both TRAIN and EVAL modes)
	# loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
	loss = tf.losses.mean_squared_error(
		labels=labels,
		predictions=output,
		weights=1.0
	)

	# Configure the Training Op (for TRAIN mode)
	if mode == tf.estimator.ModeKeys.TRAIN:
		optimizer = tf.train.AdamOptimizer()
		#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
		train_op = optimizer.minimize(
			loss=loss,
			global_step=tf.train.get_global_step())
		return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

	# Add evaluation metrics (for EVAL mode)
	eval_metric_ops = {
		"accuracy": tf.metrics.accuracy(
			labels=labels,
			predictions=predictions["depths"]
		)
	}

	return tf.estimator.EstimatorSpec(
		mode=mode,
		loss=loss,
		eval_metric_ops=eval_metric_ops
	)

# Define the training op.
log.info("Building estimator")
est = tf.estimator.Estimator(model_fn=model_fn, model_dir="./depth_model")
log.info("Training model")
est.train(input_fn=load_training_data, steps=1)

# Save results.
log.info("Saving model")
feature_spec = {'image': tf.FixedLenFeature(shape=[640, 480, 3], dtype=numpy.float32)}
est.export_savedmodel(
	export_dir_base="./depth_model",
	serving_input_receiver_fn=tf.estimator.export.build_parsing_serving_input_receiver_fn(
		feature_spec=feature_spec,
		default_batch_size=None
	),
)

#with tf.train.MonitoredTrainingSession() as session:
#	while not session.should_stop():
#		session.run(training_op)

#predict_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, mode= tf.estimator.ModeKeys.PREDICT, batch_size= 5)
#predictions = estimator.predict(input_fn=predict_input_fn)
#values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
#print()
#print("Predicted Values: {}".format(values))
	#!/usr/bin/env python

	from PIL import Image
	from glob import iglob
	import h5py
	import tensorflow as tf
	import numpy
	import logging
	import itertools

	log = logging.getLogger(__name__)

	# Load training data from the NYU v2 Labelled Training Data
	def depth_generator():
	with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
	images = fin['images']
	depths = fin['depths']
	nearest = 0.7132995128631592
	farthest = 9.99547004699707
	for i in itertools.count(1):
	im = numpy.asarray(images[i%images.shape[0],:,:,:], dtype=numpy.float32)
	if im.shape[0] == 3: # If channels are first...
	im = numpy.transpose(im, (1, 2, 0)) # Make channels last.
	dep = numpy.asarray(depths[i % images.shape[0], :, :], dtype=numpy.float32)
	yield (im, (dep-nearest)/farthest)

	def load_training_data():
	loading_all_at_once="""
	log.info("Loading training data")
	images = None
	depths = None
	with h5py.File("G:\\nyu_depth_v2_labeled.mat", 'r') as fin:
	images = numpy.asarray(fin['images'][:,:,:,:], dtype=numpy.float32)
	depths = numpy.asarray(fin['depths'][:,:,:], dtype=numpy.float32)
	log.info("Loaded images with shape {} and depths with shape {}".format(images.shape, depths.shape))
	# 'accelData', 'depths', 'images', 'instances', 'labels', 'names', 'namesToIds', 'rawDepthFilenames', 'rawDepths', 'rawRgbFilenames', 'sceneTypes', 'scenes'
	# TODO: Downsample images and downsample depths?
	#imshow(fin['depths'][0].T)
	#imshow(fin['images'][0].T)
	nearest = 0.7132995128631592
	farthest = 9.99547004699707
	#for i in range(depths.shape[0]):
	# nearest = min(nearest, numpy.min(depths[i]))
	# farthest = max(farthest, numpy.max(depths[i]))
	print("Nearest: {}\nFarthest: {}".format(nearest, farthest))

	# Convert big numpy array into a dataset.
	dataset = tf.data.Dataset.from_tensor_slices((images, depths))
	"""
	dataset = tf.data.Dataset.from_generator(depth_generator, (tf.float32, tf.float32), (tf.TensorShape([640, 480, 3]), tf.TensorShape([640, 480])))
	#dataset = dataset.map(lambda img, dep : (img, (dep - nearest)/farthest))
	dataset = dataset.shuffle(10)
	#dataset = dataset.repeat
	dataset = dataset.batch(100)

	#iterator = dataset.make_one_shot_iterator()
	#next_example, next_label = iterator.get_next()
	return dataset

	# Define the model.
	log.info("Building model")
	def model_fn(features, labels, mode):
	# Input Layer
	input_layer = tf.reshape(features, [-1, 640, 480, 3]) # 640x480 -> 128*96 greyscale.

	conv1 = tf.layers.conv2d(
	inputs=input_layer,
	filters=32,
	kernel_size=[3, 3],
	padding="same",
	activation=tf.nn.relu
	)
	conv2 = tf.layers.conv2d(
	inputs=conv1,
	filters=32,
	kernel_size=[3, 3],
	padding="same",
	activation=tf.nn.relu
	)
	pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
	# 640x480 -> 320x240

	conv3 = tf.layers.conv2d(
	inputs=pool1,
	filters=64,
	kernel_size=[3, 3],
	padding="same",
	activation=tf.nn.relu
	)
	conv4 = tf.layers.conv2d(
	inputs=conv3,
	filters=64,
	kernel_size=[3, 3],
	padding="same",
	activation=tf.nn.relu
	)
	pool2 = tf.layers.max_pooling2d(
	inputs=conv4,
	pool_size=[2, 2],
	strides=2
	)
	# 320x240 -> 160x120

	conv5 = tf.layers.conv2d(
	inputs=pool2,
	filters=128,
	kernel_size=[3, 3],
	padding="same",
	activation=tf.nn.relu
	)
	pool3 = tf.layers.max_pooling2d(
	inputs=conv5,
	pool_size=[2, 2],
	strides=2
	)
	# 160x120 -> 80x60

	# Flatten
	pool3_flat = tf.reshape(
	pool3,
	[-1, 80 * 60 * 128]
	)

	# Dense section
	dense1 = tf.layers.dense(
	inputs=pool3_flat,
	units=1024,
	activation=tf.nn.relu
	)
	dropout1 = tf.layers.dropout(
	inputs=dense1,
	rate=0.4,
	training=(mode == tf.estimator.ModeKeys.TRAIN)
	)
	dense2 = tf.layers.dense(
	inputs=dropout1,
	units=1024,
	activation=tf.nn.relu
	)
	dropout2 = tf.layers.dropout(
	inputs=dense2,
	rate=0.4,
	training=(mode == tf.estimator.ModeKeys.TRAIN)
	)
	dense3 = tf.layers.dense(
	inputs=dropout2,
	units=1024,
	activation=tf.nn.relu
	)

	# Start deconvolution to restore size.
	dense4 = tf.layers.dense(
	inputs=dense3,
	units=80 * 60 * 128
	)
	unpool1 = tf.reshape(
	dense4,
	[-1, 80, 60, 128]
	)

	deconv1 = tf.layers.conv2d_transpose(
	inputs=unpool1,
	filters=64,
	kernel_size=[2, 2],
	strides=(2, 2), # Stride two so we upscale
	padding="same",
	activation=tf.nn.relu,
	) # 80x60x128 -> 160x120x64

	deconv2 = tf.layers.conv2d_transpose(
	inputs=deconv1,
	filters=32,
	kernel_size=[2, 2],
	strides=(2, 2),
	padding="same",
	activation=tf.nn.relu,
	) # 160x120x64 -> 320x240x32

	deconv3 = tf.layers.conv2d_transpose(
	inputs=deconv2,
	filters=1,
	kernel_size=[2, 2],
	strides=(2, 2),
	padding="same",
	activation=tf.nn.relu,
	) # 320x240x32 -> 640x480x1

	clip_tail = tf.reshape(
	deconv3,
	[-1, 640, 480] # Cut off trailing '1'.
	)

	# Logits Layer
	# logits = tf.layers.dense(inputs=dropout, units=10)
	output = clip_tail

	predictions = {
	# Generate predictions (for PREDICT and EVAL mode)
	"depths": output # tf.argmax(input=logits, axis=1),
	# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
	# `logging_hook`.
	# "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
	}

	if mode == tf.estimator.ModeKeys.PREDICT:
	return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

	# Calculate Loss (for both TRAIN and EVAL modes)
	# loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
	loss = tf.losses.mean_squared_error(
	labels=labels,
	predictions=output,
	weights=1.0
	)

	# Configure the Training Op (for TRAIN mode)
	if mode == tf.estimator.ModeKeys.TRAIN:
	optimizer = tf.train.AdamOptimizer()
	#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
	train_op = optimizer.minimize(
	loss=loss,
	global_step=tf.train.get_global_step())
	return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

	# Add evaluation metrics (for EVAL mode)
	eval_metric_ops = {
	"accuracy": tf.metrics.accuracy(
	labels=labels,
	predictions=predictions["depths"]
	)
	}

	return tf.estimator.EstimatorSpec(
	mode=mode,
	loss=loss,
	eval_metric_ops=eval_metric_ops
	)

	# Define the training op.
	log.info("Building estimator")
	est = tf.estimator.Estimator(model_fn=model_fn, model_dir="./depth_model")
	log.info("Training model")
	est.train(input_fn=load_training_data, steps=1)

	# Save results.
	log.info("Saving model")
	feature_spec = {'image': tf.FixedLenFeature(shape=[640, 480, 3], dtype=numpy.float32)}
	est.export_savedmodel(
	export_dir_base="./depth_model",
	serving_input_receiver_fn=tf.estimator.export.build_parsing_serving_input_receiver_fn(
	feature_spec=feature_spec,
	default_batch_size=None
	),
	)

	#with tf.train.MonitoredTrainingSession() as session:
	# while not session.should_stop():
	# session.run(training_op)

	#predict_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, mode= tf.estimator.ModeKeys.PREDICT, batch_size= 5)
	#predictions = estimator.predict(input_fn=predict_input_fn)
	#values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
	#print()
	#print("Predicted Values: {}".format(values))