danyashorokh/[TF-KERAS] Simple object localization

## [TF-KERAS] Simple object localization
# Detect hardware
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu = None
  gpus = tf.config.experimental.list_logical_devices("GPU")

# Select appropriate distribution strategy
if tpu:
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)


'''
Feature extractor is the CNN that is made up of convolution and pooling layers.
'''
def feature_extractor(inputs):
    x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(75, 75, 1))(inputs)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    return x

'''
dense_layers adds a flatten and dense layer.
This will follow the feature extraction layers
'''
def dense_layers(inputs):
  x = tf.keras.layers.Flatten()(inputs)
  x = tf.keras.layers.Dense(128, activation='relu')(x)
  return x


'''
Classifier defines the classification output.
This has a set of fully connected layers and a softmax layer.
'''
def classifier(inputs):

  classification_output = tf.keras.layers.Dense(10, activation='softmax', name = 'classification')(inputs)
  return classification_output


'''
This function defines the regression output for bounding box prediction.
Note that we have four outputs corresponding to (xmin, ymin, xmax, ymax)
'''
def bounding_box_regression(inputs):
    bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
    return bounding_box_regression_output


def final_model(inputs):
    feature_cnn = feature_extractor(inputs)
    dense_output = dense_layers(feature_cnn)

    '''
    The model branches here.
    The dense layer's output gets fed into two branches:
    classification_output and bounding_box_output
    '''
    classification_output = classifier(dense_output)
    bounding_box_output = bounding_box_regression(dense_output)

    model = tf.keras.Model(inputs = inputs, outputs = [classification_output, bounding_box_output])

    return model


def define_and_compile_model(inputs):
  model = final_model(inputs)

  model.compile(optimizer='adam',
              loss = {'classification' : 'categorical_crossentropy',
                      'bounding_box' : 'mse'
                     },
              metrics = {'classification' : 'accuracy',
                         'bounding_box' : 'mse'
                        })
  return model


with strategy.scope():
  inputs = tf.keras.layers.Input(shape=(75, 75, 1,))
  model = define_and_compile_model(inputs)

# print model layers
model.summary()

# IoU metric
def intersection_over_union(pred_box, true_box):
    xmin_pred, ymin_pred, xmax_pred, ymax_pred =  np.split(pred_box, 4, axis = 1)
    xmin_true, ymin_true, xmax_true, ymax_true = np.split(true_box, 4, axis = 1)

    smoothing_factor = 1e-10

    xmin_overlap = np.maximum(xmin_pred, xmin_true)
    xmax_overlap = np.minimum(xmax_pred, xmax_true)
    ymin_overlap = np.maximum(ymin_pred, ymin_true)
    ymax_overlap = np.minimum(ymax_pred, ymax_true)

    pred_box_area = (xmax_pred - xmin_pred) * (ymax_pred - ymin_pred)
    true_box_area = (xmax_true - xmin_true) * (ymax_true - ymin_true)

    overlap_area = np.maximum((xmax_overlap - xmin_overlap), 0)  * np.maximum((ymax_overlap - ymin_overlap), 0)
    union_area = (pred_box_area + true_box_area) - overlap_area

    iou = (overlap_area + smoothing_factor) / (union_area + smoothing_factor)

    return iou
	# Detect hardware
	try:
	tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
	except ValueError:
	tpu = None
	gpus = tf.config.experimental.list_logical_devices("GPU")

	# Select appropriate distribution strategy
	if tpu:
	tf.config.experimental_connect_to_cluster(tpu)
	tf.tpu.experimental.initialize_tpu_system(tpu)
	strategy = tf.distribute.experimental.TPUStrategy(tpu) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
	print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
	elif len(gpus) > 1:
	strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
	print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
	elif len(gpus) == 1:
	strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
	print('Running on single GPU ', gpus[0].name)
	else:
	strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
	print('Running on CPU')
	print("Number of accelerators: ", strategy.num_replicas_in_sync)


	'''
	Feature extractor is the CNN that is made up of convolution and pooling layers.
	'''
	def feature_extractor(inputs):
	x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(75, 75, 1))(inputs)
	x = tf.keras.layers.AveragePooling2D((2, 2))(x)

	x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
	x = tf.keras.layers.AveragePooling2D((2, 2))(x)

	x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
	x = tf.keras.layers.AveragePooling2D((2, 2))(x)

	return x

	'''
	dense_layers adds a flatten and dense layer.
	This will follow the feature extraction layers
	'''
	def dense_layers(inputs):
	x = tf.keras.layers.Flatten()(inputs)
	x = tf.keras.layers.Dense(128, activation='relu')(x)
	return x


	'''
	Classifier defines the classification output.
	This has a set of fully connected layers and a softmax layer.
	'''
	def classifier(inputs):

	classification_output = tf.keras.layers.Dense(10, activation='softmax', name = 'classification')(inputs)
	return classification_output


	'''
	This function defines the regression output for bounding box prediction.
	Note that we have four outputs corresponding to (xmin, ymin, xmax, ymax)
	'''
	def bounding_box_regression(inputs):
	bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
	return bounding_box_regression_output


	def final_model(inputs):
	feature_cnn = feature_extractor(inputs)
	dense_output = dense_layers(feature_cnn)

	'''
	The model branches here.
	The dense layer's output gets fed into two branches:
	classification_output and bounding_box_output
	'''
	classification_output = classifier(dense_output)
	bounding_box_output = bounding_box_regression(dense_output)

	model = tf.keras.Model(inputs = inputs, outputs = [classification_output, bounding_box_output])

	return model


	def define_and_compile_model(inputs):
	model = final_model(inputs)

	model.compile(optimizer='adam',
	loss = {'classification' : 'categorical_crossentropy',
	'bounding_box' : 'mse'
	},
	metrics = {'classification' : 'accuracy',
	'bounding_box' : 'mse'
	})
	return model


	with strategy.scope():
	inputs = tf.keras.layers.Input(shape=(75, 75, 1,))
	model = define_and_compile_model(inputs)

	# print model layers
	model.summary()

	# IoU metric
	def intersection_over_union(pred_box, true_box):
	xmin_pred, ymin_pred, xmax_pred, ymax_pred = np.split(pred_box, 4, axis = 1)
	xmin_true, ymin_true, xmax_true, ymax_true = np.split(true_box, 4, axis = 1)

	smoothing_factor = 1e-10

	xmin_overlap = np.maximum(xmin_pred, xmin_true)
	xmax_overlap = np.minimum(xmax_pred, xmax_true)
	ymin_overlap = np.maximum(ymin_pred, ymin_true)
	ymax_overlap = np.minimum(ymax_pred, ymax_true)

	pred_box_area = (xmax_pred - xmin_pred) * (ymax_pred - ymin_pred)
	true_box_area = (xmax_true - xmin_true) * (ymax_true - ymin_true)

	overlap_area = np.maximum((xmax_overlap - xmin_overlap), 0) * np.maximum((ymax_overlap - ymin_overlap), 0)
	union_area = (pred_box_area + true_box_area) - overlap_area

	iou = (overlap_area + smoothing_factor) / (union_area + smoothing_factor)

	return iou