jperl/residual_network.py

## residual_network.py
"""
Clean and simple Keras implementation of network architectures described in:
    - (ResNet-50) [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf).
    - (ResNeXt-50 32x4d) [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf).

Python 3.
"""

from keras import layers
from keras import models


#
# image dimensions
#

img_height = 224
img_width = 224
img_channels = 3

#
# network params
#

cardinality = 32


def residual_network(x):
    """
    ResNeXt by default. For ResNet set `cardinality` = 1 above.

    """
    def add_common_layers(y):
        y = layers.BatchNormalization()(y)
        y = layers.LeakyReLU()(y)

        return y

    def grouped_convolution(y, nb_channels, _strides):
        # when `cardinality` == 1 this is just a standard convolution
        if cardinality == 1:
            return layers.Conv2D(nb_channels, kernel_size=(3, 3), strides=_strides, padding='same')(y)

        assert not nb_channels % cardinality
        _d = nb_channels // cardinality

        # in a grouped convolution layer, input and output channels are divided into `cardinality` groups,
        # and convolutions are separately performed within each group
        groups = []
        for j in range(cardinality):
            group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
            groups.append(layers.Conv2D(_d, kernel_size=(3, 3), strides=_strides, padding='same')(group))

        # the grouped convolutional layer concatenates them as the outputs of the layer
        y = layers.concatenate(groups)

        return y

    def residual_block(y, nb_channels_in, nb_channels_out, _strides=(1, 1), _project_shortcut=False):
        """
        Our network consists of a stack of residual blocks. These blocks have the same topology,
        and are subject to two simple rules:

        - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes).
        - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2.
        """
        shortcut = y

        # we modify the residual building block as a bottleneck design to make the network more economical
        y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
        y = add_common_layers(y)

        # ResNeXt (identical to ResNet when `cardinality` == 1)
        y = grouped_convolution(y, nb_channels_in, _strides=_strides)
        y = add_common_layers(y)

        y = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
        # batch normalization is employed after aggregating the transformations and before adding to the shortcut
        y = layers.BatchNormalization()(y)

        # identity shortcuts used directly when the input and output are of the same dimensions
        if _project_shortcut or _strides != (1, 1):
            # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions)
            # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2
            shortcut = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=_strides, padding='same')(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        y = layers.add([shortcut, y])

        # relu is performed right after each batch normalization,
        # expect for the output of the block where relu is performed after the adding to the shortcut
        y = layers.LeakyReLU()(y)

        return y

    # conv1
    x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same')(x)
    x = add_common_layers(x)

    # conv2
    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
    for i in range(3):
        project_shortcut = True if i == 0 else False
        x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

    # conv3
    for i in range(4):
        # down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 256, 512, _strides=strides)

    # conv4
    for i in range(6):
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 512, 1024, _strides=strides)

    # conv5
    for i in range(3):
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 1024, 2048, _strides=strides)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1)(x)

    return x


image_tensor = layers.Input(shape=(img_height, img_width, img_channels))
network_output = residual_network(image_tensor)

model = models.Model(inputs=[image_tensor], outputs=[network_output])
print(model.summary())
	"""
	Clean and simple Keras implementation of network architectures described in:
	- (ResNet-50) [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf).
	- (ResNeXt-50 32x4d) [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf).

	Python 3.
	"""

	from keras import layers
	from keras import models


	#
	# image dimensions
	#

	img_height = 224
	img_width = 224
	img_channels = 3

	#
	# network params
	#

	cardinality = 32


	def residual_network(x):
	"""
	ResNeXt by default. For ResNet set `cardinality` = 1 above.

	"""
	def add_common_layers(y):
	y = layers.BatchNormalization()(y)
	y = layers.LeakyReLU()(y)

	return y

	def grouped_convolution(y, nb_channels, _strides):
	# when `cardinality` == 1 this is just a standard convolution
	if cardinality == 1:
	return layers.Conv2D(nb_channels, kernel_size=(3, 3), strides=_strides, padding='same')(y)

	assert not nb_channels % cardinality
	_d = nb_channels // cardinality

	# in a grouped convolution layer, input and output channels are divided into `cardinality` groups,
	# and convolutions are separately performed within each group
	groups = []
	for j in range(cardinality):
	group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
	groups.append(layers.Conv2D(_d, kernel_size=(3, 3), strides=_strides, padding='same')(group))

	# the grouped convolutional layer concatenates them as the outputs of the layer
	y = layers.concatenate(groups)

	return y

	def residual_block(y, nb_channels_in, nb_channels_out, _strides=(1, 1), _project_shortcut=False):
	"""
	Our network consists of a stack of residual blocks. These blocks have the same topology,
	and are subject to two simple rules:

	- If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes).
	- Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2.
	"""
	shortcut = y

	# we modify the residual building block as a bottleneck design to make the network more economical
	y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
	y = add_common_layers(y)

	# ResNeXt (identical to ResNet when `cardinality` == 1)
	y = grouped_convolution(y, nb_channels_in, _strides=_strides)
	y = add_common_layers(y)

	y = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
	# batch normalization is employed after aggregating the transformations and before adding to the shortcut
	y = layers.BatchNormalization()(y)

	# identity shortcuts used directly when the input and output are of the same dimensions
	if _project_shortcut or _strides != (1, 1):
	# when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions)
	# when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2
	shortcut = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=_strides, padding='same')(shortcut)
	shortcut = layers.BatchNormalization()(shortcut)

	y = layers.add([shortcut, y])

	# relu is performed right after each batch normalization,
	# expect for the output of the block where relu is performed after the adding to the shortcut
	y = layers.LeakyReLU()(y)

	return y

	# conv1
	x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same')(x)
	x = add_common_layers(x)

	# conv2
	x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
	for i in range(3):
	project_shortcut = True if i == 0 else False
	x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

	# conv3
	for i in range(4):
	# down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
	strides = (2, 2) if i == 0 else (1, 1)
	x = residual_block(x, 256, 512, _strides=strides)

	# conv4
	for i in range(6):
	strides = (2, 2) if i == 0 else (1, 1)
	x = residual_block(x, 512, 1024, _strides=strides)

	# conv5
	for i in range(3):
	strides = (2, 2) if i == 0 else (1, 1)
	x = residual_block(x, 1024, 2048, _strides=strides)

	x = layers.GlobalAveragePooling2D()(x)
	x = layers.Dense(1)(x)

	return x


	image_tensor = layers.Input(shape=(img_height, img_width, img_channels))
	network_output = residual_network(image_tensor)

	model = models.Model(inputs=[image_tensor], outputs=[network_output])
	print(model.summary())