yardstick17/spp_deep_network.py

## spp_deep_network.py
CUSTOM_OUTPUT_CATEGORIES = 2
import keras.backend as K
from keras.engine.topology import Layer


class SpatialPyramidPooling(Layer):
    '''Spatial pyramid pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_list: list of int
            List of pooling regions to use. The length of the list is the number of pooling regions,
            each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
            regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if dim_ordering='th'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if dim_ordering='tf'.
    # Output shape
        2D tensor with shape:
        `(samples, channels * sum([i * i for i in pool_list])`
    '''

    def __init__(self, pool_list, **kwargs):

        self.dim_ordering = K.image_dim_ordering()
        assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'

        self.pool_list = pool_list

        self.num_outputs_per_channel = sum([i * i for i in pool_list])

        super(SpatialPyramidPooling, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.dim_ordering == 'th':
            self.nb_channels = input_shape[1]
        elif self.dim_ordering == 'tf':
            self.nb_channels = input_shape[3]

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)

    def get_config(self):
        config = {'pool_list': self.pool_list}
        base_config = super(SpatialPyramidPooling, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def call(self, x, mask=None):

        input_shape = K.shape(x)

        if self.dim_ordering == 'th':
            num_rows = input_shape[2]
            num_cols = input_shape[3]
        elif self.dim_ordering == 'tf':
            num_rows = input_shape[1]
            num_cols = input_shape[2]

        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        if self.dim_ordering == 'th':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')
                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]
                        x_crop = x[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

        elif self.dim_ordering == 'tf':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], y2 - y1,
                                     x2 - x1, input_shape[3]]

                        x_crop = x[:, y1:y2, x1:x2, :]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(1, 2))
                        outputs.append(pooled_val)

        if self.dim_ordering == 'th':
            outputs = K.concatenate(outputs)
        elif self.dim_ordering == 'tf':
            # outputs = K.concatenate(outputs,axis = 1)
            outputs = K.concatenate(outputs)
            # outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
            # outputs = K.permute_dimensions(outputs,(3,1,0,2))
            # outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))

        return outputs

def Spp():

    # uses theano ordering. Note that we leave the image size as None to allow multiple image sizes
    model = Sequential()

    model.add(Convolution2D(96, 11, 11, border_mode='same', input_shape=(3, None, None), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(32, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(SpatialPyramidPooling([1, 2, 4]))

    model.add(Dense(4096, activation='relu', name='dense_1'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu', name='dense_2'))
    model.add(Dropout(0.5))
    model.add(Dense(CUSTOM_OUTPUT_CATEGORIES, name='dense_3'))
    model.add(Activation('softmax'))
    return model
	CUSTOM_OUTPUT_CATEGORIES = 2
	import keras.backend as K
	from keras.engine.topology import Layer


	class SpatialPyramidPooling(Layer):
	'''Spatial pyramid pooling layer for 2D inputs.
	See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
	K. He, X. Zhang, S. Ren, J. Sun
	# Arguments
	pool_list: list of int
	List of pooling regions to use. The length of the list is the number of pooling regions,
	each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
	regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
	# Input shape
	4D tensor with shape:
	`(samples, channels, rows, cols)` if dim_ordering='th'
	or 4D tensor with shape:
	`(samples, rows, cols, channels)` if dim_ordering='tf'.
	# Output shape
	2D tensor with shape:
	`(samples, channels * sum([i * i for i in pool_list])`
	'''

	def __init__(self, pool_list, **kwargs):

	self.dim_ordering = K.image_dim_ordering()
	assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'

	self.pool_list = pool_list

	self.num_outputs_per_channel = sum([i * i for i in pool_list])

	super(SpatialPyramidPooling, self).__init__(**kwargs)

	def build(self, input_shape):
	if self.dim_ordering == 'th':
	self.nb_channels = input_shape[1]
	elif self.dim_ordering == 'tf':
	self.nb_channels = input_shape[3]

	def get_output_shape_for(self, input_shape):
	return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)

	def get_config(self):
	config = {'pool_list': self.pool_list}
	base_config = super(SpatialPyramidPooling, self).get_config()
	return dict(list(base_config.items()) + list(config.items()))

	def call(self, x, mask=None):

	input_shape = K.shape(x)

	if self.dim_ordering == 'th':
	num_rows = input_shape[2]
	num_cols = input_shape[3]
	elif self.dim_ordering == 'tf':
	num_rows = input_shape[1]
	num_cols = input_shape[2]

	row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
	col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

	outputs = []

	if self.dim_ordering == 'th':
	for pool_num, num_pool_regions in enumerate(self.pool_list):
	for jy in range(num_pool_regions):
	for ix in range(num_pool_regions):
	x1 = ix * col_length[pool_num]
	x2 = ix * col_length[pool_num] + col_length[pool_num]
	y1 = jy * row_length[pool_num]
	y2 = jy * row_length[pool_num] + row_length[pool_num]

	x1 = K.cast(K.round(x1), 'int32')
	x2 = K.cast(K.round(x2), 'int32')
	y1 = K.cast(K.round(y1), 'int32')
	y2 = K.cast(K.round(y2), 'int32')
	new_shape = [input_shape[0], input_shape[1],
	y2 - y1, x2 - x1]
	x_crop = x[:, :, y1:y2, x1:x2]
	xm = K.reshape(x_crop, new_shape)
	pooled_val = K.max(xm, axis=(2, 3))
	outputs.append(pooled_val)

	elif self.dim_ordering == 'tf':
	for pool_num, num_pool_regions in enumerate(self.pool_list):
	for jy in range(num_pool_regions):
	for ix in range(num_pool_regions):
	x1 = ix * col_length[pool_num]
	x2 = ix * col_length[pool_num] + col_length[pool_num]
	y1 = jy * row_length[pool_num]
	y2 = jy * row_length[pool_num] + row_length[pool_num]

	x1 = K.cast(K.round(x1), 'int32')
	x2 = K.cast(K.round(x2), 'int32')
	y1 = K.cast(K.round(y1), 'int32')
	y2 = K.cast(K.round(y2), 'int32')

	new_shape = [input_shape[0], y2 - y1,
	x2 - x1, input_shape[3]]

	x_crop = x[:, y1:y2, x1:x2, :]
	xm = K.reshape(x_crop, new_shape)
	pooled_val = K.max(xm, axis=(1, 2))
	outputs.append(pooled_val)

	if self.dim_ordering == 'th':
	outputs = K.concatenate(outputs)
	elif self.dim_ordering == 'tf':
	# outputs = K.concatenate(outputs,axis = 1)
	outputs = K.concatenate(outputs)
	# outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
	# outputs = K.permute_dimensions(outputs,(3,1,0,2))
	# outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))

	return outputs

	def Spp():

	# uses theano ordering. Note that we leave the image size as None to allow multiple image sizes
	model = Sequential()

	model.add(Convolution2D(96, 11, 11, border_mode='same', input_shape=(3, None, None), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Convolution2D(32, 3, 3, activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Convolution2D(64, 3, 3, activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(SpatialPyramidPooling([1, 2, 4]))

	model.add(Dense(4096, activation='relu', name='dense_1'))
	model.add(Dropout(0.5))
	model.add(Dense(4096, activation='relu', name='dense_2'))
	model.add(Dropout(0.5))
	model.add(Dense(CUSTOM_OUTPUT_CATEGORIES, name='dense_3'))
	model.add(Activation('softmax'))
	return model