Skip to content

Instantly share code, notes, and snippets.

@JefferyRPrice
Last active Sep 16, 2021
Embed
What would you like to do?
ResNet-like network using full-preactivation, bottleneck only layers, and all convolution (no max pooling).
from keras.models import Model
from keras.layers import Input, merge
from keras.layers import Dense, Activation, Flatten, BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
from keras.regularizers import l2
def rnpa_bottleneck_layer(input_tensor, nb_filters, filter_sz, stage,
init='glorot_normal', reg=0.0, use_shortcuts=True):
nb_in_filters, nb_bottleneck_filters = nb_filters
bn_name = 'bn' + str(stage)
conv_name = 'conv' + str(stage)
relu_name = 'relu' + str(stage)
merge_name = '+' + str(stage)
# batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
if stage>1: # first activation is just after conv1
x = BatchNormalization(axis=1, name=bn_name+'a')(input_tensor)
x = Activation('relu', name=relu_name+'a')(x)
else:
x = input_tensor
x = Convolution2D(
nb_bottleneck_filters, 1, 1,
init=init,
W_regularizer=l2(reg),
bias=False,
name=conv_name+'a'
)(x)
# batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters via FxF conv
x = BatchNormalization(axis=1, name=bn_name+'b')(x)
x = Activation('relu', name=relu_name+'b')(x)
x = Convolution2D(
nb_bottleneck_filters, filter_sz, filter_sz,
border_mode='same',
init=init,
W_regularizer=l2(reg),
bias = False,
name=conv_name+'b'
)(x)
# batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
x = BatchNormalization(axis=1, name=bn_name+'c')(x)
x = Activation('relu', name=relu_name+'c')(x)
x = Convolution2D(nb_in_filters, 1, 1,
init=init, W_regularizer=l2(reg),
name=conv_name+'c'
)(x)
# merge
if use_shortcuts:
x = merge([x, input_tensor], mode='sum', name=merge_name)
return x
def ResNetPreAct(input_shape=(3,32,32), nb_classes=10,
layer1_params=(5,64,2),
res_layer_params=(3,16,3),
final_layer_params=None,
init='glorot_normal', reg=0.0, use_shortcuts=True
):
"""
Return a new Residual Network using full pre-activation based on the work in
"Identity Mappings in Deep Residual Networks" by He et al
http://arxiv.org/abs/1603.05027
The following network definition achieves 92.0% accuracy on CIFAR-10 test using
`adam` optimizer, 100 epochs, learning rate schedule of 1e.-3 / 1.e-4 / 1.e-5 with
transitions at 50 and 75 epochs:
ResNetPreAct(layer1_params=(3,128,2),res_layer_params=(3,32,25),reg=reg)
Removed max pooling and using just stride in first convolutional layer. Motivated by
"Striving for Simplicity: The All Convolutional Net" by Springenberg et al
(https://arxiv.org/abs/1412.6806) and my own experiments where I observed about 0.5%
improvement by replacing the max pool operations in the VGG-like cifar10_cnn.py example
in the Keras distribution.
Parameters
----------
input_dim : tuple of (C, H, W)
nb_classes: number of scores to produce from final affine layer (input to softmax)
layer1_params: tuple of (filter size, num filters, stride for conv)
res_layer_params: tuple of (filter size, num res layer filters, num res stages)
final_layer_params: None or tuple of (filter size, num filters, stride for conv)
init: type of weight initialization to use
reg: L2 weight regularization (or weight decay)
use_shortcuts: to evaluate difference between residual and non-residual network
"""
sz_L1_filters, nb_L1_filters, stride_L1 = layer1_params
sz_res_filters, nb_res_filters, nb_res_stages = res_layer_params
use_final_conv = (final_layer_params is not None)
if use_final_conv:
sz_fin_filters, nb_fin_filters, stride_fin = final_layer_params
sz_pool_fin = input_shape[1] / (stride_L1 * stride_fin)
else:
sz_pool_fin = input_shape[1] / (stride_L1)
img_input = Input(shape=input_shape, name='cifar')
x = Convolution2D(
nb_L1_filters, sz_L1_filters, sz_L1_filters,
border_mode='same',
subsample=(stride_L1, stride_L1),
init=init,
W_regularizer=l2(reg),
bias=False,
name='conv0'
)(img_input)
x = BatchNormalization(axis=1, name='bn0')(x)
x = Activation('relu', name='relu0')(x)
for stage in range(1,nb_res_stages+1):
x = rnpa_bottleneck_layer(
x,
(nb_L1_filters, nb_res_filters),
sz_res_filters,
stage,
init=init,
reg=reg,
use_shortcuts=use_shortcuts
)
x = BatchNormalization(axis=1, name='bnF')(x)
x = Activation('relu', name='reluF')(x)
if use_final_conv:
x = Convolution2D(
nb_fin_filters, sz_fin_filters, sz_fin_filters,
border_mode='same',
subsample=(stride_fin, stride_fin),
init=init,
W_regularizer=l2(reg),
name='convF'
)(x)
x = AveragePooling2D((sz_pool_fin,sz_pool_fin), name='avg_pool')(x)
x = Flatten(name='flat')(x)
x = Dense(nb_classes, activation='softmax', name='fc10')(x)
return Model(img_input, x, name='rnpa')
@JefferyRPrice

This comment has been minimized.

Copy link
Owner Author

@JefferyRPrice JefferyRPrice commented Jan 12, 2017

Plot the 91.3% CIFAR-10 network:
rnpa10_model

@xxlxx1

This comment has been minimized.

Copy link

@xxlxx1 xxlxx1 commented Mar 1, 2017

File "/home/yanyao/xxl/cifar_code/keras-master/keras/backend/tensorflow_backend.py", line 862, in batch_flatten
x = tf.reshape(x, tf.pack([-1, prod(shape(x)[1:])]))
AttributeError: 'module' object has no attribute 'pack'

i have an error but i don't know why

@MartinThoma

This comment has been minimized.

Copy link

@MartinThoma MartinThoma commented Apr 9, 2017

Could you please put this in a repository and share weights / the trained Keras model?

@agoeroeg

This comment has been minimized.

Copy link

@agoeroeg agoeroeg commented Jan 15, 2018

I made it Keras 2 compatible:

from keras.models import Model
from keras.layers import Input, merge
from keras.layers import Dense, Activation, Flatten, BatchNormalization
from keras.layers import Conv2D, AveragePooling2D
from keras.regularizers import l2


def rnpa_bottleneck_layer(input_tensor, nb_filters, filter_sz, stage, init='glorot_normal', reg=0.0, use_shortcuts=True):

    nb_in_filters, nb_bottleneck_filters = nb_filters

    bn_name = 'bn' + str(stage)
    conv_name = 'conv' + str(stage)
    relu_name = 'relu' + str(stage)
    merge_name = '+' + str(stage)

    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
    if stage>1: # first activation is just after conv1
        x = BatchNormalization(axis=1, name=bn_name+'a')(input_tensor)
        x = Activation('relu', name=relu_name+'a')(x)
    else:
        x = input_tensor
    
    x = Conv2D(
            filters=nb_bottleneck_filters, 
            kernel_size=(1,1),
            kernel_initializer=init,
            kernel_regularizer=l2(reg),
            use_bias=False,
            name=conv_name+'a'
        )(x)

    # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters via FxF conv
    x = BatchNormalization(axis=1, name=bn_name+'b')(x)
    x = Activation('relu', name=relu_name+'b')(x)
    x = Conv2D(
            filters=nb_bottleneck_filters, 
            kernel_size=(filter_sz,filter_sz),
            padding='same',
            kernel_initializer=init,
            kernel_regularizer=l2(reg),
            use_bias = False,
            name=conv_name+'b'
        )(x)


    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
    x = BatchNormalization(axis=1, name=bn_name+'c')(x)
    x = Activation('relu', name=relu_name+'c')(x)
    x = Conv2D(
            filters=nb_in_filters, 
            kernel_size=(1,1),
            kernel_initializer=init, 
            kernel_regularizer=l2(reg),
            name=conv_name+'c'
        )(x)

    # merge
    if use_shortcuts:
        x = merge([x, input_tensor], mode='sum', name=merge_name)

    return x




def ResNetPreAct(input_shape=(3,32,32), nb_classes=10, layer1_params=(5,64,2), res_layer_params=(3,16,3),
        final_layer_params=None, init='glorot_normal', reg=0.0, use_shortcuts=True):
    
    """
    Return a new Residual Network using full pre-activation based on the work in
    "Identity Mappings in Deep Residual Networks"  by He et al
    http://arxiv.org/abs/1603.05027

    The following network definition achieves 92.0% accuracy on CIFAR-10 test using
    `adam` optimizer, 100 epochs, learning rate schedule of 1e.-3 / 1.e-4 / 1.e-5 with
    transitions at 50 and 75 epochs:
    ResNetPreAct(layer1_params=(3,128,2),res_layer_params=(3,32,25),reg=reg)
    
    Removed max pooling and using just stride in first convolutional layer. Motivated by
    "Striving for Simplicity: The All Convolutional Net"  by Springenberg et al
    (https://arxiv.org/abs/1412.6806) and my own experiments where I observed about 0.5%
    improvement by replacing the max pool operations in the VGG-like cifar10_cnn.py example
    in the Keras distribution.
    
    Parameters
    ----------
    input_dim : tuple of (C, H, W)
    nb_classes: number of scores to produce from final affine layer (input to softmax)
    layer1_params: tuple of (filter size, num filters, stride for conv)
    res_layer_params: tuple of (filter size, num res layer filters, num res stages)
    final_layer_params: None or tuple of (filter size, num filters, stride for conv)
    init: type of weight initialization to use
    reg: L2 weight regularization (or weight decay)
    use_shortcuts: to evaluate difference between residual and non-residual network
    """

    sz_L1_filters, nb_L1_filters, stride_L1 = layer1_params
    sz_res_filters, nb_res_filters, nb_res_stages = res_layer_params
    
    use_final_conv = (final_layer_params is not None)
    if use_final_conv:
        sz_fin_filters, nb_fin_filters, stride_fin = final_layer_params
        sz_pool_fin = input_shape[1] / (stride_L1 * stride_fin)
    else:
        sz_pool_fin = input_shape[1] / (stride_L1)


    from keras import backend as K
    # Permute dimension order if necessary
    if K.image_dim_ordering() == 'tf':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])


    img_input = Input(shape=input_shape, name='cifar')

    x = Conv2D(
            filters=nb_L1_filters, 
            kernel_size=(sz_L1_filters,sz_L1_filters),
            padding='same',
            strides=(stride_L1, stride_L1),
            kernel_initializer=init,
            kernel_regularizer=l2(reg),
            use_bias=False,
            name='conv0'
        )(img_input)
    
    x = BatchNormalization(axis=1, name='bn0')(x)
    x = Activation('relu', name='relu0')(x)

    for stage in range(1,nb_res_stages+1):
        x = rnpa_bottleneck_layer(
                x,
                (nb_L1_filters, nb_res_filters),
                sz_res_filters, 
                stage,
                init=init, 
                reg=reg, 
                use_shortcuts=use_shortcuts
            )


    x = BatchNormalization(axis=1, name='bnF')(x)
    x = Activation('relu', name='reluF')(x)

    if use_final_conv:
        x = Conv2D(
                filters=nb_L1_filters, 
                kernel_size=(sz_L1_filters,sz_L1_filters),
                padding='same',
                strides=(stride_fin, stride_fin),
                kernel_initializer=init,
                kernel_regularizer=l2(reg),
                name='convF'
            )(x)

    x = AveragePooling2D((sz_pool_fin,sz_pool_fin), name='avg_pool')(x)

    # x = Flatten(name='flat')(x)
    x = Flatten()(x)
    x = Dense(nb_classes, activation='softmax', name='fc10')(x)

    return Model(img_input, x, name='rnpa')
@suvojit-0x55aa

This comment has been minimized.

Copy link

@suvojit-0x55aa suvojit-0x55aa commented Jun 26, 2018

What are the hyper parameters used to obtain 92% accuracy ? I'm using this network but the accuracy is stuck at 74%. Thanks in advance.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment