Skip to content

Instantly share code, notes, and snippets.

@FlorianMuellerklein
Created December 18, 2016 18:38
Show Gist options
  • Save FlorianMuellerklein/933c95e7114a8f588911fe18bc1b2005 to your computer and use it in GitHub Desktop.
Save FlorianMuellerklein/933c95e7114a8f588911fe18bc1b2005 to your computer and use it in GitHub Desktop.
from keras.layers import merge, Input, Dropout
from keras.layers import Dense, Activation
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import BatchNormalization
from keras.models import Model
'''
Adapted from https://arxiv.org/pdf/1611.10080v1.pdf.
It's a wide and deep residual network designed for optimal feature extraction and gradient flow.
'''
# dictionary for number of filters
n_filters = {0:64, 1:128, 2:256, 3:512, 4:1024}
# set up the three types of residual blocks
def residual_block_type_1(l, increase_dim=False, first=False, filters=16):
'''
Standard stacked 3x3 residual block,
'''
if increase_dim:
first_stride = (2,2)
else:
first_stride = (1,1)
if first:
pre_act = l
else:
# BN -> ReLU
bn = BatchNormalization(axis=1)(l)
pre_act = Activation('relu')(bn)
conv_1 = Convolution2D(filters, 3,3, init='he_normal', border_mode='same', subsample=first_stride,
activation='linear')(pre_act)
bn_1 = BatchNormalization(axis=1)(conv_1)
relu_1 = Activation('relu')(bn_1)
conv_2 = Convolution2D(filters, 3,3, init='he_normal', border_mode='same',
activation='linear')(relu_1)
# add shorcut
if increase_dim:
# projection shortcut
projection = Convolution2D(filters, 1,1, subsample=(2,2), border_mode='same',
activation='linear')(pre_act)
block = merge([conv_2, projection], mode='sum')
else:
block = merge([conv_2, pre_act], mode='sum')
return block
def residual_block_type_2(l, increase_dim=False, first=False, filters=16):
'''
Stacked 3x3 residual block, where second conv has double the filters
'''
if increase_dim:
first_stride = (2,2)
else:
first_stride = (1,1)
if first:
pre_act = l
else:
# BN -> ReLU
bn = BatchNormalization(axis=1)(l)
pre_act = Activation('relu')(bn)
conv_1 = Convolution2D(filters, 3,3, init='he_normal', border_mode='same', subsample=first_stride,
activation='linear')(pre_act)
bn_1 = BatchNormalization(axis=1)(conv_1)
relu_1 = Activation('relu')(bn_1)
conv_2 = Convolution2D(int(filters*2), 3,3, init='he_normal', border_mode='same',
activation='linear')(relu_1)
# add shorcut
if increase_dim:
# projection shortcut
projection = Convolution2D(int(filters*2), 1,1, subsample=(2,2), border_mode='same',
activation='linear')(pre_act)
block = merge([conv_2, projection], mode='sum')
else:
# projection shortcut
projection = Convolution2D(int(filters*2), 1,1, subsample=(1,1), border_mode='same',
activation='linear')(pre_act)
block = merge([conv_2, projection], mode='sum')
return block
def residual_block_type_3(l, increase_dim=False, first=False, filters=16):
'''
Bottleneck architecture with an increasing number of filters
'''
if increase_dim:
first_stride = (2,2)
else:
first_stride = (1,1)
if first:
pre_act = l
else:
# BN -> ReLU
bn = BatchNormalization(axis=1)(l)
pre_act = Activation('relu')(bn)
conv_1 = Convolution2D(filters, 1,1, init='he_normal', border_mode='same', subsample=first_stride,
activation='linear')(pre_act)
bn_1 = BatchNormalization(axis=1)(conv_1)
relu_1 = Activation('relu')(bn_1)
conv_2 = Convolution2D(int(filters*2), 3,3, init='he_normal', border_mode='same',
activation='linear')(relu_1)
bn_2 = BatchNormalization(axis=1)(conv_2)
relu_2 = Activation('relu')(bn_2)
conv_3 = Convolution2D(int(filters*4), 1,1, init='he_normal', border_mode='same',
activation='linear')(relu_2)
# add shorcut
#if increase_dim:
# projection shortcut
projection = Convolution2D(int(filters*4), 1,1, subsample=(1,1), border_mode='same',
activation='linear')(pre_act)
block = merge([conv_3, projection], mode='sum')
#else:
# block = merge([conv_3, pre_act], mode='sum')
return block
cnn_input = Input(shape=(224,224,3), name='Input', dtype='float32')
l = Convolution2D(n_filters[0], 3,3, subsample=(1,1), init='he_normal', border_mode='same',
activation='linear')(cnn_input)
l = BatchNormalization(axis=1)(l)
l = Activation('relu')(l)
l = MaxPooling2D(pool_size=(2,2), strides=(2,2), border_mode='same')(l)
# Do the stacked 3x3 with 128 filters 3 times
l = residual_block_type_1(l, first=True, increase_dim=True, filters=n_filters[1])
for _ in range(1,3):
l = residual_block_type_1(l, filters=n_filters[1])
# Do the stacked 3x3 with 256 filters 3 times
l = residual_block_type_1(l, increase_dim=True, filters=n_filters[2])
for _ in range(1,3):
l = residual_block_type_1(l, filters=n_filters[2])
# Do the stacked 3x3 with 512 filters 6 times
l = residual_block_type_1(l, increase_dim=True, filters=n_filters[3])
for _ in range(1,5):
l = residual_block_type_1(l, filters=n_filters[3])
# Do the stacked 3x3 with 512/1024 filters 3 times
l = residual_block_type_2(l, increase_dim=True, filters=n_filters[3])
for _ in range(1,3):
l = residual_block_type_2(l, filters=n_filters[3])
# Do each bottleneck block once each
l = residual_block_type_3(l, filters=n_filters[3])
l = residual_block_type_3(l, filters=n_filters[4])
l = BatchNormalization(axis=1)(l)
l = Activation('relu')(l)
avg_pool = GlobalAveragePooling2D()(l)
pred = Dense(8, activation='softmax')(avg_pool)
model = Model(input=cnn_input, output=pred)
return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment