Skip to content

Instantly share code, notes, and snippets.

@asanakoy
Last active July 24, 2018 15:29
Show Gist options
  • Save asanakoy/4c50bf8f39f4ceac9337 to your computer and use it in GitHub Desktop.
Save asanakoy/4c50bf8f39f4ceac9337 to your computer and use it in GitHub Desktop.
Generate caffe netspec file for Alexnet model
# 23.03.16, Artsiom Sanakoyeu
import caffe
from caffe import layers as L, params as P
def alexnet(train_data_source, test_data_source, mean_file, train_batch_size=128, test_batch_size=50, database_backend=P.Data.LEVELDB):
"""
Generate the caffe's network specification train_val.prototxt file for Alexnet model,
described in the [AlexNet](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) publication.
"""
n = caffe.NetSpec()
data_transform_param = dict(mirror=True, crop_size=227, mean_file=mean_file)
n.data, n.label = L.Data(include=dict(phase=caffe.TRAIN),
transform_param=data_transform_param,
batch_size=train_batch_size,
backend=database_backend,
source=train_data_source,
ntop=2)
train_data_layer_str = str(n.to_proto())
data_transform_param['mirror'] = False
n.data, n.label = L.Data(include=dict(phase=caffe.TEST),
transform_param=data_transform_param,
batch_size=test_batch_size,
backend=database_backend,
source=test_data_source,
ntop=2)
n.conv1 = L.Convolution(n.data, name='conv1', num_output=96, kernel_size=11, stride=4,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu1 = L.ReLU(n.conv1, in_place=True)
n.norm1 = L.LRN(n.relu1, local_size=5, alpha=0.0001, beta=0.75)
n.pool1 = L.Pooling(n.norm1, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.conv2 = L.Convolution(n.pool1, name='conv2', num_output=256, pad=2, kernel_size=5,
group=2,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0.1),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu2 = L.ReLU(n.conv2, in_place=True)
n.norm2 = L.LRN(n.relu2, local_size=5, alpha=0.0001, beta=0.75)
n.pool2 = L.Pooling(n.norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.conv3 = L.Convolution(n.pool2, name='conv3', num_output=384, pad=1, kernel_size=3,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu3 = L.ReLU(n.conv3, in_place=True)
n.conv4 = L.Convolution(n.relu3, name='conv4', num_output=384, pad=1, kernel_size=3,
group=2,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0.1),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu4 = L.ReLU(n.conv4, in_place=True)
n.conv5 = L.Convolution(n.relu4, name='conv5', num_output=256, pad=1, kernel_size=3,
group=2,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0.1),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu5 = L.ReLU(n.conv5, in_place=True)
n.pool5 = L.Pooling(n.relu5, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.fc6 = L.InnerProduct(n.pool5, name='fc6', num_output=4096,
weight_filler=dict(type='gaussian', std=0.005),
bias_filler=dict(type='constant', value=0.1),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu6 = L.ReLU(n.fc6, in_place=True)
n.drop6 = L.Dropout(n.relu6, in_place=True, dropout_ratio=0.5)
n.fc7 = L.InnerProduct(n.drop6, name='fc7', num_output=4096,
weight_filler=dict(type='gaussian', std=0.005),
bias_filler=dict(type='constant', value=0.1),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.relu7 = L.ReLU(n.fc7, in_place=True)
n.drop7 = L.Dropout(n.relu7, in_place=True, dropout_ratio=0.5)
n.fc8 = L.InnerProduct(n.drop7, name='fc8', num_output=1000,
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0),
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
n.accuracy = L.Accuracy(n.fc8, n.label, include=dict(phase=caffe.TEST))
n.loss = L.SoftmaxWithLoss(n.fc8, n.label)
return 'name: "AlexNet"\n' + train_data_layer_str + str(n.to_proto())
if __name__ == '__main__':
net_spec_str = alexnet('train.lmdb', 'test.lmdb', 'imagenet_mean.binaryproto',
train_batch_size=256, test_batch_size=50,
database_backend=P.Data.LMDB)
with open('train_val.prototxt', 'w') as f:
f.write(net_spec_str)
print f.name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment