Skip to content

Instantly share code, notes, and snippets.

@qzhong0605
Created February 8, 2019 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qzhong0605/2a39f6e105ee05e298a6a1fab7bfb77c to your computer and use it in GitHub Desktop.
Save qzhong0605/2a39f6e105ee05e298a6a1fab7bfb77c to your computer and use it in GitHub Desktop.
# This script is used to generate mobilenetv1
#
################################################################################
from caffe.proto import caffe_pb2
from google.protobuf import text_format
import argparse
def Conv2dWithoutBias(net, bottom, top, dim_out, stride=1, kernel=1, pad=0):
""" A convolution block including 2d-convolution, batchnorm and relu layers
without bias
Args:
bottom: input for conv2d block
top: output for
dim_out: the number of the filters
stride: the stride for convolution layer parameter
kernel: the kernel for convolution layer parameter
Return:
the output blob for conv2d block
"""
conv_layer = caffe_pb2.LayerParameter()
conv_layer.type = "Convolution"
conv_layer.name = top
conv_layer.bottom.extend([bottom])
conv_layer.top.extend([top])
conv_layer_param = caffe_pb2.ParamSpec()
conv_layer_param.lr_mult = 1.0
conv_layer_param.decay_mult = 1.0
conv_layer.param.extend([conv_layer_param])
conv_param = conv_layer.convolution_param
conv_param.num_output = dim_out
conv_param.bias_term = False
conv_param.kernel_size.append(kernel)
conv_param.stride.append(stride)
conv_param.pad.append(pad)
conv_param.group = 1
conv_param.weight_filler.type = "msra"
ret = "{}/bn".format(top)
bn_layer = caffe_pb2.LayerParameter()
bn_layer.name = "{}/bn".format(top)
bn_layer.type = "BatchNorm"
bn_layer_param = caffe_pb2.ParamSpec()
bn_layer_param.lr_mult = 0.
bn_layer_param.decay_mult = 0.
bn_layer.param.extend([bn_layer_param] * 3)
bn_layer.bottom.extend([top])
bn_layer.top.extend([ret])
scale_layer = caffe_pb2.LayerParameter()
scale_layer.name = "{}/scale".format(top)
scale_layer.type = "Scale"
scale_layer.scale_param.bias_term = True
scale_layer.bottom.extend([ret])
scale_layer.top.extend([ret])
relu_layer = caffe_pb2.LayerParameter()
relu_layer.name = "{}/relu".format(top)
relu_layer.type = "ReLU"
relu_layer.bottom.extend([ret])
relu_layer.top.extend([ret])
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])
return ret
def DepthwiseConvWithoutBias(net, bottom, top, dim_out, kernel=1, stride=1, pad=0):
""" A depthwise convolution including depthwise convolution, batchnorm layers,
and relu layer
Args:
bottom: a list of string for the layer inputs
top: a list of string for the layer outputs
dim_out: the number of the filters
Return:
the output blob for depthwiseconv block
"""
conv_layer = caffe_pb2.LayerParameter()
# conv_layer.type = "DepthwiseConvolution"
conv_layer.type = "Convolution"
conv_layer.name = top
conv_layer.bottom.extend([bottom])
conv_layer.top.extend([top])
conv_layer_param = caffe_pb2.ParamSpec()
conv_layer_param.lr_mult = 1.0
conv_layer_param.decay_mult = 1.0
conv_layer.param.extend([conv_layer_param])
conv_param = conv_layer.convolution_param
conv_param.num_output = dim_out
conv_param.bias_term = False
conv_param.kernel_size.append(kernel)
conv_param.stride.append(stride)
conv_param.pad.append(pad)
conv_param.group = dim_out
conv_param.weight_filler.type = "msra"
ret = "{}/bn".format(top)
bn_layer = caffe_pb2.LayerParameter()
bn_layer.name = "{}/bn".format(top)
bn_layer.type = "BatchNorm"
bn_layer_param = caffe_pb2.ParamSpec()
bn_layer_param.lr_mult = 0.
bn_layer_param.decay_mult = 0.
bn_layer.param.extend([bn_layer_param] * 3)
bn_layer.bottom.extend([top])
bn_layer.top.extend([ret])
scale_layer = caffe_pb2.LayerParameter()
scale_layer.name = "{}/scale".format(top)
scale_layer.type = "Scale"
scale_layer.scale_param.bias_term = True
scale_layer.bottom.extend([ret])
scale_layer.top.extend([ret])
relu_layer = caffe_pb2.LayerParameter()
relu_layer.name = "{}/relu".format(top)
relu_layer.type = "ReLU"
relu_layer.bottom.extend([ret])
relu_layer.top.extend([ret])
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])
return ret
def DataLayer(net, train_data, val_data,
crop_size=224,
scale=0.017,
mean_value=[104.0, 117.0, 123.0],
train_batch_size=32, val_batch_size=32):
""" Generate input data layer
Args:
train_data: input data for training, which is a database data, including lmdb and leveldb
val_data: input data for validation, which is a database data, including lmdb and leveldb
crop_size: which kind input size, such as 224, 299 etc.
scale: after substract the mean value, scale this value
mean_value: input data processing, substracting this mean value
train_batch_size: the batch size for training procedure
val_batch_size: the batch size for validation procedure
Return:
output blob for data layer
"""
# train layer
train_layer = caffe_pb2.LayerParameter()
train_layer.name = "data"
train_layer.type = "Data"
train_layer.top.extend(["data", "label"])
train_state_rule = caffe_pb2.NetStateRule()
train_state_rule.phase = caffe_pb2.TRAIN
train_layer.include.extend([train_state_rule])
train_trans_param = train_layer.transform_param
train_trans_param.mirror = True
train_trans_param.crop_size = crop_size
train_trans_param.scale = scale
train_trans_param.mean_value.extend(mean_value)
train_data_param = train_layer.data_param
train_data_param.batch_size = train_batch_size
train_data_param.source = train_data
train_data_param.backend = 1 # lmdb
# validation layer
val_layer = caffe_pb2.LayerParameter()
val_layer.name = "data"
val_layer.type = "Data"
val_layer.top.extend(["data", "label"])
val_state_rule = caffe_pb2.NetStateRule()
val_state_rule.phase = caffe_pb2.TEST
val_layer.include.extend([val_state_rule])
val_trans_param = val_layer.transform_param
val_trans_param.mirror = False
val_trans_param.crop_size = crop_size
val_trans_param.scale = scale
val_trans_param.mean_value.extend(mean_value)
val_data_param = val_layer.data_param
val_data_param.batch_size = val_batch_size
val_data_param.source = val_data
val_data_param.backend = 1 # lmdb
net.layer.extend([train_layer, val_layer])
return "data", "label"
def PoolLayer(net, bottom, top,
global_pooling=False,
kernel=1, pool_method=0, stride=1, pad=0):
""" Add pooling layer, including MAX pooling and AVE pooling layer
Args:
net: caffe net
bottom: the input blob
top: the output blob
pool_method: which kind pooling method, including MAX and AVE
kernel, stride, pad: the pooling parameter for pooling layer
Return:
output blob for pooling layer
"""
pool_layer = caffe_pb2.LayerParameter()
pool_layer.name = top
pool_layer.type = "Pooling"
pool_layer.bottom.extend([bottom])
pool_layer.top.extend([top])
pool_param = pool_layer.pooling_param
pool_param.pool = pool_method
if global_pooling:
pool_param.global_pooling = True
else:
pool_param.kernel_size = kernel
pool_param.stride = stride
pool_param.pad = pad
net.layer.extend([pool_layer])
return top
def FCLayer(net, bottom, top, dim_out):
""" Add fully-connected layer
Args:
bottom: the input blob for fc layer
top: the output blob for fc layer
Return:
output blob
"""
fc_layer = caffe_pb2.LayerParameter()
fc_layer.name = top
fc_layer.type = "InnerProduct"
fc_layer.bottom.extend([bottom])
fc_layer.top.extend([top])
fc_param = fc_layer.inner_product_param
fc_param.num_output = dim_out
net.layer.extend([fc_layer])
return top
def SoftmaxWithLossLayer(net, input_data, input_label):
""" Add softmaxwithloss layer for getting loss
Args:
input_data: string, data blob
input_label: string, label blob
"""
softmax_layer = caffe_pb2.LayerParameter()
softmax_layer.name = "loss"
softmax_layer.type = "SoftmaxWithLoss"
softmax_layer.bottom.extend([input_data, input_label])
softmax_layer.top.extend(["loss/loss"])
net.layer.extend([softmax_layer])
def AccuracyLayer(net, input_data, input_label):
""" Add accuracy layer for training and validation for imagenet dataset,
the accuracy including top1 and top5
Args:
net: caffe net
input_data: data blob for accuracy
input_label: label blob for accuracy
"""
# top1 layer
top1_layer = caffe_pb2.LayerParameter()
top1_layer.name = "accuracy/top1"
top1_layer.type = "Accuracy"
top1_layer.bottom.extend([input_data, input_label])
top1_layer.top.extend(["acc@1"])
top1_acc_param = top1_layer.accuracy_param
top1_acc_param.top_k = 1
# top5 layer
top5_layer = caffe_pb2.LayerParameter()
top5_layer.name = "accuracy/top5"
top5_layer.type = "Accuracy"
top5_layer.bottom.extend([input_data, input_label])
top5_layer.top.extend(["acc@5"])
top5_acc_param = top5_layer.accuracy_param
top5_acc_param.top_k = 5
net.layer.extend([top1_layer, top5_layer])
def generate_mobilenetv1(args):
net = caffe_pb2.NetParameter()
net.name = "mobilenetv1_{}_{}".format(args.crop_size, args.shrink)
# add data layer
data, label = DataLayer(net, args.train_data, args.val_data,
crop_size=args.crop_size,
train_batch_size=args.train_batch_size,
val_batch_size=args.val_batch_size)
# first conv layer
conv1 = Conv2dWithoutBias(net, data, "conv1", int(32*args.shrink),
kernel=3, stride=2, pad=1)
# depthwise conv layer and 1x1 conv layer (conv2)
depconv2 = DepthwiseConvWithoutBias(net, conv1, "conv2/dw", int(32*args.shrink),
kernel=3, stride=1, pad=1)
conv2 = Conv2dWithoutBias(net, depconv2, "conv2", int(64*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv3)
depconv3 = DepthwiseConvWithoutBias(net, conv2, "conv3/dw", int(64*args.shrink),
kernel=3, stride=2, pad=1)
conv3 = Conv2dWithoutBias(net, depconv3, "conv3", int(128*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv4)
depconv4 = DepthwiseConvWithoutBias(net, conv3, "conv4/dw", int(128*args.shrink),
kernel=3, stride=1, pad=1)
conv4 = Conv2dWithoutBias(net, depconv4, "conv4", int(128*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv5)
depconv5 = DepthwiseConvWithoutBias(net, conv4, "conv5/dw", int(128*args.shrink),
kernel=3, stride=2, pad=1)
conv5 = Conv2dWithoutBias(net, depconv5, "conv5", int(256*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv6)
depconv6 = DepthwiseConvWithoutBias(net, conv5, "conv6/dw", int(256*args.shrink),
kernel=3, stride=1, pad=1)
conv6 = Conv2dWithoutBias(net, depconv6, "conv6", int(256*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv7)
depconv7 = DepthwiseConvWithoutBias(net, conv6, "conv7/dw", int(256*args.shrink),
kernel=3, stride=2, pad=1)
conv7 = Conv2dWithoutBias(net, depconv7, "conv7", int(512*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (loop)
depconv8 = conv7
conv8 = conv7
for idx in range(5):
depconv8 = DepthwiseConvWithoutBias(net, conv8, "conv8/{}/dw".format(idx),
int(512*args.shrink), kernel=3, stride=1, pad=1)
conv8 = Conv2dWithoutBias(net, depconv8, "conv8/{}".format(idx),
int(512*args.shrink), kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv9)
depconv9 = DepthwiseConvWithoutBias(net, conv8, "conv9/dw", int(512*args.shrink),
kernel=3, stride=2, pad=1)
conv9 = Conv2dWithoutBias(net, depconv9, "conv9", int(1024*args.shrink),
kernel=1, stride=1, pad=0)
# depthwise conv layer and 1x1 conv layer (conv10)
depconv10 = DepthwiseConvWithoutBias(net, conv9, "conv10/dw", int(1024*args.shrink),
kernel=3, stride=1, pad=1)
conv10 = Conv2dWithoutBias(net, depconv10, "conv10", int(1024*args.shrink),
kernel=1, stride=1, pad=0)
# average pool
pool11 = PoolLayer(net, conv10, "pool11", global_pooling=True, pool_method=1)
# fc layer
fc12 = FCLayer(net, pool11, "fc12", 1000)
# softmax and accuracy layer
SoftmaxWithLossLayer(net, fc12, label)
AccuracyLayer(net, fc12, label)
with open(args.output_net, "w") as net_stream:
net_stream.write(text_format.MessageToString(net))
def parse_args():
parser = argparse.ArgumentParser(description="generate mobilenetv1")
parser.add_argument("--output_net", type=str, required=True,
help="output file containing the network")
parser.add_argument("--train_data", type=str,
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_train_lmdb",
help="train data file path for imagenet")
parser.add_argument("--val_data", type=str,
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_val_lmdb",
help="validation data file path for imagenet")
parser.add_argument("--crop_size", type=int, default=224,
help="which scale size for input image")
parser.add_argument("--shrink", type=float, default=1.0,
help="multiply factor for shrinking the network")
parser.add_argument("--train_batch_size", type=int, default=32,
help="the mini-batch for train")
parser.add_argument("--val_batch_size", type=int, default=32,
help="the mini-batch for validation")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
generate_mobilenetv1(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment