qzhong0605/generate_mobilenetv2.py

## generate_mobilenetv2.py
# It's used to generate mobilenetv2
#
################################################################################

from caffe.proto import caffe_pb2
from google.protobuf import text_format
import argparse

def Conv2dWithoutBias(net, bottom, top, dim_out, stride=1, kernel=1, pad=0):
    """ A convolution block including 2d-convolution, batchnorm and relu layers
    without bias

    Args:
        bottom: input for conv2d block
        top: output for
        dim_out: the number of the filters
        stride: the stride for convolution layer parameter
        kernel: the kernel for convolution layer parameter

    Return:
        the output blob for conv2d block
    """
    conv_layer = caffe_pb2.LayerParameter()
    conv_layer.type = "Convolution"
    conv_layer.name = top
    conv_layer.bottom.extend([bottom])
    conv_layer.top.extend([top])
    conv_layer_param = caffe_pb2.ParamSpec()
    conv_layer_param.lr_mult = 1.0
    conv_layer_param.decay_mult = 1.0
    conv_layer.param.extend([conv_layer_param])
    conv_param = conv_layer.convolution_param
    conv_param.num_output = dim_out
    conv_param.bias_term = False
    conv_param.kernel_size.append(kernel)
    conv_param.stride.append(stride)
    conv_param.pad.append(pad)
    conv_param.group = 1
    conv_param.weight_filler.type = "msra"

    ret = "{}/bn".format(top)
    bn_layer = caffe_pb2.LayerParameter()
    bn_layer.name = "{}/bn".format(top)
    bn_layer.type = "BatchNorm"
    bn_layer_param = caffe_pb2.ParamSpec()
    bn_layer_param.lr_mult = 0.
    bn_layer_param.decay_mult = 0.
    bn_layer.param.extend([bn_layer_param] * 3)
    bn_layer.bottom.extend([top])
    bn_layer.top.extend([ret])

    scale_layer = caffe_pb2.LayerParameter()
    scale_layer.name = "{}/scale".format(top)
    scale_layer.type = "Scale"
    scale_layer.scale_param.bias_term = True
    scale_layer.bottom.extend([ret])
    scale_layer.top.extend([ret])

    relu_layer = caffe_pb2.LayerParameter()
    relu_layer.name = "{}/relu".format(top)
    relu_layer.type = "ReLU"
    relu_layer.bottom.extend([ret])
    relu_layer.top.extend([ret])

    net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])

    return ret


def Conv2dWithoutBiasLinear(net, bottom, top, dim_out, stride=1, kernel=1, pad=0):
    """ A convolution block including 2d-convolution, batchnorm layers
    without bias

    Args:
        bottom: input for conv2d block
        top: output for
        dim_out: the number of the filters
        stride: the stride for convolution layer parameter
        kernel: the kernel for convolution layer parameter

    Return:
        the output blob for conv2d block
    """
    conv_layer = caffe_pb2.LayerParameter()
    conv_layer.type = "Convolution"
    conv_layer.name = top
    conv_layer.bottom.extend([bottom])
    conv_layer.top.extend([top])
    conv_layer_param = caffe_pb2.ParamSpec()
    conv_layer_param.lr_mult = 1.0
    conv_layer_param.decay_mult = 1.0
    conv_layer.param.extend([conv_layer_param])
    conv_param = conv_layer.convolution_param
    conv_param.num_output = dim_out
    conv_param.bias_term = False
    conv_param.kernel_size.append(kernel)
    conv_param.stride.append(stride)
    conv_param.pad.append(pad)
    conv_param.group = 1
    conv_param.weight_filler.type = "msra"

    ret = "{}/bn".format(top)
    bn_layer = caffe_pb2.LayerParameter()
    bn_layer.name = "{}/bn".format(top)
    bn_layer.type = "BatchNorm"
    bn_layer_param = caffe_pb2.ParamSpec()
    bn_layer_param.lr_mult = 0.
    bn_layer_param.decay_mult = 0.
    bn_layer.param.extend([bn_layer_param] * 3)
    bn_layer.bottom.extend([top])
    bn_layer.top.extend([ret])

    scale_layer = caffe_pb2.LayerParameter()
    scale_layer.name = "{}/scale".format(top)
    scale_layer.type = "Scale"
    scale_layer.scale_param.bias_term = True
    scale_layer.bottom.extend([ret])
    scale_layer.top.extend([ret])

    net.layer.extend([conv_layer, bn_layer, scale_layer])

    return ret


def DepthwiseConvWithoutBias(net, bottom, top, dim_out, kernel=1, stride=1, pad=0):
    """ A depthwise convolution including depthwise convolution, batchnorm layers,
    and relu layer

    Args:
        bottom: a list of string for the layer inputs
        top: a list of string for the layer outputs
        dim_out: the number of the filters

    Return:
        the output blob for depthwiseconv block
    """
    conv_layer = caffe_pb2.LayerParameter()
    # conv_layer.type = "DepthwiseConvolution"
    conv_layer.type = "Convolution"
    conv_layer.name = top
    conv_layer.bottom.extend([bottom])
    conv_layer.top.extend([top])
    conv_layer_param = caffe_pb2.ParamSpec()
    conv_layer_param.lr_mult = 1.0
    conv_layer_param.decay_mult = 1.0
    conv_layer.param.extend([conv_layer_param])
    conv_param = conv_layer.convolution_param
    conv_param.num_output = dim_out
    conv_param.bias_term = False
    conv_param.kernel_size.append(kernel)
    conv_param.stride.append(stride)
    conv_param.pad.append(pad)
    conv_param.group = dim_out
    conv_param.weight_filler.type = "msra"

    ret = "{}/bn".format(top)
    bn_layer = caffe_pb2.LayerParameter()
    bn_layer.name = "{}/bn".format(top)
    bn_layer.type = "BatchNorm"
    bn_layer_param = caffe_pb2.ParamSpec()
    bn_layer_param.lr_mult = 0.
    bn_layer_param.decay_mult = 0.
    bn_layer.param.extend([bn_layer_param] * 3)
    bn_layer.bottom.extend([top])
    bn_layer.top.extend([ret])

    scale_layer = caffe_pb2.LayerParameter()
    scale_layer.name = "{}/scale".format(top)
    scale_layer.type = "Scale"
    scale_layer.scale_param.bias_term = True
    scale_layer.bottom.extend([ret])
    scale_layer.top.extend([ret])

    relu_layer = caffe_pb2.LayerParameter()
    relu_layer.name = "{}/relu".format(top)
    relu_layer.type = "ReLU"
    relu_layer.bottom.extend([ret])
    relu_layer.top.extend([ret])

    net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])

    return ret


def SumLayer(net, bottom_left, bottom_right, top):
    """ An element-wise layer for sum operation

    Return:
        the output blob for SUM layer
    """
    sum_layer = caffe_pb2.LayerParameter()
    sum_layer.name = top
    sum_layer.type = "Eltwise"
    sum_layer.bottom.extend([bottom_left, bottom_right])
    sum_layer.top.extend([top])
    sum_param = sum_layer.eltwise_param
    sum_param.operation = 1    # SUM operation
    net.layer.extend([sum_layer])

    return top


def PoolLayer(net, bottom, top,
              global_pooling=False,
              kernel=1, pool_method=0, stride=1, pad=0):
    """ Add pooling layer, including MAX pooling and AVE pooling layer

    Args:
        net: caffe net
        bottom: the input blob
        top: the output blob
        pool_method: which kind pooling method, including MAX and AVE
        kernel, stride, pad: the pooling parameter for pooling layer

    Return:
        output blob for pooling layer
    """
    pool_layer = caffe_pb2.LayerParameter()
    pool_layer.name = top
    pool_layer.type = "Pooling"
    pool_layer.bottom.extend([bottom])
    pool_layer.top.extend([top])

    pool_param = pool_layer.pooling_param
    pool_param.pool = pool_method

    if global_pooling:
        pool_param.global_pooling = True
    else:
        pool_param.kernel_size = kernel
        pool_param.stride = stride
        pool_param.pad = pad
    net.layer.extend([pool_layer])

    return top


def FCLayer(net, bottom, top, dim_out):
    """ Add fully-connected layer

    Args:
        bottom: the input blob for fc layer
        top: the output blob for fc layer

    Return:
        output blob
    """
    fc_layer = caffe_pb2.LayerParameter()
    fc_layer.name = top
    fc_layer.type = "InnerProduct"
    fc_layer.bottom.extend([bottom])
    fc_layer.top.extend([top])

    fc_param = fc_layer.inner_product_param
    fc_param.num_output = dim_out

    net.layer.extend([fc_layer])

    return top


def SoftmaxWithLossLayer(net, input_data, input_label):
    """ Add softmaxwithloss layer for getting loss

    Args:
        input_data: string, data blob
        input_label: string, label blob
    """
    softmax_layer = caffe_pb2.LayerParameter()
    softmax_layer.name = "loss"
    softmax_layer.type = "SoftmaxWithLoss"
    softmax_layer.bottom.extend([input_data, input_label])
    softmax_layer.top.extend(["loss/loss"])

    net.layer.extend([softmax_layer])


def AccuracyLayer(net, input_data, input_label):
    """ Add accuracy layer for training and validation for imagenet dataset,
    the accuracy including top1 and top5

    Args:
        net: caffe net
        input_data: data blob for accuracy
        input_label: label blob for accuracy
    """
    # top1 layer
    top1_layer = caffe_pb2.LayerParameter()
    top1_layer.name = "accuracy/top1"
    top1_layer.type = "Accuracy"
    top1_layer.bottom.extend([input_data, input_label])
    top1_layer.top.extend(["acc@1"])
    top1_acc_param = top1_layer.accuracy_param
    top1_acc_param.top_k = 1

    # top5 layer
    top5_layer = caffe_pb2.LayerParameter()
    top5_layer.name = "accuracy/top5"
    top5_layer.type = "Accuracy"
    top5_layer.bottom.extend([input_data, input_label])
    top5_layer.top.extend(["acc@5"])
    top5_acc_param = top5_layer.accuracy_param
    top5_acc_param.top_k = 5

    net.layer.extend([top1_layer, top5_layer])


def BottleNeckS1(net, name, bottom, dim_in, dim_out, expand):
    """ bottleneck layer, including 1x1 convolution, seperative convolution,
    1x1 convolution without relu, sum layer

    Args:
        name: root name for this bottleneck layer
        bottom: string, input blob for bottleneck layer
        dim_in: channel dimession for input blob
        dim_out: channel dimession for output blob
        expand: expand factor for the bottleneck layer

    Return:
        the output blob for the bottleneck layer
    """
    expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
                                   dim_in*expand, stride=1, kernel=1, pad=0)
    depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
                                         dim_in*expand, kernel=3, stride=1, pad=1)
    linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
                                         dim_out, kernel=1, stride=1, pad=0)
    sum_out = SumLayer(net, bottom, linear_out, "{}/sum".format(name))

    return sum_out


def BottleNeckS2(net, name, bottom, dim_in, dim_out, expand):
    """ bottleneck layer, including 1x1 convolution, seperative convolution,
    1x1 convolution without relu

    Args:
        name: root name for this bottleneck layer
        bottom: string, input blob for bottleneck layer
        dim_in: channel dimession for input blob
        dim_out: channel dimession for output blob
        expand: expand factor for the bottleneck layer

    Return:
        the output blob for the bottleneck layer
    """
    expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
                                     dim_in*expand, stride=1, kernel=1, pad=0)
    depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
                                         dim_in*expand, kernel=3, stride=2, pad=1)
    linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
                                         dim_out, kernel=1, stride=1, pad=0)
    return linear_out


def BottleNeckPseudoS2(net, name, bottom, dim_in, dim_out, expand):
    """ bottleneck layer, like the 2-stride bottleneck, while the stride = 1

    Args:
        name: root name for this bottleneck layer
        bottom: string, input blob for bottleneck layer
        dim_in: channel dimession for input blob
        dim_out: channel dimession for output blob
        expand: expand factor for the bottleneck layer

    Return:
        the output blob for the bottleneck layer
    """
    expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
                                   dim_in*expand, stride=1, kernel=1, pad=0)
    depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
                                         dim_in*expand, stride=1, kernel=3, pad=1)
    linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
                                         dim_out, kernel=1, stride=1, pad=0)

    return linear_out


def DataLayer(net, train_data, val_data,
              crop_size=224,
              scale=0.017,
              mean_value=[104.0, 117.0, 123.0],
              train_batch_size=32, val_batch_size=32):
    """ Generate input data layer

    Args:
        train_data: input data for training, which is a database data, including lmdb and leveldb
        val_data: input data for validation, which is a database data, including lmdb and leveldb
        crop_size: which kind input size, such as 224, 299 etc.
        scale: after substract the mean value, scale this value
        mean_value: input data processing, substracting this mean value
        train_batch_size: the batch size for training procedure
        val_batch_size: the batch size for validation procedure

    Return:
        output blob for data layer
    """
    # train layer
    train_layer = caffe_pb2.LayerParameter()
    train_layer.name = "data"
    train_layer.type = "Data"
    train_layer.top.extend(["data", "label"])

    train_state_rule = caffe_pb2.NetStateRule()
    train_state_rule.phase = caffe_pb2.TRAIN
    train_layer.include.extend([train_state_rule])

    train_trans_param = train_layer.transform_param
    train_trans_param.mirror = True
    train_trans_param.crop_size = crop_size
    train_trans_param.scale = scale
    train_trans_param.mean_value.extend(mean_value)

    train_data_param = train_layer.data_param
    train_data_param.batch_size = train_batch_size
    train_data_param.source = train_data
    train_data_param.backend = 1 # lmdb


    # validation layer
    val_layer = caffe_pb2.LayerParameter()
    val_layer.name = "data"
    val_layer.type = "Data"
    val_layer.top.extend(["data", "label"])

    val_state_rule = caffe_pb2.NetStateRule()
    val_state_rule.phase = caffe_pb2.TEST
    val_layer.include.extend([val_state_rule])

    val_trans_param = val_layer.transform_param
    val_trans_param.mirror = False
    val_trans_param.crop_size = crop_size
    val_trans_param.scale = scale
    val_trans_param.mean_value.extend(mean_value)

    val_data_param = val_layer.data_param
    val_data_param.batch_size = val_batch_size
    val_data_param.source = val_data
    val_data_param.backend = 1 # lmdb

    net.layer.extend([train_layer, val_layer])

    return "data", "label"


def generate_mobilenetv2(args):
    net = caffe_pb2.NetParameter()
    net.name = "mobilenetv2_{}".format(args.crop_size)

    data, label = DataLayer(net, args.train_data, args.val_data,
                            crop_size=args.crop_size)

    # first conv layer
    conv1 = Conv2dWithoutBias(net, data, "conv1", 32, kernel=3, stride=2, pad=1)

    # bottleneck1
    bottleneck_out1 = BottleNeckPseudoS2(net, "bottleneck1", conv1, 32, 16, 1)

    # bottleneck2 sequence
    bottleneck_out2_1 = BottleNeckS2(net, "bottleneck2/1", bottleneck_out1, 16, 24, 6)
    bottleneck_out2_2 = BottleNeckS1(net, "bottleneck2/2", bottleneck_out2_1, 24, 24, 6)

    # bottleneck3 sequence
    bottleneck_out3 = BottleNeckS2(net, "bottleneck3/1", bottleneck_out2_2, 24, 32, 6)
    for idx in range(2):
        bottleneck_out3 = BottleNeckS1(net, "bottleneck3/{}".format(idx + 2),
                                       bottleneck_out3, 32, 32, 6)

    # bottleneck4 sequence
    bottleneck_out4 = BottleNeckS2(net, "bottleneck4/1", bottleneck_out3, 32, 64, 6)
    for idx in range(3):
        bottleneck_out4 = BottleNeckS1(net, "bottleneck4/{}".format(idx + 2),
                                       bottleneck_out4, 64, 64, 6)

    # bottleneck5 sequence
    bottleneck_out5 = BottleNeckPseudoS2(net, "bottleneck5/1", bottleneck_out4, 64, 96, 6)
    for idx in range(3):
        bottleneck_out5 = BottleNeckS1(net, "bottleneck5/{}".format(idx + 2),
                                       bottleneck_out5, 96, 96, 6)

    # bottleneck6 sequence
    bottleneck_out6 = BottleNeckS2(net, "bottleneck6/1", bottleneck_out5, 96, 160, 6)
    for idx in range(2):
        bottleneck_out6 = BottleNeckS1(net, "bottleneck6/{}".format(idx + 2),
                                       bottleneck_out6, 160, 160, 6)

    # bottleneck7
    bottleneck_out7 = BottleNeckPseudoS2(net, "bottleneck7", bottleneck_out6, 160, 320, 6)

    # conv2d 1x1
    conv8 = Conv2dWithoutBias(net, bottleneck_out7, "conv8", 1280, kernel=1, stride=1, pad=0)

    # global avg pool
    pool9 = PoolLayer(net, conv8, "pool8", pool_method=1, global_pooling=True)

    # fc layer
    fc10 = FCLayer(net, pool9, "fc10", 1000)

    # softmax and accuracy
    SoftmaxWithLossLayer(net, fc10, label)
    AccuracyLayer(net, fc10, label)

    with open(args.output_net, "w") as net_stream:
        net_stream.write(text_format.MessageToString(net))


def parse_args():
    parser = argparse.ArgumentParser(description="generate mobilenetv2")
    parser.add_argument("--output_net", type=str, required=True,
                        help="output net file containing the model architecture")
    parser.add_argument("--train_data", type=str,
                        default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_train_lmdb",
                        help="train data file path for imagenet")
    parser.add_argument("--val_data", type=str,
                        default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_val_lmdb",
                        help="validation data file path for imagenet")
    parser.add_argument("--crop_size", type=int, default=224,
                        help="which input size for mobilenet")
    args = parser.parse_args()

    return args

if __name__ == "__main__":
    args = parse_args()
    generate_mobilenetv2(args)
	# It's used to generate mobilenetv2
	#
	################################################################################

	from caffe.proto import caffe_pb2
	from google.protobuf import text_format
	import argparse

	def Conv2dWithoutBias(net, bottom, top, dim_out, stride=1, kernel=1, pad=0):
	""" A convolution block including 2d-convolution, batchnorm and relu layers
	without bias

	Args:
	bottom: input for conv2d block
	top: output for
	dim_out: the number of the filters
	stride: the stride for convolution layer parameter
	kernel: the kernel for convolution layer parameter

	Return:
	the output blob for conv2d block
	"""
	conv_layer = caffe_pb2.LayerParameter()
	conv_layer.type = "Convolution"
	conv_layer.name = top
	conv_layer.bottom.extend([bottom])
	conv_layer.top.extend([top])
	conv_layer_param = caffe_pb2.ParamSpec()
	conv_layer_param.lr_mult = 1.0
	conv_layer_param.decay_mult = 1.0
	conv_layer.param.extend([conv_layer_param])
	conv_param = conv_layer.convolution_param
	conv_param.num_output = dim_out
	conv_param.bias_term = False
	conv_param.kernel_size.append(kernel)
	conv_param.stride.append(stride)
	conv_param.pad.append(pad)
	conv_param.group = 1
	conv_param.weight_filler.type = "msra"

	ret = "{}/bn".format(top)
	bn_layer = caffe_pb2.LayerParameter()
	bn_layer.name = "{}/bn".format(top)
	bn_layer.type = "BatchNorm"
	bn_layer_param = caffe_pb2.ParamSpec()
	bn_layer_param.lr_mult = 0.
	bn_layer_param.decay_mult = 0.
	bn_layer.param.extend([bn_layer_param] * 3)
	bn_layer.bottom.extend([top])
	bn_layer.top.extend([ret])

	scale_layer = caffe_pb2.LayerParameter()
	scale_layer.name = "{}/scale".format(top)
	scale_layer.type = "Scale"
	scale_layer.scale_param.bias_term = True
	scale_layer.bottom.extend([ret])
	scale_layer.top.extend([ret])

	relu_layer = caffe_pb2.LayerParameter()
	relu_layer.name = "{}/relu".format(top)
	relu_layer.type = "ReLU"
	relu_layer.bottom.extend([ret])
	relu_layer.top.extend([ret])

	net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])

	return ret


	def Conv2dWithoutBiasLinear(net, bottom, top, dim_out, stride=1, kernel=1, pad=0):
	""" A convolution block including 2d-convolution, batchnorm layers
	without bias

	Args:
	bottom: input for conv2d block
	top: output for
	dim_out: the number of the filters
	stride: the stride for convolution layer parameter
	kernel: the kernel for convolution layer parameter

	Return:
	the output blob for conv2d block
	"""
	conv_layer = caffe_pb2.LayerParameter()
	conv_layer.type = "Convolution"
	conv_layer.name = top
	conv_layer.bottom.extend([bottom])
	conv_layer.top.extend([top])
	conv_layer_param = caffe_pb2.ParamSpec()
	conv_layer_param.lr_mult = 1.0
	conv_layer_param.decay_mult = 1.0
	conv_layer.param.extend([conv_layer_param])
	conv_param = conv_layer.convolution_param
	conv_param.num_output = dim_out
	conv_param.bias_term = False
	conv_param.kernel_size.append(kernel)
	conv_param.stride.append(stride)
	conv_param.pad.append(pad)
	conv_param.group = 1
	conv_param.weight_filler.type = "msra"

	ret = "{}/bn".format(top)
	bn_layer = caffe_pb2.LayerParameter()
	bn_layer.name = "{}/bn".format(top)
	bn_layer.type = "BatchNorm"
	bn_layer_param = caffe_pb2.ParamSpec()
	bn_layer_param.lr_mult = 0.
	bn_layer_param.decay_mult = 0.
	bn_layer.param.extend([bn_layer_param] * 3)
	bn_layer.bottom.extend([top])
	bn_layer.top.extend([ret])

	scale_layer = caffe_pb2.LayerParameter()
	scale_layer.name = "{}/scale".format(top)
	scale_layer.type = "Scale"
	scale_layer.scale_param.bias_term = True
	scale_layer.bottom.extend([ret])
	scale_layer.top.extend([ret])

	net.layer.extend([conv_layer, bn_layer, scale_layer])

	return ret


	def DepthwiseConvWithoutBias(net, bottom, top, dim_out, kernel=1, stride=1, pad=0):
	""" A depthwise convolution including depthwise convolution, batchnorm layers,
	and relu layer

	Args:
	bottom: a list of string for the layer inputs
	top: a list of string for the layer outputs
	dim_out: the number of the filters

	Return:
	the output blob for depthwiseconv block
	"""
	conv_layer = caffe_pb2.LayerParameter()
	# conv_layer.type = "DepthwiseConvolution"
	conv_layer.type = "Convolution"
	conv_layer.name = top
	conv_layer.bottom.extend([bottom])
	conv_layer.top.extend([top])
	conv_layer_param = caffe_pb2.ParamSpec()
	conv_layer_param.lr_mult = 1.0
	conv_layer_param.decay_mult = 1.0
	conv_layer.param.extend([conv_layer_param])
	conv_param = conv_layer.convolution_param
	conv_param.num_output = dim_out
	conv_param.bias_term = False
	conv_param.kernel_size.append(kernel)
	conv_param.stride.append(stride)
	conv_param.pad.append(pad)
	conv_param.group = dim_out
	conv_param.weight_filler.type = "msra"

	ret = "{}/bn".format(top)
	bn_layer = caffe_pb2.LayerParameter()
	bn_layer.name = "{}/bn".format(top)
	bn_layer.type = "BatchNorm"
	bn_layer_param = caffe_pb2.ParamSpec()
	bn_layer_param.lr_mult = 0.
	bn_layer_param.decay_mult = 0.
	bn_layer.param.extend([bn_layer_param] * 3)
	bn_layer.bottom.extend([top])
	bn_layer.top.extend([ret])

	scale_layer = caffe_pb2.LayerParameter()
	scale_layer.name = "{}/scale".format(top)
	scale_layer.type = "Scale"
	scale_layer.scale_param.bias_term = True
	scale_layer.bottom.extend([ret])
	scale_layer.top.extend([ret])

	relu_layer = caffe_pb2.LayerParameter()
	relu_layer.name = "{}/relu".format(top)
	relu_layer.type = "ReLU"
	relu_layer.bottom.extend([ret])
	relu_layer.top.extend([ret])

	net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer])

	return ret


	def SumLayer(net, bottom_left, bottom_right, top):
	""" An element-wise layer for sum operation

	Return:
	the output blob for SUM layer
	"""
	sum_layer = caffe_pb2.LayerParameter()
	sum_layer.name = top
	sum_layer.type = "Eltwise"
	sum_layer.bottom.extend([bottom_left, bottom_right])
	sum_layer.top.extend([top])
	sum_param = sum_layer.eltwise_param
	sum_param.operation = 1 # SUM operation
	net.layer.extend([sum_layer])

	return top


	def PoolLayer(net, bottom, top,
	global_pooling=False,
	kernel=1, pool_method=0, stride=1, pad=0):
	""" Add pooling layer, including MAX pooling and AVE pooling layer

	Args:
	net: caffe net
	bottom: the input blob
	top: the output blob
	pool_method: which kind pooling method, including MAX and AVE
	kernel, stride, pad: the pooling parameter for pooling layer

	Return:
	output blob for pooling layer
	"""
	pool_layer = caffe_pb2.LayerParameter()
	pool_layer.name = top
	pool_layer.type = "Pooling"
	pool_layer.bottom.extend([bottom])
	pool_layer.top.extend([top])

	pool_param = pool_layer.pooling_param
	pool_param.pool = pool_method

	if global_pooling:
	pool_param.global_pooling = True
	else:
	pool_param.kernel_size = kernel
	pool_param.stride = stride
	pool_param.pad = pad
	net.layer.extend([pool_layer])

	return top


	def FCLayer(net, bottom, top, dim_out):
	""" Add fully-connected layer

	Args:
	bottom: the input blob for fc layer
	top: the output blob for fc layer

	Return:
	output blob
	"""
	fc_layer = caffe_pb2.LayerParameter()
	fc_layer.name = top
	fc_layer.type = "InnerProduct"
	fc_layer.bottom.extend([bottom])
	fc_layer.top.extend([top])

	fc_param = fc_layer.inner_product_param
	fc_param.num_output = dim_out

	net.layer.extend([fc_layer])

	return top


	def SoftmaxWithLossLayer(net, input_data, input_label):
	""" Add softmaxwithloss layer for getting loss

	Args:
	input_data: string, data blob
	input_label: string, label blob
	"""
	softmax_layer = caffe_pb2.LayerParameter()
	softmax_layer.name = "loss"
	softmax_layer.type = "SoftmaxWithLoss"
	softmax_layer.bottom.extend([input_data, input_label])
	softmax_layer.top.extend(["loss/loss"])

	net.layer.extend([softmax_layer])


	def AccuracyLayer(net, input_data, input_label):
	""" Add accuracy layer for training and validation for imagenet dataset,
	the accuracy including top1 and top5

	Args:
	net: caffe net
	input_data: data blob for accuracy
	input_label: label blob for accuracy
	"""
	# top1 layer
	top1_layer = caffe_pb2.LayerParameter()
	top1_layer.name = "accuracy/top1"
	top1_layer.type = "Accuracy"
	top1_layer.bottom.extend([input_data, input_label])
	top1_layer.top.extend(["acc@1"])
	top1_acc_param = top1_layer.accuracy_param
	top1_acc_param.top_k = 1

	# top5 layer
	top5_layer = caffe_pb2.LayerParameter()
	top5_layer.name = "accuracy/top5"
	top5_layer.type = "Accuracy"
	top5_layer.bottom.extend([input_data, input_label])
	top5_layer.top.extend(["acc@5"])
	top5_acc_param = top5_layer.accuracy_param
	top5_acc_param.top_k = 5

	net.layer.extend([top1_layer, top5_layer])


	def BottleNeckS1(net, name, bottom, dim_in, dim_out, expand):
	""" bottleneck layer, including 1x1 convolution, seperative convolution,
	1x1 convolution without relu, sum layer

	Args:
	name: root name for this bottleneck layer
	bottom: string, input blob for bottleneck layer
	dim_in: channel dimession for input blob
	dim_out: channel dimession for output blob
	expand: expand factor for the bottleneck layer

	Return:
	the output blob for the bottleneck layer
	"""
	expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
	dim_in*expand, stride=1, kernel=1, pad=0)
	depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
	dim_in*expand, kernel=3, stride=1, pad=1)
	linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
	dim_out, kernel=1, stride=1, pad=0)
	sum_out = SumLayer(net, bottom, linear_out, "{}/sum".format(name))

	return sum_out


	def BottleNeckS2(net, name, bottom, dim_in, dim_out, expand):
	""" bottleneck layer, including 1x1 convolution, seperative convolution,
	1x1 convolution without relu

	Args:
	name: root name for this bottleneck layer
	bottom: string, input blob for bottleneck layer
	dim_in: channel dimession for input blob
	dim_out: channel dimession for output blob
	expand: expand factor for the bottleneck layer

	Return:
	the output blob for the bottleneck layer
	"""
	expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
	dim_in*expand, stride=1, kernel=1, pad=0)
	depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
	dim_in*expand, kernel=3, stride=2, pad=1)
	linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
	dim_out, kernel=1, stride=1, pad=0)
	return linear_out


	def BottleNeckPseudoS2(net, name, bottom, dim_in, dim_out, expand):
	""" bottleneck layer, like the 2-stride bottleneck, while the stride = 1

	Args:
	name: root name for this bottleneck layer
	bottom: string, input blob for bottleneck layer
	dim_in: channel dimession for input blob
	dim_out: channel dimession for output blob
	expand: expand factor for the bottleneck layer

	Return:
	the output blob for the bottleneck layer
	"""
	expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name),
	dim_in*expand, stride=1, kernel=1, pad=0)
	depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name),
	dim_in*expand, stride=1, kernel=3, pad=1)
	linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name),
	dim_out, kernel=1, stride=1, pad=0)

	return linear_out


	def DataLayer(net, train_data, val_data,
	crop_size=224,
	scale=0.017,
	mean_value=[104.0, 117.0, 123.0],
	train_batch_size=32, val_batch_size=32):
	""" Generate input data layer

	Args:
	train_data: input data for training, which is a database data, including lmdb and leveldb
	val_data: input data for validation, which is a database data, including lmdb and leveldb
	crop_size: which kind input size, such as 224, 299 etc.
	scale: after substract the mean value, scale this value
	mean_value: input data processing, substracting this mean value
	train_batch_size: the batch size for training procedure
	val_batch_size: the batch size for validation procedure

	Return:
	output blob for data layer
	"""
	# train layer
	train_layer = caffe_pb2.LayerParameter()
	train_layer.name = "data"
	train_layer.type = "Data"
	train_layer.top.extend(["data", "label"])

	train_state_rule = caffe_pb2.NetStateRule()
	train_state_rule.phase = caffe_pb2.TRAIN
	train_layer.include.extend([train_state_rule])

	train_trans_param = train_layer.transform_param
	train_trans_param.mirror = True
	train_trans_param.crop_size = crop_size
	train_trans_param.scale = scale
	train_trans_param.mean_value.extend(mean_value)

	train_data_param = train_layer.data_param
	train_data_param.batch_size = train_batch_size
	train_data_param.source = train_data
	train_data_param.backend = 1 # lmdb


	# validation layer
	val_layer = caffe_pb2.LayerParameter()
	val_layer.name = "data"
	val_layer.type = "Data"
	val_layer.top.extend(["data", "label"])

	val_state_rule = caffe_pb2.NetStateRule()
	val_state_rule.phase = caffe_pb2.TEST
	val_layer.include.extend([val_state_rule])

	val_trans_param = val_layer.transform_param
	val_trans_param.mirror = False
	val_trans_param.crop_size = crop_size
	val_trans_param.scale = scale
	val_trans_param.mean_value.extend(mean_value)

	val_data_param = val_layer.data_param
	val_data_param.batch_size = val_batch_size
	val_data_param.source = val_data
	val_data_param.backend = 1 # lmdb

	net.layer.extend([train_layer, val_layer])

	return "data", "label"


	def generate_mobilenetv2(args):
	net = caffe_pb2.NetParameter()
	net.name = "mobilenetv2_{}".format(args.crop_size)

	data, label = DataLayer(net, args.train_data, args.val_data,
	crop_size=args.crop_size)

	# first conv layer
	conv1 = Conv2dWithoutBias(net, data, "conv1", 32, kernel=3, stride=2, pad=1)

	# bottleneck1
	bottleneck_out1 = BottleNeckPseudoS2(net, "bottleneck1", conv1, 32, 16, 1)

	# bottleneck2 sequence
	bottleneck_out2_1 = BottleNeckS2(net, "bottleneck2/1", bottleneck_out1, 16, 24, 6)
	bottleneck_out2_2 = BottleNeckS1(net, "bottleneck2/2", bottleneck_out2_1, 24, 24, 6)

	# bottleneck3 sequence
	bottleneck_out3 = BottleNeckS2(net, "bottleneck3/1", bottleneck_out2_2, 24, 32, 6)
	for idx in range(2):
	bottleneck_out3 = BottleNeckS1(net, "bottleneck3/{}".format(idx + 2),
	bottleneck_out3, 32, 32, 6)

	# bottleneck4 sequence
	bottleneck_out4 = BottleNeckS2(net, "bottleneck4/1", bottleneck_out3, 32, 64, 6)
	for idx in range(3):
	bottleneck_out4 = BottleNeckS1(net, "bottleneck4/{}".format(idx + 2),
	bottleneck_out4, 64, 64, 6)

	# bottleneck5 sequence
	bottleneck_out5 = BottleNeckPseudoS2(net, "bottleneck5/1", bottleneck_out4, 64, 96, 6)
	for idx in range(3):
	bottleneck_out5 = BottleNeckS1(net, "bottleneck5/{}".format(idx + 2),
	bottleneck_out5, 96, 96, 6)

	# bottleneck6 sequence
	bottleneck_out6 = BottleNeckS2(net, "bottleneck6/1", bottleneck_out5, 96, 160, 6)
	for idx in range(2):
	bottleneck_out6 = BottleNeckS1(net, "bottleneck6/{}".format(idx + 2),
	bottleneck_out6, 160, 160, 6)

	# bottleneck7
	bottleneck_out7 = BottleNeckPseudoS2(net, "bottleneck7", bottleneck_out6, 160, 320, 6)

	# conv2d 1x1
	conv8 = Conv2dWithoutBias(net, bottleneck_out7, "conv8", 1280, kernel=1, stride=1, pad=0)

	# global avg pool
	pool9 = PoolLayer(net, conv8, "pool8", pool_method=1, global_pooling=True)

	# fc layer
	fc10 = FCLayer(net, pool9, "fc10", 1000)

	# softmax and accuracy
	SoftmaxWithLossLayer(net, fc10, label)
	AccuracyLayer(net, fc10, label)

	with open(args.output_net, "w") as net_stream:
	net_stream.write(text_format.MessageToString(net))


	def parse_args():
	parser = argparse.ArgumentParser(description="generate mobilenetv2")
	parser.add_argument("--output_net", type=str, required=True,
	help="output net file containing the model architecture")
	parser.add_argument("--train_data", type=str,
	default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_train_lmdb",
	help="train data file path for imagenet")
	parser.add_argument("--val_data", type=str,
	default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_val_lmdb",
	help="validation data file path for imagenet")
	parser.add_argument("--crop_size", type=int, default=224,
	help="which input size for mobilenet")
	args = parser.parse_args()

	return args

	if __name__ == "__main__":
	args = parse_args()
	generate_mobilenetv2(args)