Created
February 8, 2019 16:58
-
-
Save qzhong0605/8e8c2d482b6956b25533f52196341ebb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# It's used to generate mobilenetv2 | |
# | |
################################################################################ | |
from caffe.proto import caffe_pb2 | |
from google.protobuf import text_format | |
import argparse | |
def Conv2dWithoutBias(net, bottom, top, dim_out, stride=1, kernel=1, pad=0): | |
""" A convolution block including 2d-convolution, batchnorm and relu layers | |
without bias | |
Args: | |
bottom: input for conv2d block | |
top: output for | |
dim_out: the number of the filters | |
stride: the stride for convolution layer parameter | |
kernel: the kernel for convolution layer parameter | |
Return: | |
the output blob for conv2d block | |
""" | |
conv_layer = caffe_pb2.LayerParameter() | |
conv_layer.type = "Convolution" | |
conv_layer.name = top | |
conv_layer.bottom.extend([bottom]) | |
conv_layer.top.extend([top]) | |
conv_layer_param = caffe_pb2.ParamSpec() | |
conv_layer_param.lr_mult = 1.0 | |
conv_layer_param.decay_mult = 1.0 | |
conv_layer.param.extend([conv_layer_param]) | |
conv_param = conv_layer.convolution_param | |
conv_param.num_output = dim_out | |
conv_param.bias_term = False | |
conv_param.kernel_size.append(kernel) | |
conv_param.stride.append(stride) | |
conv_param.pad.append(pad) | |
conv_param.group = 1 | |
conv_param.weight_filler.type = "msra" | |
ret = "{}/bn".format(top) | |
bn_layer = caffe_pb2.LayerParameter() | |
bn_layer.name = "{}/bn".format(top) | |
bn_layer.type = "BatchNorm" | |
bn_layer_param = caffe_pb2.ParamSpec() | |
bn_layer_param.lr_mult = 0. | |
bn_layer_param.decay_mult = 0. | |
bn_layer.param.extend([bn_layer_param] * 3) | |
bn_layer.bottom.extend([top]) | |
bn_layer.top.extend([ret]) | |
scale_layer = caffe_pb2.LayerParameter() | |
scale_layer.name = "{}/scale".format(top) | |
scale_layer.type = "Scale" | |
scale_layer.scale_param.bias_term = True | |
scale_layer.bottom.extend([ret]) | |
scale_layer.top.extend([ret]) | |
relu_layer = caffe_pb2.LayerParameter() | |
relu_layer.name = "{}/relu".format(top) | |
relu_layer.type = "ReLU" | |
relu_layer.bottom.extend([ret]) | |
relu_layer.top.extend([ret]) | |
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer]) | |
return ret | |
def Conv2dWithoutBiasLinear(net, bottom, top, dim_out, stride=1, kernel=1, pad=0): | |
""" A convolution block including 2d-convolution, batchnorm layers | |
without bias | |
Args: | |
bottom: input for conv2d block | |
top: output for | |
dim_out: the number of the filters | |
stride: the stride for convolution layer parameter | |
kernel: the kernel for convolution layer parameter | |
Return: | |
the output blob for conv2d block | |
""" | |
conv_layer = caffe_pb2.LayerParameter() | |
conv_layer.type = "Convolution" | |
conv_layer.name = top | |
conv_layer.bottom.extend([bottom]) | |
conv_layer.top.extend([top]) | |
conv_layer_param = caffe_pb2.ParamSpec() | |
conv_layer_param.lr_mult = 1.0 | |
conv_layer_param.decay_mult = 1.0 | |
conv_layer.param.extend([conv_layer_param]) | |
conv_param = conv_layer.convolution_param | |
conv_param.num_output = dim_out | |
conv_param.bias_term = False | |
conv_param.kernel_size.append(kernel) | |
conv_param.stride.append(stride) | |
conv_param.pad.append(pad) | |
conv_param.group = 1 | |
conv_param.weight_filler.type = "msra" | |
ret = "{}/bn".format(top) | |
bn_layer = caffe_pb2.LayerParameter() | |
bn_layer.name = "{}/bn".format(top) | |
bn_layer.type = "BatchNorm" | |
bn_layer_param = caffe_pb2.ParamSpec() | |
bn_layer_param.lr_mult = 0. | |
bn_layer_param.decay_mult = 0. | |
bn_layer.param.extend([bn_layer_param] * 3) | |
bn_layer.bottom.extend([top]) | |
bn_layer.top.extend([ret]) | |
scale_layer = caffe_pb2.LayerParameter() | |
scale_layer.name = "{}/scale".format(top) | |
scale_layer.type = "Scale" | |
scale_layer.scale_param.bias_term = True | |
scale_layer.bottom.extend([ret]) | |
scale_layer.top.extend([ret]) | |
net.layer.extend([conv_layer, bn_layer, scale_layer]) | |
return ret | |
def DepthwiseConvWithoutBias(net, bottom, top, dim_out, kernel=1, stride=1, pad=0): | |
""" A depthwise convolution including depthwise convolution, batchnorm layers, | |
and relu layer | |
Args: | |
bottom: a list of string for the layer inputs | |
top: a list of string for the layer outputs | |
dim_out: the number of the filters | |
Return: | |
the output blob for depthwiseconv block | |
""" | |
conv_layer = caffe_pb2.LayerParameter() | |
# conv_layer.type = "DepthwiseConvolution" | |
conv_layer.type = "Convolution" | |
conv_layer.name = top | |
conv_layer.bottom.extend([bottom]) | |
conv_layer.top.extend([top]) | |
conv_layer_param = caffe_pb2.ParamSpec() | |
conv_layer_param.lr_mult = 1.0 | |
conv_layer_param.decay_mult = 1.0 | |
conv_layer.param.extend([conv_layer_param]) | |
conv_param = conv_layer.convolution_param | |
conv_param.num_output = dim_out | |
conv_param.bias_term = False | |
conv_param.kernel_size.append(kernel) | |
conv_param.stride.append(stride) | |
conv_param.pad.append(pad) | |
conv_param.group = dim_out | |
conv_param.weight_filler.type = "msra" | |
ret = "{}/bn".format(top) | |
bn_layer = caffe_pb2.LayerParameter() | |
bn_layer.name = "{}/bn".format(top) | |
bn_layer.type = "BatchNorm" | |
bn_layer_param = caffe_pb2.ParamSpec() | |
bn_layer_param.lr_mult = 0. | |
bn_layer_param.decay_mult = 0. | |
bn_layer.param.extend([bn_layer_param] * 3) | |
bn_layer.bottom.extend([top]) | |
bn_layer.top.extend([ret]) | |
scale_layer = caffe_pb2.LayerParameter() | |
scale_layer.name = "{}/scale".format(top) | |
scale_layer.type = "Scale" | |
scale_layer.scale_param.bias_term = True | |
scale_layer.bottom.extend([ret]) | |
scale_layer.top.extend([ret]) | |
relu_layer = caffe_pb2.LayerParameter() | |
relu_layer.name = "{}/relu".format(top) | |
relu_layer.type = "ReLU" | |
relu_layer.bottom.extend([ret]) | |
relu_layer.top.extend([ret]) | |
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer]) | |
return ret | |
def SumLayer(net, bottom_left, bottom_right, top): | |
""" An element-wise layer for sum operation | |
Return: | |
the output blob for SUM layer | |
""" | |
sum_layer = caffe_pb2.LayerParameter() | |
sum_layer.name = top | |
sum_layer.type = "Eltwise" | |
sum_layer.bottom.extend([bottom_left, bottom_right]) | |
sum_layer.top.extend([top]) | |
sum_param = sum_layer.eltwise_param | |
sum_param.operation = 1 # SUM operation | |
net.layer.extend([sum_layer]) | |
return top | |
def PoolLayer(net, bottom, top, | |
global_pooling=False, | |
kernel=1, pool_method=0, stride=1, pad=0): | |
""" Add pooling layer, including MAX pooling and AVE pooling layer | |
Args: | |
net: caffe net | |
bottom: the input blob | |
top: the output blob | |
pool_method: which kind pooling method, including MAX and AVE | |
kernel, stride, pad: the pooling parameter for pooling layer | |
Return: | |
output blob for pooling layer | |
""" | |
pool_layer = caffe_pb2.LayerParameter() | |
pool_layer.name = top | |
pool_layer.type = "Pooling" | |
pool_layer.bottom.extend([bottom]) | |
pool_layer.top.extend([top]) | |
pool_param = pool_layer.pooling_param | |
pool_param.pool = pool_method | |
if global_pooling: | |
pool_param.global_pooling = True | |
else: | |
pool_param.kernel_size = kernel | |
pool_param.stride = stride | |
pool_param.pad = pad | |
net.layer.extend([pool_layer]) | |
return top | |
def FCLayer(net, bottom, top, dim_out): | |
""" Add fully-connected layer | |
Args: | |
bottom: the input blob for fc layer | |
top: the output blob for fc layer | |
Return: | |
output blob | |
""" | |
fc_layer = caffe_pb2.LayerParameter() | |
fc_layer.name = top | |
fc_layer.type = "InnerProduct" | |
fc_layer.bottom.extend([bottom]) | |
fc_layer.top.extend([top]) | |
fc_param = fc_layer.inner_product_param | |
fc_param.num_output = dim_out | |
net.layer.extend([fc_layer]) | |
return top | |
def SoftmaxWithLossLayer(net, input_data, input_label): | |
""" Add softmaxwithloss layer for getting loss | |
Args: | |
input_data: string, data blob | |
input_label: string, label blob | |
""" | |
softmax_layer = caffe_pb2.LayerParameter() | |
softmax_layer.name = "loss" | |
softmax_layer.type = "SoftmaxWithLoss" | |
softmax_layer.bottom.extend([input_data, input_label]) | |
softmax_layer.top.extend(["loss/loss"]) | |
net.layer.extend([softmax_layer]) | |
def AccuracyLayer(net, input_data, input_label): | |
""" Add accuracy layer for training and validation for imagenet dataset, | |
the accuracy including top1 and top5 | |
Args: | |
net: caffe net | |
input_data: data blob for accuracy | |
input_label: label blob for accuracy | |
""" | |
# top1 layer | |
top1_layer = caffe_pb2.LayerParameter() | |
top1_layer.name = "accuracy/top1" | |
top1_layer.type = "Accuracy" | |
top1_layer.bottom.extend([input_data, input_label]) | |
top1_layer.top.extend(["acc@1"]) | |
top1_acc_param = top1_layer.accuracy_param | |
top1_acc_param.top_k = 1 | |
# top5 layer | |
top5_layer = caffe_pb2.LayerParameter() | |
top5_layer.name = "accuracy/top5" | |
top5_layer.type = "Accuracy" | |
top5_layer.bottom.extend([input_data, input_label]) | |
top5_layer.top.extend(["acc@5"]) | |
top5_acc_param = top5_layer.accuracy_param | |
top5_acc_param.top_k = 5 | |
net.layer.extend([top1_layer, top5_layer]) | |
def BottleNeckS1(net, name, bottom, dim_in, dim_out, expand): | |
""" bottleneck layer, including 1x1 convolution, seperative convolution, | |
1x1 convolution without relu, sum layer | |
Args: | |
name: root name for this bottleneck layer | |
bottom: string, input blob for bottleneck layer | |
dim_in: channel dimession for input blob | |
dim_out: channel dimession for output blob | |
expand: expand factor for the bottleneck layer | |
Return: | |
the output blob for the bottleneck layer | |
""" | |
expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name), | |
dim_in*expand, stride=1, kernel=1, pad=0) | |
depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name), | |
dim_in*expand, kernel=3, stride=1, pad=1) | |
linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name), | |
dim_out, kernel=1, stride=1, pad=0) | |
sum_out = SumLayer(net, bottom, linear_out, "{}/sum".format(name)) | |
return sum_out | |
def BottleNeckS2(net, name, bottom, dim_in, dim_out, expand): | |
""" bottleneck layer, including 1x1 convolution, seperative convolution, | |
1x1 convolution without relu | |
Args: | |
name: root name for this bottleneck layer | |
bottom: string, input blob for bottleneck layer | |
dim_in: channel dimession for input blob | |
dim_out: channel dimession for output blob | |
expand: expand factor for the bottleneck layer | |
Return: | |
the output blob for the bottleneck layer | |
""" | |
expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name), | |
dim_in*expand, stride=1, kernel=1, pad=0) | |
depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name), | |
dim_in*expand, kernel=3, stride=2, pad=1) | |
linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name), | |
dim_out, kernel=1, stride=1, pad=0) | |
return linear_out | |
def BottleNeckPseudoS2(net, name, bottom, dim_in, dim_out, expand): | |
""" bottleneck layer, like the 2-stride bottleneck, while the stride = 1 | |
Args: | |
name: root name for this bottleneck layer | |
bottom: string, input blob for bottleneck layer | |
dim_in: channel dimession for input blob | |
dim_out: channel dimession for output blob | |
expand: expand factor for the bottleneck layer | |
Return: | |
the output blob for the bottleneck layer | |
""" | |
expand_out = Conv2dWithoutBias(net, bottom, "{}/expand".format(name), | |
dim_in*expand, stride=1, kernel=1, pad=0) | |
depth_out = DepthwiseConvWithoutBias(net, expand_out, "{}/dw".format(name), | |
dim_in*expand, stride=1, kernel=3, pad=1) | |
linear_out = Conv2dWithoutBiasLinear(net, depth_out, "{}/linear".format(name), | |
dim_out, kernel=1, stride=1, pad=0) | |
return linear_out | |
def DataLayer(net, train_data, val_data, | |
crop_size=224, | |
scale=0.017, | |
mean_value=[104.0, 117.0, 123.0], | |
train_batch_size=32, val_batch_size=32): | |
""" Generate input data layer | |
Args: | |
train_data: input data for training, which is a database data, including lmdb and leveldb | |
val_data: input data for validation, which is a database data, including lmdb and leveldb | |
crop_size: which kind input size, such as 224, 299 etc. | |
scale: after substract the mean value, scale this value | |
mean_value: input data processing, substracting this mean value | |
train_batch_size: the batch size for training procedure | |
val_batch_size: the batch size for validation procedure | |
Return: | |
output blob for data layer | |
""" | |
# train layer | |
train_layer = caffe_pb2.LayerParameter() | |
train_layer.name = "data" | |
train_layer.type = "Data" | |
train_layer.top.extend(["data", "label"]) | |
train_state_rule = caffe_pb2.NetStateRule() | |
train_state_rule.phase = caffe_pb2.TRAIN | |
train_layer.include.extend([train_state_rule]) | |
train_trans_param = train_layer.transform_param | |
train_trans_param.mirror = True | |
train_trans_param.crop_size = crop_size | |
train_trans_param.scale = scale | |
train_trans_param.mean_value.extend(mean_value) | |
train_data_param = train_layer.data_param | |
train_data_param.batch_size = train_batch_size | |
train_data_param.source = train_data | |
train_data_param.backend = 1 # lmdb | |
# validation layer | |
val_layer = caffe_pb2.LayerParameter() | |
val_layer.name = "data" | |
val_layer.type = "Data" | |
val_layer.top.extend(["data", "label"]) | |
val_state_rule = caffe_pb2.NetStateRule() | |
val_state_rule.phase = caffe_pb2.TEST | |
val_layer.include.extend([val_state_rule]) | |
val_trans_param = val_layer.transform_param | |
val_trans_param.mirror = False | |
val_trans_param.crop_size = crop_size | |
val_trans_param.scale = scale | |
val_trans_param.mean_value.extend(mean_value) | |
val_data_param = val_layer.data_param | |
val_data_param.batch_size = val_batch_size | |
val_data_param.source = val_data | |
val_data_param.backend = 1 # lmdb | |
net.layer.extend([train_layer, val_layer]) | |
return "data", "label" | |
def generate_mobilenetv2(args): | |
net = caffe_pb2.NetParameter() | |
net.name = "mobilenetv2_{}".format(args.crop_size) | |
data, label = DataLayer(net, args.train_data, args.val_data, | |
crop_size=args.crop_size) | |
# first conv layer | |
conv1 = Conv2dWithoutBias(net, data, "conv1", 32, kernel=3, stride=2, pad=1) | |
# bottleneck1 | |
bottleneck_out1 = BottleNeckPseudoS2(net, "bottleneck1", conv1, 32, 16, 1) | |
# bottleneck2 sequence | |
bottleneck_out2_1 = BottleNeckS2(net, "bottleneck2/1", bottleneck_out1, 16, 24, 6) | |
bottleneck_out2_2 = BottleNeckS1(net, "bottleneck2/2", bottleneck_out2_1, 24, 24, 6) | |
# bottleneck3 sequence | |
bottleneck_out3 = BottleNeckS2(net, "bottleneck3/1", bottleneck_out2_2, 24, 32, 6) | |
for idx in range(2): | |
bottleneck_out3 = BottleNeckS1(net, "bottleneck3/{}".format(idx + 2), | |
bottleneck_out3, 32, 32, 6) | |
# bottleneck4 sequence | |
bottleneck_out4 = BottleNeckS2(net, "bottleneck4/1", bottleneck_out3, 32, 64, 6) | |
for idx in range(3): | |
bottleneck_out4 = BottleNeckS1(net, "bottleneck4/{}".format(idx + 2), | |
bottleneck_out4, 64, 64, 6) | |
# bottleneck5 sequence | |
bottleneck_out5 = BottleNeckPseudoS2(net, "bottleneck5/1", bottleneck_out4, 64, 96, 6) | |
for idx in range(3): | |
bottleneck_out5 = BottleNeckS1(net, "bottleneck5/{}".format(idx + 2), | |
bottleneck_out5, 96, 96, 6) | |
# bottleneck6 sequence | |
bottleneck_out6 = BottleNeckS2(net, "bottleneck6/1", bottleneck_out5, 96, 160, 6) | |
for idx in range(2): | |
bottleneck_out6 = BottleNeckS1(net, "bottleneck6/{}".format(idx + 2), | |
bottleneck_out6, 160, 160, 6) | |
# bottleneck7 | |
bottleneck_out7 = BottleNeckPseudoS2(net, "bottleneck7", bottleneck_out6, 160, 320, 6) | |
# conv2d 1x1 | |
conv8 = Conv2dWithoutBias(net, bottleneck_out7, "conv8", 1280, kernel=1, stride=1, pad=0) | |
# global avg pool | |
pool9 = PoolLayer(net, conv8, "pool8", pool_method=1, global_pooling=True) | |
# fc layer | |
fc10 = FCLayer(net, pool9, "fc10", 1000) | |
# softmax and accuracy | |
SoftmaxWithLossLayer(net, fc10, label) | |
AccuracyLayer(net, fc10, label) | |
with open(args.output_net, "w") as net_stream: | |
net_stream.write(text_format.MessageToString(net)) | |
def parse_args(): | |
parser = argparse.ArgumentParser(description="generate mobilenetv2") | |
parser.add_argument("--output_net", type=str, required=True, | |
help="output net file containing the model architecture") | |
parser.add_argument("--train_data", type=str, | |
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_train_lmdb", | |
help="train data file path for imagenet") | |
parser.add_argument("--val_data", type=str, | |
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_val_lmdb", | |
help="validation data file path for imagenet") | |
parser.add_argument("--crop_size", type=int, default=224, | |
help="which input size for mobilenet") | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
args = parse_args() | |
generate_mobilenetv2(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment