Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@cysin
Created November 20, 2018 00:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cysin/f8c89d6eb1f66fb50c3ba3447b3f4466 to your computer and use it in GitHub Desktop.
Save cysin/f8c89d6eb1f66fb50c3ba3447b3f4466 to your computer and use it in GitHub Desktop.
mbn.py
import numpy as np
import sys,os
import argparse
caffe_root = '/root/work/caffe/'
sys.path.insert(0, caffe_root + 'python')
os.environ['GLOG_minloglevel'] = '3'
import caffe
#import google.protobuf as pb
import google.protobuf.text_format
import pprint as pp
def make_parser():
parser = argparse.ArgumentParser(description='Merge bn layer to conv layer, and automatically convert layer names.')
parser.add_argument('--tn', type=str, required=True, help='.prototxt with bn')
parser.add_argument('--tm', type=str, required=True, help='.caffemodel with bn')
parser.add_argument('--dn', type=str, required=True, help='.prototxt to save')
parser.add_argument('--dm', type=str, required=True, help='.caffemodel to save')
return parser
if __name__ == '__main__':
parser = make_parser()
args = parser.parse_args()
# convert net definitions
bn_eps = []
with open(args.tn) as f:
net = caffe.proto.caffe_pb2.NetParameter()
google.protobuf.text_format.Parse(f.read(), net)
#pp.pprint(net)
num_layers = len(net.layer)
idx_remove = []
name_maps = {}
for i in range(num_layers):
l = net.layer[i]
if l.type in ['Convolution', 'DepthwiseConvolution'] and (net.layer[i + 1].type == 'BatchNorm' and net.layer[i + 2].type == 'Scale'):
# rename next layer
bn_eps.append(net.layer[i + 1].batch_norm_param.eps)
l_src = None
l_dst = None
if i+3 < num_layers and net.layer[i + 2].top != net.layer[i].top:
l_src = net.layer[i + 2].top[0]
l_dst = net.layer[i].top[0]
name_maps[l_src] = l_dst
l.convolution_param.bias_term = True
l.convolution_param.bias_filler.type = 'constant'
l.convolution_param.bias_filler.value = 0
idx_remove.append(net.layer[i + 1])
idx_remove.append(net.layer[i + 2])
#print i
#print l.type
for j,v in enumerate(l.bottom):
if v in name_maps:
l.bottom[j] = name_maps[v]
for l in idx_remove:
net.layer.remove(l)
print "Saving new net models to '%s'" % args.dn
with open(args.dn, 'w') as f:
f.write(str(net))
dst_net = caffe.Net(args.dn, caffe.TEST)
net = caffe.Net(args.tn, args.tm, caffe.TEST)
num_layers = len(net.layers)
i = 0;
dst_i = 0;
bn_i = 0;
while i < num_layers:
lname = net._layer_names[i]
lblobs = net.layers[i].blobs
ltype = net.layers[i].type
#print lname
if ltype in ['Convolution', 'DepthwiseConvolution'] and (net.layers[i + 1].type == 'BatchNorm' and net.layers[i + 2].type == 'Scale'):
conv = net.layers[i + 0].blobs
bn = net.layers[i + 1].blobs
scale = net.layers[i + 2].blobs
#bn_param = net.layers[i + 1].layer_param
#print net.LayerParameter
'''
merge the batchnorm, scale layer weights to the conv layer, to improve the performance
var = var + scaleFacotr
rstd = 1. / sqrt(var + eps)
w = w * rstd * scale
b = (b - mean) * rstd * scale + shift
'''
weight = conv[0].data
channels = weight.shape[0]
bias = np.zeros(weight.shape[0])
if len(conv) > 1:
bias = conv[1].data
mean = bn[0].data
var = bn[1].data
scalef = bn[2].data
scales = scale[0].data
shift = scale[1].data
if scalef != 0:
scalef = 1. / scalef
mean = mean * scalef
var = var * scalef
eps = bn_eps[bn_i]
#print eps
bn_i += 1
rstd = 1 / np.sqrt(var + eps)
rstd1 = rstd.reshape((channels,1,1,1))
scales1 = scales.reshape((channels,1,1,1))
weight = weight * rstd1 * scales1
bias = (bias - mean) * rstd * scales + shift
dst_conv = dst_net.layers[dst_i].blobs
#print dst_net.layers[dst_i].type
dst_conv[0].data[...] = weight
dst_conv[1].data[...] = bias
i += 3
else:
l = net.layers[i].blobs
for j, w in enumerate(l):
dst_net.layers[dst_i].blobs[j].data[...] = w.data
i += 1
dst_i += 1
print "Saving new net weights to '%s'" % args.dm
dst_net.save(args.dm)
#--------------------------------------------------
print "Now testing new model and weights:"
caffe.set_mode_gpu()
net1 = caffe.Net(args.tn, args.tm, caffe.TEST)
net2 = caffe.Net(args.dn, args.dm, caffe.TEST)
for i in range(100):
input_data = np.random.random_integers(0,255,net1.blobs['data'].data.shape)
net1.blobs['data'].data[...] = input_data
net2.blobs['data'].data[...] = input_data
r1 = net1.forward()
r2 = net2.forward()
d1 = r1.values()[0]
d2 = r2.values()[0]
diff = 0
for idx, x in np.ndenumerate(d1):
a = d1[idx]
b = d2[idx]
diff += (a - b) * (a - b)
variance = diff / d1.size
if variance < 0.0000001:
print "iter %d diff variance: %f OK!" % (i, variance)
else:
print "iter %d diff variance: %f ERR!" % (i, variance)
#layer = {}
#layer['name'] = net._layer_names[i]
#layer['blobs'] = net.layers[i].blobs
#layer['type'] = net.layers[i].type
#net_layers.append(layer)
#for i in range(len(net_layers)):
#v = net_layers[i]
#print "%d name: %s\t blob size: %d\t type: %s" % (i, v['name'], len(v['blobs']), v['type'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment