[TOC]
pytorch_to_caffe模型转换工具依赖:
pytorch-1.1.0 (mask-rcnn 限定为pytorch0.4.0)
caffe-ssd版本:地址
需要在caffe-ssd中添加一些不支持的算子及操作
(1)interpolate算子
(2)permute算子
(3)roi_align_pool算子
(4)roi_pooling算子
(5)upsample算子
(6)reorg算子
(7)在pooling层中添加ceil_mode参数
numpy
opencv
├─README.md //工具说明
├─Caffe //caffe相关依赖
├─convert //转换工具
└─example //转换示例
├─1-fpn
├─2-fcn
├─3-yolov2
├─4-yolov3
├─5-fasterrcnn
├─6-deeplabv3
├─7-deeplabv3+
├─8-maskrcnn
├─9-ssd
├─10-mtcnn
└─11-openpose
├─Caffe
├─__init__.py
├─caffe.proto //caffe编译生成文件
├─caffe_net.py
├─caffe_pb2.py //caffe编译生成文件
└─layer_param.py
(1)caffe.proto及caffe_pb2.py从编译后的caffe中获取。
/xxx/caffe-master/src/caffe/proto/caffe.proto
/xxx/caffe-master/python/caffe/proto/caffe_pb2.py
/xxx/表示caffe的安装路径
(2)caffe_net.py及layer_param.py文件从以下地址获取:
https://github.com/xiaoranchenwai/re-id_mgn/tree/master/pytorch2caffe_MGN/Caffe
对layer_param.py进行修改:
修改前:
if self.type!='Convolution':
修改后:
if self.type not in ['Convolution','Deconvolution']:
修改前:
def pool_param (self ,type = 'MAX' ,kernel_size = 2 ,stride = 2 ,pad = None ):
pool_param = pb .PoolingParameter ()
pool_param .pool = pool_param .PoolMethod .Value (type )
if len (pair_process (kernel_size )) > 1 :
pool_param .kernel_h = kernel_size [0 ]
pool_param .kernel_w = kernel_size [1 ]
else :
pool_param .kernel_size = kernel_size
if len (pair_process (stride )) > 1 :
pool_param .stride_h = stride [0 ]
pool_param .stride_w = stride [1 ]
else :
pool_param .stride = stride
if pad :
pool_param .pad = pad
self .param .pooling_param .CopyFrom (pool_param )
修改后:
def pool_param (self ,type = 'MAX' ,kernel_size = 2 ,stride = 2 ,pad = None ,ceil_mode = False ):
pool_param = pb .PoolingParameter ()
pool_param .pool = pool_param .PoolMethod .Value (type )
if len (pair_process (kernel_size )) > 1 :
pool_param .kernel_h = kernel_size [0 ]
pool_param .kernel_w = kernel_size [1 ]
else :
pool_param .kernel_size = kernel_size
if len (pair_process (stride )) > 1 :
pool_param .stride_h = stride [0 ]
pool_param .stride_w = stride [1 ]
else :
pool_param .stride = stride
if pad :
pool_param .pad = pad
if not ceil_mode :
pool_param .ceil_mode = ceil_mode
self .param .pooling_param .CopyFrom (pool_param )
pytorch_to_caffe转换工具调用pytorch_to_caffe.py模块中的trans_net函数,在pytorch模型的推理过程中完成对应pytorch操作到caffe操作的转换。
函数接口如下:
trans_net(net,input_var,name="model_name")
net:表示pytorch模型。
input_var:表示pytorch模型的输入,以列表的形式输入。
name:表示待转换pytorch模型的名称。
save_prototxt(save_name):
def save_caffemodel(save_name):
save_name:表示caffemodel文件路径
在pytorch_to_caffe转换过程中主要包含以下四种情况:
(1)对torch.nn.functional函数中算子的转换
(2)对torch operation操作的转换
(3)对Tensor/Variable operation操作的转换
(4)对pytorch自定义算子的转换
对torch.nn.functional函数的转换
torch.nn.functional函数包含了pytorch中的标准算子,对pytorch标准算子的转换过程如下所示,以对convolution算子的转换为例。
(1) 在pytorch_to_caffe.py模块中添加convolution算子对应的替换函数。
def _conv2d (raw , input , weight , bias = None , stride = 1 , padding = 0 , dilation = 1 , groups = 1 ):
"""
2 D convolution operator of torch to caffe
:param raw: torch operator
:param input: operator input
:param weight: operator weight
:param bias: operator bias
:param stride: stride value
:param padding: padding value
:param dilation: dilation value
:param groups: groups value
:return: the result of torch operator
"""
x = raw (input , weight , bias , stride , padding , dilation , groups )
name = log .add_layer (name = 'conv' )
log .add_blobs ([x ], name = 'conv' )
layer = caffe_net .Layer_param (name = name , type = 'Convolution' ,
bottom = [log .blobs (input )], top = [log .blobs (x )])
layer .conv_param (x .size ()[1 ], weight .size ()[2 :], stride = _pair (stride ),
pad = _pair (padding ), dilation = _pair (dilation ),
bias_term = bias is not None , groups = groups )
if bias is not None :
layer .add_data (weight .cpu ().data .numpy (), bias .cpu ().data .numpy ())
else :
layer .param .convolution_param .bias_term = False
layer .add_data (weight .cpu ().data .numpy ())
log .cnet .add_layer (layer )
return x
代码说明:
执行pytorch推理过程:
x = raw (input , weight , bias , stride , padding , dilation , groups )
其中,raw表示pytorch操作;weight,bias,stride,padding,dilation,groups表示convolution算子的参数。
添加pytorch中convolution算子对应的caffe层:
name = log .add_layer (name = 'conv' )
log .add_blobs ([x ], name = 'conv' )
layer = caffe_net .Layer_param (name = name , type = 'Convolution' ,
bottom = [log .blobs (input )], top = [log .blobs (x )])
添加caffe层对应的参数及权重:
layer .conv_param (x .size ()[1 ], weight .size ()[2 :], stride = _pair (stride ),
pad = _pair (padding ), dilation = _pair (dilation ),
bias_term = bias is not None , groups = groups )
if bias is not None :
layer .add_data (weight .cpu ().data .numpy (), bias .cpu ().data .numpy ())
else :
layer .param .convolution_param .bias_term = False
layer .add_data (weight .cpu ().data .numpy ())
将替换后的caffe层连接到caffe网络中:
log .cnet .add_layer (layer )
返回pytorch算子推理的结果:
(2)添加对pytorch算子的替换
F .conv2d = Rp (F .conv2d , _conv2d )
其中,F.conv2d表示pytorch算子操作,_conv2d表示对应的替换函数。
对pytorch中torch对应操作的转换过程如下所示,以torch.cat转换为例。
(1)在pytorch_to_caffe.py模块中添加torch.cat操作对应的替换函数。
def _cat (raw , inputs , dim = 0 ):
"""
cat of torch to caffe
:param raw: torch operator
:param inputs: inputs
:param dim: dimension
:return: the result of torch operator
"""
x = raw (inputs , dim )
bottom_blobs = []
for input in inputs :
bottom_blobs .append (log .blobs (input ))
layer_name = log .add_layer (name = 'cat' )
top_blobs = log .add_blobs ([x ], name = 'cat' )
layer = caffe_net .Layer_param (name = layer_name , type = 'Concat' ,
bottom = bottom_blobs , top = top_blobs )
layer .param .concat_param .axis = dim
log .cnet .add_layer (layer )
return x
代码说明:
执行pytorch推理过程:
添加torch.cat操作对应的caffe层:
bottom_blobs = []
for input in inputs :
bottom_blobs .append (log .blobs (input ))
layer_name = log .add_layer (name = 'cat' )
top_blobs = log .add_blobs ([x ], name = 'cat' )
layer = caffe_net .Layer_param (name = layer_name , type = 'Concat' ,
bottom = bottom_blobs , top = top_blobs )
添加caffe层对应的的相关参数:
layer .param .concat_param .axis = dim
将替换后的caffe层连接到caffe网络中:
log .cnet .add_layer (layer )
返回pytorch算子推理的结果:
(2)添加对pytorch操作的替换
torch .cat = Rp (torch .cat , _cat )
其中,torch.cat表示pytorch操作,_cat表示对应的替换函数。
对Tensor/Variable operation操作的转换
对pytorch中Tensor/Variable对应操作的转换过程如下所示,以torch.Tensor.view转换为例。
(1)在pytorch_to_caffe.py模块中添加view操作对应的替换函数。
def _view (input , * args ):
"""
view of torch to caffe
:param input: input
:param args: args
:return: the result of torch operator
"""
x = raw_view (input , * args )
if not NET_INITTED :
return x
layer_name = log .add_layer (name = 'view' )
top_blobs = log .add_blobs ([x ], name = 'view' )
layer = caffe_net .Layer_param (name = layer_name , type = 'Reshape' ,
bottom = [log .blobs (input )], top = top_blobs )
dims = list (args )
layer .param .reshape_param .shape .CopyFrom (caffe_net .pb .BlobShape (dim = dims ))
log .cnet .add_layer (layer )
return x
代码说明:
执行pytorch推理过程:
x = raw_view (input , * args )
判断该操作是否需要转换,如果不要转换则直接返回pytorch推理结果。
if not NET_INITTED :
return x
添加view操作对应的caffe层:
layer_name = log .add_layer (name = 'view' )
top_blobs = log .add_blobs ([x ], name = 'view' )
layer = caffe_net .Layer_param (name = layer_name , type = 'Reshape' ,
bottom = [log .blobs (input )], top = top_blobs )
添加caffe层对应的的相关参数:
dims = list (args )
dims [0 ] = 0 # the first dim should be batch_size
layer .param .reshape_param .shape .CopyFrom (caffe_net .pb .BlobShape (dim = dims ))
将替换后的caffe层连接到caffe网络中:
log .cnet .add_layer (layer )
返回pytorch推理的结果:
(2)添加对pytorch操作的替换
raw_view = t .view
t .view = _view
其中raw_view及t.view表示pytorch操作,_view表示替换函数。
对pytorch中自定义的转换过程如下所示,以fasterrcnn模型中的proposal算子转换为例。
(1)在pytorch_to_caffe.py模块中导入pytorch自定义算子模块。
from model .rpn .proposal_layer import _ProposalLayer
(2)在pytorch_to_caffe.py模块中添加自定义算子对应的替换函数。
def _proposal (self ,input ):
global NET_INITTED
NET_INITTED = False
x = raw_proposal_forward (self ,input )
name = log .add_layer (name = 'proposal' )
log .add_blobs ([x ],name = 'proposal' )
layer = caffe_net .Layer_param (name = name ,type = 'Python' ,
bottom = [log .blobs (input [0 ]),log .blobs (input [1 ]),
log .blobs (input [2 ])],top = [log .blobs (x )])
layer .param .python_param .module = 'rpn.proposal_layer'
layer .param .python_param .layer = 'ProposalLayer'
log .cnet .add_layer (layer )
NET_INITTED = True
return x
代码说明:
设置全局变量NET_INITTED为False,避免替换自定义算子内部的torch operation及Tensor/Variable operation操作:
global NET_INITTED
NET_INITTED = False
执行pytorch推理过程:
x = raw_proposal_forward (self ,input )
添加自定义算子对应的caffe层,将pytorch自定义算子转换为caffe框架下的python层自定义算子。
name = log .add_layer (name = 'proposal' )
log .add_blobs ([x ],name = 'proposal' )
layer = caffe_net .Layer_param (name = name ,type = 'Python' ,
bottom = [log .blobs (input [0 ]),log .blobs (input [1 ]),
log .blobs (input [2 ])],top = [log .blobs (x )])
添加caffe层对应的的相关参数
layer .param .python_param .module = 'rpn.proposal_layer'
layer .param .python_param .layer = 'ProposalLayer'
将替换后的caffe层连接到caffe网络中,并NET_INITTED变量置为True:
log .cnet .add_layer (layer )
NET_INITTED = True
返回pytorch推理的结果:
(3)添加自定义算子对应的替换
raw_proposal_forward = _ProposalLayer .forward
_ProposalLayer .forward = _proposal
其中_ProposalLayer.forward及raw_proposal_forward表示pytorch自定义算子,_proposal表示替换函数。
(1)将转换工具中的Caffe及convert文件夹拷贝到模型工程目录下
例如:
└─2-fcn //pytorch模型工程目录
├─assets
├─bag_data
├─bag_data_msk
├─Caffe //转换工具
├─checkpoints
├─convert //转换工具
├─BagData.py
├─FCN.py
├─onehot.py
├─README.md
└─train.py
修改convert目录下的pytorch_to_caffe.py模块,添加下列代码:
from torchvision import models
转换代码:
from convert import pytorch_to_caffe
import torch
from torch .autograd import Variable
net = torch .load ('./checkpoints/fcn_model_0.pt' )
net .eval ()
input_var = Variable (torch .randn (1 ,3 ,160 ,160 ))
print '****************start pytorch2caffe *******************'
pytorch_to_caffe .trans_net (net ,[input_var ],'FCN' )
pytorch_to_caffe .save_prototxt ('./fcn.prototxt' )
pytorch_to_caffe .save_caffemodel ('./fcn.caffemodel' )
print '******************done*********************************'
代码说明:
导入相关依赖
from convert import pytorch_to_caffe
import torch
from torch .autograd import Variable
导入训练好的模型,并设置为推理模式
net = torch .load ('./checkpoints/fcn_model_0.pt' )
net .eval ()
构造模型输入
input_var = Variable (torch .randn (1 ,3 ,160 ,160 ))
进行pytorch模型到caffe模型的转换
pytorch_to_caffe .trans_net (net ,[input_var ],'FCN' )
保存转换后的caffe模型
pytorch_to_caffe .save_prototxt ('./fcn.prototxt' )
pytorch_to_caffe .save_caffemodel ('./fcn.caffemodel' )
完成模型模型的转换后,如果需要可以对转换后的模型进行精度验证分析。最终输出的是pytorch推理结果向量与caffe推理结果向量之间的数据误差的最大值,均值、最小值、中值
验证代码:
import torch
import caffe
from torch .autograd import Variable
import numpy as np
#input
input_size = [1 ,3 ,160 ,160 ]
image = np .random .randint (0 ,255 ,input_size )
input_data = image .astype (np .float32 )
#do inference
net = torch .load ('./checkpoints/fcn_model_0.pt' )
net .eval ()
input_var = Variable (torch .from_numpy (input_data ))
pytorch_out = net .forward (input_var )
#do inference
model = './fcn.prototxt'
weights = './fcn.caffemodel'
caffe_net = caffe .Net (model ,weights ,caffe .TEST )
caffe_net .blobs ['blob1' ].data [...] = input_data
caffe_out = caffe_net .forward ()
#different between pytorch model result and caffe model result
print '******diff******'
diff_conv14 = np .abs (pytorch_out .data .cpu ().numpy ().flatten () - caffe_out ['conv14' ].flatten ())
print 'diff:'
print 'diff_max:' ,diff_conv14 .max (),' ' ,'diff_mean:' ,diff_conv14 .mean (),' ' ,\
'diff_min:' ,diff_conv14 .min (),' ' ,'diff_median:' ,np .median (diff_conv14 )
在pytorch模型转caffe模型的过程中遇到不支持的算子,则按照工具原理中情况添加对应算子的转换。