see http://takatakamanbou.hatenablog.com/
- ex160328_Caffe.py
- ex160328_Theano.py
- convnet160327.py is in https://gist.github.com/takatakamanbou/b4eafa6a0d8c1a47857e
- ILSVRC2012util20151204.py is not publicly available
see http://takatakamanbou.hatenablog.com/
from __future__ import print_function | |
import numpy as np | |
import caffe | |
import cv2 | |
import ILSVRC2012util20151204 as ILSVRC2012util | |
import time | |
fnModel = '../160307-caffe/VGG_ILSVRC/VGG_ILSVRC_16_layers_deploy_upgraded.prototxt' | |
fnTrained = '../160307-caffe/VGG_ILSVRC/VGG_ILSVRC_16_layers_upgraded.caffemodel' | |
dirILSVRC = '/data/ImageNet/ILSVRC2012' | |
bgr_mean = np.array( [103.939, 116.779, 123.68] ) | |
def getData( fnImage ): | |
imgRaw = cv2.imread( fnImage ) | |
w, h = imgRaw.shape[1], imgRaw.shape[0] | |
if w < h: | |
wnew, hnew = 256, int( np.round( h * 256 / w ) ) | |
o = ( hnew - 256 ) / 2 | |
img256x256 = cv2.resize( imgRaw, ( wnew, hnew ) )[o:o+256, :, :] | |
else: | |
wnew, hnew = int( np.round( w * 256 / h ) ), 256 | |
o = ( wnew - 256 ) / 2 | |
img256x256 = cv2.resize( imgRaw, ( wnew, hnew ) )[:, o:o+256, :] | |
img = img256x256[16:16+224, 16:16+224, :] # 224 x 224 x 3 | |
x1 = img - bgr_mean[np.newaxis, np.newaxis, :] | |
x2 = np.asarray( np.transpose( x1, axes = [ 2, 0, 1 ] ), dtype = np.float32 ) | |
return x2 # 3 x 224 x 224 | |
if __name__ == "__main__": | |
print( '# fnModel = ', fnModel ) | |
print( '# fnTrained = ', fnTrained ) | |
cnnC = caffe.Net( fnModel, fnTrained, caffe.TEST ) | |
ilsvrc = ILSVRC2012util.ILSVRC2012( dirILSVRC ) | |
# VGG net uses the class labels sorted by WNID | |
index = np.argsort( [ x['WNID'] for x in ilsvrc.meta ] ) | |
useGPU = True | |
if useGPU: | |
caffe.set_mode_gpu() | |
else: | |
caffe.set_mode_cpu() | |
print( '# useGPU = ', useGPU ) | |
ndat = ilsvrc.V.ndat | |
#ndat = 100 | |
top5 = np.empty( ( ndat, 5 ), dtype = int ) | |
batchsize = 10 | |
X = np.empty( ( batchsize, 3, 224, 224 ) ) | |
c = 0 | |
ts = time.time() | |
for ib in range( ndat / batchsize ): | |
ii = np.arange( ib * batchsize, ( ib + 1 ) * batchsize ) | |
print( ii[0], c ) | |
for j in range( batchsize ): | |
X[j, :, :, :] = getData( ilsvrc.V.fnList[ii[j]] ) | |
rv = cnnC.forward_all( **{cnnC.inputs[0]:X} ) | |
Z = rv[cnnC.outputs[0]] | |
rank5 = np.argsort( -Z, axis = 1 )[:, :5] | |
for j in range( batchsize ): | |
top5[ii[j], :] = index[rank5[j, :]] | |
c += top5[ii[j], 0] == ilsvrc.V.label[ii[j]] | |
print( '# time:', time.time() - ts ) | |
cr1 = np.sum( top5[:, 0] == ilsvrc.V.label[:ndat] ) | |
cr5 = np.sum( top5 == ilsvrc.V.label[:ndat, np.newaxis] ) | |
print( '# top-1 error rate: ', float( ndat - cr1 ) / ndat * 100.0 ) | |
print( '# top-5 error rate: ', float( ndat - cr5 ) / ndat * 100.0 ) |
from __future__ import print_function | |
import numpy as np | |
import caffe | |
import theano | |
import theano.tensor as T | |
import convnet160327 as convnet | |
import ILSVRC2012util20151204 as ILSVRC2012util | |
import ex160328_Caffe | |
import time | |
def conv( paramsC, Xdim ): | |
withBias = len( paramsC ) == 2 | |
W = paramsC[0].data | |
Wdim = ( W.shape[0], W.shape[2], W.shape[3] ) | |
if withBias: | |
b = paramsC[1].data | |
L = convnet.ConvLayer( Xdim, Wdim, afunc = 'ReLu', withBias = withBias, | |
border_mode = 1, filter_flip = False ) | |
if withBias: | |
L.setWeight( W, b ) | |
else: | |
L.setWeight( W ) | |
return L | |
def pool( Xdim ): | |
L = convnet.PoolLayer( Xdim, ds = ( 2, 2 ) ) | |
return L | |
def fc( paramsC, Xdim, afunc, dropout = 1.0, T4toMat = False ): | |
withBias = len( paramsC ) == 2 | |
W = paramsC[0].data | |
if withBias: | |
b = paramsC[1].data | |
L = convnet.FullLayer( Xdim, W.shape[0], afunc = afunc, withBias = withBias, | |
dropout = dropout, T4toMat = T4toMat ) | |
if withBias: | |
L.setWeight( W, b ) | |
else: | |
L.setWeight( W ) | |
return L | |
def caffe2theano( cnnC ): | |
p = cnnC.params | |
b = cnnC.blobs | |
print() | |
print( '### Caffe-CNN ###' ) | |
print( '# params' ) | |
for ln, params in p.items(): | |
print( '#', ln, params[0].data.shape, params[1].data.shape ) | |
print( '### blobs' ) | |
for bn, blobs in b.items(): | |
print( '#', bn, tuple( blobs.shape ) ) | |
print() | |
print( '### Theano-CNN ###' ) | |
# data | |
print( '# data' ) | |
batchshape = tuple( b[cnnC.inputs[0]].shape ) | |
Xdim = batchshape[1:] | |
L0 = convnet.T4InputLayer( Xdim ) | |
print( L0.Xshape ) | |
layersT = [ L0 ] | |
# conv1_1, 1_2, pool1 | |
print( '# conv1_1, 1_2, pool1' ) | |
Lconv1_1 = conv( p['conv1_1'], L0.Xshape ) | |
print( Lconv1_1.Yshape, Lconv1_1.kwargs4conv2d ) | |
Lconv1_2 = conv( p['conv1_2'], Lconv1_1.Yshape ) | |
print( Lconv1_2.Yshape, Lconv1_2.kwargs4conv2d ) | |
Lpool1 = pool( Lconv1_2.Yshape ) | |
print( Lpool1.Yshape, Lpool1.kwargs4pool_2d ) | |
layersT += [ Lconv1_1, Lconv1_2, Lpool1 ] | |
# conv2_1, 2_2, pool2 | |
print( '# conv2_1, 2_2, pool2' ) | |
Lconv2_1 = conv( p['conv2_1'], Lpool1.Yshape ) | |
print( Lconv2_1.Yshape, Lconv2_1.kwargs4conv2d ) | |
Lconv2_2 = conv( p['conv2_2'], Lconv2_1.Yshape ) | |
print( Lconv2_2.Yshape, Lconv2_2.kwargs4conv2d ) | |
Lpool2 = pool( Lconv2_2.Yshape ) | |
print( Lpool2.Yshape, Lpool2.kwargs4pool_2d ) | |
layersT += [ Lconv2_1, Lconv2_2, Lpool2 ] | |
# conv3_1, 3_2, 3_3, pool3 | |
print( '# conv3_1, 3_2, 3_3, pool3' ) | |
Lconv3_1 = conv( p['conv3_1'], Lpool2.Yshape ) | |
print( Lconv3_1.Yshape, Lconv3_1.kwargs4conv2d ) | |
Lconv3_2 = conv( p['conv3_2'], Lconv3_1.Yshape ) | |
print( Lconv3_2.Yshape, Lconv3_2.kwargs4conv2d ) | |
Lconv3_3 = conv( p['conv3_3'], Lconv3_2.Yshape ) | |
print( Lconv3_3.Yshape, Lconv3_3.kwargs4conv2d ) | |
Lpool3 = pool( Lconv3_3.Yshape ) | |
print( Lpool3.Yshape, Lpool3.kwargs4pool_2d ) | |
layersT += [ Lconv3_1, Lconv3_2, Lconv3_3, Lpool3 ] | |
# conv4_1, 4_2, 4_3, pool3 | |
print( '# conv4_1, 4_2, 4_3, pool4' ) | |
Lconv4_1 = conv( p['conv4_1'], Lpool3.Yshape ) | |
print( Lconv4_1.Yshape, Lconv4_1.kwargs4conv2d ) | |
Lconv4_2 = conv( p['conv4_2'], Lconv4_1.Yshape ) | |
print( Lconv4_2.Yshape, Lconv4_2.kwargs4conv2d ) | |
Lconv4_3 = conv( p['conv4_3'], Lconv4_2.Yshape ) | |
print( Lconv4_3.Yshape, Lconv4_3.kwargs4conv2d ) | |
Lpool4 = pool( Lconv4_3.Yshape ) | |
print( Lpool4.Yshape, Lpool4.kwargs4pool_2d ) | |
layersT += [ Lconv4_1, Lconv4_2, Lconv4_3, Lpool4 ] | |
# conv5_1, 5_2, 5_3, pool3 | |
print( '# conv5_1, 5_2, 5_3, pool5' ) | |
Lconv5_1 = conv( p['conv5_1'], Lpool4.Yshape ) | |
print( Lconv5_1.Yshape, Lconv5_1.kwargs4conv2d ) | |
Lconv5_2 = conv( p['conv5_2'], Lconv5_1.Yshape ) | |
print( Lconv5_2.Yshape, Lconv5_2.kwargs4conv2d ) | |
Lconv5_3 = conv( p['conv5_3'], Lconv5_2.Yshape ) | |
print( Lconv5_3.Yshape, Lconv5_3.kwargs4conv2d ) | |
Lpool5 = pool( Lconv5_3.Yshape ) | |
print( Lpool5.Yshape, Lpool5.kwargs4pool_2d ) | |
layersT += [ Lconv5_1, Lconv5_2, Lconv5_3, Lpool5 ] | |
# fc6, 7, 8 | |
print( '# fc6, 7, 8' ) | |
Lfc6 = fc( p['fc6'], Lpool5.Dout, afunc = 'ReLu', dropout = 1.0, T4toMat = True ) | |
print( Lfc6.Nunit, Lfc6.afunc, Lfc6.dropout ) | |
Lfc7 = fc( p['fc7'], Lfc6.Nunit, afunc = 'ReLu', dropout = 1.0, T4toMat = False ) | |
print( Lfc7.Nunit, Lfc7.afunc, Lfc7.dropout ) | |
Lfc8 = fc( p['fc8'], Lfc7.Nunit, afunc = 'linear', dropout = 1.0, T4toMat = False ) | |
print( Lfc8.Nunit, Lfc8.afunc, Lfc8.dropout ) | |
layersT += [ Lfc6, Lfc7, Lfc8 ] | |
return convnet.CNN( layersT ) | |
if __name__ == "__main__": | |
theano.config.floatX = 'float32' | |
caffe.set_mode_cpu() | |
### reading the pre-trained CNN | |
# | |
fnModel = ex160328_Caffe.fnModel | |
fnTrained = ex160328_Caffe.fnTrained | |
print( '# fnModel = ', fnModel ) | |
print( '# fnTrained = ', fnTrained ) | |
cnnC = caffe.Net( fnModel, fnTrained, caffe.TEST ) | |
### converting the Caffe-CNN to Theano-CNN | |
# | |
cnnT = caffe2theano( cnnC ) | |
ilsvrc = ILSVRC2012util.ILSVRC2012( ex160328_Caffe.dirILSVRC ) | |
# VGG net uses the class labels sorted by WNID | |
index = np.argsort( [ x['WNID'] for x in ilsvrc.meta ] ) | |
ndat = ilsvrc.V.ndat | |
#ndat = 100 | |
top5 = np.empty( ( ndat, 5 ), dtype = int ) | |
batchsize = 10 | |
X = np.empty( ( batchsize, 3, 224, 224 ), dtype = theano.config.floatX ) | |
c = 0 | |
ts = time.time() | |
for ib in range( ndat / batchsize ): | |
ii = np.arange( ib * batchsize, ( ib + 1 ) * batchsize ) | |
print( ii[0], c ) | |
for j in range( batchsize ): | |
X[j, :, :, :] = ex160328_Caffe.getData( ilsvrc.V.fnList[ii[j]] ) | |
Z = cnnT.output( X ) | |
rank5 = np.argsort( -Z, axis = 1 )[:, :5] | |
for j in range( batchsize ): | |
top5[ii[j], :] = index[rank5[j, :]] | |
c += top5[ii[j], 0] == ilsvrc.V.label[ii[j]] | |
print( '# time:', time.time() - ts ) | |
cr1 = np.sum( top5[:, 0] == ilsvrc.V.label[:ndat] ) | |
cr5 = np.sum( top5 == ilsvrc.V.label[:ndat, np.newaxis] ) | |
print( '# top-1 error rate: ', float( ndat - cr1 ) / ndat * 100.0 ) | |
print( '# top-5 error rate: ', float( ndat - cr5 ) / ndat * 100.0 ) |