belltailjp/prepare.sh

## prepare.sh
#!/bin/sh
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar
tar xf ILSVRC2012_img_val.tar

wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
tar xf caffe_ilsvrc12.tar.gz

## result.md

      
    Raw
  

              result.md
            
          
    Single image inference Top-5 accuracy (with 2000 samples)


CNN
__call__
predict
predict(oversample)


GoogLeNet
88.7%
74.1%
75.5%


VGG16Layers
86.85%
74.0%
74.95%


ResNet50Layers
89.6%
78.6%
79.95%


ResNet101Layers
90.05%
78.75%
80.5%


ResNet152Layers
90.8%
80.65%
81.4%


## run.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os, chainer, cupy, numpy, cv2, tqdm

use_predict = True
oversample = True

net = chainer.links.GoogLeNet()
#net = chainer.links.VGG16Layers()
#net = chainer.links.ResNet50Layers()
#net = chainer.links.ResNet101Layers()
#net = chainer.links.ResNet152Layers()

net.to_gpu()
print(type(net).__name__, use_predict, oversample)

n_tp_top1 = n_tp_top5 = 0
images = [s.split() for s in open('val.txt').readlines()]
images = images[:2000]      # because evaluating the entire images is time consuming...

mean = numpy.array([103.939, 116.779, 123.68], dtype=numpy.float32).reshape((3, 1, 1))
with chainer.function.no_backprop_mode(), chainer.using_config('train', False):
    for p, label in tqdm.tqdm(images):
        x = cv2.imread(p, cv2.IMREAD_COLOR)
        if use_predict:
            y = net.predict([x], oversample=oversample)
        else:
            x = cv2.resize(x, (224, 224)).transpose(2, 0, 1).astype(numpy.float32) - mean
            y = net(cupy.array(x[None, :]), layers=['prob'])['prob']

        top5 = cupy.argsort(y.data.flatten())[::-1][:5]
        n_tp_top1 += (1 if int(label) == top5[0] else 0)
        n_tp_top5 += (1 if int(label) in top5 else 0)

print(f'Top1: {100.0*n_tp_top1/len(images)}%')
print(f'Top5: {100.0*n_tp_top5/len(images)}%')
	#!/bin/sh
	wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar
	tar xf ILSVRC2012_img_val.tar

	wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
	tar xf caffe_ilsvrc12.tar.gz
CNN	__call__	predict	predict(oversample)
GoogLeNet	88.7%	74.1%	75.5%
VGG16Layers	86.85%	74.0%	74.95%
ResNet50Layers	89.6%	78.6%	79.95%
ResNet101Layers	90.05%	78.75%	80.5%
ResNet152Layers	90.8%	80.65%	81.4%
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import os, chainer, cupy, numpy, cv2, tqdm

	use_predict = True
	oversample = True

	net = chainer.links.GoogLeNet()
	#net = chainer.links.VGG16Layers()
	#net = chainer.links.ResNet50Layers()
	#net = chainer.links.ResNet101Layers()
	#net = chainer.links.ResNet152Layers()

	net.to_gpu()
	print(type(net).__name__, use_predict, oversample)

	n_tp_top1 = n_tp_top5 = 0
	images = [s.split() for s in open('val.txt').readlines()]
	images = images[:2000] # because evaluating the entire images is time consuming...

	mean = numpy.array([103.939, 116.779, 123.68], dtype=numpy.float32).reshape((3, 1, 1))
	with chainer.function.no_backprop_mode(), chainer.using_config('train', False):
	for p, label in tqdm.tqdm(images):
	x = cv2.imread(p, cv2.IMREAD_COLOR)
	if use_predict:
	y = net.predict([x], oversample=oversample)
	else:
	x = cv2.resize(x, (224, 224)).transpose(2, 0, 1).astype(numpy.float32) - mean
	y = net(cupy.array(x[None, :]), layers=['prob'])['prob']

	top5 = cupy.argsort(y.data.flatten())[::-1][:5]
	n_tp_top1 += (1 if int(label) == top5[0] else 0)
	n_tp_top5 += (1 if int(label) in top5 else 0)

	print(f'Top1: {100.0*n_tp_top1/len(images)}%')
	print(f'Top5: {100.0*n_tp_top5/len(images)}%')