Create a gist now

Instantly share code, notes, and snippets.

@mavenlin /readme.md Secret
Last active Jun 7, 2018

Embed
Network in Network Imagenet Model

Info

name: Network in Network Imagenet Model

caffemodel: nin_imagenet.caffemodel

caffemodel_url: https://www.dropbox.com/s/0cidxafrb2wuwxw/nin_imagenet.caffemodel?dl=1

license: BSD

sha1: 8e89c8fcd46e02780e16c867a5308e7bb7af0803

caffe_commit: pull request yet to be merged

gist_id: d802a5849de39225bcc6

Descriptions

This model is a 4 layer Network in Network model trained on imagenet dataset.

Thanks to the replacement of fully connected layer with a global average pooling layer, this model has greatly reduced parameters, which results in a snapshot of size 29MB, compared to AlexNet which is about 230MB, it is one eighth the size.

The top 1 performance of this model on validation set is 59.36%, which is slightly better than AlexNet. (Using the average of 10 crops, (4 + 1 center) * 2 mirror, should obtain a bit higher accuracy.)

The training time of the model is also greatly reduced compared to AlexNet because of the faster convergence. It takes 4-5 days to train on a GTX Titan.

License

BSD

net: "models/nin_imagenet/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.01
lr_policy: "step"
gamma: 0.1
stepsize: 200000
display: 20
max_iter: 450000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/nin_imagenet/nin_imagenet_train"
solver_mode: GPU
name: "nin_imagenet"
layers {
top: "data"
top: "label"
name: "data"
type: DATA
data_param {
source: "/home/linmin/IMAGENET-LMDB/imagenet-train-lmdb"
backend: LMDB
batch_size: 64
}
transform_param {
crop_size: 224
mirror: true
mean_file: "/home/linmin/IMAGENET-LMDB/imagenet-train-mean"
}
include: { phase: TRAIN }
}
layers {
top: "data"
top: "label"
name: "data"
type: DATA
data_param {
source: "/home/linmin/IMAGENET-LMDB/imagenet-val-lmdb"
backend: LMDB
batch_size: 89
}
transform_param {
crop_size: 224
mirror: false
mean_file: "/home/linmin/IMAGENET-LMDB/imagenet-train-mean"
}
include: { phase: TEST }
}
layers {
bottom: "data"
top: "conv1"
name: "conv1"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
mean: 0
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "conv1"
top: "conv1"
name: "relu0"
type: RELU
}
layers {
bottom: "conv1"
top: "cccp1"
name: "cccp1"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp1"
top: "cccp1"
name: "relu1"
type: RELU
}
layers {
bottom: "cccp1"
top: "cccp2"
name: "cccp2"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp2"
top: "cccp2"
name: "relu2"
type: RELU
}
layers {
bottom: "cccp2"
top: "pool0"
name: "pool0"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
bottom: "pool0"
top: "conv2"
name: "conv2"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "conv2"
top: "conv2"
name: "relu3"
type: RELU
}
layers {
bottom: "conv2"
top: "cccp3"
name: "cccp3"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp3"
top: "cccp3"
name: "relu5"
type: RELU
}
layers {
bottom: "cccp3"
top: "cccp4"
name: "cccp4"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp4"
top: "cccp4"
name: "relu6"
type: RELU
}
layers {
bottom: "cccp4"
top: "pool2"
name: "pool2"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
bottom: "pool2"
top: "conv3"
name: "conv3"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "conv3"
top: "conv3"
name: "relu7"
type: RELU
}
layers {
bottom: "conv3"
top: "cccp5"
name: "cccp5"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp5"
top: "cccp5"
name: "relu8"
type: RELU
}
layers {
bottom: "cccp5"
top: "cccp6"
name: "cccp6"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp6"
top: "cccp6"
name: "relu9"
type: RELU
}
layers {
bottom: "cccp6"
top: "pool3"
name: "pool3"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
bottom: "pool3"
top: "pool3"
name: "drop"
type: DROPOUT
dropout_param {
dropout_ratio: 0.5
}
}
layers {
bottom: "pool3"
top: "conv4"
name: "conv4-1024"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 1024
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "conv4"
top: "conv4"
name: "relu10"
type: RELU
}
layers {
bottom: "conv4"
top: "cccp7"
name: "cccp7-1024"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 1024
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp7"
top: "cccp7"
name: "relu11"
type: RELU
}
layers {
bottom: "cccp7"
top: "cccp8"
name: "cccp8-1024"
type: CONVOLUTION
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 1000
kernel_size: 1
stride: 1
weight_filler {
type: "gaussian"
mean: 0
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "cccp8"
top: "cccp8"
name: "relu12"
type: RELU
}
layers {
bottom: "cccp8"
top: "pool4"
name: "pool4"
type: POOLING
pooling_param {
pool: AVE
kernel_size: 6
stride: 1
}
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "pool4"
bottom: "label"
top: "accuracy"
include: { phase: TEST }
}
layers {
bottom: "pool4"
bottom: "label"
name: "loss"
type: SOFTMAX_LOSS
include: { phase: TRAIN }
}
@ducha-aiki

This comment has been minimized.

Show comment
Hide comment
@ducha-aiki

ducha-aiki Oct 3, 2014

Have you checked this model? I have tried to finetune it to PASCAL and got an error

1003 13:05:46.623013 31678 caffe.cpp:115] Finetuning from nin_imagenet.caffemodel
...
F1003 13:05:46.656553 31678 net.cpp:713] Check failed: target_blobs[j]->channels() == source_layer.blobs(j).channels() (1 vs. 96)

Have you checked this model? I have tried to finetune it to PASCAL and got an error

1003 13:05:46.623013 31678 caffe.cpp:115] Finetuning from nin_imagenet.caffemodel
...
F1003 13:05:46.656553 31678 net.cpp:713] Check failed: target_blobs[j]->channels() == source_layer.blobs(j).channels() (1 vs. 96)

@seanbell

This comment has been minimized.

Show comment
Hide comment
@seanbell

seanbell Oct 13, 2014

I get the same problem, trying to fine-tune to my own dataset (with 20 labels):

F1013 16:13:58.640352 1556 net.cpp:712] Check failed: target_blobs[j]->channels() == source_layer.blobs(j).channels() (1 vs. 96)

I get the same problem, trying to fine-tune to my own dataset (with 20 labels):

F1013 16:13:58.640352 1556 net.cpp:712] Check failed: target_blobs[j]->channels() == source_layer.blobs(j).channels() (1 vs. 96)

@mavenlin

This comment has been minimized.

Show comment
Hide comment
@mavenlin

mavenlin Oct 14, 2014

@ducha-aiki
@seanbell
Sorry for the inconvenience, it is because in caffe the bias has dimension {1,1,1,n} while I think it is more reasonable to put n in the channel dim, thus setting it as {1,n,1,1}. As the CCCP layers are finally moved to CONVOLUTION Layers, I simply changed the type from CCCP to CONVOLUTION but forgot to update the dimension of the blobs.

Owner

mavenlin commented Oct 14, 2014

@ducha-aiki
@seanbell
Sorry for the inconvenience, it is because in caffe the bias has dimension {1,1,1,n} while I think it is more reasonable to put n in the channel dim, thus setting it as {1,n,1,1}. As the CCCP layers are finally moved to CONVOLUTION Layers, I simply changed the type from CCCP to CONVOLUTION but forgot to update the dimension of the blobs.

@zarchary

This comment has been minimized.

Show comment
Hide comment
@zarchary

zarchary Oct 26, 2014

I have a problem downloading the caffemodel .will you help me out??

I have a problem downloading the caffemodel .will you help me out??

@moskewcz

This comment has been minimized.

Show comment
Hide comment
@moskewcz

moskewcz Oct 29, 2014

sorry if this should be obvious and/or i'm confused, but exactly what datasets are you using for 'imagenet' train and val lmdbs above? it seems that you expect 89000 images in /home/linmin/IMAGENET-LMDB/imagenet-val-lmdb based on the batch_size=89 and test_iters=1000?

sorry if this should be obvious and/or i'm confused, but exactly what datasets are you using for 'imagenet' train and val lmdbs above? it seems that you expect 89000 images in /home/linmin/IMAGENET-LMDB/imagenet-val-lmdb based on the batch_size=89 and test_iters=1000?

@mavenlin

This comment has been minimized.

Show comment
Hide comment
@mavenlin

mavenlin Nov 11, 2014

@moskewcz
I removed the hard samples from the validation set, the resulting number of images is 48238 = 89 * 542.
I tried to be exact when doing validation. you can reset the batch size anyways.

Owner

mavenlin commented Nov 11, 2014

@moskewcz
I removed the hard samples from the validation set, the resulting number of images is 48238 = 89 * 542.
I tried to be exact when doing validation. you can reset the batch size anyways.

@erogol

This comment has been minimized.

Show comment
Hide comment
@erogol

erogol Dec 3, 2014

how to load it to python interface, I tried following but it raises error

net = caffe.Classifier(caffe_root + 'models/network_in_network/net_in_net.prototxt',
caffe_root + 'models/network_in_network/nin_imagenet.caffemodel')

IndexError Traceback (most recent call last)
in ()
1 net = caffe.Classifier(caffe_root + 'models/network_in_network/net_in_net.prototxt',
----> 2 caffe_root + 'models/network_in_network/nin_imagenet.caffemodel')

/home/retina18/Downloads/caffe/python/caffe/classifier.pyc in init(self, model_file, pretrained_file, image_dims, gpu, mean, input_scale, raw_scale, channel_swap)
41 self.set_channel_swap(self.inputs[0], channel_swap)
42
---> 43 self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
44 if not image_dims:
45 image_dims = self.crop_dims

IndexError: list index out of range

erogol commented Dec 3, 2014

how to load it to python interface, I tried following but it raises error

net = caffe.Classifier(caffe_root + 'models/network_in_network/net_in_net.prototxt',
caffe_root + 'models/network_in_network/nin_imagenet.caffemodel')

IndexError Traceback (most recent call last)
in ()
1 net = caffe.Classifier(caffe_root + 'models/network_in_network/net_in_net.prototxt',
----> 2 caffe_root + 'models/network_in_network/nin_imagenet.caffemodel')

/home/retina18/Downloads/caffe/python/caffe/classifier.pyc in init(self, model_file, pretrained_file, image_dims, gpu, mean, input_scale, raw_scale, channel_swap)
41 self.set_channel_swap(self.inputs[0], channel_swap)
42
---> 43 self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
44 if not image_dims:
45 image_dims = self.crop_dims

IndexError: list index out of range

@zhushun0008

This comment has been minimized.

Show comment
Hide comment
@zhushun0008

zhushun0008 Dec 26, 2014

Is this implementation the same as NIN?
I think mlpconv should do before conv, but now I am confused. Did I miss something important?

Is this implementation the same as NIN?
I think mlpconv should do before conv, but now I am confused. Did I miss something important?

@AnanS

This comment has been minimized.

Show comment
Hide comment
@AnanS

AnanS Mar 21, 2015

Hi,

Has anyone managed to successfully train this model?

Isn't the input to the layer 'pool4' 5 x 5 (that is, we start with 224, then 54, 26, 12 and 5)? However, the kernel_size of 'pool4' is 6 x 6. Am I missing something? @mavenlin ?

Thanks!

AnanS commented Mar 21, 2015

Hi,

Has anyone managed to successfully train this model?

Isn't the input to the layer 'pool4' 5 x 5 (that is, we start with 224, then 54, 26, 12 and 5)? However, the kernel_size of 'pool4' is 6 x 6. Am I missing something? @mavenlin ?

Thanks!

@wuxinhong

This comment has been minimized.

Show comment
Hide comment
@wuxinhong

wuxinhong Apr 10, 2015

could you tell you how to set the number of classes in the train_val.prototxt?because i want to use you model to train on my own dataset. i am not found any Parameter to set the number of classes.

could you tell you how to set the number of classes in the train_val.prototxt?because i want to use you model to train on my own dataset. i am not found any Parameter to set the number of classes.

@4fur4

This comment has been minimized.

Show comment
Hide comment
@4fur4

4fur4 Apr 12, 2015

Shouldnt the crop size be = 227? This could also answer @AnanS question.

4fur4 commented Apr 12, 2015

Shouldnt the crop size be = 227? This could also answer @AnanS question.

@sbrugman

This comment has been minimized.

Show comment
Hide comment
@sbrugman

sbrugman Jun 2, 2015

For anyone wondering where to finetune: the number of classes is set in "cccp8-1024" (1000). When changing this, do not forget to rename the layer (i.e. "cccp8-1024-finetune", otherwise Caffe will produce an error).

sbrugman commented Jun 2, 2015

For anyone wondering where to finetune: the number of classes is set in "cccp8-1024" (1000). When changing this, do not forget to rename the layer (i.e. "cccp8-1024-finetune", otherwise Caffe will produce an error).

@swamiviv

This comment has been minimized.

Show comment
Hide comment
@swamiviv

swamiviv Jul 23, 2015

I am having trouble calculating the number of parameters (weights) for the mlpconv layers. Can anyone specify how many parameters each mlpconv layer has ? Assuming the model is unchanged as given in this page.

I am having trouble calculating the number of parameters (weights) for the mlpconv layers. Can anyone specify how many parameters each mlpconv layer has ? Assuming the model is unchanged as given in this page.

@mtngld

This comment has been minimized.

Show comment
Hide comment
@mtngld

mtngld Aug 5, 2015

Please see a related post on caffe-users group.

mtngld commented Aug 5, 2015

Please see a related post on caffe-users group.

@rewonc

This comment has been minimized.

Show comment
Hide comment
@rewonc

rewonc Aug 11, 2015

Does anyone have a deploy.prototxt for this model?

rewonc commented Aug 11, 2015

Does anyone have a deploy.prototxt for this model?

@tzutalin

This comment has been minimized.

Show comment
Hide comment
@tzutalin

tzutalin Aug 31, 2015

Hi @rewonc ,
You can refer to my deploy.prototxt as bellow link:
https://gist.github.com/tzutalin/0e3fd793a5b13dd7f647

Hi @rewonc ,
You can refer to my deploy.prototxt as bellow link:
https://gist.github.com/tzutalin/0e3fd793a5b13dd7f647

@ronentk

This comment has been minimized.

Show comment
Hide comment
@ronentk

ronentk Sep 8, 2015

Hi @mavenlin, what training error should i be expecting to achieve? 0?

Also, I was having trouble getting the training to converge, raising batch size from 64 to 256 helped in my case.

Thanks

ronentk commented Sep 8, 2015

Hi @mavenlin, what training error should i be expecting to achieve? 0?

Also, I was having trouble getting the training to converge, raising batch size from 64 to 256 helped in my case.

Thanks

@taoari

This comment has been minimized.

Show comment
Hide comment
@taoari

taoari Sep 14, 2015

@AnanS @4fur4 ,

for 224, it would be 224, 54, 27, 13, 6
for 227, it would be 227, 55, 27, 13, 6

So it does not really matters.

I have also evaluated this model on the ILSVRC 2012 val 50000 dataset on K80 GPU.

forward-backward time: caffenet 5.71ms/image, nin 8.125ms/image
top-1 accuracy (only single center crop): caffenet 57.4%, nin 56.3240% (227 version), 56.3279% (224 version).

The 59.36% should be the effects of eliminating of the hard examples as stated by @mavenlin .

taoari commented Sep 14, 2015

@AnanS @4fur4 ,

for 224, it would be 224, 54, 27, 13, 6
for 227, it would be 227, 55, 27, 13, 6

So it does not really matters.

I have also evaluated this model on the ILSVRC 2012 val 50000 dataset on K80 GPU.

forward-backward time: caffenet 5.71ms/image, nin 8.125ms/image
top-1 accuracy (only single center crop): caffenet 57.4%, nin 56.3240% (227 version), 56.3279% (224 version).

The 59.36% should be the effects of eliminating of the hard examples as stated by @mavenlin .

@krishkoushik

This comment has been minimized.

Show comment
Hide comment
@krishkoushik

krishkoushik Oct 24, 2015

Hi, when I try to load the model in lua using

model = loadcaffe.load('deploy.prototxt', 'nin_imagenet.caffemodel', 'ccn2')

I get the following error

Successfully loaded nin_imagenet.caffemodel
MODULE data UNDEFINED
warning: module 'data [type 5]' not found
.../torch/install/share/lua/5.1/ccn2/SpatialConvolution.lua:16: Assertion failed: [math.fmod(nOutputPlane, 16) == 0]. Number of output planes has to be a multiple of 16.
stack traceback:
[C]: in function 'error'
.../torch/install/share/lua/5.1/ccn2/SpatialConvolution.lua:16: in function '__init'
/home/krishnan/torch/install/share/lua/5.1/torch/init.lua:54: in function </home/krishnan/torch/install/share/lua/5.1/torch/init.lua:50>
[C]: in function 'SpatialConvolution'
deploy.prototxt.lua:31: in main chunk
[C]: in function 'dofile'
...hnan/torch/install/share/lua/5.1/loadcaffe/loadcaffe.lua:24: in function 'load'
[string "model = loadcaffe.load('deploy.prototxt', 'ni..."]:1: in main chunk
[C]: at 0x7f13f591ce10

I tried changing the last layer's output to 1024 instead of 1000. Still the deploy.prototxt.lua file generated is the same - it has 1000 and not 1024. I can't quite understand what's happening here. Can anyone please help me?

Thanks

Hi, when I try to load the model in lua using

model = loadcaffe.load('deploy.prototxt', 'nin_imagenet.caffemodel', 'ccn2')

I get the following error

Successfully loaded nin_imagenet.caffemodel
MODULE data UNDEFINED
warning: module 'data [type 5]' not found
.../torch/install/share/lua/5.1/ccn2/SpatialConvolution.lua:16: Assertion failed: [math.fmod(nOutputPlane, 16) == 0]. Number of output planes has to be a multiple of 16.
stack traceback:
[C]: in function 'error'
.../torch/install/share/lua/5.1/ccn2/SpatialConvolution.lua:16: in function '__init'
/home/krishnan/torch/install/share/lua/5.1/torch/init.lua:54: in function </home/krishnan/torch/install/share/lua/5.1/torch/init.lua:50>
[C]: in function 'SpatialConvolution'
deploy.prototxt.lua:31: in main chunk
[C]: in function 'dofile'
...hnan/torch/install/share/lua/5.1/loadcaffe/loadcaffe.lua:24: in function 'load'
[string "model = loadcaffe.load('deploy.prototxt', 'ni..."]:1: in main chunk
[C]: at 0x7f13f591ce10

I tried changing the last layer's output to 1024 instead of 1000. Still the deploy.prototxt.lua file generated is the same - it has 1000 and not 1024. I can't quite understand what's happening here. Can anyone please help me?

Thanks

@Seinzhu

This comment has been minimized.

Show comment
Hide comment
@Seinzhu

Seinzhu Dec 29, 2015

@taoari
Hi, I also evaluated this on ILSVRC2012 with generated data source ilsvrc2012_train_lmdb and ilsvrc2012_val_lmdb, while I can only get 21.369%(224 version). Any suggestions would be appreciated :) Thanks!

Seinzhu commented Dec 29, 2015

@taoari
Hi, I also evaluated this on ILSVRC2012 with generated data source ilsvrc2012_train_lmdb and ilsvrc2012_val_lmdb, while I can only get 21.369%(224 version). Any suggestions would be appreciated :) Thanks!

@bhargavaurala

This comment has been minimized.

Show comment
Hide comment
@bhargavaurala

bhargavaurala Jan 8, 2016

Hi @mavenlin. I am using the NiN architecture to train ImageNet 2012. After about 50k iterations, the validation accuracy is around 0.1% which corresponds to random chance. I am using the same structure and initialization as you have. Can you please let me know when (iteration number) the validation accuracy starts to increase? This will help me decide if the network is learning anything useful and if I should restart with different hyperparameters.

Thanks.

Hi @mavenlin. I am using the NiN architecture to train ImageNet 2012. After about 50k iterations, the validation accuracy is around 0.1% which corresponds to random chance. I am using the same structure and initialization as you have. Can you please let me know when (iteration number) the validation accuracy starts to increase? This will help me decide if the network is learning anything useful and if I should restart with different hyperparameters.

Thanks.

@ProGamerGov

This comment has been minimized.

Show comment
Hide comment
@ProGamerGov

ProGamerGov Jul 23, 2016

Has anyone else trained any other Network In Network (NIN) models? Or is this the only one?

Has anyone else trained any other Network In Network (NIN) models? Or is this the only one?

@mrgloom

This comment has been minimized.

Show comment
Hide comment
@mrgloom

mrgloom Oct 15, 2016

layers {
  bottom: "cccp8"
  top: "pool4"
  name: "pool4"
  type: POOLING
  pooling_param {
    pool: AVE
    kernel_size: 6
    stride: 1
  }
}

Seems this is old Caffe .prototxt, do we need now specify global_pooling: true?
As far as I can see NIN use global average pooling layer, not just average pooling. [link to paper](global average poolin)

layer {
  name: "pool4"
  type: "Pooling"
  bottom: "cccp8"
  top: "pool4"
  pooling_param {
    pool: AVE
    global_pooling: true
  }
}

mrgloom commented Oct 15, 2016

layers {
  bottom: "cccp8"
  top: "pool4"
  name: "pool4"
  type: POOLING
  pooling_param {
    pool: AVE
    kernel_size: 6
    stride: 1
  }
}

Seems this is old Caffe .prototxt, do we need now specify global_pooling: true?
As far as I can see NIN use global average pooling layer, not just average pooling. [link to paper](global average poolin)

layer {
  name: "pool4"
  type: "Pooling"
  bottom: "cccp8"
  top: "pool4"
  pooling_param {
    pool: AVE
    global_pooling: true
  }
}
@moyix

This comment has been minimized.

Show comment
Hide comment
@moyix

moyix Oct 9, 2017

Hi @mavenlin,

I noticed that the SHA1 of the caffe model does not match what's listed here (the SHA1 listed here is 8e89c8fcd46e02780e16c867a5308e7bb7af0803 but the SHA1 of the downloaded model is 2794deb2aada04f667894b7d6d929371b4689ea9). Maybe this should be fixed so that people can be sure their download was successful and they're getting the correct model?

moyix commented Oct 9, 2017

Hi @mavenlin,

I noticed that the SHA1 of the caffe model does not match what's listed here (the SHA1 listed here is 8e89c8fcd46e02780e16c867a5308e7bb7af0803 but the SHA1 of the downloaded model is 2794deb2aada04f667894b7d6d929371b4689ea9). Maybe this should be fixed so that people can be sure their download was successful and they're getting the correct model?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment