Skip to content

Instantly share code, notes, and snippets.

@mavenlin mavenlin/readme.md Secret
Last active Nov 3, 2018

Embed
What would you like to do?
Network in Network CIFAR10

Info

name: Network in Network CIFAR10 Model

caffemodel: cifar10_nin.caffemodel

caffemodel_url: https://www.dropbox.com/s/blrajqirr1p31v0/cifar10_nin.caffemodel?dl=1

license: BSD

sha1: 8e89c8fcd46e02780e16c867a5308e7bb7af0803

caffe_commit: c69b3b49084b503e23b95dc387329975245949c2

gist_id: e56253735ef32c3c296d

Descriptions

This model is a 3 layer Network in Network model trained on CIFAR10 dataset.

The performance of this model on validation set is 89.6% The detailed descriptions are in the paper Network in Network

The preprocessed CIFAR10 data is downloadable in lmdb format here:

License

The data used to train this model comes from http://www.cs.toronto.edu/~kriz/cifar.html Please follow the license there if used.

net: "train_test.prototxt"
test_iter: 100
test_interval: 500
base_lr: 0.1
momentum: 0.9
weight_decay: 0.0001
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 100
max_iter: 120000
snapshot: 10000
snapshot_prefix: "cifar10_nin"
solver_mode: GPU
name: "CIFAR10_full"
layers {
name: "cifar"
type: DATA
top: "data"
top: "label"
data_param {
source: "cifar-train-leveldb"
batch_size: 128
}
include: { phase: TRAIN }
}
layers {
name: "cifar"
type: DATA
top: "data"
top: "label"
data_param {
source: "cifar-test-leveldb"
batch_size: 100
}
include: { phase: TEST }
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1.
weight_decay: 0.
convolution_param {
num_output: 192
pad: 2
kernel_size: 5
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
}
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "cccp1"
type: CONVOLUTION
bottom: "conv1"
top: "cccp1"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 160
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp1"
type: RELU
bottom: "cccp1"
top: "cccp1"
}
layers {
name: "cccp2"
type: CONVOLUTION
bottom: "cccp1"
top: "cccp2"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp2"
type: RELU
bottom: "cccp2"
top: "cccp2"
}
layers {
name: "pool1"
type: POOLING
bottom: "cccp2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "drop3"
type: DROPOUT
bottom: "pool1"
top: "pool1"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "pool1"
top: "conv2"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1.
weight_decay: 0.
convolution_param {
num_output: 192
pad: 2
kernel_size: 5
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "cccp3"
type: CONVOLUTION
bottom: "conv2"
top: "cccp3"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 192
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp3"
type: RELU
bottom: "cccp3"
top: "cccp3"
}
layers {
name: "cccp4"
type: CONVOLUTION
bottom: "cccp3"
top: "cccp4"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 192
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp4"
type: RELU
bottom: "cccp4"
top: "cccp4"
}
layers {
name: "pool2"
type: POOLING
bottom: "cccp4"
top: "pool2"
pooling_param {
pool: AVE
kernel_size: 3
stride: 2
}
}
layers {
name: "drop6"
type: DROPOUT
bottom: "pool2"
top: "pool2"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "pool2"
top: "conv3"
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
convolution_param {
num_output: 192
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "cccp5"
type: CONVOLUTION
bottom: "conv3"
top: "cccp5"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 192
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp5"
type: RELU
bottom: "cccp5"
top: "cccp5"
}
layers {
name: "cccp6"
type: CONVOLUTION
bottom: "cccp5"
top: "cccp6"
blobs_lr: 0.1
blobs_lr: 0.1
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 10
group: 1
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu_cccp6"
type: RELU
bottom: "cccp6"
top: "cccp6"
}
layers {
name: "pool3"
type: POOLING
bottom: "cccp6"
top: "pool3"
pooling_param {
pool: AVE
kernel_size: 8
stride: 1
}
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "pool3"
bottom: "label"
top: "accuracy"
include: { phase: TEST }
}
layers {
name: "loss"
type: SOFTMAX_LOSS
bottom: "pool3"
bottom: "label"
top: "loss"
}
@RyanLiuNtust

This comment has been minimized.

Copy link

commented Nov 8, 2014

@mavenlin
Thank for your sharing!
But I have a question, Does you preprocessing the data using the local contrast normalization and ZCA whiting? or something else?

@mavenlin

This comment has been minimized.

Copy link
Owner Author

commented Nov 11, 2014

@RyanLiuNtust
The preprocessing is exactly the same as described in the maxout paper, the code from pylearn2 is used for preprocessing.

@zashani

This comment has been minimized.

Copy link

commented Dec 3, 2014

@mavenlin
I've download the preprocessed files and the 2 prototxt files but the training does not behave well. After about 7000 iterations the loss score starts to increase and finally settles around 2.30258. I'm using the latest version from the master branch of caffe.
Any ideas why this may be so?

@RyanLiuNtust

This comment has been minimized.

Copy link

commented Dec 6, 2014

@zashani
I have same problem before....
However I change the base_lr to 0.01 and it works!
I hope it also works for you ^^

@DannaShavit

This comment has been minimized.

Copy link

commented Dec 24, 2014

@mavenlin

Thanks for sharing this.
Is there any chance you could post a link to the preprocessed Cifar-100 data? I am trying to reproduce the NIN paper results for this dataset and getting only around 58% accuracy with the given Cifar-10 parameters.

@qinhongwei

This comment has been minimized.

Copy link

commented Jan 7, 2015

@RyanLiuNtust
The same problem now...
After setting the base_lr as you did, it did not work either.

@JacobianTang

This comment has been minimized.

Copy link

commented Mar 14, 2015

hello,In your paper,we extract and directly visualize the feature maps from the last mlpconv layer of the trained model for CIFAR-10. I think that the size of feature map from the last mlpconv layer is smaller than the origin image,but picture in your paper show that these have the same size.how to do it?

@hiwonjoon

This comment has been minimized.

Copy link

commented May 8, 2015

@mavenlin

Anyone can give some tips for generating Local Contrast Normalization & ZCA Whitening datasets suitable for caffe? I am trying to convert pylearn2 result to LeveDB, but the file I generate seems different from given data.

The reason I want to convert is that I would like to train on cifar 100 datasets, not cifar 10. Any help will be greatly appreciated. Thanks!

@hiwonjoon

This comment has been minimized.

Copy link

commented May 9, 2015

Self resolved. I made the converter. To use this, you need make a dataset file(cPickle file) generated by pylearn2. My code has an dependency on protoc file of caffe, py-leveldb.

https://gist.github.com/hiwonjoon/8f91034cc1168f2d2dd5

@jasonustc

This comment has been minimized.

Copy link

commented Jun 2, 2015

@zashani I downloaded the pre-processed data, but I think it's in the format of leveldb. The problem is, when I extraced the files to a folder, then run this model, but it seems that the model can not read data from the folder(empty), any body comes into this problem?

@wwnigel

This comment has been minimized.

Copy link

commented Aug 5, 2015

@jasonustc

Have you solved the problem? I am facing the same problem, i.e. the leveldb data in the extracted folder cannot be read by train_test.prototxt.

@PeterPan1990

This comment has been minimized.

Copy link

commented Aug 28, 2015

@DannaShavit, I also got 50% accuracy on the pretrained cifar10 data, did you find the solution?

@PeterPan1990

This comment has been minimized.

Copy link

commented Aug 28, 2015

@RyanLiuNtust, would you like to share the experience of reproducing the results on cifar10? I got stucked of 50% accuracy? I have send you a email, Thanks!

@mfigurnov

This comment has been minimized.

Copy link

commented Sep 9, 2015

@PeterPan1990 maybe your problems with reproducing the results are caused by the issue BVLC/caffe#2688

Here is the model definition with workaround for the issue: pool1 layer is forced to use Caffe engine https://gist.github.com/mfigurnov/4736f2f4a6e1676d074d

@diaomin

This comment has been minimized.

Copy link

commented Sep 10, 2015

@jasonustc Hello, jasonustc, have you solved the problem? I am facing the same problem. The model can not read the given pre-processed data.

@Coldmooon

This comment has been minimized.

Copy link

commented Sep 16, 2015

@diaomin Have you changed net: "train_test.prototxt" to net: "train_val.prototxt" in the solver.prototxt file? The solver.prototxt points to train_test.prototxt which is actually train_val.prototxt.

@hengck23

This comment has been minimized.

Copy link

commented Sep 19, 2015

@diaomin , @PeterPan1990
I would like to share my experiences on repeating the experiments results for network-in-network on cifar 10 [1]. In summary, the paper results is correct and can be repeated somehow.

There are two open-source implementation:

1) caffe implementation

  • dataset (caffe leveldb format) can be download from :https://gist.github.com/mavenlin/e56253735ef32c3c296d
  • the files are "cifar-test-leveldb" and "cifar-train-leveldb"
  • my initial version of window caffe (I forget the version) cannot read this.
  • I have to change my leveldb version to this "https://github.com/maxd/leveldbwin".
  • Then, I verify that testing "cifar10_nin.caffemodel" on "cifar-test-leveldb" does indeed produce accuracy of 89.4%.
  • However, I cannot get the same results if I train my own model using the network and solver prototxt files provided. (I suspected the issue may be the version of caffe used )
  • I tried changing the base learn rate from 0.1 to 0.01.
  • I also tried renaming the "top" and "bottom" blob names of the layer to prevent "in-place" operation like relu, dropout.
  • I also tried switching to caffe engine for max pooling.
  • I got test accuracy stuck at 10% (loss 2.30).
  • I can get test accuracy near 60% after playing with the solver parameters.

2) convnet implementation

  • there is a convnet implementation by the author at: https://github.com/mavenlin/cuda-convnet
  • dataset can be downloaded too. It is in python format and the file is "cifar-10-py-colmajor".
  • I verify that this python file is same as the caffe leveldb version (up to small difference of near 1e-7)
  • I compiled the convnet code (in windows) and can get the same results as claimed , near 89%.

3) my implementation

  • I wrote my own c++ code. (mainly by hacking into caffe and copy their layer source code into mine).
  • I set up the same architecture using the caffe network and solver prototxt files.
  • I can get around 88% accuracy.

4) Others

some information that may be useful.

[1]"Network In Network" - M. Lin, Q. Chen, S. Yan, ICLR-2014.
[2] "Empirical Evaluation of Rectified Activations in Convolutional Network"-Bing Xu, Naiyan Wang, Tianqi Chen, Mu Li, arxiv 2015

@Coldmooon

This comment has been minimized.

Copy link

commented Oct 13, 2015

@mfigurnov Thanks great! This solved my problem.

I've reproduced the paper result two times and got the accuracy of 89.47%( learning rate is 0.1 ) and 87.2( downgrade learning rate to 0.01).

@kgl-prml

This comment has been minimized.

Copy link

commented Nov 24, 2015

@hengck23 Using current caffe version, I also cannot repeat the results. After reading your experience, I am quite curious about why current caffe implementation cannot do this. Does that means there may exist some bugs in caffe?

@kgl-prml

This comment has been minimized.

Copy link

commented Nov 24, 2015

@Coldmooon How do you reproduce the paper result? Just use the current caffe implementation or you have performed any additional work?

@mollahosseini

This comment has been minimized.

Copy link

commented Dec 10, 2015

@kgl-prml, we followed the paper outline and did global contrast normalization and ZCA whitening and we were able to reproduce %10.4 error rate on cifar-10

@mollahosseini

This comment has been minimized.

Copy link

commented Dec 10, 2015

Has anybody been able to reproduce the paper results on cifar-100? We could get the paper result on cifar-10 (%10 error rate) but the same network doesn't converge to less than %75 error rate on cifar-100! Do you have any suggestion?

@rockstone533

This comment has been minimized.

Copy link

commented Dec 30, 2015

Hi, @hengck23. I try to reproduce NIN with caffe and get nearly 88% accuracy follow your advice.Now, I encounter some questions.
I want to use the network architecture to test a new image in python,
1.How should I implement GCN? I guess subtract mean and divide std, is it right?
2.Is the dropout layer needed to test a new image?
By the way, do you know how to visualize the feature map as shown in the paper? The patch size seems smaller than the input.

@tingtinglu

This comment has been minimized.

Copy link

commented Jun 27, 2016

I also want to implement this paper (NIN). But i am new in caffe and deep learning. I download the pre-trained model and the cifar-test-leveldb file from the website of the author(https://gist.github.com/mavenlin/e56253735ef32c3c296d), but my caffe cannot read the cifar-test-leveldb. What is wrong with this? and how can implement this experiment easily? thanks so much!

@Perseus14

This comment has been minimized.

Copy link

commented Jul 13, 2016

@mavenlin Can you upload the deploy.prototxt or show how to convert train_val.prototxt to deploy.prototxt as I don't know the input params. I want to take the model weights and biases without downloading the entire dataset.

@happyzhouch

This comment has been minimized.

Copy link

commented Oct 12, 2016

@hengck23
Hello,I also meet the problem that I can't read the downloaded data,I see you change your leveldb.Can you tell me how to change it ? I don't understand it.
I have to change my leveldb version to this "https://github.com/maxd/leveldbwin".

@happyzhouch

This comment has been minimized.

Copy link

commented Oct 12, 2016

@hengck23
How to use the leveldbwin? I can't understand it.I am a fresher in caffe and hope to get the solution from you sincerely.

@ectg

This comment has been minimized.

Copy link

commented Feb 13, 2017

@Perseus14, did you find the deploy.prototxt ?

@sayadyaghoobi

This comment has been minimized.

Copy link

commented Apr 30, 2017

i want to finetune the nin model just for 3 classes, but i dont know what layer must be changed? i tried to rename cccp6 but it didnt work. anyone has idea about it? please share

@sayadyaghoobi

This comment has been minimized.

Copy link

commented Oct 31, 2017

i'm trying to train nin with my data, and i don't want to use pretrained weights, instead i'm going to try with random weights and use the nin network, my data consists of 3 classes and i just tried to to change the output of cccp6 from 10 to 3. when i run it i got wrong constant accuracy 0.3368 in every testing process. so if anyone has idea to help me with this error?
i did'nt change anything except the cccp6' output 10 to 3. thnx very much

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.