Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Fully convolutional reduced VGGNet
name caffemodel caffemodel_url sha1 gist_id
Fully convolutional reduced VGGNet
VGG_ILSVRC_16_layers_fc_reduced.caffemodel
97eb7c469c5097f51a0f9a944f4a5731f470eee2

This is a model used in the paper

ParseNet: Looking Wider to See Better
Wei Liu, Andrew Rabinovich, Alexander C. Berg
arXiv:1506.04579

This is a network modified from VGGNet by making it fully convolutional and also by subsampling parameters from fc6 and fc7 layers. This is useful when using it to finetune for segmentation. For example, ParseNet shows how to use it to finetune for semantic segmentation task.

name: "VGG_ILSVRC_16_layers_fc_reduced"
input: "data"
input_dim: 10
input_dim: 3
input_dim: 500
input_dim: 500
layers {
bottom: "data"
top: "conv1_1"
name: "conv1_1"
type: CONVOLUTION
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv1_1"
top: "conv1_1"
name: "relu1_1"
type: RELU
}
layers {
bottom: "conv1_1"
top: "conv1_2"
name: "conv1_2"
type: CONVOLUTION
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv1_2"
top: "conv1_2"
name: "relu1_2"
type: RELU
}
layers {
bottom: "conv1_2"
top: "pool1"
name: "pool1"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool1"
top: "conv2_1"
name: "conv2_1"
type: CONVOLUTION
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv2_1"
top: "conv2_1"
name: "relu2_1"
type: RELU
}
layers {
bottom: "conv2_1"
top: "conv2_2"
name: "conv2_2"
type: CONVOLUTION
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv2_2"
top: "conv2_2"
name: "relu2_2"
type: RELU
}
layers {
bottom: "conv2_2"
top: "pool2"
name: "pool2"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool2"
top: "conv3_1"
name: "conv3_1"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_1"
top: "conv3_1"
name: "relu3_1"
type: RELU
}
layers {
bottom: "conv3_1"
top: "conv3_2"
name: "conv3_2"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_2"
top: "conv3_2"
name: "relu3_2"
type: RELU
}
layers {
bottom: "conv3_2"
top: "conv3_3"
name: "conv3_3"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_3"
top: "conv3_3"
name: "relu3_3"
type: RELU
}
layers {
bottom: "conv3_3"
top: "pool3"
name: "pool3"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool3"
top: "conv4_1"
name: "conv4_1"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_1"
top: "conv4_1"
name: "relu4_1"
type: RELU
}
layers {
bottom: "conv4_1"
top: "conv4_2"
name: "conv4_2"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_2"
top: "conv4_2"
name: "relu4_2"
type: RELU
}
layers {
bottom: "conv4_2"
top: "conv4_3"
name: "conv4_3"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_3"
top: "conv4_3"
name: "relu4_3"
type: RELU
}
layers {
bottom: "conv4_3"
top: "pool4"
name: "pool4"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool4"
top: "conv5_1"
name: "conv5_1"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_1"
top: "conv5_1"
name: "relu5_1"
type: RELU
}
layers {
bottom: "conv5_1"
top: "conv5_2"
name: "conv5_2"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_2"
top: "conv5_2"
name: "relu5_2"
type: RELU
}
layers {
bottom: "conv5_2"
top: "conv5_3"
name: "conv5_3"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_3"
top: "conv5_3"
name: "relu5_3"
type: RELU
}
layers {
bottom: "conv5_3"
top: "pool5"
name: "pool5"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool5"
top: "fc6"
name: "fc6"
type: CONVOLUTION
convolution_param {
num_output: 1024
kernel_size: 3
dilation: 3
pad: 3
}
}
layers {
bottom: "fc6"
top: "fc6"
name: "relu6"
type: RELU
}
layers {
bottom: "fc6"
top: "fc6"
name: "drop6"
type: DROPOUT
dropout_param {
dropout_ratio: 0.5
}
}
layers {
bottom: "fc6"
top: "fc7"
name: "fc7"
type: CONVOLUTION
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layers {
bottom: "fc7"
top: "fc7"
name: "relu7"
type: RELU
}
layers {
bottom: "fc7"
top: "fc7"
name: "drop7"
type: DROPOUT
dropout_param {
dropout_ratio: 0.5
}
}
layers {
bottom: "fc7"
top: "fc8"
name: "fc8"
type: CONVOLUTION
convolution_param {
num_output: 1000
kernel_size: 1
}
}
layers {
bottom: "fc8"
top: "prob"
name: "prob"
type: SOFTMAX
}
@koki0702

This comment has been minimized.

Show comment Hide comment
@koki0702

koki0702 Oct 9, 2015

@weiliu89
Why did you change parameters at fc6 and fc7 layers from the original[1]?
For the original VGG-16, fc6's kernel_size should be 7 and the num_output is 4096.

[1]https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md

koki0702 commented Oct 9, 2015

@weiliu89
Why did you change parameters at fc6 and fc7 layers from the original[1]?
For the original VGG-16, fc6's kernel_size should be 7 and the num_output is 4096.

[1]https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md

@weiliu89

This comment has been minimized.

Show comment Hide comment
@weiliu89

weiliu89 Oct 13, 2015

This is to make VGG faster and also implements the "atrous" algorithm described in DeepLab paper. Here is how the reduced VGG is configured for that: https://gist.github.com/weiliu89/45e9e8de2c13af6476ca#file-vgg_voc2012ext-prototxt-L385

Owner

weiliu89 commented Oct 13, 2015

This is to make VGG faster and also implements the "atrous" algorithm described in DeepLab paper. Here is how the reduced VGG is configured for that: https://gist.github.com/weiliu89/45e9e8de2c13af6476ca#file-vgg_voc2012ext-prototxt-L385

@koki0702

This comment has been minimized.

Show comment Hide comment
@koki0702

koki0702 Oct 21, 2015

I missed the DeepLab paper -- sorry! ("atrous" algoritm is great!!)

I missed the DeepLab paper -- sorry! ("atrous" algoritm is great!!)

@xiaohaoChen

This comment has been minimized.

Show comment Hide comment
@xiaohaoChen

xiaohaoChen May 13, 2016

@weiliu89
How can I train this VGG_ILSVRC_16_layers_fc_reduced model?
Any examples or source codes?
Thank you!

@weiliu89
How can I train this VGG_ILSVRC_16_layers_fc_reduced model?
Any examples or source codes?
Thank you!

@ck196

This comment has been minimized.

Show comment Hide comment
@ck196

ck196 May 25, 2016

Could you provide SSD train.prototxt, deploy.prototxt and solver.txt of InceptionV3?
Thank you.

ck196 commented May 25, 2016

Could you provide SSD train.prototxt, deploy.prototxt and solver.txt of InceptionV3?
Thank you.

@shesung

This comment has been minimized.

Show comment Hide comment
@shesung

shesung Oct 10, 2016

why the dilation = 3 in fc6?

shesung commented Oct 10, 2016

why the dilation = 3 in fc6?

@nian-liu

This comment has been minimized.

Show comment Hide comment
@nian-liu

nian-liu Nov 1, 2016

@shesung, dilation=3, then you can get a true convolutional kernel with size dilation_(kernel_size-1)+1=3_(3-1)+1=7, which is the same with the kernel_size of fc6 in the original VGGnet.

nian-liu commented Nov 1, 2016

@shesung, dilation=3, then you can get a true convolutional kernel with size dilation_(kernel_size-1)+1=3_(3-1)+1=7, which is the same with the kernel_size of fc6 in the original VGGnet.

@tron19920125

This comment has been minimized.

Show comment Hide comment
@tron19920125

tron19920125 Nov 7, 2016

Can i treat dilation equal to the filter_stride in your code of caffe-fcn?

Can i treat dilation equal to the filter_stride in your code of caffe-fcn?

@dongzhuoyao

This comment has been minimized.

Show comment Hide comment
@dongzhuoyao

dongzhuoyao Jan 14, 2017

@tron19920125, dilation is not filter_stride.dilation is the hole between your single convolution unit

@tron19920125, dilation is not filter_stride.dilation is the hole between your single convolution unit

@mjssat7

This comment has been minimized.

Show comment Hide comment
@mjssat7

mjssat7 Apr 6, 2017

@weiliu89,What dataset do you use to train ,then get this VGG_ILSVRC_16_layers_fc_reduced.model,because I don't see any data illustrations from VGG_ILSVRC_16_layers_fc_reduced_deploy.prototxt, thank you!

mjssat7 commented Apr 6, 2017

@weiliu89,What dataset do you use to train ,then get this VGG_ILSVRC_16_layers_fc_reduced.model,because I don't see any data illustrations from VGG_ILSVRC_16_layers_fc_reduced_deploy.prototxt, thank you!

@cooliscool

This comment has been minimized.

Show comment Hide comment
@cooliscool

cooliscool Jun 15, 2017

@mjssat7 , using ILSVRC dataset which has 1000 classes.

@mjssat7 , using ILSVRC dataset which has 1000 classes.

@idanusher

This comment has been minimized.

Show comment Hide comment
@idanusher

idanusher Oct 13, 2017

@weiliu89, I'm trying to understand how do I parse the network output?
I'm using python and get dict with the prob inside. there is a (10,1000,16,16) array.
please any clue.. :)

@weiliu89, I'm trying to understand how do I parse the network output?
I'm using python and get dict with the prob inside. there is a (10,1000,16,16) array.
please any clue.. :)

@BOBrown

This comment has been minimized.

Show comment Hide comment
@BOBrown

BOBrown Mar 12, 2018

We fine-tune the basic reduced VGG on ImageNet-1000 dataset or VOC dataset? If ImageNet-1000, Could you report the result of the reduced VGG on ImageNet?

BOBrown commented Mar 12, 2018

We fine-tune the basic reduced VGG on ImageNet-1000 dataset or VOC dataset? If ImageNet-1000, Could you report the result of the reduced VGG on ImageNet?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment