Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Fully convolutional reduced VGGNet
name caffemodel caffemodel_url sha1 gist_id
Fully convolutional reduced VGGNet
VGG_ILSVRC_16_layers_fc_reduced.caffemodel
97eb7c469c5097f51a0f9a944f4a5731f470eee2

This is a model used in the paper

ParseNet: Looking Wider to See Better
Wei Liu, Andrew Rabinovich, Alexander C. Berg
arXiv:1506.04579

This is a network modified from VGGNet by making it fully convolutional and also by subsampling parameters from fc6 and fc7 layers. This is useful when using it to finetune for segmentation. For example, ParseNet shows how to use it to finetune for semantic segmentation task.

name: "VGG_ILSVRC_16_layers_fc_reduced"
input: "data"
input_dim: 10
input_dim: 3
input_dim: 500
input_dim: 500
layers {
bottom: "data"
top: "conv1_1"
name: "conv1_1"
type: CONVOLUTION
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv1_1"
top: "conv1_1"
name: "relu1_1"
type: RELU
}
layers {
bottom: "conv1_1"
top: "conv1_2"
name: "conv1_2"
type: CONVOLUTION
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv1_2"
top: "conv1_2"
name: "relu1_2"
type: RELU
}
layers {
bottom: "conv1_2"
top: "pool1"
name: "pool1"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool1"
top: "conv2_1"
name: "conv2_1"
type: CONVOLUTION
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv2_1"
top: "conv2_1"
name: "relu2_1"
type: RELU
}
layers {
bottom: "conv2_1"
top: "conv2_2"
name: "conv2_2"
type: CONVOLUTION
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv2_2"
top: "conv2_2"
name: "relu2_2"
type: RELU
}
layers {
bottom: "conv2_2"
top: "pool2"
name: "pool2"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool2"
top: "conv3_1"
name: "conv3_1"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_1"
top: "conv3_1"
name: "relu3_1"
type: RELU
}
layers {
bottom: "conv3_1"
top: "conv3_2"
name: "conv3_2"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_2"
top: "conv3_2"
name: "relu3_2"
type: RELU
}
layers {
bottom: "conv3_2"
top: "conv3_3"
name: "conv3_3"
type: CONVOLUTION
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3_3"
top: "conv3_3"
name: "relu3_3"
type: RELU
}
layers {
bottom: "conv3_3"
top: "pool3"
name: "pool3"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool3"
top: "conv4_1"
name: "conv4_1"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_1"
top: "conv4_1"
name: "relu4_1"
type: RELU
}
layers {
bottom: "conv4_1"
top: "conv4_2"
name: "conv4_2"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_2"
top: "conv4_2"
name: "relu4_2"
type: RELU
}
layers {
bottom: "conv4_2"
top: "conv4_3"
name: "conv4_3"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4_3"
top: "conv4_3"
name: "relu4_3"
type: RELU
}
layers {
bottom: "conv4_3"
top: "pool4"
name: "pool4"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool4"
top: "conv5_1"
name: "conv5_1"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_1"
top: "conv5_1"
name: "relu5_1"
type: RELU
}
layers {
bottom: "conv5_1"
top: "conv5_2"
name: "conv5_2"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_2"
top: "conv5_2"
name: "relu5_2"
type: RELU
}
layers {
bottom: "conv5_2"
top: "conv5_3"
name: "conv5_3"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5_3"
top: "conv5_3"
name: "relu5_3"
type: RELU
}
layers {
bottom: "conv5_3"
top: "pool5"
name: "pool5"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool5"
top: "fc6"
name: "fc6"
type: CONVOLUTION
convolution_param {
num_output: 1024
kernel_size: 3
dilation: 3
pad: 3
}
}
layers {
bottom: "fc6"
top: "fc6"
name: "relu6"
type: RELU
}
layers {
bottom: "fc6"
top: "fc6"
name: "drop6"
type: DROPOUT
dropout_param {
dropout_ratio: 0.5
}
}
layers {
bottom: "fc6"
top: "fc7"
name: "fc7"
type: CONVOLUTION
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layers {
bottom: "fc7"
top: "fc7"
name: "relu7"
type: RELU
}
layers {
bottom: "fc7"
top: "fc7"
name: "drop7"
type: DROPOUT
dropout_param {
dropout_ratio: 0.5
}
}
layers {
bottom: "fc7"
top: "fc8"
name: "fc8"
type: CONVOLUTION
convolution_param {
num_output: 1000
kernel_size: 1
}
}
layers {
bottom: "fc8"
top: "prob"
name: "prob"
type: SOFTMAX
}

koki0702 commented Oct 9, 2015

@weiliu89
Why did you change parameters at fc6 and fc7 layers from the original[1]?
For the original VGG-16, fc6's kernel_size should be 7 and the num_output is 4096.

[1]https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md

Owner

weiliu89 commented Oct 13, 2015

This is to make VGG faster and also implements the "atrous" algorithm described in DeepLab paper. Here is how the reduced VGG is configured for that: https://gist.github.com/weiliu89/45e9e8de2c13af6476ca#file-vgg_voc2012ext-prototxt-L385

I missed the DeepLab paper -- sorry! ("atrous" algoritm is great!!)

@weiliu89
How can I train this VGG_ILSVRC_16_layers_fc_reduced model?
Any examples or source codes?
Thank you!

ck196 commented May 25, 2016

Could you provide SSD train.prototxt, deploy.prototxt and solver.txt of InceptionV3?
Thank you.

shesung commented Oct 10, 2016

why the dilation = 3 in fc6?

nian-liu commented Nov 1, 2016

@shesung, dilation=3, then you can get a true convolutional kernel with size dilation_(kernel_size-1)+1=3_(3-1)+1=7, which is the same with the kernel_size of fc6 in the original VGGnet.

Can i treat dilation equal to the filter_stride in your code of caffe-fcn?

@tron19920125, dilation is not filter_stride.dilation is the hole between your single convolution unit

mjssat7 commented Apr 6, 2017

@weiliu89,What dataset do you use to train ,then get this VGG_ILSVRC_16_layers_fc_reduced.model,because I don't see any data illustrations from VGG_ILSVRC_16_layers_fc_reduced_deploy.prototxt, thank you!

@mjssat7 , using ILSVRC dataset which has 1000 classes.

@weiliu89, I'm trying to understand how do I parse the network output?
I'm using python and get dict with the prob inside. there is a (10,1000,16,16) array.
please any clue.. :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment