Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ParseNet on PASCAL 2012 dataset
name caffemodel caffemodel_url sha1 gist_id
ParseNet on PASCAL
VGG_VOC2012ext.caffemodel
99cc76c373dc522fd70f80208b30a43ab2fba2f6

This is a model presented in the paper

ParseNet: Looking Wider to See Better
Wei Liu, Andrew Rabinovich, Alexander C. Berg
arXiv:1506.04579

This is the ParseNet model trained on PASCAL (using extra data from Hariharan et al. and finetuned from the fully convolutional reduced VGGNet).

You should be able to train/eval this model with http://github.com/weiliu89/caffe/tree/fcn. This branch introduces filter_stride (used for 'atrous' algorithm as described in Deeplab), L2-norm layer, evaluation code on the fly, etc.

The model should obtain 69.55 mean IoU on PASCAL 2012 segmementation val dataset. Please feel free to send me email (wliu@cs.unc.edu) if you have any questions.

name: "VGG_VOC2012ext"
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TRAIN
}
transform_param {
mean_value: 104.00699
mean_value: 116.66877
mean_value: 122.67892
}
data_param {
source: "examples/VOC2012ext/VOC2012ext_train_aug_img_lmdb"
batch_size: 1
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TRAIN
}
data_param {
source: "examples/VOC2012ext/VOC2012ext_train_aug_label_lmdb"
batch_size: 1
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
transform_param {
mean_value: 104.00699
mean_value: 116.66877
mean_value: 122.67892
}
data_param {
source: "examples/VOC2012ext/VOC2012ext_val_img_lmdb"
batch_size: 1
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TEST
}
data_param {
source: "examples/VOC2012ext/VOC2012ext_val_label_lmdb"
batch_size: 1
backend: LMDB
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 2
filter_stride: 2
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 2
filter_stride: 2
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 2
filter_stride: 2
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
kernel_size: 3
filter_stride: 12
pad: 12
engine: CAFFE
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
kernel_size: 1
engine: CAFFE
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
### pool ###
layer {
name: "fc7_norm"
type: "Normalize"
bottom: "fc7"
top: "fc7_norm"
norm_param {
scale_filler {
type: "constant"
value: 10
}
across_spatial: false
channel_shared: false
fix_scale: false
}
}
layer {
name: "pool6_1x1"
type: "Pooling"
bottom: "fc7"
top: "pool6_1x1"
pooling_param {
pool: AVE
bin_size: 1
}
}
layer {
name: "pool6_1x1_norm"
type: "Normalize"
bottom: "pool6_1x1"
top: "pool6_1x1_norm"
norm_param {
scale_filler {
type: "constant"
value: 10
}
across_spatial: false
channel_shared: false
fix_scale: false
}
}
layer {
name: "pool6_1x1_norm_drop"
type: "Dropout"
bottom: "pool6_1x1_norm"
top: "pool6_1x1_norm"
dropout_param {
dropout_ratio: 0.3
}
}
layer {
name: "fc7_norm_score21"
type: "Convolution"
bottom: "fc7_norm"
top: "fc7_norm_score21"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
engine: CAFFE
}
}
layer {
name: "pool6_1x1_norm_score21"
type: "Convolution"
bottom: "pool6_1x1_norm"
top: "pool6_1x1_norm_score21"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
engine: CAFFE
}
}
layer {
name: "pool6_1x1_norm_upscore21"
type: "UnPooling"
bottom: "pool6_1x1_norm_score21"
bottom: "fc7_norm_score21"
top: "pool6_1x1_norm_upscore21"
unpooling_param {
unpool: REP
out_kernel_size: 0
out_stride: 0
}
}
layer {
name: "score21"
type: "Eltwise"
bottom: "pool6_1x1_norm_upscore21"
bottom: "fc7_norm_score21"
top: "score21"
eltwise_param {
operation: SUM
}
}
layer {
name: "upscore21"
type: "Deconvolution"
bottom: "score21"
top: "upscore21"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 16
stride: 8
pad: 4
group: 21
weight_filler {
type: "bilinear_upsampling"
}
}
}
layer {
type: "Crop"
name: "score"
bottom: "upscore21"
bottom: "data"
top: "score"
}
layer {
type: 'SoftmaxWithLoss'
name: 'loss'
bottom: 'score'
bottom: 'label'
top: 'loss'
loss_param {
normalize: false
ignore_label: 255
}
include {
phase: TRAIN
}
}
layer {
type: "ParseOutput"
name: "predlabel"
bottom: "score"
top: "predlabel"
include {
phase: TEST
}
}
layer {
type: "ParseEvaluate"
name: "evaluation"
bottom: "predlabel"
bottom: "label"
top: "evaluation"
parse_evaluate_param {
num_labels: 21
ignore_label: 255
}
include {
phase: TEST
}
}
# You should change $CAFFE_ROOT to the caffe root directory (e.g. ~/project/caffe) or make it as environment variable (e.g. put it in ~/.bashrc)
cd $CAFFE_ROOT
if [ ! -d jobs/VGGNet/VOC2012ext ]
then
mkdir -p jobs/VGGNet/VOC2012ext
fi
./build/tools/caffe train \
--solver="models/VGGNet/VOC2012ext/VGG_VOC2012ext_solver.prototxt" \
--weights="models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel" \
--gpu 0 2>&1 | tee jobs/VGGNet/VOC2012ext/VGG_VOC2012ext.log
net: "models/VGGNet/VOC2012ext/VGG_VOC2012ext.prototxt"
test_iter: 1449
test_interval: 2000
eval_type: "segmentation"
display: 20
average_loss: 20
lr_policy: "poly"
power: 0.9
base_lr: 1e-8
momentum: 0.9
iter_size: 8
max_iter: 20000
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/VGGNet/VOC2012ext/VGG_VOC2012ext"
test_initialization: false
debug_info: false
@MalteOeljeklaus
Copy link

From the paper [1] I understood, that the pooled global feature is concatenated to the feature map. However, the above prototxt seems to use an element wise summation for this purpose. Am I missing something?

[1] http://arxiv.org/abs/1506.04579

@likesiwell
Copy link

Hello, I want train your ParseNet from scratch on pascal voc 2012 segmentation task. Follow your the experiment in your paper, I use the augmented dataset and the train_val.prototxt, solver.protoxt you provided above.
But I always get the all zero output which stand for background label for all pixels in the image. I also tried other learning rate and trained several times, but the results are all zero.
I think it's because the network is stuck in local minima, since the training set is in balance, the background label occupy nearly 75% of all pixels.
So, Have you ever meet this problem during your experiments?
If not, do you know what causes this problem ?
I have been bothered by this problem for two weeks, I also trained FCNs, DeconvNet and my segmentation networks, but the results are all the same, all zero.
Wish your reply, thankyou

@huangh12
Copy link

Same question as @MalteOeljeklaus

@matteosal
Copy link

Hello, what is the licensing for this model? Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment