Skip to content

Instantly share code, notes, and snippets.

@sergeyk
Last active April 14, 2018 21:56
Show Gist options
  • Save sergeyk/034c6ac3865563b69e60 to your computer and use it in GitHub Desktop.
Save sergeyk/034c6ac3865563b69e60 to your computer and use it in GitHub Desktop.
Finetuning CaffeNet on Flickr Style
name caffemodel caffemodel_url license sha1 caffe_commit gist_id
Finetuning CaffeNet on Flickr Style
finetune_flickr_style.caffemodel
non-commercial
b61b5cef7d771b53b0c488e78d35ccadc073e9cf
737ea5e936821b5c69f9c3952d72693ae5843370
034c6ac3865563b69e60

This model is trained exactly as described in docs/finetune_flickr_style/readme.md, using all 80000 images. The final performance:

I1017 07:36:17.370688 31333 solver.cpp:228] Iteration 100000, loss = 0.757952
I1017 07:36:17.370730 31333 solver.cpp:247] Iteration 100000, Testing net (#0)
I1017 07:36:34.248730 31333 solver.cpp:298]     Test net output #0: accuracy = 0.3916

License

The Flickr Style dataset contains only URLs to images. Some of the images may have copyright. Training a category-recognition model for research/non-commercial use may constitute fair use of this data, but the result should not be used for commercial purposes.

net: "models/finetune_flickr_style/train_val.prototxt"
test_iter: 100
test_interval: 1000
# lr for fine-tuning should be lower than when starting from scratch
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
# stepsize should also be lower, as we're closer to being done
stepsize: 20000
display: 20
max_iter: 100000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/finetune_flickr_style/finetune_flickr_style"
# uncomment the following to default to CPU mode solving
# solver_mode: CPU
name: "FlickrStyleCaffeNet"
layers {
name: "data"
type: IMAGE_DATA
top: "data"
top: "label"
image_data_param {
source: "data/flickr_style/train.txt"
batch_size: 50
new_height: 256
new_width: 256
}
transform_param {
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
mirror: true
}
include: { phase: TRAIN }
}
layers {
name: "data"
type: IMAGE_DATA
top: "data"
top: "label"
image_data_param {
source: "data/flickr_style/train.txt"
batch_size: 50
new_height: 256
new_width: 256
}
transform_param {
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
mirror: false
}
include: { phase: TEST }
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm1"
type: LRN
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "norm1"
top: "conv2"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm2"
type: LRN
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "norm2"
top: "conv3"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "conv3"
top: "conv4"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "conv5"
type: CONVOLUTION
bottom: "conv4"
top: "conv5"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5"
type: RELU
bottom: "conv5"
top: "conv5"
}
layers {
name: "pool5"
type: POOLING
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "pool5"
top: "fc6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6"
type: RELU
bottom: "fc6"
top: "fc6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "fc6"
top: "fc7"
# Note that blobs_lr can be set to 0 to disable any fine-tuning of this, and any other, layer
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc8_flickr"
type: INNER_PRODUCT
bottom: "fc7"
top: "fc8_flickr"
# blobs_lr is set to higher than for other layers, because this layer is starting from random while the others are already trained
blobs_lr: 10
blobs_lr: 20
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 20
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "loss"
type: SOFTMAX_LOSS
bottom: "fc8_flickr"
bottom: "label"
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "fc8_flickr"
bottom: "label"
top: "accuracy"
include: { phase: TEST }
}
@SalemAmeen
Copy link

Is the accuracy in this pretrained model 0.3916 only?

@aceimnorstuvwxz
Copy link

how about top5 error rate?

@Gil-Mor
Copy link

Gil-Mor commented Mar 23, 2017

Why do you have 'source: "data/flickr_style/train.txt"' in both TRAIN phase and TEST phase in val.prototxt?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment