jimgoo/readme.md

## readme.md

      
    Raw
  

              readme.md
            
          
    name: CaffeNet fine-tuned on the Oxford 102 category flower dataset
caffemodel: oxford102.caffemodel
caffemodel_url: https://s3.amazonaws.com/jgoode/oxford102.caffemodel
gist_id: 0179e52305ca768a601f
license: BSD-3
See https://github.com/jimgoo/caffe-oxford102 for full code.
The CNN is a BVLC reference CaffeNet fine-tuned for the Oxford 102 category flower dataset. The number of outputs in the inner product layer has been set to 102 to reflect the number of flower categories. Hyperparameter choices reflect those in Fine-tuning CaffeNet for Style Recognition on “Flickr Style” Data. The global learning rate is reduced while the learning rate for the final fully connected is increased relative to the other layers.
The split file (setid.mat) lists 6,149 images in the test set and 1,020 images in the training set. We have instead trained this model on the larger set of 6,149 images and tested against the smaller set of 1,020 images.
After 50,000 iterations, the top-1 error is 7% on the test set of 1,020 images:
I0215 15:28:06.417726  6585 solver.cpp:246] Iteration 50000, loss = 0.000120038
I0215 15:28:06.417789  6585 solver.cpp:264] Iteration 50000, Testing net (#0)
I0215 15:28:30.834987  6585 solver.cpp:315]     Test net output #0: accuracy = 0.9326
I0215 15:28:30.835072  6585 solver.cpp:251] Optimization Done.
I0215 15:28:30.835083  6585 caffe.cpp:121] Optimization Done.

Note that this uses the mean file for ILSVRC 2012 instead of the mean for the actual Oxford dataset.

  
## solver.prototxt
net: "models/oxford102/train_val.prototxt"
test_iter: 100
test_interval: 1000
# lr for fine-tuning should be lower than when starting from scratch
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
# stepsize should also be lower, as we're closer to being done
stepsize: 20000
display: 20
max_iter: 50000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/oxford102/oxford102"
# uncomment the following to default to CPU mode solving
solver_mode: GPU

## train_val.prototxt
name: "Oxford102CaffeNet"
layers {
  name: "data"
  type: IMAGE_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "data/oxford102/test.txt"
    batch_size: 50
    new_height: 256
    new_width: 256
  }
  transform_param {
    crop_size: 227
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
    mirror: true
  }
  include: { phase: TRAIN }
}
layers {
  name: "data"
  type: IMAGE_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "data/oxford102/train.txt"
    batch_size: 50
    new_height: 256
    new_width: 256
  }
  transform_param {
    crop_size: 227
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
    mirror: false
  }
  include: { phase: TEST }
}
layers {
  name: "conv1"
  type: CONVOLUTION
  bottom: "data"
  top: "conv1"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1"
  type: RELU
  bottom: "conv1"
  top: "conv1"
}
layers {
  name: "pool1"
  type: POOLING
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm1"
  type: LRN
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv2"
  type: CONVOLUTION
  bottom: "norm1"
  top: "conv2"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2"
  type: RELU
  bottom: "conv2"
  top: "conv2"
}
layers {
  name: "pool2"
  type: POOLING
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm2"
  type: LRN
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv3"
  type: CONVOLUTION
  bottom: "norm2"
  top: "conv3"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu3"
  type: RELU
  bottom: "conv3"
  top: "conv3"
}
layers {
  name: "conv4"
  type: CONVOLUTION
  bottom: "conv3"
  top: "conv4"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4"
  type: RELU
  bottom: "conv4"
  top: "conv4"
}
layers {
  name: "conv5"
  type: CONVOLUTION
  bottom: "conv4"
  top: "conv5"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5"
  type: RELU
  bottom: "conv5"
  top: "conv5"
}
layers {
  name: "pool5"
  type: POOLING
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "fc6"
  type: INNER_PRODUCT
  bottom: "pool5"
  top: "fc6"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6"
  type: RELU
  bottom: "fc6"
  top: "fc6"
}
layers {
  name: "drop6"
  type: DROPOUT
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7"
  type: INNER_PRODUCT
  bottom: "fc6"
  top: "fc7"
  # Note that blobs_lr can be set to 0 to disable any fine-tuning of this, and any other, layer
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7"
  type: RELU
  bottom: "fc7"
  top: "fc7"
}
layers {
  name: "drop7"
  type: DROPOUT
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc8_oxford_102"
  type: INNER_PRODUCT
  bottom: "fc7"
  top: "fc8_oxford_102"
  # blobs_lr is set to higher than for other layers, because this layer is starting from random while the others are already trained
  blobs_lr: 10
  blobs_lr: 20
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 102
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "fc8_oxford_102"
  bottom: "label"
}
layers {
  name: "accuracy"
  type: ACCURACY
  bottom: "fc8_oxford_102"
  bottom: "label"
  top: "accuracy"
  include: { phase: TEST }
}
	net: "models/oxford102/train_val.prototxt"
	test_iter: 100
	test_interval: 1000
	# lr for fine-tuning should be lower than when starting from scratch
	base_lr: 0.001
	lr_policy: "step"
	gamma: 0.1
	# stepsize should also be lower, as we're closer to being done
	stepsize: 20000
	display: 20
	max_iter: 50000
	momentum: 0.9
	weight_decay: 0.0005
	snapshot: 10000
	snapshot_prefix: "models/oxford102/oxford102"
	# uncomment the following to default to CPU mode solving
	solver_mode: GPU
	name: "Oxford102CaffeNet"
	layers {
	name: "data"
	type: IMAGE_DATA
	top: "data"
	top: "label"
	image_data_param {
	source: "data/oxford102/test.txt"
	batch_size: 50
	new_height: 256
	new_width: 256
	}
	transform_param {
	crop_size: 227
	mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
	mirror: true
	}
	include: { phase: TRAIN }
	}
	layers {
	name: "data"
	type: IMAGE_DATA
	top: "data"
	top: "label"
	image_data_param {
	source: "data/oxford102/train.txt"
	batch_size: 50
	new_height: 256
	new_width: 256
	}
	transform_param {
	crop_size: 227
	mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
	mirror: false
	}
	include: { phase: TEST }
	}
	layers {
	name: "conv1"
	type: CONVOLUTION
	bottom: "data"
	top: "conv1"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	convolution_param {
	num_output: 96
	kernel_size: 11
	stride: 4
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 0
	}
	}
	}
	layers {
	name: "relu1"
	type: RELU
	bottom: "conv1"
	top: "conv1"
	}
	layers {
	name: "pool1"
	type: POOLING
	bottom: "conv1"
	top: "pool1"
	pooling_param {
	pool: MAX
	kernel_size: 3
	stride: 2
	}
	}
	layers {
	name: "norm1"
	type: LRN
	bottom: "pool1"
	top: "norm1"
	lrn_param {
	local_size: 5
	alpha: 0.0001
	beta: 0.75
	}
	}
	layers {
	name: "conv2"
	type: CONVOLUTION
	bottom: "norm1"
	top: "conv2"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	convolution_param {
	num_output: 256
	pad: 2
	kernel_size: 5
	group: 2
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 1
	}
	}
	}
	layers {
	name: "relu2"
	type: RELU
	bottom: "conv2"
	top: "conv2"
	}
	layers {
	name: "pool2"
	type: POOLING
	bottom: "conv2"
	top: "pool2"
	pooling_param {
	pool: MAX
	kernel_size: 3
	stride: 2
	}
	}
	layers {
	name: "norm2"
	type: LRN
	bottom: "pool2"
	top: "norm2"
	lrn_param {
	local_size: 5
	alpha: 0.0001
	beta: 0.75
	}
	}
	layers {
	name: "conv3"
	type: CONVOLUTION
	bottom: "norm2"
	top: "conv3"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	convolution_param {
	num_output: 384
	pad: 1
	kernel_size: 3
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 0
	}
	}
	}
	layers {
	name: "relu3"
	type: RELU
	bottom: "conv3"
	top: "conv3"
	}
	layers {
	name: "conv4"
	type: CONVOLUTION
	bottom: "conv3"
	top: "conv4"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	convolution_param {
	num_output: 384
	pad: 1
	kernel_size: 3
	group: 2
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 1
	}
	}
	}
	layers {
	name: "relu4"
	type: RELU
	bottom: "conv4"
	top: "conv4"
	}
	layers {
	name: "conv5"
	type: CONVOLUTION
	bottom: "conv4"
	top: "conv5"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	convolution_param {
	num_output: 256
	pad: 1
	kernel_size: 3
	group: 2
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 1
	}
	}
	}
	layers {
	name: "relu5"
	type: RELU
	bottom: "conv5"
	top: "conv5"
	}
	layers {
	name: "pool5"
	type: POOLING
	bottom: "conv5"
	top: "pool5"
	pooling_param {
	pool: MAX
	kernel_size: 3
	stride: 2
	}
	}
	layers {
	name: "fc6"
	type: INNER_PRODUCT
	bottom: "pool5"
	top: "fc6"
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	inner_product_param {
	num_output: 4096
	weight_filler {
	type: "gaussian"
	std: 0.005
	}
	bias_filler {
	type: "constant"
	value: 1
	}
	}
	}
	layers {
	name: "relu6"
	type: RELU
	bottom: "fc6"
	top: "fc6"
	}
	layers {
	name: "drop6"
	type: DROPOUT
	bottom: "fc6"
	top: "fc6"
	dropout_param {
	dropout_ratio: 0.5
	}
	}
	layers {
	name: "fc7"
	type: INNER_PRODUCT
	bottom: "fc6"
	top: "fc7"
	# Note that blobs_lr can be set to 0 to disable any fine-tuning of this, and any other, layer
	blobs_lr: 1
	blobs_lr: 2
	weight_decay: 1
	weight_decay: 0
	inner_product_param {
	num_output: 4096
	weight_filler {
	type: "gaussian"
	std: 0.005
	}
	bias_filler {
	type: "constant"
	value: 1
	}
	}
	}
	layers {
	name: "relu7"
	type: RELU
	bottom: "fc7"
	top: "fc7"
	}
	layers {
	name: "drop7"
	type: DROPOUT
	bottom: "fc7"
	top: "fc7"
	dropout_param {
	dropout_ratio: 0.5
	}
	}
	layers {
	name: "fc8_oxford_102"
	type: INNER_PRODUCT
	bottom: "fc7"
	top: "fc8_oxford_102"
	# blobs_lr is set to higher than for other layers, because this layer is starting from random while the others are already trained
	blobs_lr: 10
	blobs_lr: 20
	weight_decay: 1
	weight_decay: 0
	inner_product_param {
	num_output: 102
	weight_filler {
	type: "gaussian"
	std: 0.01
	}
	bias_filler {
	type: "constant"
	value: 0
	}
	}
	}
	layers {
	name: "loss"
	type: SOFTMAX_LOSS
	bottom: "fc8_oxford_102"
	bottom: "label"
	}
	layers {
	name: "accuracy"
	type: ACCURACY
	bottom: "fc8_oxford_102"
	bottom: "label"
	top: "accuracy"
	include: { phase: TEST }
	}