Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Deep Scene

A Deep Siamese Network for Scene Detection

This is a model from the paper:

A Deep Siamese Network for Scene Detection in Broadcast Videos
Lorenzo Baraldi, Costantino Grana, Rita Cucchiara
Proceedings of the 23rd ACM International Conference on Multimedia, 2015

Please cite the paper if you use the models.

Contacts

net: "examples/DeepScene/train_val.prototxt"
test_iter: 1000
test_interval: 10
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 500
display: 1
max_iter: 100
momentum: 0.9
weight_decay: 0.0005
snapshot: 50
snapshot_prefix: "examples/DeepScene/snapshots/snapshot_"
solver_mode: GPU
############################################################
# BEGIN
############################################################
name: "ScenesSiamese"
############################################################
# DATA LAYERS
############################################################
# Train data layers
############################################################
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image_p.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TRAIN }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_train_h5.txt"
batch_size: 100
}
}
############################################################
# Test data layers
############################################################
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image_p.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TEST }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_test_h5.txt"
batch_size: 100
}
}
############################################################
# FIRST CONVOLUTIONAL NEURAL NETWORK
############################################################
layer {
name: "conv1"
type: "Convolution"
bottom: "image"
top: "conv1"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc8_relu"
type: "ReLU"
bottom: "fc8"
top: "fc8"
}
############################################################
# SECOND CONVOLUTIONAL NEURAL NETWORK
############################################################
layer {
name: "conv1_p"
type: "Convolution"
bottom: "image_p"
top: "conv1_p"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_p"
type: "ReLU"
bottom: "conv1_p"
top: "conv1_p"
}
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1_p"
type: "LRN"
bottom: "pool1_p"
top: "norm1_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2_p"
type: "Convolution"
bottom: "norm1_p"
top: "conv2_p"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2_p"
type: "ReLU"
bottom: "conv2_p"
top: "conv2_p"
}
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2_p"
type: "LRN"
bottom: "pool2_p"
top: "norm2_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3_p"
type: "Convolution"
bottom: "norm2_p"
top: "conv3_p"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_p"
type: "ReLU"
bottom: "conv3_p"
top: "conv3_p"
}
layer {
name: "conv4_p"
type: "Convolution"
bottom: "conv3_p"
top: "conv4_p"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4_p"
type: "ReLU"
bottom: "conv4_p"
top: "conv4_p"
}
layer {
name: "conv5_p"
type: "Convolution"
bottom: "conv4_p"
top: "conv5_p"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5_p"
type: "ReLU"
bottom: "conv5_p"
top: "conv5_p"
}
layer {
name: "pool5_p"
type: "Pooling"
bottom: "conv5_p"
top: "pool5_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6_p"
type: "InnerProduct"
bottom: "pool5_p"
top: "fc6_p"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6_p"
type: "ReLU"
bottom: "fc6_p"
top: "fc6_p"
}
layer {
name: "drop6_p"
type: "Dropout"
bottom: "fc6_p"
top: "fc6_p"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_p"
type: "InnerProduct"
bottom: "fc6_p"
top: "fc7_p"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7_p"
type: "ReLU"
bottom: "fc7_p"
top: "fc7_p"
}
layer {
name: "drop7_p"
type: "Dropout"
bottom: "fc7_p"
top: "fc7_p"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_p"
type: "InnerProduct"
bottom: "fc7_p"
top: "fc8_p"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc8_p_relu"
type: "ReLU"
bottom: "fc8_p"
top: "fc8_p"
}
############################################################
# MERGE + LAST IP LAYERS
############################################################
layer {
name: "merge"
type: "Concat"
bottom: "fc8"
bottom: "time"
bottom: "histograms"
top: "merge"
}
layer {
name: "merge_p"
type: "Concat"
bottom: "fc8_p"
bottom: "time_p"
bottom: "histograms_p"
top: "merge_p"
}
layer {
name: "fc_final"
type: "InnerProduct"
bottom: "merge"
top: "fc_final"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
}
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
}
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "fc_final_p"
type: "InnerProduct"
bottom: "merge_p"
top: "fc_final_p"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
}
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
}
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
############################################################
# LOSS/ACCURACY LAYERS
############################################################
layer {
name: "loss"
type: "ContrastiveLoss"
bottom: "fc_final"
bottom: "fc_final_p"
bottom: "label"
top: "loss"
contrastive_loss_param {
margin: 1.0
}
}
############################################################
# END
############################################################
layer {
name: "shot_id_silence"
type: "Silence"
bottom: "shot_id"
}
layer {
name: "shot_id_silence_p"
type: "Silence"
bottom: "shot_id_p"
}
layer {
name: "label_image_silence"
type: "Silence"
bottom: "label_image"
}
layer {
name: "label_image_silence_p"
type: "Silence"
bottom: "label_image_p"
}
@meisa233

This comment has been minimized.

Copy link

meisa233 commented Aug 9, 2018

I am sorry to disturb you.

I have downloaded your model of scene detection in 23rd ACM International Conference on Multimedia, 2015.

However, I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

@superpunny

This comment has been minimized.

Copy link

superpunny commented Jan 15, 2019

I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.