Skip to content

Instantly share code, notes, and snippets.

@baraldilorenzo
Last active August 20, 2023 09:39
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save baraldilorenzo/05b742d47220b487c8bd to your computer and use it in GitHub Desktop.
Save baraldilorenzo/05b742d47220b487c8bd to your computer and use it in GitHub Desktop.
Deep Scene

A Deep Siamese Network for Scene Detection

This is a model from the paper:

A Deep Siamese Network for Scene Detection in Broadcast Videos
Lorenzo Baraldi, Costantino Grana, Rita Cucchiara
Proceedings of the 23rd ACM International Conference on Multimedia, 2015

Please cite the paper if you use the models.

Contacts

net: "examples/DeepScene/train_val.prototxt"
test_iter: 1000
test_interval: 10
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 500
display: 1
max_iter: 100
momentum: 0.9
weight_decay: 0.0005
snapshot: 50
snapshot_prefix: "examples/DeepScene/snapshots/snapshot_"
solver_mode: GPU
############################################################
# BEGIN
############################################################
name: "ScenesSiamese"
############################################################
# DATA LAYERS
############################################################
# Train data layers
############################################################
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image_p.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TRAIN }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_train_h5.txt"
batch_size: 100
}
}
############################################################
# Test data layers
############################################################
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image_p.txt"
batch_size: 100
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
}
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TEST }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_test_h5.txt"
batch_size: 100
}
}
############################################################
# FIRST CONVOLUTIONAL NEURAL NETWORK
############################################################
layer {
name: "conv1"
type: "Convolution"
bottom: "image"
top: "conv1"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc8_relu"
type: "ReLU"
bottom: "fc8"
top: "fc8"
}
############################################################
# SECOND CONVOLUTIONAL NEURAL NETWORK
############################################################
layer {
name: "conv1_p"
type: "Convolution"
bottom: "image_p"
top: "conv1_p"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_p"
type: "ReLU"
bottom: "conv1_p"
top: "conv1_p"
}
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1_p"
type: "LRN"
bottom: "pool1_p"
top: "norm1_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2_p"
type: "Convolution"
bottom: "norm1_p"
top: "conv2_p"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2_p"
type: "ReLU"
bottom: "conv2_p"
top: "conv2_p"
}
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2_p"
type: "LRN"
bottom: "pool2_p"
top: "norm2_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3_p"
type: "Convolution"
bottom: "norm2_p"
top: "conv3_p"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_p"
type: "ReLU"
bottom: "conv3_p"
top: "conv3_p"
}
layer {
name: "conv4_p"
type: "Convolution"
bottom: "conv3_p"
top: "conv4_p"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4_p"
type: "ReLU"
bottom: "conv4_p"
top: "conv4_p"
}
layer {
name: "conv5_p"
type: "Convolution"
bottom: "conv4_p"
top: "conv5_p"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5_p"
type: "ReLU"
bottom: "conv5_p"
top: "conv5_p"
}
layer {
name: "pool5_p"
type: "Pooling"
bottom: "conv5_p"
top: "pool5_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6_p"
type: "InnerProduct"
bottom: "pool5_p"
top: "fc6_p"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6_p"
type: "ReLU"
bottom: "fc6_p"
top: "fc6_p"
}
layer {
name: "drop6_p"
type: "Dropout"
bottom: "fc6_p"
top: "fc6_p"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_p"
type: "InnerProduct"
bottom: "fc6_p"
top: "fc7_p"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7_p"
type: "ReLU"
bottom: "fc7_p"
top: "fc7_p"
}
layer {
name: "drop7_p"
type: "Dropout"
bottom: "fc7_p"
top: "fc7_p"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_p"
type: "InnerProduct"
bottom: "fc7_p"
top: "fc8_p"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
}
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc8_p_relu"
type: "ReLU"
bottom: "fc8_p"
top: "fc8_p"
}
############################################################
# MERGE + LAST IP LAYERS
############################################################
layer {
name: "merge"
type: "Concat"
bottom: "fc8"
bottom: "time"
bottom: "histograms"
top: "merge"
}
layer {
name: "merge_p"
type: "Concat"
bottom: "fc8_p"
bottom: "time_p"
bottom: "histograms_p"
top: "merge_p"
}
layer {
name: "fc_final"
type: "InnerProduct"
bottom: "merge"
top: "fc_final"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
}
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
}
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "fc_final_p"
type: "InnerProduct"
bottom: "merge_p"
top: "fc_final_p"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
}
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
}
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
############################################################
# LOSS/ACCURACY LAYERS
############################################################
layer {
name: "loss"
type: "ContrastiveLoss"
bottom: "fc_final"
bottom: "fc_final_p"
bottom: "label"
top: "loss"
contrastive_loss_param {
margin: 1.0
}
}
############################################################
# END
############################################################
layer {
name: "shot_id_silence"
type: "Silence"
bottom: "shot_id"
}
layer {
name: "shot_id_silence_p"
type: "Silence"
bottom: "shot_id_p"
}
layer {
name: "label_image_silence"
type: "Silence"
bottom: "label_image"
}
layer {
name: "label_image_silence_p"
type: "Silence"
bottom: "label_image_p"
}
@meisa233
Copy link

meisa233 commented Aug 9, 2018

I am sorry to disturb you.

I have downloaded your model of scene detection in 23rd ACM International Conference on Multimedia, 2015.

However, I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

@superpunny
Copy link

I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment