Skip to content

Instantly share code, notes, and snippets.

Last active August 20, 2023 09:39
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save baraldilorenzo/05b742d47220b487c8bd to your computer and use it in GitHub Desktop.
Save baraldilorenzo/05b742d47220b487c8bd to your computer and use it in GitHub Desktop.
Deep Scene

A Deep Siamese Network for Scene Detection

This is a model from the paper:

A Deep Siamese Network for Scene Detection in Broadcast Videos
Lorenzo Baraldi, Costantino Grana, Rita Cucchiara
Proceedings of the 23rd ACM International Conference on Multimedia, 2015

Please cite the paper if you use the models.


net: "examples/DeepScene/train_val.prototxt"
test_iter: 1000
test_interval: 10
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 500
display: 1
max_iter: 100
momentum: 0.9
weight_decay: 0.0005
snapshot: 50
snapshot_prefix: "examples/DeepScene/snapshots/snapshot_"
solver_mode: GPU
name: "ScenesSiamese"
# Train data layers
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image.txt"
batch_size: 100
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TRAIN }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_train_image_p.txt"
batch_size: 100
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TRAIN }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_train_h5.txt"
batch_size: 100
# Test data layers
layer {
name: "data_image"
type: "ImageData"
top: "image"
top: "label_image"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image.txt"
batch_size: 100
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
layer {
name: "data_image_p"
type: "ImageData"
top: "image_p"
top: "label_image_p"
include { phase: TEST }
image_data_param {
source: "/raid/lbaraldi/scene/bbc_test_image_p.txt"
batch_size: 100
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
layer {
name: "data_multimodal"
type: "HDF5Data"
top: "time"
top: "time_p"
top: "shot_id"
top: "shot_id_p"
top: "histograms"
top: "histograms_p"
top: "label"
include { phase: TEST }
hdf5_data_param {
source: "/raid/lbaraldi/scene/bbc_test_h5.txt"
batch_size: 100
layer {
name: "conv1"
type: "Convolution"
bottom: "image"
top: "conv1"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 1
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 1
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "fc8_relu"
type: "ReLU"
bottom: "fc8"
top: "fc8"
layer {
name: "conv1_p"
type: "Convolution"
bottom: "image_p"
top: "conv1_p"
param {
name: "conv1_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv1_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "relu1_p"
type: "ReLU"
bottom: "conv1_p"
top: "conv1_p"
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "norm1_p"
type: "LRN"
bottom: "pool1_p"
top: "norm1_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer {
name: "conv2_p"
type: "Convolution"
bottom: "norm1_p"
top: "conv2_p"
param {
name: "conv2_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv2_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu2_p"
type: "ReLU"
bottom: "conv2_p"
top: "conv2_p"
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "norm2_p"
type: "LRN"
bottom: "pool2_p"
top: "norm2_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer {
name: "conv3_p"
type: "Convolution"
bottom: "norm2_p"
top: "conv3_p"
param {
name: "conv3_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv3_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "relu3_p"
type: "ReLU"
bottom: "conv3_p"
top: "conv3_p"
layer {
name: "conv4_p"
type: "Convolution"
bottom: "conv3_p"
top: "conv4_p"
param {
name: "conv4_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv4_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu4_p"
type: "ReLU"
bottom: "conv4_p"
top: "conv4_p"
layer {
name: "conv5_p"
type: "Convolution"
bottom: "conv4_p"
top: "conv5_p"
param {
name: "conv5_w"
lr_mult: 1
decay_mult: 1
param {
name: "conv5_b"
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 1
layer {
name: "relu5_p"
type: "ReLU"
bottom: "conv5_p"
top: "conv5_p"
layer {
name: "pool5_p"
type: "Pooling"
bottom: "conv5_p"
top: "pool5_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "fc6_p"
type: "InnerProduct"
bottom: "pool5_p"
top: "fc6_p"
param {
name: "fc6_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc6_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 1
layer {
name: "relu6_p"
type: "ReLU"
bottom: "fc6_p"
top: "fc6_p"
layer {
name: "drop6_p"
type: "Dropout"
bottom: "fc6_p"
top: "fc6_p"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc7_p"
type: "InnerProduct"
bottom: "fc6_p"
top: "fc7_p"
param {
name: "fc7_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc7_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 1
layer {
name: "relu7_p"
type: "ReLU"
bottom: "fc7_p"
top: "fc7_p"
layer {
name: "drop7_p"
type: "Dropout"
bottom: "fc7_p"
top: "fc7_p"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc8_p"
type: "InnerProduct"
bottom: "fc7_p"
top: "fc8_p"
param {
name: "fc8_w"
lr_mult: 1
decay_mult: 1
param {
name: "fc8_b"
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 1183
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "fc8_p_relu"
type: "ReLU"
bottom: "fc8_p"
top: "fc8_p"
layer {
name: "merge"
type: "Concat"
bottom: "fc8"
bottom: "time"
bottom: "histograms"
top: "merge"
layer {
name: "merge_p"
type: "Concat"
bottom: "fc8_p"
bottom: "time_p"
bottom: "histograms_p"
top: "merge_p"
layer {
name: "fc_final"
type: "InnerProduct"
bottom: "merge"
top: "fc_final"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0.01
layer {
name: "fc_final_p"
type: "InnerProduct"
bottom: "merge_p"
top: "fc_final_p"
param {
name: "fc_final_w"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 1
param {
name: "fc_final_b"
lr_mult: 4 # Higher learning rate since this layer is starting from random
decay_mult: 0
inner_product_param {
num_output: 200
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0.01
layer {
name: "loss"
type: "ContrastiveLoss"
bottom: "fc_final"
bottom: "fc_final_p"
bottom: "label"
top: "loss"
contrastive_loss_param {
margin: 1.0
layer {
name: "shot_id_silence"
type: "Silence"
bottom: "shot_id"
layer {
name: "shot_id_silence_p"
type: "Silence"
bottom: "shot_id_p"
layer {
name: "label_image_silence"
type: "Silence"
bottom: "label_image"
layer {
name: "label_image_silence_p"
type: "Silence"
bottom: "label_image_p"
Copy link

meisa233 commented Aug 9, 2018

I am sorry to disturb you.

I have downloaded your model of scene detection in 23rd ACM International Conference on Multimedia, 2015.

However, I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

Copy link

I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.

Where can I find them or download them?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment