|
name: 'FCN' |
|
|
|
input: 'bgr_data' |
|
input_dim: 1 |
|
input_dim: 3 |
|
input_dim: 500 |
|
input_dim: 500 |
|
|
|
input: 'hha_data' |
|
input_dim: 1 |
|
input_dim: 3 |
|
input_dim: 500 |
|
input_dim: 500 |
|
|
|
# bgr network |
|
layers { bottom: 'bgr_data' top: 'conv1_1' name: 'conv1_1' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 64 pad: 100 kernel_size: 3 } } |
|
layers { bottom: 'conv1_1' top: 'conv1_1' name: 'relu1_1' type: RELU } |
|
layers { bottom: 'conv1_1' top: 'conv1_2' name: 'conv1_2' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 64 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv1_2' top: 'conv1_2' name: 'relu1_2' type: RELU } |
|
layers { name: 'pool1' bottom: 'conv1_2' top: 'pool1' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { name: 'conv2_1' bottom: 'pool1' top: 'conv2_1' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv2_1' top: 'conv2_1' name: 'relu2_1' type: RELU } |
|
layers { bottom: 'conv2_1' top: 'conv2_2' name: 'conv2_2' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv2_2' top: 'conv2_2' name: 'relu2_2' type: RELU } |
|
layers { bottom: 'conv2_2' top: 'pool2' name: 'pool2' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool2' top: 'conv3_1' name: 'conv3_1' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_1' top: 'conv3_1' name: 'relu3_1' type: RELU } |
|
layers { bottom: 'conv3_1' top: 'conv3_2' name: 'conv3_2' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_2' top: 'conv3_2' name: 'relu3_2' type: RELU } |
|
layers { bottom: 'conv3_2' top: 'conv3_3' name: 'conv3_3' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_3' top: 'conv3_3' name: 'relu3_3' type: RELU } |
|
layers { bottom: 'conv3_3' top: 'pool3' name: 'pool3' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool3' top: 'conv4_1' name: 'conv4_1' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_1' top: 'conv4_1' name: 'relu4_1' type: RELU } |
|
layers { bottom: 'conv4_1' top: 'conv4_2' name: 'conv4_2' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_2' top: 'conv4_2' name: 'relu4_2' type: RELU } |
|
layers { bottom: 'conv4_2' top: 'conv4_3' name: 'conv4_3' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_3' top: 'conv4_3' name: 'relu4_3' type: RELU } |
|
layers { bottom: 'conv4_3' top: 'pool4' name: 'pool4' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool4' top: 'conv5_1' name: 'conv5_1' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_1' top: 'conv5_1' name: 'relu5_1' type: RELU } |
|
layers { bottom: 'conv5_1' top: 'conv5_2' name: 'conv5_2' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_2' top: 'conv5_2' name: 'relu5_2' type: RELU } |
|
layers { bottom: 'conv5_2' top: 'conv5_3' name: 'conv5_3' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_3' top: 'conv5_3' name: 'relu5_3' type: RELU } |
|
layers { bottom: 'conv5_3' top: 'pool5' name: 'pool5' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool5' top: 'fc6' name: 'fc6' type: CONVOLUTION |
|
convolution_param { engine: CAFFE kernel_size: 7 num_output: 4096 } } |
|
layers { bottom: 'fc6' top: 'fc6' name: 'relu6' type: RELU } |
|
layers { bottom: 'fc6' top: 'fc6' name: 'drop6' type: DROPOUT |
|
dropout_param { dropout_ratio: 0.5 } } |
|
layers { bottom: 'fc6' top: 'fc7' name: 'fc7' type: CONVOLUTION |
|
convolution_param { engine: CAFFE kernel_size: 1 num_output: 4096 } } |
|
layers { bottom: 'fc7' top: 'fc7' name: 'relu7' type: RELU } |
|
layers { bottom: 'fc7' top: 'fc7' name: 'drop7' type: DROPOUT |
|
dropout_param { dropout_ratio: 0.5 } } |
|
layers { name: 'score' type: CONVOLUTION bottom: 'fc7' top: 'score' |
|
convolution_param { engine: CAFFE num_output: 40 kernel_size: 1 } } |
|
|
|
# hha network |
|
layers { bottom: 'hha_data' top: 'conv1_1_hha' name: 'conv1_1_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 64 pad: 100 kernel_size: 3 } } |
|
layers { bottom: 'conv1_1_hha' top: 'conv1_1_hha' name: 'relu1_1_hha' type: RELU } |
|
layers { bottom: 'conv1_1_hha' top: 'conv1_2_hha' name: 'conv1_2_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 64 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv1_2_hha' top: 'conv1_2_hha' name: 'relu1_2_hha' type: RELU } |
|
layers { name: 'pool1_hha' bottom: 'conv1_2_hha' top: 'pool1_hha' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { name: 'conv2_1_hha' bottom: 'pool1_hha' top: 'conv2_1_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv2_1_hha' top: 'conv2_1_hha' name: 'relu2_1_hha' type: RELU } |
|
layers { bottom: 'conv2_1_hha' top: 'conv2_2_hha' name: 'conv2_2_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv2_2_hha' top: 'conv2_2_hha' name: 'relu2_2_hha' type: RELU } |
|
layers { bottom: 'conv2_2_hha' top: 'pool2_hha' name: 'pool2_hha' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool2_hha' top: 'conv3_1_hha' name: 'conv3_1_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_1_hha' top: 'conv3_1_hha' name: 'relu3_1_hha' type: RELU } |
|
layers { bottom: 'conv3_1_hha' top: 'conv3_2_hha' name: 'conv3_2_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_2_hha' top: 'conv3_2_hha' name: 'relu3_2_hha' type: RELU } |
|
layers { bottom: 'conv3_2_hha' top: 'conv3_3_hha' name: 'conv3_3_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv3_3_hha' top: 'conv3_3_hha' name: 'relu3_3_hha' type: RELU } |
|
layers { bottom: 'conv3_3_hha' top: 'pool3_hha' name: 'pool3_hha' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool3_hha' top: 'conv4_1_hha' name: 'conv4_1_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_1_hha' top: 'conv4_1_hha' name: 'relu4_1_hha' type: RELU } |
|
layers { bottom: 'conv4_1_hha' top: 'conv4_2_hha' name: 'conv4_2_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_2_hha' top: 'conv4_2_hha' name: 'relu4_2_hha' type: RELU } |
|
layers { bottom: 'conv4_2_hha' top: 'conv4_3_hha' name: 'conv4_3_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv4_3_hha' top: 'conv4_3_hha' name: 'relu4_3_hha' type: RELU } |
|
layers { bottom: 'conv4_3_hha' top: 'pool4_hha' name: 'pool4_hha' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool4_hha' top: 'conv5_1_hha' name: 'conv5_1_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_1_hha' top: 'conv5_1_hha' name: 'relu5_1_hha' type: RELU } |
|
layers { bottom: 'conv5_1_hha' top: 'conv5_2_hha' name: 'conv5_2_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_2_hha' top: 'conv5_2_hha' name: 'relu5_2_hha' type: RELU } |
|
layers { bottom: 'conv5_2_hha' top: 'conv5_3_hha' name: 'conv5_3_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } |
|
layers { bottom: 'conv5_3_hha' top: 'conv5_3_hha' name: 'relu5_3_hha' type: RELU } |
|
layers { bottom: 'conv5_3_hha' top: 'pool5_hha' name: 'pool5_hha' type: POOLING |
|
pooling_param { pool: MAX kernel_size: 2 stride: 2 } } |
|
layers { bottom: 'pool5_hha' top: 'fc6_hha' name: 'fc6_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE kernel_size: 7 num_output: 4096 } } |
|
layers { bottom: 'fc6_hha' top: 'fc6_hha' name: 'relu6_hha' type: RELU } |
|
layers { bottom: 'fc6_hha' top: 'fc6_hha' name: 'drop6_hha' type: DROPOUT |
|
dropout_param { dropout_ratio: 0.5 } } |
|
layers { bottom: 'fc6_hha' top: 'fc7_hha' name: 'fc7_hha' type: CONVOLUTION |
|
convolution_param { engine: CAFFE kernel_size: 1 num_output: 4096 } } |
|
layers { bottom: 'fc7_hha' top: 'fc7_hha' name: 'relu7_hha' type: RELU } |
|
layers { bottom: 'fc7_hha' top: 'fc7_hha' name: 'drop7_hha' type: DROPOUT |
|
dropout_param { dropout_ratio: 0.5 } } |
|
layers { name: 'score_hha' type: CONVOLUTION bottom: 'fc7_hha' top: 'score_hha' |
|
convolution_param { engine: CAFFE num_output: 40 kernel_size: 1 } } |
|
|
|
# fuse |
|
layers { type: ELTWISE name: 'fuse' bottom: 'score' bottom: 'score_hha' top: 'score-fused' |
|
eltwise_param { operation: SUM coeff: 0.5 coeff: 0.5 } } |
|
|
|
layers { type: DECONVOLUTION name: 'upsample-new' |
|
bottom: 'score-fused' top: 'bigscore' |
|
convolution_param { num_output: 40 kernel_size: 64 stride: 32 bias_term: false } } |
|
layers { type: CROP name: 'crop' bottom: 'bigscore' bottom: 'bgr_data' top: 'upscore' } |
Hello Jon,
currently I am reproducing your results on the NYUD dataset and I am asking myself, why you choose 500x500 as input dimensions? The raw images have something around 640*480, so you even must have interpolated the height?!
I'd appreciate any details on the preprocessing!
Thank you ver much!