Last active
January 10, 2019 17:21
-
-
Save crazysal/edc52a3554d5f1e8ee6a24d774de2bd0 to your computer and use it in GitHub Desktop.
Decoder training model for end to end textspotter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name : "DECODER" | |
input: "sample_gt_label_input" | |
input_shape { dim: 1 dim: 1 } | |
input2: "sample_gt_cont" | |
input_shape { dim: 1 dim: 1 } | |
layer { | |
name: "embedding_letter" | |
type: "Embed" | |
bottom: "sample_gt_label_input" | |
top: "embedding_letter" | |
param { | |
lr_mult: 1 | |
} | |
embed_param { | |
bias_term: false | |
input_dim: 69 | |
num_output: 128 | |
weight_filler { type: "uniform" min: -0.08 max: 0.08 } | |
} | |
propagate_down: false | |
} | |
########################################## | |
layer { | |
name: "llstmoutput" | |
type: "Lstm" | |
bottom: "reshape_feature_transpose" | |
top: "llstm" | |
param { lr_mult: 1.0 decay_mult: 1.0 } | |
param { lr_mult: 2.0 decay_mult: 0.0 } | |
param { lr_mult: 1.0 decay_mult: 1.0 } | |
lstm_param { | |
num_output: 256 | |
weight_filler { type: "gaussian" std: 0.01} | |
bias_filler { type: "constant" } | |
clipping_threshold: 1 | |
} | |
} | |
# ===================== rlstm =================== | |
layer { | |
name: "rlstm_input" | |
type: "ReverseAxis" | |
bottom: "reshape_feature_transpose" | |
top: "rlstm_input" | |
reverse_axis_param { | |
axis: 0 | |
} | |
} | |
layer { | |
name: "rlstm_output" | |
type: "Lstm" | |
bottom: "rlstm_input" | |
top: "rlstm_output" | |
param { lr_mult: 1.0 decay_mult: 1.0 } | |
param { lr_mult: 2.0 decay_mult: 0.0 } | |
param { lr_mult: 1.0 decay_mult: 1.0 } | |
lstm_param { | |
num_output: 256 | |
weight_filler { type: "gaussian" std: 0.01} | |
bias_filler { type: "constant" } | |
clipping_threshold: 1 | |
} | |
} | |
layer { | |
name: "lstm_reverse2" | |
type: "ReverseAxis" | |
bottom: "rlstm_output" | |
top: "rlstm" | |
reverse_axis_param { | |
axis: 0 | |
} | |
} | |
# merge lstm and rlstm 64 * N * 128 | |
layer { | |
name: "lstm_encoders" | |
type: "Eltwise" | |
bottom: "llstm" | |
bottom: "rlstm" | |
top: "lstm_encoders" | |
eltwise_param { operation: SUM coeff: 1 coeff: 1 } | |
} | |
############################################################# | |
### 64*N*128*1 | |
layer { | |
name: "reshape_lstm_encoders" | |
type: "Reshape" | |
bottom: "lstm_encoders" | |
top: "reshape_lstm_encoders" | |
reshape_param { | |
shape { dim: 0 dim: 0 dim: 0 dim: 1} | |
} | |
} | |
### 64*N*128*1 ==> 1*N*128*64 | |
layer { | |
name: "reshape_lstm_encoders_transpose" | |
type: "Transpose" | |
bottom: "reshape_lstm_encoders" | |
top: "reshape_lstm_encoders_transpose" | |
transpose_param { dim: 3 dim: 1 dim: 2 dim: 0 } | |
} | |
layer { | |
name: "tile_encoder" | |
type: "Tile" | |
bottom: "reshape_lstm_encoders_transpose" | |
top: "tile_encoder" | |
tile_param { | |
axis: 0 | |
tiles: 25 | |
} | |
} | |
layer { | |
name: "decoder" | |
type: "AttLstm" | |
bottom: "tile_encoder" | |
bottom: "sample_gt_cont" | |
top: "decoder" | |
top: "att_weights" | |
param { lr_mult: 10 decay_mult: 1 } | |
param { lr_mult: 10 decay_mult: 1 } | |
param { lr_mult: 10 decay_mult: 1 } | |
recurrent_param { | |
num_output: 256 | |
weight_filler { type: "xavier" } | |
bias_filler { type: "constant" value: 0 } | |
} | |
propagate_down: true | |
propagate_down: false | |
} | |
layer { | |
name: "concat_embedding" | |
bottom: "decoder" | |
bottom: "embedding_letter" | |
top: "concat_embedding" | |
type: "Concat" | |
concat_param { axis: 2 } | |
} | |
layer { | |
name: "pred_lstm" | |
type: "LSTMNew" | |
bottom: "concat_embedding" | |
bottom: "sample_gt_cont" | |
top: "pred_lstm" | |
param { lr_mult: 1 decay_mult: 1 } | |
param { lr_mult: 1 decay_mult: 1 } | |
param { lr_mult: 1 decay_mult: 1 } | |
recurrent_param { | |
num_output: 256 | |
weight_filler { type: "xavier" } | |
bias_filler { type: "constant" value: 0 } | |
} | |
propagate_down: true | |
propagate_down: false | |
} | |
layer { | |
name: "predictip" | |
type: "InnerProduct" | |
bottom: "pred_lstm" | |
top: "predict_ip" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
inner_product_param { | |
num_output: 256 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
axis: 2 | |
} | |
} | |
layer { | |
name: "predictforward" | |
type: "InnerProduct" | |
bottom: "predict_ip" | |
top: "predict_forward" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
inner_product_param { | |
num_output: 69 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
axis: 2 | |
} | |
} | |
layer { | |
name: "pred" | |
type: "SoftmaxWithLoss" | |
bottom: "predict_forward" | |
bottom: "sample_gt_label_output" | |
top: "pred" | |
loss_param { | |
ignore_label: -1 | |
} | |
softmax_param { | |
axis: 2 | |
} | |
loss_weight: 1 | |
propagate_down: true | |
propagate_down: false | |
} | |
layer { | |
name: "pred_soft" | |
type: "Softmax" | |
bottom: "predict_forward" | |
top: "pred_soft" | |
softmax_param { | |
axis: 2 | |
} | |
propagate_down: false | |
} | |
layer { | |
name: "accuracy" | |
type: "Accuracy" | |
bottom: "pred_soft" | |
bottom: "sample_gt_label_output" | |
top: "accuracy" | |
accuracy_param { ignore_label: -1 axis: 2} | |
include { | |
phase: TEST | |
} | |
} | |
layer { | |
name: "score_4s" | |
type: "Convolution" | |
bottom: "conv_final" | |
top: "score_4s" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
convolution_param { | |
num_output: 2 | |
kernel_size: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
} | |
} | |
layer { | |
name: "loss_4s" | |
type: "SoftmaxWithLoss" | |
bottom: "score_4s" | |
bottom: "mask_gt" | |
top: "loss_4s" | |
loss_weight:1 | |
propagate_down: true | |
propagate_down: false | |
loss_param { | |
ignore_label: 255 | |
normalize: true | |
} | |
} | |
layer { | |
name: "conv_feature_prior" | |
type: "Convolution" | |
bottom: "conv_final" | |
top: "conv_feature_prior" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
convolution_param { | |
num_output: 128 | |
pad: 1 | |
kernel_size: 3 | |
weight_filler { | |
type: "xavier" | |
std: 0.01 | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
} | |
} | |
layer { | |
name: "conv_feature_prior_relu" | |
type: "ReLU" | |
bottom: "conv_feature_prior" | |
top: "conv_feature_prior" | |
} | |
layer { | |
name: "conv_maps" | |
type: "Convolution" | |
bottom: "conv_feature_prior" | |
top: "conv_maps" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
convolution_param { | |
num_output: 4 | |
pad: 0 | |
kernel_size: 1 | |
weight_filler { | |
type: "xavier" | |
std: 0.01 | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
} | |
} | |
layer { | |
name: "conv_maps/relu" | |
type: "ReLU" | |
bottom: "conv_maps" | |
top: "conv_maps" | |
} | |
layer { | |
name: "conv_orient" | |
type: "Convolution" | |
bottom: "conv_feature_prior" | |
top: "conv_orient" | |
param { | |
lr_mult: 1 | |
decay_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
decay_mult: 0 | |
} | |
convolution_param { | |
num_output: 1 | |
pad: 0 | |
kernel_size: 1 | |
weight_filler { | |
type: "xavier" | |
std: 0.01 | |
} | |
bias_filler { | |
type: "constant" | |
value: 0 | |
} | |
} | |
} | |
layer { | |
name: "concat_bbox_orient" | |
type: "Concat" | |
bottom: "conv_maps" | |
bottom: "conv_orient" | |
top: "pre_bbox_orient" | |
} | |
layer { | |
name: "YOUR IOULOSS LAYER" | |
type: "YOUR IOULOSS LAYER" | |
bottom: "pre_bbox_orient" | |
top: iouloss | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment