Instantly share code, notes, and snippets.

@ErikGartner /train.prototxt Secret
Last active May 30, 2018

Embed
What would you like to do?
Policy Gradients in Caffe
name: "VPGNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: {dim: 20 dim: 1} }
}
layer {
name: "reward"
type: "Input"
top: "reward"
input_param { shape: {dim:20 dim: 2} }
}
layer {
# This layer is used for numerical stability in the log layer.
type: "DummyData"
name: "epsilon"
top: "epsilon"
dummy_data_param {
shape: { dim: 20 dim: 2 }
data_filler: { type: "constant" value: 0.0000001 }
}
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "softmax"
type: "Softmax"
bottom: "fc1"
top: "softmax"
}
layer {
name: "eps_softmax"
type: "Eltwise"
eltwise_param: {
operation: SUM
}
bottom: "epsilon"
bottom: "softmax"
top: "eps_softmax"
}
layer {
name: "log"
type: "Log"
bottom: "eps_softmax"
top: "log"
}
layer {
name: "rew_prod_log"
type: "Eltwise"
eltwise_param: {
operation: PROD
}
bottom: "log"
bottom: "reward"
top: "rew_prod_log"
}
layer {
name: "action_sum"
type: "Reduction"
reduction_param: {
operation: SUM
axis: 1
}
bottom: "rew_prod_log"
top: "action_sum"
}
layer {
name: "loss"
type: "Reduction"
reduction_param: {
operation: MEAN
axis: 0
}
bottom: "action_sum"
top: "loss"
loss_weight: 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment