Created
June 25, 2018 07:36
-
-
Save simoninithomas/df21ba9d370c76482aa3a8c62e6b7af7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DDDQNNet: | |
def __init__(self, state_size, action_size, learning_rate, name): | |
self.state_size = state_size | |
self.action_size = action_size | |
self.learning_rate = learning_rate | |
self.name = name | |
# We use tf.variable_scope here to know which network we're using (DQN or target_net) | |
# it will be useful when we will update our w- parameters (by copy the DQN parameters) | |
with tf.variable_scope(self.name): | |
# We create the placeholders | |
# *state_size means that we take each elements of state_size in tuple hence is like if we wrote | |
# [None, 100, 120, 4] | |
self.inputs_ = tf.placeholder(tf.float32, [None, *state_size], name="inputs") | |
self.actions_ = tf.placeholder(tf.float32, [None, action_size], name="actions_") | |
# Remember that target_Q is the R(s,a) + ymax Qhat(s', a') | |
self.target_Q = tf.placeholder(tf.float32, [None], name="target") | |
""" | |
First convnet: | |
CNN | |
ELU | |
""" | |
# Input is 100x120x4 | |
self.conv1 = tf.layers.conv2d(inputs = self.inputs_, | |
filters = 32, | |
kernel_size = [8,8], | |
strides = [4,4], | |
padding = "VALID", | |
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(), | |
name = "conv1") | |
self.conv1_out = tf.nn.elu(self.conv1, name="conv1_out") | |
""" | |
Second convnet: | |
CNN | |
ELU | |
""" | |
self.conv2 = tf.layers.conv2d(inputs = self.conv1_out, | |
filters = 64, | |
kernel_size = [4,4], | |
strides = [2,2], | |
padding = "VALID", | |
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(), | |
name = "conv2") | |
self.conv2_out = tf.nn.elu(self.conv2, name="conv2_out") | |
""" | |
Third convnet: | |
CNN | |
ELU | |
""" | |
self.conv3 = tf.layers.conv2d(inputs = self.conv2_out, | |
filters = 128, | |
kernel_size = [4,4], | |
strides = [2,2], | |
padding = "VALID", | |
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(), | |
name = "conv3") | |
self.conv3_out = tf.nn.elu(self.conv3, name="conv3_out") | |
self.flatten = tf.layers.flatten(self.conv3_out) | |
## Here we separate into two streams | |
# The one that calculate V(s) | |
self.value_fc = tf.layers.dense(inputs = self.flatten, | |
units = 512, | |
activation = tf.nn.elu, | |
kernel_initializer=tf.contrib.layers.xavier_initializer(), | |
name="value_fc") | |
self.value = tf.layers.dense(inputs = self.value_fc, | |
units = 1, | |
activation = None, | |
kernel_initializer=tf.contrib.layers.xavier_initializer(), | |
name="value") | |
# The one that calculate A(s,a) | |
self.advantage_fc = tf.layers.dense(inputs = self.flatten, | |
units = 512, | |
activation = tf.nn.elu, | |
kernel_initializer=tf.contrib.layers.xavier_initializer(), | |
name="advantage_fc") | |
self.advantage = tf.layers.dense(inputs = self.advantage_fc, | |
units = self.action_size, | |
activation = None, | |
kernel_initializer=tf.contrib.layers.xavier_initializer(), | |
name="advantages") | |
# Agregating layer | |
# Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a')) | |
self.output = self.value + tf.subtract(self.advantage, tf.reduce_mean(self.advantage, axis=1, keepdims=True)) | |
# Q is our predicted Q value. | |
self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_), axis=1) | |
# The loss is the difference between our predicted Q_values and the Q_target | |
# Sum(Qtarget - Q)^2 | |
self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q)) | |
self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment