Skip to content

Instantly share code, notes, and snippets.

@AurelianTactics
Created October 26, 2018 14:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AurelianTactics/49b6c17f652ad2988dc84f9f9307c1de to your computer and use it in GitHub Desktop.
Save AurelianTactics/49b6c17f652ad2988dc84f9f9307c1de to your computer and use it in GitHub Desktop.
Double Layered Critic Network in TD3
class Critic(Model):
def __init__(self, name='critic', td3_variant=False, network='mlp', **network_kwargs):
super().__init__(name=name, network=network, **network_kwargs)
self.layer_norm = True
self.td3_variant = td3_variant
def __call__(self, obs, action, reuse=False):
with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
if self.td3_variant:
#Critic produces two outputs, use the minimum of the critic_target when training
#From paper: https://arxiv.org/abs/1802.09477
x1 = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
x1 = self.network_builder(x1)
x1 = tf.layers.dense(x1, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))
x2 = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
x2 = self.network_builder(x2)
x2 = tf.layers.dense(x2, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))
return x1, x2
else:
x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
x = self.network_builder(x)
x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))
return x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment