Skip to content

Instantly share code, notes, and snippets.

@Amber0914
Last active April 12, 2019 16:08
Show Gist options
  • Save Amber0914/e6f46925a4feb831987fa2d6bfddc5a6 to your computer and use it in GitHub Desktop.
Save Amber0914/e6f46925a4feb831987fa2d6bfddc5a6 to your computer and use it in GitHub Desktop.
class TNET():
"""
Target network is for calculating the maximum estimated Q-value in given action a.
"""
def __init__(self, in_units, out_units, hidden_units=250):
self.in_units = in_units
self.out_units = out_units
self.hidden_units = hidden_units
self._model()
def _model(self):
with tf.variable_scope('tnet'):
# input layer
self.x = tf.placeholder(tf.float32, shape=(None, self.in_units))
# from input layer to hidden layer1
W1=tf.get_variable('W1', shape=(self.in_units, self.hidden_units), initializer=tf.random_normal_initializer())
# from hidden layer1 to hiiden layer2
W2=tf.get_variable('W2', shape=(self.hidden_units, self.hidden_units), initializer=tf.random_normal_initializer())
# from hidden layer2 to output layer
W3=tf.get_variable('W3', shape=(self.hidden_units, self.out_units), initializer=tf.random_normal_initializer())
# the bias of hidden layer1
b1=tf.get_variable('b1', shape=(self.hidden_units), initializer=tf.zeros_initializer())
# the bias of hidden layer2
b2=tf.get_variable('b2', shape=(self.hidden_units), initializer=tf.zeros_initializer())
# the ouput of hidden layer1
h1=tf.nn.tanh(tf.matmul(self.x, W1)+b1)
# the output of hidden layer2
h2=tf.nn.tanh(tf.matmul(h1, W2)+b2)
# the output of output layer, that is, Q-value
self.q=tf.matmul(h2, W3)
self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='tnet')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment