Skip to content

Instantly share code, notes, and snippets.

@horoiwa
Created May 2, 2023 09:13
Show Gist options
  • Save horoiwa/2d26dbc4f3bf5a46d2e8f9cea9a2a148 to your computer and use it in GitHub Desktop.
Save horoiwa/2d26dbc4f3bf5a46d2e8f9cea9a2a148 to your computer and use it in GitHub Desktop.
def update_q(self, states, actions, rewards, dones, next_states):
rewards = tf.clip_by_value(tf.reshape(rewards, (-1, 1)), -1.0, 1.0)
dones = tf.reshape(dones, (-1, 1))
target_q = rewards + self.gamma * (1.0 - dones) * self.valuenet(next_states)
with tf.GradientTape() as tape:
q1, q2 = self.qnet(states, actions)
loss = tf.reduce_mean(
tf.square(target_q - q1) + tf.square(target_q - q2)
)
variables = self.qnet.trainable_variables
grads = tape.gradient(loss, variables)
self.q_optimizer.apply_gradients(zip(grads, variables))
return loss
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment