horoiwa/update_v.py

## update_v.py

  def update_value(self, states, actions):
      """ Expectile Regression
      """
      q1, q2 = self.target_qnet(states, actions)
      target_values = tf.minimum(q1, q2)

      with tf.GradientTape() as tape:
          values = self.valuenet(states)
          error = (target_values - values)
          weights = tf.where(error > 0, self.tau, 1. - self.tau)
          loss = tf.reduce_mean(weights * tf.square(error))

      variables = self.valuenet.trainable_variables
      grads = tape.gradient(loss, variables)
      self.v_optimizer.apply_gradients(zip(grads, variables))

      return loss

	def update_value(self, states, actions):
	""" Expectile Regression
	"""
	q1, q2 = self.target_qnet(states, actions)
	target_values = tf.minimum(q1, q2)

	with tf.GradientTape() as tape:
	values = self.valuenet(states)
	error = (target_values - values)
	weights = tf.where(error > 0, self.tau, 1. - self.tau)
	loss = tf.reduce_mean(weights * tf.square(error))

	variables = self.valuenet.trainable_variables
	grads = tape.gradient(loss, variables)
	self.v_optimizer.apply_gradients(zip(grads, variables))

	return loss