def store_rollout(self, state, reward): | |
self.reward_buffer.append(reward) | |
self.state_buffer.append(state[0]) | |
def train_step(self, steps_count): | |
states = np.array(self.state_buffer[-steps_count:])/self.division_rate | |
rewars = self.reward_buffer[-steps_count:] | |
_, ls = self.sess.run([self.train_op, self.loss], | |
{self.states: states, | |
self.discounted_rewards: rewars}) | |
return ls |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment