Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
agent.train_step_counter.assign(0)
avg_return = get_average_return(evaluation_env, agent.policy, EVAL_EPISODES)
returns = [avg_return]
for _ in range(NUMBER_ITERATION):
for _ in range(COLLECTION_STEPS):
experience_replay.timestamp_data(train_env, agent.collect_policy)
experience, info = next(experience_replay.iterator)
train_loss = agent.train(experience).loss
if agent.train_step_counter.numpy() % EVAL_INTERVAL == 0:
avg_return = get_average_return(evaluation_env, agent.policy, EVAL_EPISODES)
print('Iteration {0} - Average Return = {1}, Loss = {2}.'.format(agent.train_step_counter.numpy(), avg_return, train_loss))
returns.append(avg_return)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment