Created December 23, 2019 12:06
avg_return = get_average_return(evaluation_env, agent.policy, EVAL_EPISODES)
returns = [avg_return]
for _ in range(NUMBER_ITERATION):
for _ in range(COLLECTION_STEPS):
experience_replay.timestamp_data(train_env, agent.collect_policy)
experience, info = next(experience_replay.iterator)
train_loss = agent.train(experience).loss
if agent.train_step_counter.numpy() % EVAL_INTERVAL == 0:
avg_return = get_average_return(evaluation_env, agent.policy, EVAL_EPISODES)
print('Iteration {0} - Average Return = {1}, Loss = {2}.'.format(agent.train_step_counter.numpy(), avg_return, train_loss))
