Skip to content

Instantly share code, notes, and snippets.

@piEsposito
Last active January 21, 2020 17:20
Show Gist options
  • Save piEsposito/6f47ca996ee90836a752eb57bfc66cc1 to your computer and use it in GitHub Desktop.
Save piEsposito/6f47ca996ee90836a752eb57bfc66cc1 to your computer and use it in GitHub Desktop.
for episode in range(EPISODES):
game.new_episode()
curr_health = game.get_state().game_variables[0]
state = game.get_state().screen_buffer
state = stacker.stack(state)
log_probs = []
rewards = []
done = False
steps = 0
while True:
action_idx, log_prob = policy_net.get_action(state)
action = doom_actions[action_idx]
reward = game.make_action(action)
g_state = game.get_state()
if g_state is None:
health = 0
else:
health = g_state.game_variables[0]
if health > curr_health:
reward = 20
curr_health = health
done = game.is_episode_finished()
rewards.append(reward)
log_probs.append(log_prob)
steps += 1
if done:
stacker.reset()
break
new_state = game.get_state().screen_buffer
state = stacker.stack(new_state)
writer.add_scalar("steps", steps, episode)
update_policy(policy_net, rewards, log_probs)
num_steps.append(steps)
writer.add_scalar("avg_steps", np.mean(num_steps[-10:]), episode)
avg_numsteps.append(np.mean(num_steps[-10:]))
all_rewards.append(np.sum(rewards))
print("Episode: {}, total_reward: {}, length: {}".format(episode+1, np.sum(rewards), steps))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment