Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# TRAINING PHASE
rewards = []
for episode in range(n_episodes):
current_state = env.reset()
current_state = discretize(current_state)
alpha = get_alpha(episode)
epsilon = get_epsilon(episode)
episode_rewards = 0
for t in range(n_steps):
# env.render()
action = epsilon_policy(current_state, epsilon)
new_state, reward, done, _ = env.step(action)
new_state = discretize(new_state)
update_q(current_state, action, reward, new_state, alpha)
current_state = new_state
# increment the cumulative reward
episode_rewards += reward
# at the end of the episode
if done:
print('Episode:{}/{} finished with a total reward of: {}'.format(episode, n_episodes, episode_rewards))
break
# append the episode cumulative reward to the reward list
rewards.append(episode_rewards)
# PLOT RESULTS
x = range(n_episodes)
plt.plot(x, rewards)
plt.xlabel('episode')
plt.ylabel('Training cumulative reward')
plt.savefig('Q_learning_CART.png', dpi=300)
plt.show()
# TEST PHASE
current_state = env.reset()
current_state = discretize(current_state)
episode_rewards = 0
for t in range(n_steps):
env.render()
action = greedy_policy(current_state)
new_state, reward, done, _ = env.step(action)
new_state = discretize(new_state)
update_q(current_state, action, reward, new_state, alpha)
current_state = new_state
episode_rewards += reward
# at the end of the episode
if done:
print('Test episode finished with a total reward of: {}'.format(episode_rewards))
break
env.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.