Skip to content

Instantly share code, notes, and snippets.

@TomLin
Created February 20, 2019 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TomLin/4eb589c263b04581617ba0de6d8f4387 to your computer and use it in GitHub Desktop.
Save TomLin/4eb589c263b04581617ba0de6d8f4387 to your computer and use it in GitHub Desktop.
Training Process for A2C model.
# Set up training process.
from collections import deque
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
agent_a2c = A2CModel().to(device)
optimizer = optim.Adam(agent_a2c.parameters(), lr=0.00015)
env_info = env.reset(train_mode=True)[brain_name]
states = env_info.vector_observations
init_states = states
n_episodes = 1
n_steps = 10
episode_end = False
a2c_ep_rewards_list = []
ep_rewards_deque = deque([0], maxlen=100) # initialize with 0
ep_rewards = 0
while True:
batch_s, batch_a, batch_v_t, accu_rewards, init_states, episode_end = collect_trajectories(
agent_a2c, env, brain_name, init_states, episode_end, n_steps)
loss, mus, stds = learn(batch_s, batch_a, batch_v_t, agent_a2c, optimizer)
ep_rewards += accu_rewards
print('\rEpisode {:>4}\tEpisodic Score {:>7.3f}\tLoss {:>12.6f}'.format(
n_episodes, np.mean(ep_rewards_deque), float(loss)), end="")
if episode_end == True:
if n_episodes % 100 == 0:
print('\rEpisode {:>4}\tEpisodic Score {:>7.3f}\tLoss {:>12.6f}'.format(
n_episodes, np.mean(ep_rewards_deque), float(loss)))
if np.mean(ep_rewards_deque) >= 34:
break
a2c_ep_rewards_list.append(ep_rewards/num_agents)
ep_rewards_deque.append(ep_rewards/num_agents)
ep_rewards = 0
n_episodes += 1
episode_end = False
# save a2c model
pth = './checkpoint/a2c_checkpoint.pth'
torch.save(agent_a2c.state_dict(), pth)
a2c_ep_rewards_list = np.array(a2c_ep_rewards_list)
np.save('./data/a2c_ep_rewards_list.npy', a2c_ep_rewards_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment